import os
from selenium import webdriver
import time
from linkedin_scraper import actions
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome("driver/chromedriver", options=chrome_options)
email = os.getenv("LINKEDIN_USER")
password = os.getenv("LINKEDIN_PASSWORD")
actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
driver.get('https://www.linkedin.com/company/biorasi-llc/about/')
_ = WebDriverWait(driver, 3).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'section')))
time.sleep(3)
grid = driver.find_elements_by_tag_name("section")[3]
about_us = grid.find_elements_by_tag_name("p")[0].text.strip()
print(about_us)--这是我用来刮取公司about_us数据的代码,它可以工作,但有时我会收到一个错误,比如:
TimeoutException跟踪(最近一次调用)
17 email = os.getenv("LINKEDIN_USER")
18 password = os.getenv("LINKEDIN_PASSWORD")-> 19 actions.login(驱动程序、电子邮件、密码)#如果没有提供电子邮件和密码,它将在终端提示
20 driver.get('https://www.linkedin.com/company/biorasi-llc/about/')
21 _ = WebDriverWait(driver, 3).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'section')))登录中的~\Anaconda3\lib\site-packages\linkedin_scraper\actions.py (驱动程序、电子邮件、密码)
28 password_elem.submit()
29 -> 30元素= 10).until(EC.presence_of_element_located((By.ID,(驱动程序,“配置文件-导航-项”))
~\Anaconda3\lib\site-packages\selenium\webdriver\support\wait.py在直到(自我,方法,消息)
78 if time.time() > end_time:
79 break-> 80 TimeoutException(消息、屏幕、堆栈跟踪) 81 82 def until_not(self,method,message=''):
TimeoutException:消息:
有人,请帮我解决这个问题。
发布于 2020-12-08 08:45:54
可能是因为超时时间太短(3秒),所以在页面完全加载之前,它会达到超时阈值。试着把它提高到第21行的5-10秒.
TIMEOUT = 10
_ = WebDriverWait(driver, TIMEOUT).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'section')))下面是一些改进代码的技巧:
WebDriverWait),如果可能的话尽量减少使用time.sleep。WebDriverWait将停止等待并返回您的元素,因此将节省时间。通过标记名查找import os
from selenium import webdriver
import time
from linkedin_scraper import actions
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome("driver/chromedriver", options=chrome_options)
email = os.getenv("LINKEDIN_USER")
password = os.getenv("LINKEDIN_PASSWORD")
actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal
driver.get('https://www.linkedin.com/company/biorasi-llc/about/')
# directly finds paragraph, removed time.sleep
paragraph_elem = WebDriverWait(driver, 15).until(EC.presence_of_element_located((By.XPATH, '//section//h4/..//p')))
about_us = paragraph_elem.text.strip()
print(about_us)https://stackoverflow.com/questions/65193776
复制相似问题