我想从网站中获取弹出数据。
如第一个图所示,我需要单击一个链接。
在此之后,将出现一个弹出,如第二个图所示.
弹出的内容是我想要的。
我试着按照这个示例使用pyqyt5获取数据。
然而,该程序继续永久运行。
如何解决这个问题?
非常感谢。
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEngineView
from bs4 import BeautifulSoup
class Render(QWebEngineView):
def __init__(self, url):
self.html = None
self.first_pass = True
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._load_finished)
self.load(QUrl(url))
self.app.exec_()
def _load_finished(self, result):
if self.first_pass:
self._first_finished()
self.first_pass = False
else:
self._second_finished()
def _first_finished(self):
self.page().runJavaScript("document.getElementById('auto-header-citypop-citylist');")
def _second_finished(self):
self.page().toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
url = r'https://www.autohome.com.cn'
web = Render(url)
with open('data2.html', 'w', encoding='utf-8-sig') as f:
f.write(web.html)


发布于 2019-02-13 00:04:16
页面上有一个JavaScript:
if (rf === "" || rf.toLocaleLowerCase().indexOf(".autohome.com.cn") === -1) {
if (screen == undefined || screen.width < 810) {
if (browser.versions.mobile == true || browser.versions.iPhone == true || browser.versions.ucweb == true || browser.versions.android == true || browser.versions.Symbian == true) {
window.location.href = "//m.autohome.com.cn/?from=pc";
return
}
}
} 它通过打印https://m.autohome.com.cn/?from=pc ()将您重定向到self.url()。为了避免这种情况,我设置了以下引用标题:
import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl, QByteArray
from PyQt5.QtWebEngineWidgets import QWebEngineView
from PyQt5.QtWebEngineCore import QWebEngineHttpRequest
from bs4 import BeautifulSoup
class Render(QWebEngineView):
def __init__(self, url):
self.html = None
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._load_finished)
self.request = QWebEngineHttpRequest(QUrl(url))
self.request.setHeader(QByteArray().append('Referer'), QByteArray().append('https://www.autohome.com.cn/beijing/'))
self.load(self.request)
self.app.exec_()
def _load_finished(self, result):
self.page().runJavaScript("document.getElementById('auto-header-switcharea').click();")
self.page().toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
url = 'https://www.autohome.com.cn/beijing/'
web = Render(url)
soup = BeautifulSoup(web.html, 'html.parser')
for city in soup.find_all('a', {'name':'auto-header-citypop-city'}):
print(city)产出:
<a data-info="[110100, 646, '北京', 'beijing']" data-key="110100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">北京</a>
<a data-info="[440100, 62, '广州', 'guangzhou']" data-key="440100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">广州</a>
<a data-info="[440300, 670, '深圳', 'shenzhen']" data-key="440300" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">深圳</a>
<a data-info="[320100, 335, '南京', 'nanjing']" data-key="320100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">南京</a>
<a data-info="[310100, 649, '上海', 'shanghai']" data-key="310100" href="javascript:void(0);" name="auto-header-citypop-city" target="_self">上海</a>
....单击事件之后没有页面加载,因此不需要两个_load_finished方法。
https://stackoverflow.com/questions/54589304
复制相似问题