import requests
from bs4 import BeautifulSoup
base_url = "http://www.harness.org.au"
webpage_response = requests.get('http://www.harness.org.au/racing/tracks/', "html.parser")
soup = BeautifulSoup(webpage_response.content, "html.parser")
# only finding one track
# soup.table to find all links for days racing
#harness_table = soup.row
# scraps a href that is an incomplete URL that im trying to get to
tracks = soup.find(class_="col-lg-10 col-md-10 col-sm-10 col-xs-10 content")
lists = []
links = tracks.find_all('a')
#Gets each track
for a in links:
lists.append(base_url+a["href"])
for link in lists:
webpage = requests.get(link)
track = BeautifulSoup(webpage.content, "html.parser")
Barriertable = track.select(".mBarrier")
trackname = track.find(class_="pageTitle")
try:
track1 = trackname.get_text()
except:
print('No class="pageTitle" found.')
track1 = ''
for th in Barriertable:
td = th.find_all('th')
row = [i.text for i in td]
print(track1, row)打印每一行。在餐桌上,我想做几件被困在桌上的事情:
我尝试了几件不同的事情却没有运气。
发布于 2020-04-13 08:34:00
您希望循环表行。
for tr in Barriertable.select('tr'):和您可以使用列表理解简化代码(同时使用css或语法处理th和td )。使用Session的效率低于tcp重复使用与多个请求。您可以通过更改选择器在单个列表理解步骤中获得跟踪链接。
import requests
from bs4 import BeautifulSoup as bs
base_url = "http://www.harness.org.au"
with requests.Session() as s:
webpage_response = s.get('http://www.harness.org.au/racing/tracks/')
soup = bs(webpage_response.content, "html.parser")
links = [base_url + i['href'] for i in soup.select('h4 a')]
for track in links:
webpage = s.get(track)
soup = bs(webpage.content, "html.parser")
barrierTable = soup.select_one('.mBarrier')
trackname = soup.select_one('.pageTitle')
try:
track1 = trackname.get_text()
except:
print('No class="pageTitle" found.')
track1 = ''
for tr in barrierTable.select('tr'):
row = [i.text for i in tr.select('th,td')]
print(track1, row)https://stackoverflow.com/questions/61182747
复制相似问题