**keyword = ''
'''To obtain keyword'''
def test():`enter code here`
keywords = list()
while True:
print('what do you want to do?(a: add a key word for searching, q:quit adding words and start)')
command = input('command:')
if command == 'a':
word = input('keyword: ')
if word not in keywords:
keywords.append(word)
elif command == 'q':
break
else:
print('please input a valid command')
if len(keywords) == 0:
return
search_string = ''
for keyword in keywords:
search_string += keyword
search_string += '+'
search_string = search_string[:-1]
print(search_string)
search_url = 'http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-' \
'bool.html&r=0&f=S&l=50&TERM1=' + search_string + '&FIELD1=&co1=AND&TERM2=&FIELD2=&d=PTXT'
return search_url
'''Incoming url start the scrapy crawle'''
class Uspto(scrapy.Spider):
name = 'uspto'
#allowed_domains = ['http://patft.uspto.gov/']
#start_url = 'http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=0&f=S&l=50&TERM1=water&FIELD1=&co1=AND&TERM2=&FIELD2=&d=PTXT'
allowed_domains = ["http://patft.uspto.gov"]
keyword = test()
start_urls = [
#"http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=0&f=S&l=50&TERM1=python&FIELD1=&co1=AND&TERM2=&FIELD2=&d=PTXT",
keyword,
]
**在这里输入图像描述错误截图
从键盘输入关键字按关键字构造链接,然后启动爬虫,现在的问题是我访问关键字的方法有问题。
发布于 2017-05-14 14:08:18
根据您的错误消息NameError: name 'a' is not defined,您似乎使用python 2而不是python 3,如果是,请使用raw_input()而不是input()。
command = raw_input("commands:")raw_input()返回用户输入的字符串,您可以参考这个答案来了解有关input和raw_input的更多细节。
https://stackoverflow.com/questions/43964534
复制相似问题