def concordance(string, search_term, width=80):
offset = 0
indexes = []
while offset < len(string):
try:
position = string[offset:].lower().index(search_term.lower())
except ValueError:
break
if position:
indexes.append(position + offset)
offset += position + len(search_term)
return tuple(string[index-width:index+width+len(search_term)]
string = 'The relationship between beef and beef broth, is not like the
relationship between beef and beef-broth, a simpler extraction and
condensation; rather, as Einstein goes on, it is like the relationship
between our overcoat and the ticket given us when we check our overcoat. In
other words, human perception involves coding even more than crude
sensing.just as a map-maker colors a nation purple not because it is purple
but because his code demands it." 当我尝试在search_term为‘and’的情况下调用concordance函数时,结果不仅返回'and',而且还返回其中包含and的单词,如'demand‘。我如何修改我的代码,使其只返回'and'?
发布于 2018-08-28 19:38:04
正如评论中提到的:您可以尝试使用re module。在这里,我尝试匹配您的search_term,仅当它被不在[a-zA-Z0-9_]中的字符包围时(不确定这是否最适合您的意图...):
import re
def concordance(strg, search_term, width=5):
ret = []
# \W Matches any character which is not a word character.
# ~ anything that is not [a-zA-Z0-9_]
rgx = re.compile('\W{}\W'.format(search_term))
match_end = 0
while True:
match = rgx.search(strg, match_end)
if match is None:
break
ret.append(strg[match.start()+1-width:match.end()-1+width])
match_end = match.end()
return ret
print(concordance(strg, 'and'))
# ['beef and beef', 'beef and beef', 'tion and\ncond', 'coat and the ']https://stackoverflow.com/questions/52056462
复制相似问题