def onepage(url):
r = requests.get(url)
r.encoding = "utf-8"
html = r.text
soup = BeautifulSoup(html, "html.parser")
comments_sec = soup.find("div", "mod-bd")
comments_list = comments_sec.find_all("p", "")
lst = []
for i in range(len(comments_list)):
lst.append(comments_list[i].text.strip())
return lst
def parsepage(movie_id, page_num):
data = []
for i in range(page_num):
url = "https://movie.douban.com/subject/"+ str(movie_id) + "/comments?start=" + str(20 * i) + "&limit=20"
data += onepage(url)
print("parsing page %d" % (i+1))
time.sleep(3)#
return " ".join(data)
程序如上 为什么最后的data显示没有被定义呢