python">
import re
import os
import urllib.request
# 小说目录
home = "http://www.23zw.com/olread/9/9068/"
if __name__ == '__main__':
url = home + "index.html"
page = urllib.request.urlopen(url).read()
page = page.decode("gbk")
print(len(page))
s_key = 'h1>(.+?)<'
re_c = re.compile(s_key)
ls = re.findall(re_c, page)
if len(ls) > 0:
title = ls[0]
print(title)
if not os.path.exists(title):
os.makedirs(title)
urllib.request.urlretrieve(url, title + "/index.html")
s_key = 'href="(.{37}?)"'
re_c = re.compile(s_key)
ls = re.findall(re_c, page)
i = 0
for l in ls:
try:
i += 1
print("(" + str(i) + "/" + str(len(ls)) + ") " + l)
if os.path.exists(title + "/" + l):
continue
url = home + l
urllib.request.urlretrieve(url, title + "/" + l)
except:
print("error!")
print("finish!")
标签:python