Python crawler learning: crawling novel perfect world case source code sharing

Python crawler learning: crawling novel perfect world case source code sharing

Case source code

__author__ ='zouxiaoliang'
 
import urllib
import re
import os
import codecs
 
def getBookMemu(url_path):
    menu_patten ='<dd>.*?</dd>'
    url_chapter ='<dd><a href="(.*)">(.*)</a></dd>'
    thePage = urllib.urlopen(url_path)
    page = str(thePage.read()).decode('gbk')
    menu_list = re.findall(menu_patten, page)
    menu = dict() # map<url, chapter>
    for chapter in menu_list:
        g = re.match(url_chapter, chapter)
        if g:
            menu[url_path+g.group(1)] = g.group(2)
    return menu
    pass;
 
def getContent(url_path):
    cc = str()
    thePage = urllib.urlopen(url_path)
    page = str(thePage.read()).decode('gbk')
    # print(page)
    c_patten ='<div id="content">(.*)</div>'
    g = re.search(c_patten, page)
    if g:
        cc = g.group(1)
        # print(cc)
        cc = re.sub(' ','', cc)
        cc = re.sub('<br/><br/>','\n', cc)
        # print(cc)
    return cc
    pass
 
def writeFile(dirname, filename, content):
    w_handle = codecs.open(dirname+'//'+filename+".txt", mode='wb', encoding='utf8')
    w_handle.write(content)
    w_handle.close()
    pass
 
if __name__ =='__main__':
    m = getBookMemu('http://www.biquge.la/book/14/')
    for c in m.keys():
        url = c
        name = m[c]
        print('%s, %s' %(url, name))
        while True:
            try:
                cc = getContent(url)
                # print(cc)
                if not os.path.exists('biquge'):
                    os.mkdir('biquge')
                writeFile('biquge', name, cc)
                break
            except:
                continue
    print("get book over")

ps: Here I recommend my python zero-based system learning exchange deduction qun: 322795889, if you don’t understand python (learning method, learning route, how to learn efficiently), you can add it, there is a good learning tutorial in the group , Development tools, e-book sharing. Professional teacher answering questions

Alright! The article is shared with the readers here

Finally, if you find it helpful, remember to follow, forward, and favorite

·END·

Reference: https://cloud.tencent.com/developer/article/1475070 python crawler learning: crawling novel perfect world case source sharing-Cloud + Community-Tencent Cloud