用pyhthon编写爬虫,从官网获取,可参考一下示例# encoding:UTF-8from pyquery import PyQuery as pyqimport urllib.requesturl
用pyhthon编写爬虫,从官网获取,可参考一下示例
# encoding:UTF-8from pyquery import PyQuery as pyqimport urllib.requesturl = 'http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201703/t20170310_1471429.html'headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Chrome/51.0.2704.63 Safari/537.36'}req = urllib.request.Request(url=url, headers=headers)res = urllib.request.urlopen(req)doc = res.read()div = pyq(doc)('.MsoNormal')admin = []for p in div: spans = pyq(p).find('span') for span in spans: temp = pyq(span).text() if temp.isdigit():#是否是数字 id = temp elif temp >= u"/u4e00" and temp <= u"/u9fa6":#判断是否包含中文 name = temp if id[2:] == '0000':#省 parent = '0' grade = '0' elif id[4:] == '00':#市 parent = id[:2]+'0000' grade = '1' else:#县 parent = id[:4]+'00' grade = '2' admin.append('(' + id + ",'" + name +"',"+parent+','+grade+ ')')line = 'INSERT INTO ADMINDIVSION(ID,NAME,PARENT,GRADE) VALUES'+','.join(admin)with open('D:/adminvsion.sql','w') as file: file.write(line)