1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
|
import xbot,pymysql from xbot import print, sleep from .import package from .package import variables as glv
def recountry(c): data = { "https://s.w.org/images/core/emoji/12.0.0-1/svg/1f1e8-1f1f3.svg":"中国", "https://s.w.org/images/core/emoji/12.0.0-1/svg/1f1fa-1f1f8.svg":"美国", "https://s.w.org/images/core/emoji/12.0.0-1/svg/1f1ee-1f1f3.svg":"印度", "https://s.w.org/images/core/emoji/12.0.0-1/svg/1f1ef-1f1f5.svg":"日本", "https://s.w.org/images/core/emoji/12.0.0-1/svg/1f1ec-1f1e7.svg":"英国", "https://s.w.org/images/core/emoji/12.0.0-1/svg/1f1eb-1f1f7.svg":"法国", "https://s.w.org/images/core/emoji/12.0.0-1/svg/1f1eb-1f1ee.svg":"芬兰" } if c in data: return data[c] else: return None
def main(args): webpage = xbot.web.create(url="http://www.boxofficecn.com/the-red-box-office") res = [] for i in range(9): web1 = webpage.find_all_by_xpath('//tbody/tr') for j in web1: mname = j.children()[1].get_text().split("(")[0] year = j.children()[0].get_text().replace(" ","") country = recountry(j.children()[0].children()[0].get_attribute("src")) score = j.children()[1].get_text().split("(")[1].replace(")","").replace(" ","").replace("-","0") director = j.children()[2].get_text() mbo = j.children()[3].get_text().replace(" ","").replace("&","").replace(" ","").replace("↗","") res.append([mname,year,country,score,director,mbo,"Tetuka"]) nextpage = webpage.find_by_xpath("//div[@id='tablepress-4_paginate']/a[2]") nextpage.click() print(res) webpage.close()
conn=pymysql.connect(host="192.168.48.129",user="root",password="root123456", database="test",charset="utf8") insertdata=conn.cursor() for i in res: sql=f"insert into movie values('{i[0]}','{i[1]}','{i[2]}','{i[3]}','{i[4]}','{i[5]}','{i[6]}')" insertdata.execute(sql) conn.commit() insertdata.close() conn.close()
|