股票代码_公司介绍_单页信息 Version 0.1

# -*- coding:utf-8 -*-
#股票代码_公司介绍_单页信息 Version 0.1
#Author: NMTech
#Mail: NMTech@Aliyun.com
import requests,re
Share_id = []
def Spider(url):
    html = requests.get(url)
    html.encoding = 'utf-8'
    txt(html.text)
#html正则过滤
def txt(text):
    html_tmp = re.compile(r'<div class="nc">\s*<h5>.*\s*(.*?)\s*</h5>')
    company = html_tmp.findall(text)
    html_tmp = re.compile(r'<td width="184">(.*?)</td>\s*<td>(.*?)</td>')
    tmp_txt = html_tmp.findall(text)
    Addinfo(company,tmp_txt)
#创建并添加item到list"SS"中
def Addinfo(company,tmp_txt):
    item = {}
    item['company'] = company[0]
    for list in range(len(tmp_txt)):
        item[tmp_txt[list][0]] = tmp_txt[list][1]
        print tmp_txt[list][0] + "\t"+ item[tmp_txt[list][0]]
    Share_id.append(item)
    print Share_id

Spider("http://www.962518.com/newscenter/gszb.htm?code=000001")

版权申明:未经允许不得随意转载,网络转载必须说明来自www.0xc2.cn 原创文章