- import requests
- import re
- import pandas as pd
- model = '<tr align="center" bgcolor="#fafdff">.*?target=_blank>(.*?)</a>'\
- + '.*?<td>(.*?)</td>'\
- + '.*?<td>(.*?)</td>'\
- + '.*?<td>(.*?)</td>'\
- + '.*?<font color=.*?>(.*?)</font>'\
- + '.*?<font color=.*?>(.*?)</font>'\
- + '.*?<td>(.*?)</td>'\
- + '.*?<td>(.*?)</td>'\
- + '.*?<font color=.*?>(.*?)</font>'\
- + '.*?<font color=.*?>(.*?)</font>'
- pattern = re.compile(model, re.S)
- col = ['商品','现货价格','近月代码','近月价格','基差1_1','基差1_2','主力代码',
- '主力价格','基差2_1','基差2_2']
- def getURLdata(date, excelpath=None):
- #date = '2017-09-14'
- url = r'http://www.100ppi.com/sf/day-{}.html'.format(date)
- html = requests.get(url)
- text = html.text
-
- #交易所
- markets = ['上海期货交易所', '郑州商品交易所', '大连商品交易所']
- mktlist = []
- for mkt in markets:
- mktlist.append(text.find(mkt))
-
- df = pd.DataFrame()
- for i in range(3):
- if i == 2:
- end = len(text)
- else:
- end = mktlist[i+1]
-
- start = mktlist[i]
- dataone = re.findall(pattern, text[start:end])
- dfone = pd.DataFrame(dataone, columns=col)
- dfone['交易所'] = markets[i]
- df = df.append(dfone)
-
- #删除" "
- for k in ['现货价格','近月代码','近月价格','主力代码','主力价格']:
- df[k] = df[k].apply(lambda x: x.replace(" ", ''))
-
- #保存excel
- if excelpath is None:
- excelpath = r'D:\{}.xlsx'.format(date)
- df.to_excel(excelpath, index=False)
-
- if __name__ == '__main__':
- #请设置日期和excel保存地址
- date = '2017-09-13'
- excelpath = None
-
- getURLdata(date, excelpath)



雷达卡



京公网安备 11010802022788号







