import os path =r'E:\CRM\phone\call' path2= r'E:\CRM\phone\result' for root,dirs,files in os.walk(path): for fn in files: #print fn name=fn.split('.') wroute1 = r'%s'%path+'\\'+fn wroute2 = r'%s'%path2+'\\'+name+'jieguo.txt' file1 = open(wroute1,'r') file2 = open(wroute2,'w+') for k in file1.readlines(): if 'com' in k : list1 = k.strip().split('\t') urs = list1 #print urs call_1 = list1 if call_1.find("打DM电话")!=-1: file2.write('%s\t%s\n'%(urs,call_1)) file2.close() ''' name_list = for name in name_list: wroute1 = r'E:\CRM\phone\call\%s.txt'%name wroute2 = r'E:\CRM\phone\call\%s'%name+'jieguo.txt' #print wroute1,wroute2 file1 = open(wroute1,'r') file2 = open(wroute2,'w+') for k in file1.readlines(): if 'com' in k : list1 = k.strip().split('\t') urs = list1 #print urs call_1 = list1 if call_1.find("打DM电话")!=-1: file2.write('%s\t%s\n'%(urs,call_1)) file2.close() '''
# -*- coding:cp936 -*- import urllib2 import time,datetime import urllib import re import sys import json import cookielib import time import os from operator import itemgetter urllib.getproxies_registry = lambda:{} class NewBBS(): def __init__(self,name,url,minprice,maxprice): self.os_char='gb18030' self.dw={} html=self.getHtml(url) datalist=self.getdata(html) self.makedata(datalist,name,minprice,maxprice) def getHtml(self,url): page = urllib.urlopen(url) html = page.read() #print html return html def getdata(self,html): reg=r'"detail_url":"(.*?)","comment_url":"http:.*?","view_sales":".*?","view_price":"(\d+.\d+)","view_fee":"(\d+.\d+)","shopLink":"' imgre = re.compile(reg) imglist = re.findall(imgre,html) return imglist def makedata(self,datalist,name,minprice,maxprice): minprice=float(minprice) maxprice=float(maxprice) nowday=time.strftime('%Y%m%d',time.localtime(time.time())) #print datalist da={} w=open('taobao.txt','a+') for i in w: if i not in da: da =1 dt={} w1=open('data.txt','a+') for i in w1: if i not in dt: dt =1 dsort={} for i in datalist: #time.sleep(1) dataurl=i dataprice=float(i )+float(i ) dataurl=" http://item.taobao.com//item."+dataurl k=nowday+'\t'+name+'\t'+dataurl if k not in dsort: dsort =dataprice lsort=sorted(dsort.iteritems(), key=itemgetter(1), reverse=False) for j in lsort: k=j +'\t'+str(j ) if k not in dt: w1.write('%s\n'%(k)) if j maxprice and j minprice: #os.startfile('%s' %dataurl) #print dataurl if k not in da: w.write('%s\n'%(k)) while True: os_char='gb18030' w = open('taobao.txt','a+') w.write('--------------%s------------------\n'%(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))) w.close() nowday=time.strftime('%Y%m%d',time.localtime(time.time())) f=open('购买.txt','r') for i in f: name,minprice,maxprice=i.strip().split() name2=urllib.quote(name.decode(sys.stdin.encoding).encode('utf8')) #url=" http://s.taobao.com/search?q="+name2+"js=1stats_click=search_radio_all%253A1initiative_id=staobaoz_"+nowday url=" http://s.taobao.com/search?fs=1initiative_id=tbindexz_"+nowday+"tab=allq="+name2+"suggest=history_1source=suggestfilterFineness=1 " n=NewBBS(name,url,minprice,maxprice) print 'wait' time.sleep(3600)
/*文件合并工具*/ # -*- coding: cp936 -*- import glob files=glob.glob('*_*')#这个改文件格式 w=open('comb.txt','w')#这个改输出结果 #w.write('推广员编号,姓名,申请ip,地址,玩家id,服务器id,昵称,等级,激活ip,地址,激活时间,消费\n') for wenjian in files: f=open(wenjian) for i in f: #if '推广员编号' in i: #continue w.write('%s%s'%(wenjian,i)) f.close() w.close() /**********取文件中首个字符为日期时间型的数据******/ #coding:gb2312 import time filename="system_ogoss.log_20131126.log" def is_valid_date(str): try: time.strptime(str, "%Y-%m-%d %X") return True except: return False f=open(filename) r=open(filename+"_.result", "w") for s in f: if not s.strip():continue if is_valid_date(s ): r.write(s) r.close() raw_input("Done") /*******取某个字段后面的数据************/ #coding:gb2312 file_name="lj.instance.2013-12-31.log" span="log=" r=open(file_name+"_result.txt", "w") f=open(file_name) for s in f: if not s.strip():continue r.write(s.split(span) ) r.close() /**********去掉非日期格式的数据行**********/ #coding:gb2312 import time,re regx=re.compile(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}') filename="2013-11-10-chat.log" f=open(filename, "rb").read().replace("\r","").split("\n") r=open(filename+"_.result", "w") for s in f: if not s.strip():continue if regx.search(s ): r.write(s+"\n") r.close() raw_input("Done") /************匹配字段的脚本**********/ # -*- coding: cp936 -*- import glob,gzip,sys,datetime,time ty=sys.getfilesystemencoding() s={} logs=glob.glob('*.log') for z in logs: print z for r in open(z): try: sr=r.decode('utf-8').encode(ty).split(',',15) except: sr=r.split(',',15) fn=sr .split(':') .strip(),sr .split(':') .strip() if "@" not in sr : urs=sr .split(':') .strip()+"@163.com" else: urs=sr .split(':') .strip() if s.has_key(fn)==False: s =urs w=open('再流失样本流通记录(加帐号).txt','w+') for ii in open('11.1-11.30道具消费.txt'): #for ii in open(i0): try: i=ii.decode('utf-8').encode(ty) except: i=ii si=i.split(',') if len(si)==23: fwq=si .strip() yjs=si .strip() fy=fwq,yjs mjs=si .strip() fm=fwq,mjs yurs='未匹配' murs='未匹配' if s.has_key(fy): yurs=s if s.has_key(fm): murs=s w.write('%s,%s,%s\n'%(yurs,murs,i.strip())) w.close() /**********自动下载数据的脚本**********/ #---coding:cp936---# import urllib,urllib2,cookielib,socket,time,os,re,datetime,random,sys,base64 import module if '__main__'==__name__: self_module=module.self_module date='2014-01-23' log_out_reqh=self_module().down_out(date) out_lines=urllib2.urlopen(log_out_reqh) w=open('%s_out.log'%date,'w') for line in out_lines: w.write(line.strip()+'\n') w.close() log_in_reqh=self_module().down_in(date) in_lines=urllib2.urlopen(log_in_reqh) w=open('%s_in.log'%date,'w') for line in in_lines: w.write(line.strip()+'\n') w.close()
结果不是 jo 186 . ja 2121 a joan 4695 . . 3567 fi joan 4698 m John 5463 accouting 而是 Obs Name empid department 1 Jill 1864 2 Jack 2121 accouting 3 3567 finance 4 Joan 4698 marketing 5 John 5463 accouting 因为指针是按照变量排序移动的 proc import dbms=excel out=one datafile='F:\raw material\_52 of 70_1.xls' replace; run; proc import dbms=excel out=two datafile='F:\raw material\_52 of 70_2.xls' replace; run; data all; merge one(in=o) two(in=d); by empid; if (o and not d) or (d and not o);/*其中有一个数组无效in=.,成立;注意指针下移是按照empid的排序的*/ run; proc print data=all; run;