python 处理大数据,有需要的朋友可以参考下。
最近大数据竞赛很火,本人python没学多久,想试着写一下,只是实现了数据的处理,主要用到了dict,list,file知识。
还有一点要说,我也用matlab实现了,但是运行完要差不多两分钟,但是python秒处理,有木有啊,足见python处理文本功能之强大。
文件里的数据格式:
clientid shopingid num date
1111000 3873 2 4月5日
clientinfo = [] shopinginfo = {} month={} day={} shopidflag = 0 clientstartflag = 0 total={} tmpclientid='' output= open('f:/a.txt','a') with open('f:/s.txt','r') as data_file: for lineinfo in data_file: lineinfo = lineinfo.split() clientid = lineinfo[0] shopingid = lineinfo[1] num=[] num.append(lineinfo[2]) data = lineinfo[3] data = data[:-1] data = data.split('月') monthvar=[] monthvar.append(data[0]) dayvar=[] dayvar.append(data[1]) if clientid in clientinfo and shopingid in shopinginfo and int(data[0])>=6: shopinginfo[shopingid].append(lineinfo[2]) month[shopingid].append(data[0]) day[shopingid].append(data[1]) elif clientid in clientinfo and shopingid not in shopinginfo and int(data[0])>=6: shopinginfo[shopingid]=num month[shopingid]= monthvar day[shopingid] = dayvar elif clientid not in clientinfo : #if clientstartflag == 1: clientflag = 0 shopinglink='' for (k, v) in shopinginfo.items(): total={} vote=0 for i in v: if i in total: total[i]+=1 else: total[i]=1 for var in total: if var == '0': vote += total[var] elif var == '1': vote = 0 break elif var == '2': vote += total[var]*2 else: vote += total[var]*3 if vote >= 3: if clientflag == 0: output.write(tmpclientid+'\t') clientflag =1 shopinglink+=k+',' if clientflag == 1: output.write(shopinglink.strip(',')+'\r\n') shopinginfo={} month ={} day ={} clientinfo=[] tmpclientid=clientid clientinfo.append(clientid) shopinginfo[shopingid]=num month[shopingid] = monthvar day[shopingid] = dayvar shopinglink='' for (k, v) in shopinginfo.items(): for i in v: if i in total: total[i]+=1 else: total[i]=1 total={} vote=0 for i in v: if i in total: total[i]+=1 else: total[i]=1 for var in total: if var == '0': vote += total[var] elif var == '1': vote = 0 break elif var == '2': vote += total[var]*2 else: vote += total[var]*3 if vote >= 3: if clientflag == 0: clientflag =1 shopinglink+=k+',' if clientflag == 1: output.write(tmpclientid+'\t') output.write(shopinglink.strip(',')) data_file.close() output.close()
猜您喜欢: