import numpy as np import pandas as pd data=np.random.standard_normal((1000000,5)).round(5) filename=path+'numbs' import sqlite3 as sq3 query='CREATE TABLE numbers (No1 real,No2 real,No3 real,No4 real,No5 real)' con=sq3.Connection(filename+'.db') con.execute(query) %%timecon.executemany('INSERT INTO numbers VALUES (?,?,?,?,?)',data)con.commit() %%timetemp=con.execute('SELECT * FROM numbers').fetchall()print temp[:2]temp=0.0 %%timequery='SELECT * FROM numbers WHERE No1>0 AND No2>0'res=np.array(con.execute(query).fetchall()).round(3) res=res[::100]import matplotlib.pyplot as plt%matplotlib inlineplt.plot(res[:,0],res[:,1],'ro')plt.grid(True);plt.xlim(-0.5,4.5);plt.ylim(-4.5,0.5) import pandas.io.sql as pds %time data=pds.read_sql('SELECT * FROM numbers',con) data.head() %time data[(data['No1']>0) &(data['No2']<0)].head() %%time\res=data[['No1','No2'][((data['No1']>0.5)|(data['No1']<-0.5))&((data['No2']<-1)|(data['No2']<1))]] %%timeres=data[['No1','No2'][((data['No1']>0.5)|(data['No1']<-0.5))&((data['No2']<-1)|(data['No2']<1))]] %%timeres=data[['No1','No2']][((data['No1']>0.5)|(data['No1']<-0.5))&((data['No2']<-1)|(data['No2']<1))] plt.plot(res.No1,res.No2,'ro')plt.grid(True);plt.axis('tight') h5s=pd.HDFStore(filename+'.h5s','w') %time h5s['data']=data h5s h5s.close() %%timeh5s=pd.HDFStore(filename+'.h5s','r')temp=h5s('data')h5s.close() %%timeh5s=pd.HDFStore(filename+'.h5s','r')temp=h5s['data']h5s.close() np.allclose(np.array(temp),np.array(data)) temp=0.0 %time data.to_csv(filename+'.csv') %%timepd.read_csv(filename+'.csv')[['No1','No2','No3','No4']].hist(bins=20) %time data[:100000].to_excel(filename+'.xlsx') %time pd.read_excel(filename+'.xlsx','Sheet1').cumsum().plot() import numpy as npimport tables as tbimport datetime as dtimport matplotlib.pyplot as plt%matplotlib inline filename=path+'tab.h5'h5=tb.open_file(filename,'w') rows=2000000 row_des={ ' row_des={ 'Date':tb.StringCol(26,pos=1), 'No1':tb.IntCol(pos=2), 'No2':tb.IntCol(pos=3), 'No3':tb.FloatCol(pos=4), 'No4':tb.FloatCol(pos=5)} filters=tb.Filters(complevel=0)tab=h5.create_table('/','ints_floats',row_des,title='Integers and Floats',expectedrows=rows,filters=filters) tab pointer=tab.row ran_int=np.random.randint(0,10000,size=(rows,2))ran_flo=np.random.standard_normal((rows,2)).round(5) %%timefor i in range(rows): pointer['Date']=dt.datetime.now() pointer['No1']=ran_int[i,0] pointer['No2']=ran_int[i,1] pointer['No3']=ran_float[i,0] pointer['No4']=ran_float[i,1] pointer.append()tab.flush() %%timefor i in range(rows): pointer['Date']=dt.datetime.now() pointer['No1']=ran_int[i,0] pointer['No2']=ran_int[i,1] pointer['No3']=ran_flo[i,0] pointer['No4']=ran_flo[i,1] pointer.append()tab.flush() tab dty=np.dtype([('Date','S26'),('No1','0.5)) &((No4<-1) |(No4>1))')])[::100] %%timeres=np.array([(row['No3'],row['No4']) for row in tab where (((No3<-0.5)|(No3>0.5)) & ((No4<-1) |(No4>1)))])[::100] %%timeres=np.array([(row['No3'],row['No4']) for row in tab.where ('((No3<-0.5)|(No3>0.5)) & ((No4<-1) |(No4>1)))'])[::100] %%timeres=np.array([(row['No3'],row['No4']) for row in tab.where ('((No3<-0.5)|(No3>0.5)) & ((No4<-1) |(No4>1))')])[::100] plt.plot(res.T[0],res.T[1],'ro')plt.grid(True) %%timevalues=tab.cols.No3[:]print "max %18.3f" % values.max()print "Ave %18.3f" % values.mean()print "MIn %18.3f" % values.min()print "Std %18.3f" % values.std() %%timeresults=[(row['No1'],row['No2']) for row in tab.where('((No1>9800) |(No1<200)) & ((No2>4500) |(No2<5500))')]for res in results[:4] print res %%timeresults=[(row['No1'],row['No2']) for row in tab.where('((No1>9800) |(No1<200)) & ((No2>4500) |(No2<5500))')]for res in results[:4]: print res %%timeresults=[(row['No1'],row['No2']) for row in tab.where('(No1==1234) & (No2>9776)')]for res in results: print res filename=path+'tab.h5c'h5c=tb.open_file(filename,'w') filters=tb.Filters(complevel=4,complib='blosc') tabc=h5c.create_table('/','ints_floats',sarray,title='Intergers and Floats',expectedrows=rows,filter=filters) tabc=h5c.create_table('/','ints_floats',sarray,title='Intergers and Floats',expectedrows=rows,filters=filters) %%timeres=np.array([(row['No3'],row['No4']) for row in tabc.where ('((No3<-0.5)|(No3>0.5)) & ((No4<-1) |(No4>1))')])[::100] %time arr_non=tab.read() %time arr_com=tabc.read() h5c.close() %%timearr_int=h5.create_array('/','integer',ran_int)arr_flo=h5.create_array('/','floats',ran_flo) h5 filename=path+'array.h5'h5=tb.open_file(filename,'w') n=1000\ear=h5.createEArray(h5.root,'ear',atom=tb.Float64Atom(),shape(0,n)) n=1000ear=h5.createEArray(h5.root,'ear',atom=tb.Float64Atom(),shape(0,n)) ear=h5.createEArray(h5.root,'ear',atom=tb.Float64Atom(),shape(0,1000)) ear=h5.createEArray(h5.root(),'ear',atom=tb.Float64Atom(),shape(0,1000)) ear=h5.createEArray(h5.root,'ear',atom=tb.Float64Atom(),shape=(0,1000)) %%timerand=np.random.standard_normal((1000,1000))for i in range(750): ear.append(rand)ear.flush() ear ear.size_on_disk out=h5.createEArray(h5.root,'out',atom=tb.Float64Atom(),shape=(0,1000)) expr=tb.Expr('3*sin(ear)+sqrt(abs(ear))')expr.setOutput(out,append_mode=True) %time expr.eval() out[0,:10] %time imarray=ear.read()