我这 对测试 mysql infobright 压缩 和 查询速度 进行测试
测试结果我会尽快登出的
#!/usr/bin/python
import MySQLdb
#conn = MySQLdb.Connection('127.0.0.1', 'root', '', 'dmspi')
conn=MySQLdb.connect(host="127.0.0.1",port=3307,user="root",passwd="",db="test")
cur = conn.cursor()
st = "create table testtime4 ( "
try :
for cc in xrange(1000):
if cc % 2 == 0 :
st += 'a'+str(cc)+' varchar(20),\n'
else :
st += 'a'+str(cc)+' int(20),\n'
st += 'a int(20)'
st = st + ");"
cur.execute(st)
# import sys
# sys.exit(1)
import random
ccs = lambda : random.choice(['apple', 'pear', 'peach', 'orange', 'lemon',''])
ccn = lambda : random.randint(0,10000)
fd = open('/data/logs/dataFormat/test/t4.data','w')
for cc in xrange(10000000):
st = ''
ss = ccs()
nn = str(ccn())
for cc in xrange(1000):
if cc < 15 :
if cc % 2 == 0 :
st += ss+'\t'
else :
st += nn+'\t'
else :
st += '\t'
st += nn
print >>fd,st
fd.close()
# cur.execute('load data infile \'/data/logs/dataFormat/test/t4.data\' into table testtime4 fields terminated by "\t";')
finally :
cur.close()
conn.close()
mysql infobright 测试结果报告:
一千万条数据导入花费时间:
mysql> load data infile '/data/logs/dataFormat/test/t4.data' into table testtime4 fields terminated by "\t";
Query OK, 10000000 rows affected (36 min 47.00 sec)
测试一 :
1. 表属性 有 500 列
2. 属性列都有值, 无 Null 数据
3. 原始文件大小 26G ,导入数据仓库 5G
部分测试时间:
select count(*) from testtime where a0="pear" and a2="orange";
1 row in set (3.63 sec)
select a6,count(*) from testtime group by a6 order by a6 desc ;
5 rows in set (2.24 sec)
mysql> select count(*) from testtime where a0="apple" ;
1 row in set (5.68 sec)
测试二 :
1. 表属性 有 1000 列
2. 属性列前 15 列有值 , 其余后面都为 Null
3. 原始文件大小 10G ,导入数据仓库 215M
mysql> select a0,count(*) from testtime4 group by a0 ;
+--------+----------+
| a0 | count(*) |
+--------+----------+
| lemon | 1665543 |
| peach | 1666276 |
| orange | 1667740 |
| pear | 1665910 |
| apple | 1665678 |
| NULL | 1668863 |
+--------+----------+
6 rows in set (4.55 sec)
select * from testtime4 order by a6 desc limit 2000000,1 ;
1 row in set (3.30 sec)
整理 www.blogjava.net/Good-Game