vlad777
23.1.2010, 18:26
Ne prica se o najmanjoj brzini memorije racunara tj.
o worst case scenario adresiranju.
Kada sam napisao ovaj program za testiranje brzine memorije
sa nesekvencijalnim pristupom i pokrenuo ga
dobio sam iznenadjujuce male rezultate za najmanju brzinu.
(napomena: ne koristim mmx instrukcije)
Ako pogledate gif iz attachmenta bice vam odma jasno
kako program radi.
http://img140.imageshack.us/img140/9334/memm.gif
(Nadam se da ce ovo nekom biti inspiracija za clanak.)
Neki rezultati:
-------------------------------------------------
comp od mog drugara
nemam podatke samo da je
u pitanju neki core 2 duo
speed=5563.28 MB/s jump= 1 *4 bytes
speed=3012.05 MB/s jump= 2 *4 bytes
speed=1580.40 MB/s jump= 4 *4 bytes
speed=802.41 MB/s jump= 8 *4 bytes
speed=386.14 MB/s jump= 16 *4 bytes
speed=345.93 MB/s jump= 32 *4 bytes
speed=320.00 MB/s jump= 64 *4 bytes
speed=282.57 MB/s jump= 128 *4 bytes
speed=260.43 MB/s jump= 256 *4 bytes
speed=230.00 MB/s jump= 512 *4 bytes
speed=198.60 MB/s jump= 1024 *4 bytes
speed=197.23 MB/s jump= 2048 *4 bytes
speed=193.94 MB/s jump= 4096 *4 bytes
speed=192.20 MB/s jump= 8192 *4 bytes
speed=390.24 MB/s jump= 16384 *4 bytes
speed=398.13 MB/s jump= 32768 *4 bytes
speed=395.06 MB/s jump= 65536 *4 bytes
speed=386.70 MB/s jump= 131072 *4 bytes
speed=373.17 MB/s jump= 262144 *4 bytes
speed=397.54 MB/s jump= 524288 *4 bytes
speed=804.99 MB/s jump= 1048576 *4 bytes
best=5563.28 MB/s worst=192.20 MB
--------------------------------------------------
cpu: intel Conroe E1200 @ 1.6 Ghz
cache: L1 32 d/i L2 512 KB
memory: 1GB FSB:800MHz
speed=3466.20 MB/s jump= 1 *4 bytes
speed=1927.71 MB/s jump= 2 *4 bytes
speed=1072.96 MB/s jump= 4 *4 bytes
speed=566.01 MB/s jump= 8 *4 bytes
speed=281.45 MB/s jump= 16 *4 bytes
speed=248.46 MB/s jump= 32 *4 bytes
speed=243.72 MB/s jump= 64 *4 bytes
speed=232.05 MB/s jump= 128 *4 bytes
speed=210.52 MB/s jump= 256 *4 bytes
speed=188.26 MB/s jump= 512 *4 bytes
speed=167.48 MB/s jump= 1024 *4 bytes
speed=153.45 MB/s jump= 2048 *4 bytes
speed=134.03 MB/s jump= 4096 *4 bytes
speed=115.19 MB/s jump= 8192 *4 bytes
speed=88.42 MB/s jump= 16384 *4 bytes
speed=84.71 MB/s jump= 32768 *4 bytes
speed=88.39 MB/s jump= 65536 *4 bytes
speed=98.81 MB/s jump= 131072 *4 bytes
speed=107.33 MB/s jump= 262144 *4 bytes
speed=155.59 MB/s jump= 524288 *4 bytes
speed=490.26 MB/s jump= 1048576 *4 bytes
best=3466.20 MB/s worst=84.71 MB/s
---------------------------------------------
Cpu: AMD Barton 2.2 GHz (socket A)
Cache: L1 64KB D/I L2 512 KB full speed
Memory: DDR1 200/400 MHz FSB capacity: 1.2 GB speed: 3200 MB/s
Chipset: VIA KT600
speed=748.64 MB/s jump= 1 *4 bytes
speed=521.38 MB/s jump= 2 *4 bytes
speed=321.21 MB/s jump= 4 *4 bytes
speed=219.74 MB/s jump= 8 *4 bytes
speed=127.49 MB/s jump= 16 *4 bytes
speed=141.20 MB/s jump= 32 *4 bytes
speed=134.31 MB/s jump= 64 *4 bytes
speed=118.85 MB/s jump= 128 *4 bytes
speed=104.06 MB/s jump= 256 *4 bytes
speed=91.82 MB/s jump= 512 *4 bytes
speed=80.25 MB/s jump= 1024 *4 bytes
speed=77.81 MB/s jump= 2048 *4 bytes
speed=46.80 MB/s jump= 4096 *4 bytes
speed=24.21 MB/s jump= 8192 *4 bytes
speed=23.93 MB/s jump= 16384 *4 bytes
speed=24.12 MB/s jump= 32768 *4 bytes
speed=45.80 MB/s jump= 65536 *4 bytes
speed=79.04 MB/s jump= 131072 *4 bytes
speed=139.05 MB/s jump= 262144 *4 bytes
speed=178.64 MB/s jump= 524288 *4 bytes
speed=218.25 MB/s jump= 1048576 *4 bytes
best=748.64 MB/s worst=23.93 MB/s
-------------------------------------------------
Negde oko 32kB za jump su najlosiji rezultati.
Program:
---------------------------------------------------
//compajlirano kao Win32 Console Application u Visual C++ 6.0
#include<windows.h>
#include<stdlib.h>
#include<stdio.h>
void test2(long* m,long jump,long sizemy){
long a;
long adr=0;
long pass=0;
long smd4=sizemy/4;
while(pass<jump && pass<smd4){
adr=pass;
while(adr<smd4){
a=m[adr]; //zakomentarisi za test brzine algoritma(ne memorije)
adr+=jump;
}
pass++ ;
}
}
void test3(long* m,long jump,long sizemy/*,long* tp*/){
_asm{
//mov tpl,0;
mov edi,[m]
mov esi,edi
add esi,sizemy
mov ecx,edi
mov ebx,0
mov edx,jump
shl edx,2
}
ll:
_asm{
mov edi,ecx //mov edi,[m]
add edi,ebx
}
l:
_asm{
mov eax,[edi] //zakomentarisi za test brzine algoritma(ne memorije)
add edi,edx
cmp edi,esi
jb l
add ebx,4
cmp ebx,edx
jae k
jmp ll
}
k: ;
}
void main(){
double cur,best=0,worst=500000;
int j;
long i;
long size=1024*1024*80; // 80 MB
long times=50;
int tick;
FILE *f;
long *m=(long *)malloc(size); //alocira 80MB za test
f=fopen("test2.txt","w");
for(i=1;i<=524288*2;i<<=1){
//for(i=524288;i<=524288*2*2*2*2;i<<=1){
tick=GetTickCount();
if(i<size && i>0) for(j=0;j<times;j++) test3(m,i,size); //--------------------------
tick=GetTickCount()-tick;
cur=(double)( (double)size*times/((double)tick/1000))/(double)(1024*1024);
if(cur>best)best=cur;
if(cur<worst)worst=cur;
//printf("\n speed=%5.2f MB/s jump= %d *4 bytes tp=%d ",cur,i,tp);
printf("\n speed=%5.2f MB/s jump= %d *4 bytes ",cur,i);
fprintf(f,"\n speed=%5.2f MB/s jump= %d *4 bytes",cur,i);
}
printf("\n\n best=%5.2f MB/s worst=%5.2f MB/s \n",best,worst);
fprintf(f,"\n\n best=%5.2f MB/s worst=%5.2f MB/s \n",best,worst);
fclose(f);
}
o worst case scenario adresiranju.
Kada sam napisao ovaj program za testiranje brzine memorije
sa nesekvencijalnim pristupom i pokrenuo ga
dobio sam iznenadjujuce male rezultate za najmanju brzinu.
(napomena: ne koristim mmx instrukcije)
Ako pogledate gif iz attachmenta bice vam odma jasno
kako program radi.
http://img140.imageshack.us/img140/9334/memm.gif
(Nadam se da ce ovo nekom biti inspiracija za clanak.)
Neki rezultati:
-------------------------------------------------
comp od mog drugara
nemam podatke samo da je
u pitanju neki core 2 duo
speed=5563.28 MB/s jump= 1 *4 bytes
speed=3012.05 MB/s jump= 2 *4 bytes
speed=1580.40 MB/s jump= 4 *4 bytes
speed=802.41 MB/s jump= 8 *4 bytes
speed=386.14 MB/s jump= 16 *4 bytes
speed=345.93 MB/s jump= 32 *4 bytes
speed=320.00 MB/s jump= 64 *4 bytes
speed=282.57 MB/s jump= 128 *4 bytes
speed=260.43 MB/s jump= 256 *4 bytes
speed=230.00 MB/s jump= 512 *4 bytes
speed=198.60 MB/s jump= 1024 *4 bytes
speed=197.23 MB/s jump= 2048 *4 bytes
speed=193.94 MB/s jump= 4096 *4 bytes
speed=192.20 MB/s jump= 8192 *4 bytes
speed=390.24 MB/s jump= 16384 *4 bytes
speed=398.13 MB/s jump= 32768 *4 bytes
speed=395.06 MB/s jump= 65536 *4 bytes
speed=386.70 MB/s jump= 131072 *4 bytes
speed=373.17 MB/s jump= 262144 *4 bytes
speed=397.54 MB/s jump= 524288 *4 bytes
speed=804.99 MB/s jump= 1048576 *4 bytes
best=5563.28 MB/s worst=192.20 MB
--------------------------------------------------
cpu: intel Conroe E1200 @ 1.6 Ghz
cache: L1 32 d/i L2 512 KB
memory: 1GB FSB:800MHz
speed=3466.20 MB/s jump= 1 *4 bytes
speed=1927.71 MB/s jump= 2 *4 bytes
speed=1072.96 MB/s jump= 4 *4 bytes
speed=566.01 MB/s jump= 8 *4 bytes
speed=281.45 MB/s jump= 16 *4 bytes
speed=248.46 MB/s jump= 32 *4 bytes
speed=243.72 MB/s jump= 64 *4 bytes
speed=232.05 MB/s jump= 128 *4 bytes
speed=210.52 MB/s jump= 256 *4 bytes
speed=188.26 MB/s jump= 512 *4 bytes
speed=167.48 MB/s jump= 1024 *4 bytes
speed=153.45 MB/s jump= 2048 *4 bytes
speed=134.03 MB/s jump= 4096 *4 bytes
speed=115.19 MB/s jump= 8192 *4 bytes
speed=88.42 MB/s jump= 16384 *4 bytes
speed=84.71 MB/s jump= 32768 *4 bytes
speed=88.39 MB/s jump= 65536 *4 bytes
speed=98.81 MB/s jump= 131072 *4 bytes
speed=107.33 MB/s jump= 262144 *4 bytes
speed=155.59 MB/s jump= 524288 *4 bytes
speed=490.26 MB/s jump= 1048576 *4 bytes
best=3466.20 MB/s worst=84.71 MB/s
---------------------------------------------
Cpu: AMD Barton 2.2 GHz (socket A)
Cache: L1 64KB D/I L2 512 KB full speed
Memory: DDR1 200/400 MHz FSB capacity: 1.2 GB speed: 3200 MB/s
Chipset: VIA KT600
speed=748.64 MB/s jump= 1 *4 bytes
speed=521.38 MB/s jump= 2 *4 bytes
speed=321.21 MB/s jump= 4 *4 bytes
speed=219.74 MB/s jump= 8 *4 bytes
speed=127.49 MB/s jump= 16 *4 bytes
speed=141.20 MB/s jump= 32 *4 bytes
speed=134.31 MB/s jump= 64 *4 bytes
speed=118.85 MB/s jump= 128 *4 bytes
speed=104.06 MB/s jump= 256 *4 bytes
speed=91.82 MB/s jump= 512 *4 bytes
speed=80.25 MB/s jump= 1024 *4 bytes
speed=77.81 MB/s jump= 2048 *4 bytes
speed=46.80 MB/s jump= 4096 *4 bytes
speed=24.21 MB/s jump= 8192 *4 bytes
speed=23.93 MB/s jump= 16384 *4 bytes
speed=24.12 MB/s jump= 32768 *4 bytes
speed=45.80 MB/s jump= 65536 *4 bytes
speed=79.04 MB/s jump= 131072 *4 bytes
speed=139.05 MB/s jump= 262144 *4 bytes
speed=178.64 MB/s jump= 524288 *4 bytes
speed=218.25 MB/s jump= 1048576 *4 bytes
best=748.64 MB/s worst=23.93 MB/s
-------------------------------------------------
Negde oko 32kB za jump su najlosiji rezultati.
Program:
---------------------------------------------------
//compajlirano kao Win32 Console Application u Visual C++ 6.0
#include<windows.h>
#include<stdlib.h>
#include<stdio.h>
void test2(long* m,long jump,long sizemy){
long a;
long adr=0;
long pass=0;
long smd4=sizemy/4;
while(pass<jump && pass<smd4){
adr=pass;
while(adr<smd4){
a=m[adr]; //zakomentarisi za test brzine algoritma(ne memorije)
adr+=jump;
}
pass++ ;
}
}
void test3(long* m,long jump,long sizemy/*,long* tp*/){
_asm{
//mov tpl,0;
mov edi,[m]
mov esi,edi
add esi,sizemy
mov ecx,edi
mov ebx,0
mov edx,jump
shl edx,2
}
ll:
_asm{
mov edi,ecx //mov edi,[m]
add edi,ebx
}
l:
_asm{
mov eax,[edi] //zakomentarisi za test brzine algoritma(ne memorije)
add edi,edx
cmp edi,esi
jb l
add ebx,4
cmp ebx,edx
jae k
jmp ll
}
k: ;
}
void main(){
double cur,best=0,worst=500000;
int j;
long i;
long size=1024*1024*80; // 80 MB
long times=50;
int tick;
FILE *f;
long *m=(long *)malloc(size); //alocira 80MB za test
f=fopen("test2.txt","w");
for(i=1;i<=524288*2;i<<=1){
//for(i=524288;i<=524288*2*2*2*2;i<<=1){
tick=GetTickCount();
if(i<size && i>0) for(j=0;j<times;j++) test3(m,i,size); //--------------------------
tick=GetTickCount()-tick;
cur=(double)( (double)size*times/((double)tick/1000))/(double)(1024*1024);
if(cur>best)best=cur;
if(cur<worst)worst=cur;
//printf("\n speed=%5.2f MB/s jump= %d *4 bytes tp=%d ",cur,i,tp);
printf("\n speed=%5.2f MB/s jump= %d *4 bytes ",cur,i);
fprintf(f,"\n speed=%5.2f MB/s jump= %d *4 bytes",cur,i);
}
printf("\n\n best=%5.2f MB/s worst=%5.2f MB/s \n",best,worst);
fprintf(f,"\n\n best=%5.2f MB/s worst=%5.2f MB/s \n",best,worst);
fclose(f);
}