GCC :
#include <stdio.h>
int main() {
long long a; // 64 bit machine
long long n;
double b;
for (n = 1,
a = 0,
b = 0; n <= 1e9; n++,
a++,
b+=a)
;
printf("%f\n", b / a);
return 0;
}
$ gcc O t.c
$ time ./a.out
500000000.067109
./a.out 3.63s user 0.00s system 100% cpu 3.614 total
main:
.LFB19:
pushq %rbp
.LCFI0:
movq %rsp, %rbp
.LCFI1:
subq $32, %rsp
.LCFI2:
movl $1, 20(%rbp)
movl $0, 4(%rbp)
xorpd %xmm0, %xmm0
movsd %xmm0, 16(%rbp)
.L16:
cvtsi2sd 20(%rbp), %xmm1
movsd .LC1(%rip), %xmm0
ucomisd %xmm1, %xmm0
jae .L18
jmp .L17
.L18:
leaq 20(%rbp), %rax
incl (%rax)
leaq 4(%rbp), %rax
incl (%rax)
cvtsi2sd 4(%rbp), %xmm1
movsd 16(%rbp), %xmm0
addsd %xmm1, %xmm0
movsd %xmm0, 16(%rbp)
jmp .L16
.L17:
cvtsi2sd 4(%rbp), %xmm1
movsd 16(%rbp), %xmm0
divsd %xmm1, %xmm0
movl $.LC2, %edi
movl $1, %eax
call printf
movl $0, %eax
leave
ret
GHC:
import Data.Array.Vector
import Text.Printf
mean :: UArr Double -> Double
mean arr = b / fromIntegral a
where
k (n :*: s) a = n+1 :*: s+a
a :*: b = foldlU k (0 :*: 0) arr :: (Int :*: Double)
main = printf "%f\n" . mean $ enumFromToFracU 1 1e9
$ ghc O2 fviaC A.hs --make
$ time ./A
500000000.067109
./A 3.68s user 0.00s system 99% cpu 3.683 total
$s$wfold :: Double# -> Double# -> Int# -> (# Int, Double #)
$s$wfold =
\ (sc_s11e :: Double#)
(sc1_s11f :: Double#)
(sc2_s11g :: Int#) ->
case <=## sc_s11e 1.0000000005e9 of wild_aWQ {
False -> (# I# sc2_s11g, D# sc1_s11f #);
True ->
$s$wfold
(+## sc_s11e 1.0)
(+## sc1_s11f sc_s11e)
(+# sc2_s11g 1)
Main_zdszdwfold_info:
.text
.align 8
.type Main_zdszdwfold_info, @function
leaq 32(%r12), %rax
cmpq %r15, %rax
movq %rax, %r12
ja .L10
movsd .LC0(%rip), %xmm0
ucomisd %xmm5, %xmm0
jae .L12
movq %rsi, (%rax)
movq $base_GHCziFloat_Dzh_con_info, 24(%rax)
movsd %xmm6, 16(%rax)
movq $base_GHCziBase_Izh_con_info, 8(%rax)
leaq 7(%rax), %rbx
leaq 23(%rax), %rsi
jmp *(%rbp)
.p2align 6,,7
.L12:
movapd %xmm6, %xmm0
incq %rsi
subq $32, %r12
addsd %xmm5, %xmm0
addsd .LC2(%rip), %xmm5
movapd %xmm0, %xmm6
jmp Main_zdszdwfold_info
.p2align 6,,7
.L10:
leaq 24(%rbp), %rax
movq $32, 184(%r13)
movl $Main_zdszdwfold_closure, %ebx
movq %rax, %rbp
movsd %xmm5, (%rax)
movsd %xmm6, 8(%rax)
movq %rsi, 16(%rax)
jmp *-8(%r13)
Using the native code generator:
Main_zdszdwfold_info:
addq $32,%r12
cmpq %r15,%r12
ja .Lc13J
ucomisd .Ln13O(%rip),%xmm5
jp .Ln13H
jbe .Lc13M
.Ln13H:
movq $base_GHCziFloat_Dzh_con_info,24(%r12)
movsd %xmm6,16(%r12)
movq $base_GHCziBase_Izh_con_info,8(%r12)
movq %rsi,(%r12)
leaq 7(%r12),%rbx
leaq 23(%r12),%rsi
jmp *(%rbp)
.Lc13J:
movq $32,184(%r13)
movl $Main_zdszdwfold_closure,%ebx
addq $-24,%rbp
movsd %xmm5,(%rbp)
movsd %xmm6,8(%rbp)
movq %rsi,16(%rbp)
jmp *-8(%r13)
.Lc13M:
leaq 1(%rsi),%rax
movsd %xmm6,%xmm0
addsd %xmm5,%xmm0
addsd .Ln13P(%rip),%xmm5
movsd %xmm0,%xmm6
movq %rax,%rsi
addq $-32,%r12
jmp Main_zdszdwfold_info
$ time ./A
500000000.067109
./A 3.62s user 0.03s system 99% cpu 3.671 total
GCC O2
main:
.LFB11:
subq $8, %rsp
.LCFI0:
movl $1, %edx
xorl %eax, %eax
xorpd %xmm2, %xmm2
movsd .LC5(%rip), %xmm3
.p2align 4,,7
.L6:
incq %rdx
incq %rax
cvtsi2sdq %rdx, %xmm0
cvtsi2sdq %rax, %xmm1
ucomisd %xmm0, %xmm3
addsd %xmm1, %xmm2
jae .L6
divsd %xmm1, %xmm2
movl $.LC2, %edi
movl $1, %eax
movapd %xmm2, %xmm0
call printf
addq $8, %rsp
xorl %eax, %eax
ret