hpaste

recent | annotate | new

GCC :

#include <stdio.h>

int main() {

    long long a; // 64 bit machine
    long long n;
    double b;

    for (n = 1,
         a = 0,
         b = 0; n <= 1e9; n++,
                          a++,
                          b+=a) 
        ;

    printf("%f\n", b / a);

    return 0;

}

------------------------------------------------------------------------

$ gcc -O t.c
$ time ./a.out 
500000000.067109
./a.out  3.63s user 0.00s system 100% cpu 3.614 total

------------------------------------------------------------------------

main:
.LFB19:
    pushq   %rbp
.LCFI0:
    movq    %rsp, %rbp
.LCFI1:
    subq    $32, %rsp
.LCFI2:
    movl    $1, -20(%rbp)
    movl    $0, -4(%rbp)
    xorpd   %xmm0, %xmm0
    movsd   %xmm0, -16(%rbp)
.L16:
    cvtsi2sd    -20(%rbp), %xmm1
    movsd   .LC1(%rip), %xmm0
    ucomisd %xmm1, %xmm0
    jae .L18
    jmp .L17
.L18:
    leaq    -20(%rbp), %rax
    incl    (%rax)
    leaq    -4(%rbp), %rax
    incl    (%rax)
    cvtsi2sd    -4(%rbp), %xmm1
    movsd   -16(%rbp), %xmm0
    addsd   %xmm1, %xmm0
    movsd   %xmm0, -16(%rbp)
    jmp .L16
.L17:
    cvtsi2sd    -4(%rbp), %xmm1
    movsd   -16(%rbp), %xmm0
    divsd   %xmm1, %xmm0
    movl    $.LC2, %edi
    movl    $1, %eax
    call    printf
    movl    $0, %eax
    leave
    ret



------------------------------------------------------------------------
------------------------------------------------------------------------

GHC:


{-# LANGUAGE TypeOperators #-}

import Data.Array.Vector
import Text.Printf

mean :: UArr Double -> Double
mean arr = b / fromIntegral a
  where
    k (n :*: s) a = n+1 :*: s+a
    a :*: b = foldlU k (0 :*: 0) arr :: (Int :*: Double)

main = printf "%f\n" . mean $ enumFromToFracU 1 1e9

------------------------------------------------------------------------

$ ghc -O2 -fvia-C A.hs --make
$ time ./A
500000000.067109
./A  3.68s user 0.00s system 99% cpu 3.683 total

------------------------------------------------------------------------

$s$wfold :: Double# -> Double# -> Int# -> (# Int, Double #)

$s$wfold =
  \ (sc_s11e :: Double#)
    (sc1_s11f :: Double#)
    (sc2_s11g :: Int#) ->
    case <=## sc_s11e 1.0000000005e9 of wild_aWQ {
      False -> (# I# sc2_s11g, D# sc1_s11f #);
      True ->
        $s$wfold
          (+## sc_s11e 1.0)
          (+## sc1_s11f sc_s11e)
          (+# sc2_s11g 1)

------------------------------------------------------------------------

Main_zdszdwfold_info:
.text
  .align 8
  .type     Main_zdszdwfold_info, @function
  leaq        32(%r12), %rax
  cmpq        %r15, %rax
  movq        %rax, %r12
  ja  .L10
  movsd       .LC0(%rip), %xmm0
  ucomisd     %xmm5, %xmm0
  jae .L12
  movq        %rsi, (%rax)
  movq        $base_GHCziFloat_Dzh_con_info, -24(%rax)
  movsd       %xmm6, -16(%rax)
  movq        $base_GHCziBase_Izh_con_info, -8(%rax)
  leaq        -7(%rax), %rbx
  leaq        -23(%rax), %rsi
  jmp *(%rbp)
  .p2align 6,,7
.L12:
  movapd      %xmm6, %xmm0
  incq        %rsi
  subq        $32, %r12
  addsd       %xmm5, %xmm0
  addsd       .LC2(%rip), %xmm5
  movapd      %xmm0, %xmm6
  jmp Main_zdszdwfold_info
  .p2align 6,,7
.L10:
  leaq        -24(%rbp), %rax
  movq        $32, 184(%r13)
  movl        $Main_zdszdwfold_closure, %ebx
  movq        %rax, %rbp
  movsd       %xmm5, (%rax)
  movsd       %xmm6, 8(%rax)
  movq        %rsi, 16(%rax)
  jmp *-8(%r13)

Using the native code generator:

Main_zdszdwfold_info:
  addq $32,%r12
  cmpq %r15,%r12
  ja .Lc13J
  ucomisd .Ln13O(%rip),%xmm5
  jp .Ln13H
  jbe .Lc13M
.Ln13H:
  movq $base_GHCziFloat_Dzh_con_info,-24(%r12)
  movsd %xmm6,-16(%r12)
  movq $base_GHCziBase_Izh_con_info,-8(%r12)
  movq %rsi,(%r12)
  leaq -7(%r12),%rbx
  leaq -23(%r12),%rsi
  jmp *(%rbp)
.Lc13J:
  movq $32,184(%r13)
  movl $Main_zdszdwfold_closure,%ebx
  addq $-24,%rbp
  movsd %xmm5,(%rbp)
  movsd %xmm6,8(%rbp)
  movq %rsi,16(%rbp)
  jmp *-8(%r13)
.Lc13M:
  leaq 1(%rsi),%rax
  movsd %xmm6,%xmm0
  addsd %xmm5,%xmm0
  addsd .Ln13P(%rip),%xmm5
  movsd %xmm0,%xmm6
  movq %rax,%rsi
  addq $-32,%r12
  jmp Main_zdszdwfold_info

$ time ./A 
500000000.067109
./A  3.62s user 0.03s system 99% cpu 3.671 total


GCC -O2 -- radically optimised.

main:
.LFB11:
	subq	$8, %rsp
.LCFI0:
	movl	$1, %edx
	xorl	%eax, %eax
	xorpd	%xmm2, %xmm2
	movsd	.LC5(%rip), %xmm3
	.p2align 4,,7

.L6:
	incq	%rdx
	incq	%rax
	cvtsi2sdq	%rdx, %xmm0
	cvtsi2sdq	%rax, %xmm1
	ucomisd	%xmm0, %xmm3
	addsd	%xmm1, %xmm2
	jae	.L6

	divsd	%xmm1, %xmm2
	movl	$.LC2, %edi
	movl	$1, %eax
	movapd	%xmm2, %xmm0
	call	printf
	addq	$8, %rsp
	xorl	%eax, %eax
	ret