mode(0)
clearglobal();
stacksize('max'); 
// ---------
// Preamble
// ---------
// The following document is a personal reminder for vectorization issue.
// Rather few documents / tutorials exist in such item, and the main goal of the present document is to initiate and share (recent) experiences on such powerfull way of coding ...
// ... furthermore is a way to thanks some people of the community that helped me ...
// 
// Each time, the "traditional" code using loops is compared to a vectorized code.
//
// NB : My experience in vectorization is quite new ; thus the above codes must be intended as one way among others to code fast
// 
// ---------
// Release
// ---------
// V0.1 : initial release - initiated and created by Paul Carrico (paul_dat_carrico[at]free_dot_fr] the nov-19th 2012
// Please share any improvement and /or correction to the community
// Hope that other examples will be added by skilled people on that issue
//
// -----------------
// Table of content
// -----------------
// case 1 : basic summation as found in some Scilab documentation
// case 2 : basic products
// case 3 : partial (or complete) product in a matrix
// case 4 : more complex product using a function 
// case 5 : use of ".*." i.e. Kronecker symbol
// case 6 : ise of "."
// ....
// case n : 


current_release = 0.0;
printf("\n------------------------------------------------------------------------------------------------------------------------------\n");
printf("The current release is %2.1f\n",current_release);
printf("NB : in some cases, a division by zero error may appears => increase the number of loops by a factor  1000 or 10 000, etc. ...\n");
printf("------------------------------------------------------------------------------------------------------------------------------\n\n");
choice = 4;


select choice
case 1 then
    // case 1 : basic summation as found in some Scilab documentation
    printf("###################################################################\n");
    printf("Case 1 : basic summation as found in some Scilab documentation\n");
    Numb_loop = 1E5;
    A = rand(Numb_loop,1);
    scalar_trad = 0;

    // traditional loop
    tic()
    for i = 1 : Numb_loop 
        scalar_trad = scalar_trad + A(i);
    end
    time_traditional = toc();
    printf("Using loops, the duration is %g\n\n",time_traditional);

    // way 1 : sum function
    tic()
    scalar_way1 = sum(A(:,1));
    time_way1 = toc();
    ratio_way1 = (time_traditional / time_way1);
    printf("Using vectorization - way 1, the duration is %g\n",time_way1);
    printf("For the way1, the ratio duration is %g\n",ratio_way1);
    printf("The results difference is %e\n\n",(max(abs(scalar_trad - scalar_way1))));

    // way2 : sum function + index use (see. case X for further information's)
    tic()
    scalar_way2 = sum(A([1:Numb_loop],1));
    time_way2 = toc();
    ratio_way2 = (time_traditional / time_way2);
    printf("Using vectorization - way 2, the duration is %g\n",time_way2);
    printf("For the way1, the ratio duration is %g\n",ratio_way2);
    printf("The results difference is %e\n",(max(abs(scalar_trad - scalar_way2))));
    printf("\n\n");


case 2 then
    // case 2 : basic products of 2 matrix
    printf("###################################################################\n");
    printf("Case 2 : basic products\n");
    Numb_loop = 1E5;
    A = rand(Numb_loop,Numb_loop);
    B = rand(Numb_loop,Numb_loop);
    C = zeros(Numb_loop,Numb_loop);

    // using a loop
    tic()
    for i = 1 : Numb_loop
        for j = 1 : Numb_loop
            cst = 0;
            for k = 1 : Numb_loop
                cst = cst +  A(i,k) * B(k,j);
            end
            C(i,j) = cst;
        end

    end
    time_traditional = toc();
    printf("Using loops, the duration is %g\n\n",time_traditional);

    // way 1 : vectorization
    C1 = zeros(Numb_loop,Numb_loop);
    tic()
    C1 = A*B;
    time_way1 = toc();
    ratio_way1 = (time_traditional / time_way1);    
    printf("Using vectorization - way 1, the duration is %g\n",time_way1);
    printf("For the way1, the ratio duration is %g\n",ratio_way1);
    printf("The results difference is %e\n\n",(max(abs(C - C1))));


case 3 then
    // case 3 : partial (or complete product in a matrix)
    printf("###################################################################\n");
    printf("Case 2 : partial products\n");
    A = rand(Numb_loop,(Numb_loop - modulo(Numb_loop,2))/2);
    start_ = 5;
    end_ = (Numb_loop - modulo(Numb_loop,3))/3;

    // in a first stage, we want to make a product between the (start)th row and the (end_)th one
    // a new matrix is created
    // NB : note that for a matrix A = [ 1 2 3 ; 4 5 6 ; 7 8 9] :
    //      -  A([2 3],:) gives the lines 2 3
    //      -  A([2 3],[2 3]) gives B = [ 5 6 ; 8 9]
    tic()
    B = zeros((end_ - start_),(Numb_loop - modulo(Numb_loop,2))/2);
    k = 1;
    for i = start_ : end_
        B(k,:) = 20*A(i,:);
        k = k + 1;
    end
    time_traditional = toc();
    printf("Using loops, the duration is %g\n\n",time_traditional);

    // way 1 : creating an index
    tic()
    B1 = zeros((end_ - start_ +1),(Numb_loop - modulo(Numb_loop,2))/2);
    index = [start_ : end_];
    B1([1:(end_ - start_ +1)],:) = 20* A([start_ : end_],:);
    time_way1 = toc();
    ratio_way1 = (time_traditional / time_way1);
    printf("Using vectorization - way 1, the duration is %g\n",time_way1);
    printf("For the way1, the ratio duration is %g\n",ratio_way1);
    printf("The results difference is %e\n\n",(max(abs(B - B1))));


    // way 2 : 
    tic()
    B2 = zeros((end_ - start_ +1),0.5*Numb_loop);
    index = [start_ : end_];
    B2([1:(end_ - start_ +1)],:) = 20*A([index],:);
    time_way2 = toc();
    ratio_way2 = (time_traditional / time_way2);
    printf("Using vectorization - way 2, the duration is %g\n",time_way2);
    printf("For the way1, the ratio duration is %g\n",ratio_way2);
    printf("The results difference is %e\n\n",(max(abs(B - B2))));


    // of course the same can be done on the columns
    tic()
    start_c_ = 1;
    end_c_ = (Numb_loop - modulo(Numb_loop,4))/4;
    B3 = zeros(Numb_loop, (end_c_ - start_c_ +1));
    B3(:,[1:(end_c_ - start_c_ +1)]) = 20* A(:,[start_c_ : end_c_]);
    time_add = toc();
    printf("Using vectorization, the duration is %g\n",time_add);
    printf("\n\n");

    // and so on, a partial or complete tables/matrix ....


case 4
    // case 4 : more complex product using a function
    printf("###################################################################\n");
    printf("Case 4 : more complex product using a function\n");
    function [D_cst1,D_cst2] = myfool(vect1,vect2,theta,fi)
        D_cst1 = vect1*cos(theta)*sin(fi);
        D_cst2 = vect2*cos(2*theta)*tan(fi);
    endfunction

    Numb_loop = 1E5;
    // traditionally
    A = rand(Numb_loop,2);
    theta = %pi/3;
    fi = %pi/6;

    tic()
    B = zeros(Numb_loop,2);
    for i = 1 : Numb_loop
        [B(i,1),B(i,2)] = myfool(A(i,1), A(i,2),theta,fi);
    end
    time_traditional = toc();
    printf("Using loops, the duration is %g\n\n",time_traditional);

    // way 1:
    B1 = zeros(Numb_loop,2);
    tic()
    [B1([1:Numb_loop],1),B1([1:Numb_loop],2)] = myfool(A([1:Numb_loop],1),A([1:Numb_loop],2),theta,fi);
    time_way1 = toc();
    ratio_way1 = (time_traditional / time_way1);
    printf("Using vectorization - way 1, the duration is %g\n",time_way1);
    printf("For the way1, the ratio duration is %g\n",ratio_way1);
    printf("The results difference is %e\n\n",(max(abs(B - B1))));


case 5
    // case 5 : in the current study, for a given theta, we want to vary fi from 0 to 360 degrees
    // and theta vary from 0 to 360 degrees as well
    // thus the dimension of the final matrix is (360 +1)^2 = 130321 rows
    printf("###################################################################\n");
    printf("Case 5 : more complex product using a function\n");
    function vect_fct = fct_angles(i,j)
        vect_fct = [cos(i)+sin(j) -sin(j) i j];
    endfunction

    Numb_loop = 1E5;

    // tradictionally
    tic()
    B = zeros(130321,4);
    //
    k = 1;
    for i = 0 : 360
        for j = 0 : 360
            B(k,:) = fct_angles(i,j);
            k = k +1;
        end
    end
    time_traditional = toc();
    printf("Using loops, the duration is %g\n\n",time_traditional);

    // way 1 : NB : a better way should exist
    // the idea is to create 2 vectors : one for theta and another one for fi
    // only one loop is used to create these vectors
    tic()
    B1 = zeros(130321,4);
    theta_vect = (0:360)'.*.ones(361,1); // BE CAREFUL : ".*." -> Kronecker symbol !!!!
    fi_vect = (ones(361,1)'.*.[0:360])';
    B1 = fct_angles(theta_vect([1:130321],1),fi_vect([1:130321],1));
    time_way1 = toc();
    ratio_way1 = (time_traditional / time_way1);
    printf("Using vectorization - way 1, the duration is %g\n",time_way1);
    printf("For the way1, the ratio duration is %g\n",ratio_way1);
    printf("The results difference is %e\n\n",(max(abs(B - B1))));


case 6
    // case 6 : use of "." i.e. "_dot_" ... typically on a vector
    printf("###################################################################\n");
    printf("Case 6 : use of ""."" \n");

    Numb_loop = 1E6;
    x = [1 : Numb_loop]'; // vector created

    // way1
    tic()
    x1_square = x.^2;
    time_way1 = toc();
    printf("For the way1, the duration is %g\n\n",time_way1);


    // way 2
    tic()
    x2_square = x([1:Numb_loop],:)^2;
    time_way2 = toc();
    printf("For the way1, the duration is %g\n\n",time_way2);
    printf("The ratio duration is %g\n",time_way1 / time_way2);


end