23 #if (AE_COMPILER==AE_MSVC) 24 #pragma warning(disable:4100) 25 #pragma warning(disable:4127) 26 #pragma warning(disable:4702) 27 #pragma warning(disable:4996) 53 static void tsort_tagsortfastirec(
ae_vector*
a,
60 static void tsort_tagsortfastrrec(
ae_vector* a,
67 static void tsort_tagsortfastrec(
ae_vector* a,
92 static void hsschur_internalauxschur(
ae_bool wantt,
109 static void hsschur_aux2x2schur(
double* a,
120 static double hsschur_extschursign(
double a,
double b,
ae_state *_state);
155 static double linmin_ftol = 0.001;
158 static double linmin_stpmin = 1.0E-50;
159 static double linmin_defstpmax = 1.0E+50;
160 static double linmin_armijofactor = 1.3;
161 static void linmin_mcstep(
double* stx,
189 static ae_int_t ftbase_coloperandscnt = 1;
190 static ae_int_t ftbase_coloperandsize = 2;
191 static ae_int_t ftbase_colmicrovectorsize = 3;
192 static ae_int_t ftbase_colparam0 = 4;
193 static ae_int_t ftbase_colparam1 = 5;
194 static ae_int_t ftbase_colparam2 = 6;
195 static ae_int_t ftbase_colparam3 = 7;
198 static ae_int_t ftbase_opcomplexreffft = 1;
199 static ae_int_t ftbase_opbluesteinsfft = 2;
200 static ae_int_t ftbase_opcomplexcodeletfft = 3;
201 static ae_int_t ftbase_opcomplexcodelettwfft = 4;
202 static ae_int_t ftbase_opradersfft = 5;
203 static ae_int_t ftbase_opcomplextranspose = -1;
204 static ae_int_t ftbase_opcomplexfftfactors = -2;
205 static ae_int_t ftbase_opstart = -3;
207 static ae_int_t ftbase_opparallelcall = -5;
208 static ae_int_t ftbase_maxradix = 6;
209 static ae_int_t ftbase_updatetw = 16;
210 static ae_int_t ftbase_recursivethreshold = 1024;
211 static ae_int_t ftbase_raderthreshold = 19;
212 static ae_int_t ftbase_ftbasecodeletrecommended = 5;
213 static double ftbase_ftbaseinefficiencyfactor = 1.3;
214 static ae_int_t ftbase_ftbasemaxsmoothfactor = 5;
215 static void ftbase_ftdeterminespacerequirements(
ae_int_t n,
219 static void ftbase_ftcomplexfftplanrec(
ae_int_t n,
265 static void ftbase_ftapplycomplexreffft(
ae_vector* a,
272 static void ftbase_ftapplycomplexcodeletfft(
ae_vector* a,
278 static void ftbase_ftapplycomplexcodelettwfft(
ae_vector* a,
284 static void ftbase_ftprecomputebluesteinsfft(
ae_int_t n,
303 static void ftbase_ftprecomputeradersfft(
ae_int_t n,
321 static void ftbase_ftfactorize(
ae_int_t n,
327 static void ftbase_ffttwcalc(
ae_vector* a,
332 static void ftbase_internalcomplexlintranspose(
ae_vector* a,
338 static void ftbase_ffticltrec(
ae_vector* a,
347 static void ftbase_fftirltrec(
ae_vector* a,
356 static void ftbase_ftbasefindsmoothrec(
ae_int_t n,
524 ae_assert(n>=1,
"TaskGenInterpolationEqdist1D: N<1!", _state);
532 for(i=1; i<=n-1; i++)
575 ae_assert(n>=1,
"TaskGenInterpolationEqdist1D: N<1!", _state);
583 for(i=1; i<=n-1; i++)
618 ae_assert(n>=1,
"TaskGenInterpolation1DCheb1: N<1!", _state);
623 for(i=0; i<=n-1; i++)
665 ae_assert(n>=1,
"TaskGenInterpolation1DCheb2: N<1!", _state);
670 for(i=0; i<=n-1; i++)
718 ae_assert(n>=1,
"APSERVAreDistinct: internal error (N<1)", _state);
732 for(i=1; i<=n-1; i++)
738 ae_assert(!nonsorted,
"APSERVAreDistinct: internal error (not sorted)", _state);
739 for(i=1; i<=n-1; i++)
764 result = (v1&&v2)||(!v1&&!v2);
876 for(i=0; i<=m-1; i++)
878 for(j=0; j<=n-1; j++)
921 for(i=0; i<=m-1; i++)
923 for(j=0; j<=n-1; j++)
954 ae_assert(n>=0,
"APSERVIsFiniteVector: internal error (N<0)", _state);
965 for(i=0; i<=n-1; i++)
992 ae_assert(n>=0,
"APSERVIsFiniteCVector: internal error (N<0)", _state);
993 for(i=0; i<=n-1; i++)
1023 ae_assert(n>=0,
"APSERVIsFiniteMatrix: internal error (N<0)", _state);
1024 ae_assert(m>=0,
"APSERVIsFiniteMatrix: internal error (M<0)", _state);
1035 for(i=0; i<=m-1; i++)
1037 for(j=0; j<=n-1; j++)
1067 ae_assert(n>=0,
"APSERVIsFiniteCMatrix: internal error (N<0)", _state);
1068 ae_assert(m>=0,
"APSERVIsFiniteCMatrix: internal error (M<0)", _state);
1069 for(i=0; i<=m-1; i++)
1071 for(j=0; j<=n-1; j++)
1104 ae_assert(n>=0,
"APSERVIsFiniteRTRMatrix: internal error (N<0)", _state);
1115 for(i=0; i<=n-1; i++)
1127 for(j=j1; j<=j2; j++)
1160 ae_assert(n>=0,
"APSERVIsFiniteCTRMatrix: internal error (N<0)", _state);
1161 for(i=0; i<=n-1; i++)
1173 for(j=j1; j<=j2; j++)
1204 ae_assert(n>=0,
"APSERVIsFiniteOrNaNMatrix: internal error (N<0)", _state);
1205 ae_assert(m>=0,
"APSERVIsFiniteOrNaNMatrix: internal error (M<0)", _state);
1206 for(i=0; i<=m-1; i++)
1208 for(j=0; j<=n-1; j++)
1521 ae_assert(n>0,
"RandomUnit: N<=0", _state);
1529 for(i=0; i<=n-1; i++)
1538 for(i=0; i<=n-1; i++)
1668 for(i=0; i<=n-1; i++)
1691 for(i=0; i<=n-1; i++)
1717 for(i=0; i<=n-1; i++)
1741 for(i=0; i<=n-1; i++)
1764 for(i=0; i<=n-1; i++)
1790 for(i=0; i<=n-1; i++)
1821 for(i=0; i<=n0-1; i++)
1823 for(j=0; j<=n1-1; j++)
1854 for(i=0; i<=n0-1; i++)
1856 for(j=0; j<=n1-1; j++)
1886 for(i=0; i<=n0-1; i++)
1888 for(j=0; j<=n1-1; j++)
1911 for(i=0; i<=src->
cnt-1; i++)
1933 for(i=0; i<=src->
cnt-1; i++)
1956 for(i=0; i<=src->
rows-1; i++)
1958 for(j=0; j<=src->
cols-1; j++)
2006 for(k=0; k<=nheader-1; k++)
2060 ae_assert(tasksize>=2,
"SplitLengthEven: TaskSize<2", _state);
2073 *task0 = tasksize/2;
2074 *task1 = tasksize/2;
2087 *task0 = tasksize-1;
2090 ae_assert(*task0>=1,
"SplitLengthEven: internal error", _state);
2091 ae_assert(*task1>=1,
"SplitLengthEven: internal error", _state);
2119 ae_assert(chunksize>=2,
"SplitLength: ChunkSize<2", _state);
2120 ae_assert(tasksize>=2,
"SplitLength: TaskSize<2", _state);
2121 *task0 = tasksize/2;
2122 if( *task0>chunksize&&*task0%chunksize!=0 )
2124 *task0 = *task0-*task0%chunksize;
2126 *task1 = tasksize-(*task0);
2127 ae_assert(*task0>=1,
"SplitLength: internal error", _state);
2128 ae_assert(*task1>=1,
"SplitLength: internal error", _state);
2627 for(i=0; i<=n-1; i++)
2654 for(i=0; i<=n-1; i++)
2659 for(i=0; i<=n-1; i++)
2731 for(i=1; i<=n-1; i++)
2742 for(i=0; i<=n-1; i++)
2770 tsort_tagsortfastirec(a, b, bufa, bufb, 0, n-1, _state);
2818 for(i=1; i<=n-1; i++)
2829 for(i=0; i<=n-1; i++)
2857 tsort_tagsortfastrrec(a, b, bufa, bufb, 0, n-1, _state);
2903 for(i=1; i<=n-1; i++)
2914 for(i=0; i<=n-1; i++)
2935 tsort_tagsortfastrec(a, bufa, 0, n-1, _state);
3333 middle = first+half;
3378 middle = first+half;
3401 static void tsort_tagsortfastirec(
ae_vector* a,
3437 for(j=i1+1; j<=i2; j++)
3448 for(k=j-1; k>=i1; k--)
3465 for(i=j-1; i>=
k; i--)
3515 for(i=i1; i<=i2; i++)
3530 cntless = cntless+1;
3552 cntgreater = cntgreater+1;
3554 for(i=0; i<=cnteq-1; i++)
3556 j = i1+cntless+cnteq-1-
i;
3561 for(i=0; i<=cntgreater-1; i++)
3563 j = i1+cntless+cnteq+
i;
3572 tsort_tagsortfastirec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
3573 tsort_tagsortfastirec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
3584 static void tsort_tagsortfastrrec(
ae_vector* a,
3621 for(j=i1+1; j<=i2; j++)
3632 for(k=j-1; k>=i1; k--)
3649 for(i=j-1; i>=
k; i--)
3699 for(i=i1; i<=i2; i++)
3714 cntless = cntless+1;
3736 cntgreater = cntgreater+1;
3738 for(i=0; i<=cnteq-1; i++)
3740 j = i1+cntless+cnteq-1-
i;
3745 for(i=0; i<=cntgreater-1; i++)
3747 j = i1+cntless+cnteq+
i;
3756 tsort_tagsortfastrrec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
3757 tsort_tagsortfastrrec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
3768 static void tsort_tagsortfastrec(
ae_vector* a,
3802 for(j=i1+1; j<=i2; j++)
3813 for(k=j-1; k>=i1; k--)
3829 for(i=j-1; i>=
k; i--)
3877 for(i=i1; i<=i2; i++)
3891 cntless = cntless+1;
3911 cntgreater = cntgreater+1;
3913 for(i=0; i<=cnteq-1; i++)
3915 j = i1+cntless+cnteq-1-
i;
3919 for(i=0; i<=cntgreater-1; i++)
3921 j = i1+cntless+cnteq+
i;
3929 tsort_tagsortfastrec(a, bufa, i1, i1+cntless-1, _state);
3930 tsort_tagsortfastrec(a, bufa, i1+cntless+cnteq, i2, _state);
3986 for(i=0; i<=n-1; i++)
4004 tmp = (double)(n-1)/(double)2;
4006 for(i=0; i<=n-1; i++)
4028 for(k=i; k<=j-1; k++)
4040 voffs = (double)(n-1)/(double)2;
4046 for(i=0; i<=n-1; i++)
4073 #ifndef ALGLIB_INTERCEPTS_ABLAS 4103 #ifndef ALGLIB_INTERCEPTS_ABLAS 4189 #ifndef ALGLIB_INTERCEPTS_ABLAS 4196 return _ialglib_i_cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
4221 #ifndef ALGLIB_INTERCEPTS_ABLAS 4228 return _ialglib_i_cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
4253 #ifndef ALGLIB_INTERCEPTS_ABLAS 4260 return _ialglib_i_rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
4285 #ifndef ALGLIB_INTERCEPTS_ABLAS 4292 return _ialglib_i_rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
4318 #ifndef ALGLIB_INTERCEPTS_ABLAS 4325 return _ialglib_i_cmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
4351 #ifndef ALGLIB_INTERCEPTS_ABLAS 4358 return _ialglib_i_rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
4388 #ifndef ALGLIB_INTERCEPTS_ABLAS 4395 return _ialglib_i_rmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
4425 #ifndef ALGLIB_INTERCEPTS_ABLAS 4432 return _ialglib_i_cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
4554 if(
cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
4568 for(i=0; i<=m-1; i++)
4570 for(j=0; j<=n-1; j++)
4578 for(i=0; i<=m-1; i++)
4580 for(j=0; j<=n-1; j++)
4616 if( i+2<=m&&j+2<=n )
4658 for(t=0; t<=k-1; t++)
4702 v00x = v00x+a0x*b0x-a0y*b0y;
4703 v00y = v00y+a0x*b0y+a0y*b0x;
4704 v01x = v01x+a0x*b1x-a0y*b1y;
4705 v01y = v01y+a0x*b1y+a0y*b1x;
4706 v10x = v10x+a1x*b0x-a1y*b0y;
4707 v10y = v10y+a1x*b0y+a1y*b0x;
4708 v11x = v11x+a1x*b1x-a1y*b1y;
4709 v11y = v11y+a1x*b1y+a1y*b1x;
4750 for(ik=i0; ik<=i1; ik++)
4752 for(jk=j0; jk<=j1; jk++)
4761 if( optypea==0&&optypeb==0 )
4765 if( optypea==0&&optypeb==1 )
4769 if( optypea==0&&optypeb==2 )
4773 if( optypea==1&&optypeb==0 )
4777 if( optypea==1&&optypeb==1 )
4781 if( optypea==1&&optypeb==2 )
4785 if( optypea==2&&optypeb==0 )
4789 if( optypea==2&&optypeb==1 )
4793 if( optypea==2&&optypeb==2 )
4897 if(
rmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
4911 for(i=0; i<=m-1; i++)
4913 for(j=0; j<=n-1; j++)
4921 for(i=0; i<=m-1; i++)
4923 for(j=0; j<=n-1; j++)
4940 if( optypea==0&&optypeb==0 )
4942 rmatrixgemmk44v00(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
4944 if( optypea==0&&optypeb!=0 )
4946 rmatrixgemmk44v01(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
4948 if( optypea!=0&&optypeb==0 )
4950 rmatrixgemmk44v10(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
4952 if( optypea!=0&&optypeb!=0 )
4954 rmatrixgemmk44v11(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
5048 ae_assert(
ae_fp_neq(alpha,0),
"RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
5071 if( i+4<=m&&j+4<=n )
5111 for(t=0; t<=k-1; t++)
5193 for(ik=i0; ik<=i1; ik++)
5195 for(jk=j0; jk<=j1; jk++)
5312 ae_assert(
ae_fp_neq(alpha,0),
"RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
5335 if( i+4<=m&&j+4<=n )
5371 for(t=0; t<=k-1; t++)
5453 for(ik=i0; ik<=i1; ik++)
5455 for(jk=j0; jk<=j1; jk++)
5572 ae_assert(
ae_fp_neq(alpha,0),
"RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
5595 if( i+4<=m&&j+4<=n )
5631 for(t=0; t<=k-1; t++)
5713 for(ik=i0; ik<=i1; ik++)
5715 for(jk=j0; jk<=j1; jk++)
5833 ae_assert(
ae_fp_neq(alpha,0),
"RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
5856 if( i+4<=m&&j+4<=n )
5892 for(t=0; t<=k-1; t++)
5974 for(ik=i0; ik<=i1; ik++)
5976 for(jk=j0; jk<=j1; jk++)
6028 #ifndef ALGLIB_INTERCEPTS_MKL 6035 return _ialglib_i_rmatrixsyrkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
6065 #ifndef ALGLIB_INTERCEPTS_MKL 6072 return _ialglib_i_rmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
6105 for(ix=i1; ix<=i2; ix++)
6112 ssq = 1+ssq*
ae_sqr(scl/absxi, _state);
6117 ssq = ssq+
ae_sqr(absxi/scl, _state);
6121 result = scl*
ae_sqrt(ssq, _state);
6136 for(i=i1+1; i<=i2; i++)
6158 for(i=i1+1; i<=i2; i++)
6180 for(j=j1+1; j<=j2; j++)
6204 ae_assert(i2-i1==j2-j1,
"UpperHessenberg1Norm: I2-I1<>J2-J1!", _state);
6205 for(j=j1; j<=j2; j++)
6209 for(i=i1; i<=i2; i++)
6211 for(j=
ae_maxint(j1, j1+i-i1-1, _state); j<=j2; j++)
6217 for(j=j1; j<=j2; j++)
6241 if( is1>is2||js1>js2 )
6245 ae_assert(is2-is1==id2-id1,
"CopyMatrix: different sizes!", _state);
6246 ae_assert(js2-js1==jd2-jd1,
"CopyMatrix: different sizes!", _state);
6247 for(isrc=is1; isrc<=is2; isrc++)
6249 idst = isrc-is1+id1;
6274 ae_assert(i1-i2==j1-j2,
"InplaceTranspose error: incorrect array size!", _state);
6275 for(i=i1; i<=i2-1; i++)
6304 if( is1>is2||js1>js2 )
6308 ae_assert(is2-is1==jd2-jd1,
"CopyAndTranspose: different sizes!", _state);
6309 ae_assert(js2-js1==id2-id1,
"CopyAndTranspose: different sizes!", _state);
6310 for(isrc=is1; isrc<=is2; isrc++)
6312 jdst = isrc-is1+jd1;
6348 ae_assert(j2-j1==ix2-ix1,
"MatrixVectorMultiply: A and X dont match!", _state);
6349 ae_assert(i2-i1==iy2-iy1,
"MatrixVectorMultiply: A and Y dont match!", _state);
6356 for(i=iy1; i<=iy2; i++)
6369 for(i=i1; i<=i2; i++)
6385 ae_assert(i2-i1==ix2-ix1,
"MatrixVectorMultiply: A and X do not match!", _state);
6386 ae_assert(j2-j1==iy2-iy1,
"MatrixVectorMultiply: A and Y do not match!", _state);
6393 for(i=iy1; i<=iy2; i++)
6406 for(i=i1; i<=i2; i++)
6499 ae_assert(acols==brows,
"MatrixMatrixMultiply: incorrect matrix sizes!", _state);
6500 if( ((arows<=0||acols<=0)||brows<=0)||bcols<=0 )
6520 for(i=ci1; i<=ci2; i++)
6522 for(j=cj1; j<=cj2; j++)
6530 for(i=ci1; i<=ci2; i++)
6539 if( !transa&&!transb )
6541 for(l=ai1; l<=ai2; l++)
6543 for(r=bi1; r<=bi2; r++)
6556 if( !transa&&transb )
6558 if( arows*acols<brows*bcols )
6560 for(r=bi1; r<=bi2; r++)
6562 for(l=ai1; l<=ai2; l++)
6572 for(l=ai1; l<=ai2; l++)
6574 for(r=bi1; r<=bi2; r++)
6587 if( transa&&!transb )
6589 for(l=aj1; l<=aj2; l++)
6591 for(r=bi1; r<=bi2; r++)
6604 if( transa&&transb )
6606 if( arows*acols<brows*bcols )
6608 for(r=bi1; r<=bi2; r++)
6611 for(i=1; i<=crows; i++)
6615 for(l=ai1; l<=ai2; l++)
6626 for(l=aj1; l<=aj2; l++)
6630 for(r=bi1; r<=bi2; r++)
6679 for(i=i1; i<=i2; i++)
6689 for(i=i1; i<=i2-1; i++)
6713 for(i=i1+1; i<=i2; i++)
6757 for(i=i1; i<=i2; i++)
6770 for(i=i1; i<=i2; i++)
6887 xnorm =
ae_sqrt(xnorm, _state)*mx;
6909 *tau = (beta-alpha)/beta;
6963 if( (
ae_fp_eq(tau,0)||n1>n2)||m1>m2 )
6971 for(i=n1; i<=n2; i++)
6975 for(i=m1; i<=m2; i++)
6984 for(i=m1; i<=m2; i++)
7035 if( (
ae_fp_eq(tau,0)||n1>n2)||m1>m2 )
7040 for(i=m1; i<=m2; i++)
7160 xnorm =
ae_sqrt(xnorm, _state)*mx;
7177 tau->
x = (beta-alphr)/beta;
7178 tau->
y = -alphi/
beta;
7247 for(i=n1; i<=n2; i++)
7251 for(i=m1; i<=m2; i++)
7260 for(i=m1; i<=m2; i++)
7320 for(i=m1; i<=m2; i++)
7330 for(i=m1; i<=m2; i++)
7377 for(i=i1; i<=i2; i++)
7387 for(i=i1; i<=i2-1; i++)
7413 for(i=i1+1; i<=i2; i++)
7460 for(i=i1; i<=i2; i++)
7474 for(i=i1; i<=i2; i++)
7550 for(j=m1; j<=m2-1; j++)
7571 for(j=m1; j<=m2-1; j++)
7592 for(j=m2-1; j>=m1; j--)
7613 for(j=m2-1; j>=m1; j--)
7684 for(j=n1; j<=n2-1; j++)
7705 for(j=n1; j<=n2-1; j++)
7726 for(j=n2-1; j>=n1; j--)
7747 for(j=n2-1; j>=n1; j--)
7986 ae_assert(n>=0,
"InternalSchurDecomposition: incorrect N!", _state);
7987 ae_assert(tneeded==0||tneeded==1,
"InternalSchurDecomposition: incorrect TNeeded!", _state);
7988 ae_assert((zneeded==0||zneeded==1)||zneeded==2,
"InternalSchurDecomposition: incorrect ZNeeded!", _state);
8036 for(j=1; j<=n-2; j++)
8038 for(i=j+2; i<=
n; i++)
8047 if( (ns<=2||ns>n)||maxb>=n )
8053 hsschur_internalauxschur(wantt, wantz, n, 1, n, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);
8065 for(i=j+1; i<=
n; i++)
8073 for(i=j+2; i<=
n; i++)
8087 smlnum = unfl*(n/ulp);
8126 for(i=j+1; i<=
n; i++)
8134 for(i=j+2; i<=
n; i++)
8157 for(its=0; its<=itn; its++)
8163 for(k=i; k>=l+1; k--)
8199 if( its==20||its==30 )
8205 for(ii=i-ns+1; ii<=
i; ii++)
8217 copymatrix(h, i-ns+1, i, i-ns+1, i, &s, 1, ns, 1, ns, _state);
8218 hsschur_internalauxschur(
ae_false,
ae_false, ns, 1, ns, &s, &tmpwr, &tmpwi, 1, ns, z, &work, &workv3, &workc1, &works1, &ierr, _state);
8219 for(p1=1; p1<=ns; p1++)
8231 for(ii=1; ii<=ierr; ii++)
8246 for(ii=2; ii<=ns+1; ii++)
8251 for(j=i-ns+1; j<=
i; j++)
8263 matrixvectormultiply(h, l, l+nv, l, l+nv-1,
ae_false, &vv, 1, nv, 1.0, &v, 1, nv+1, -wr->
ptr.
p_double[j], _state);
8276 matrixvectormultiply(h, l, l+nv, l, l+nv-1,
ae_false, &v, 1, nv, 1.0, &vv, 1, nv+1, -2*wr->
ptr.
p_double[j], _state);
8282 temp = temp*absw*absw;
8283 matrixvectormultiply(h, l, l+nv+1, l, l+nv,
ae_false, &vv, 1, nv+1, 1.0, &v, 1, nv+2, temp, _state);
8297 for(ii=2; ii<=nv; ii++)
8314 for(k=l; k<=i-1; k++)
8339 for(ii=k+1; ii<=
i; ii++)
8356 applyreflectionfromtheright(h, tau, &v, i1,
ae_minint(k+nr, i, _state), k, k+nr-1, &work, _state);
8382 hsschur_internalauxschur(wantt, wantz, n, l, i, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);
8400 static void hsschur_internalauxschur(
ae_bool wantt,
8498 smlnum = unfl*(nh/ulp);
8535 for(its=0; its<=itn; its++)
8541 for(k=i; k>=l+1; k--)
8577 if( its==10||its==20 )
8599 disc = (h33-h44)*0.5;
8600 disc = disc*disc+h43h34;
8608 ave = 0.5*(h33+h44);
8611 h33 = h33*h44-h43h34;
8612 h44 = h33/(hsschur_extschursign(disc, ave, _state)+ave);
8616 h44 = hsschur_extschursign(disc, ave, _state)+ave;
8626 for(m=i-2; m>=l; m--)
8640 v1 = (h33s*h44s-h43h34)/h21+h12;
8641 v2 = h22-h11-h33s-h44s;
8666 for(k=m; k<=i-1; k++)
8682 for(p1=1; p1<=nr; p1++)
8715 for(j=k; j<=i2; j++)
8727 for(j=i1; j<=
ae_minint(k+3, i, _state); j++)
8740 for(j=iloz; j<=ihiz; j++)
8758 for(j=k; j<=i2; j++)
8769 for(j=i1; j<=
i; j++)
8781 for(j=iloz; j<=ihiz; j++)
8825 hsschur_aux2x2schur(&him1im1, &him1i, &hiim1, &hii, &wrim1, &wiim1, &wri, &wii, &cs, &sn, _state);
8873 static void hsschur_aux2x2schur(
double* a,
8936 if(
ae_fp_eq(*a-(*d),0)&&hsschur_extschursigntoone(*b, _state)!=hsschur_extschursigntoone(*c, _state) )
8946 bcmis =
ae_minreal(
ae_fabs(*b, _state),
ae_fabs(*c, _state), _state)*hsschur_extschursigntoone(*b, _state)*hsschur_extschursigntoone(*c, _state);
8948 z = p/scl*p+bcmax/scl*bcmis;
8960 z = p+hsschur_extschursign(
ae_sqrt(scl, _state)*
ae_sqrt(z, _state), p, _state);
8962 *d = *d-bcmax/z*bcmis;
8981 tau =
pythag2(sigma, temp, _state);
8983 *sn = -p/(tau*(*cs))*hsschur_extschursign(1, sigma, _state);
8989 aa = *a*(*cs)+*b*(*sn);
8990 bb = -*a*(*sn)+*b*(*cs);
8991 cc = *c*(*cs)+*d*(*sn);
8992 dd = -*c*(*sn)+*d*(*cs);
8998 *a = aa*(*cs)+cc*(*sn);
8999 *b = bb*(*cs)+dd*(*sn);
9000 *c = -aa*(*sn)+cc*(*cs);
9001 *d = -bb*(*sn)+dd*(*cs);
9002 temp = 0.5*(*a+(*d));
9009 if( hsschur_extschursigntoone(*b, _state)==hsschur_extschursigntoone(*c, _state) )
9017 p = hsschur_extschursign(sab*sac, *c, _state);
9025 temp = *cs*cs1-*sn*sn1;
9026 *sn = *cs*sn1+*sn*cs1;
9062 static double hsschur_extschursign(
double a,
double b,
ae_state *_state)
9275 for(k=1; k<=j-1; k++)
9288 for(j=1; j<=n-1; j++)
9291 for(k=j+1; k<=
n; k++)
9320 tscal = 1/(smlnum*tmax);
9374 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9423 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9479 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9502 xbnd = xbnd*(tjj/xj);
9521 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9549 if( (upper&¬ran)||(!upper&&!notran) )
9560 for(i=n-1; i>=1; i--)
9640 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9701 rec = tjj*bignum/xj;
9780 for(k=2; k<=j-1; k++)
9803 for(k=j+2; k<=
n; k++)
9823 while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9902 for(i=1; i<=j-1; i++)
9912 for(i=j+1; i<=
n; i++)
9985 rec = tjj*bignum/xj;
10071 ae_assert(n>0,
"RMatrixTRSafeSolve: incorrect N!", _state);
10072 ae_assert(trans==0||trans==1,
"RMatrixTRSafeSolve: incorrect Trans!", _state);
10089 for(i=0; i<=n-1; i++)
10100 if( isupper&&trans==0 )
10106 for(i=n-1; i>=0; i--)
10134 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
10145 if( !isupper&&trans==0 )
10151 for(i=0; i<=n-1; i++)
10179 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
10190 if( isupper&&trans==1 )
10196 for(i=0; i<=n-1; i++)
10215 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
10236 if( !isupper&&trans==1 )
10242 for(i=n-1; i>=0; i--)
10261 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
10330 ae_assert(n>0,
"CMatrixTRSafeSolve: incorrect N!", _state);
10331 ae_assert((trans==0||trans==1)||trans==2,
"CMatrixTRSafeSolve: incorrect Trans!", _state);
10348 for(i=0; i<=n-1; i++)
10359 if( isupper&&trans==0 )
10365 for(i=n-1; i>=0; i--)
10393 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10404 if( !isupper&&trans==0 )
10410 for(i=0; i<=n-1; i++)
10438 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10449 if( isupper&&trans==1 )
10455 for(i=0; i<=n-1; i++)
10474 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10494 if( !isupper&&trans==1 )
10500 for(i=n-1; i>=0; i--)
10519 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10539 if( isupper&&trans==2 )
10545 for(i=0; i<=n-1; i++)
10564 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10584 if( !isupper&&trans==2 )
10590 for(i=n-1; i>=0; i--)
10609 result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10741 batch4size = 3*chunksize*ntotal+chunksize*(2*nout+1);
10754 if( buf->
x.
cnt<nin )
10758 if( buf->
y.
cnt<nout )
10774 if( buf->
g.
cnt<wcount )
10778 if( !hpccores_hpcpreparechunkedgradientx(weights, wcount, &buf->
hpcbuf, _state) )
10780 for(i=0; i<=wcount-1; i++)
10813 if( !hpccores_hpcfinalizechunkedgradientx(&buf->
hpcbuf, buf->
wcount, grad, _state) )
10815 for(i=0; i<=buf->
wcount-1; i++)
10840 #ifndef ALGLIB_INTERCEPTS_SSE2 10847 return _ialglib_i_hpcchunkedgradient(weights, structinfo, columnmeans, columnsigmas, xy, cstart, csize, batch4buf, hpcbuf, e, naturalerrorfunc);
10867 #ifndef ALGLIB_INTERCEPTS_SSE2 10874 return _ialglib_i_hpcchunkedprocess(weights, structinfo, columnmeans, columnsigmas, xy, cstart, csize, batch4buf, hpcbuf);
10891 #ifndef ALGLIB_INTERCEPTS_SSE2 10898 return _ialglib_i_hpcpreparechunkedgradientx(weights, wcount, hpcbuf);
10915 #ifndef ALGLIB_INTERCEPTS_SSE2 10922 return _ialglib_i_hpcfinalizechunkedgradientx(buf, wcount, grad);
11073 for(i=0; i<=n-1; i++)
11085 xblas_xsum(temp, mx, n, r, rerr, _state);
11144 for(i=0; i<=n-1; i++)
11160 xblas_xsum(temp, mx, 2*n, &r->
x, &rerrx, _state);
11167 for(i=0; i<=n-1; i++)
11183 xblas_xsum(temp, mx, 2*n, &r->
y, &rerry, _state);
11254 ae_assert(n<536870912,
"XDot: N is too large!", _state);
11259 ln2 =
ae_log(2, _state);
11268 s = xblas_xfastpow(2, -k, _state);
11287 k =
ae_trunc(
ae_log((
double)536870912/(
double)n, _state)/ln2, _state);
11288 chunk = xblas_xfastpow(2, k, _state);
11293 invchunk = 1/chunk;
11305 for(i=0; i<=n-1; i++)
11318 if( allzeros||
ae_fp_eq(s*n+mx,mx) )
11347 result =
ae_sqr(xblas_xfastpow(r, n/2, _state), _state);
11351 result = r*xblas_xfastpow(r, n-1, _state);
11361 result = xblas_xfastpow(1/r, -n, _state);
11391 for(i=0; i<=n-1; i++)
11549 stpmax = linmin_defstpmax;
11553 *stp = linmin_stpmin;
11614 state->
width = stpmax-linmin_stpmin;
11677 *stp = linmin_stpmin;
11725 if( *nfev>=linmin_maxfev )
11769 state->
fm = *f-*stp*state->
dgtest;
11780 linmin_mcstep(&state->
stx, &state->
fxm, &state->
dgxm, &state->
sty, &state->
fym, &state->
dgym, stp, state->
fm, state->
dgm, &state->
brackt, state->
stmin, state->
stmax, &state->
infoc, _state);
11797 linmin_mcstep(&state->
stx, &state->
fx, &state->
dgx, &state->
sty, &state->
fy, &state->
dgy, stp, *f, state->
dg, &state->
brackt, state->
stmin, state->
stmax, &state->
infoc, _state);
11808 *stp = state->
stx+p5*(state->
sty-state->
stx);
11861 if( state->
x.
cnt<n )
11869 if( state->
s.
cnt<n )
11971 v = state->
stplen*linmin_armijofactor;
11987 state->
fcur = state->
f;
12013 v = state->
stplen*linmin_armijofactor;
12031 state->
fcur = state->
f;
12046 v = state->
stplen/linmin_armijofactor;
12058 state->
fcur = state->
f;
12084 v = state->
stplen/linmin_armijofactor;
12098 state->
fcur = state->
f;
12153 *info = state->
info;
12159 static void linmin_mcstep(
double* stx,
12200 sgnd = dp*(*dx/
ae_fabs(*dx, _state));
12212 theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
12214 gamma = s*
ae_sqrt(
ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
12219 p = gamma-(*dx)+
theta;
12220 q = gamma-(*dx)+gamma+dp;
12222 stpc = *stx+r*(*stp-(*stx));
12223 stpq = *stx+*dx/((*fx-fp)/(*stp-(*stx))+(*dx))/2*(*stp-(*stx));
12230 stpf = stpc+(stpq-stpc)/2;
12247 theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
12249 gamma = s*
ae_sqrt(
ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
12254 p = gamma-dp+
theta;
12255 q = gamma-dp+gamma+(*dx);
12257 stpc = *stp+r*(*stx-(*stp));
12258 stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
12286 theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
12298 p = gamma-dp+
theta;
12299 q = gamma+(*dx-dp)+gamma;
12303 stpc = *stp+r*(*stx-(*stp));
12316 stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
12353 theta = 3*(fp-(*fy))/(*sty-(*stp))+(*dy)+dp;
12355 gamma = s*
ae_sqrt(
ae_sqr(theta/s, _state)-*dy/s*(dp/s), _state);
12360 p = gamma-dp+
theta;
12361 q = gamma-dp+gamma+(*dy);
12363 stpc = *stp+r*(*sty-(*stp));
12410 if( *brackt&&bound )
12414 *stp =
ae_minreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
12418 *stp =
ae_maxreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
12566 ae_assert(n>=3,
"FindPrimitiveRootAndInverse: N<3", _state);
12573 ae_assert(ntheory_isprime(n, _state),
"FindPrimitiveRoot: N is not prime", _state);
12592 for(candroot=2; candroot<=n-1; candroot++)
12612 t = ntheory_modexp(candroot, phin/f, n, _state);
12631 ae_assert(*proot>=2,
"FindPrimitiveRoot: internal error (root not found)", _state);
12666 ae_assert(n2/(n-1)==n-1,
"FindPrimitiveRoot: internal error", _state);
12667 ae_assert(*proot*(*invproot)/(*proot)==(*invproot),
"FindPrimitiveRoot: internal error", _state);
12668 ae_assert(*proot*(*invproot)/(*invproot)==(*proot),
"FindPrimitiveRoot: internal error", _state);
12669 ae_assert(*proot*(*invproot)%n==1,
"FindPrimitiveRoot: internal error", _state);
12705 ae_assert(a>=0&&a<n,
"ModMul: A<0 or A>=N", _state);
12706 ae_assert(b>=0&&b<n,
"ModMul: B<0 or B>=N", _state);
12746 t = ntheory_modmul(a, b/2, n, _state);
12768 t = ntheory_modmul(a, b/2, n, _state);
12795 ae_assert(a>=0&&a<n,
"ModExp: A<0 or A>=N", _state);
12796 ae_assert(b>=0,
"ModExp: B<0", _state);
12817 t = ntheory_modmul(a, a, n, _state);
12818 result = ntheory_modexp(t, b/2, n, _state);
12822 t = ntheory_modmul(a, a, n, _state);
12823 result = ntheory_modexp(t, b/2, n, _state);
12824 result = ntheory_modmul(result, a, n, _state);
12867 ae_assert(n>0,
"FTComplexFFTPlan: N<=0", _state);
12868 ae_assert(k>0,
"FTComplexFFTPlan: K<=0", _state);
12887 ftbase_ftdeterminespacerequirements(n, &precrsize, &precisize, _state);
12905 ftbase_ftcomplexfftplanrec(n, k,
ae_true,
ae_true, &rowptr, &bluesteinsize, &precrptr, &preciptr, plan, _state);
12913 ae_assert(precrptr==precrsize,
"FTComplexFFTPlan: internal error (PrecRPtr<>PrecRSize)", _state);
12914 ae_assert(preciptr==precisize,
"FTComplexFFTPlan: internal error (PrecRPtr<>PrecRSize)", _state);
12948 for(i=0; i<=repcnt-1; i++)
12950 ftbase_ftapplysubplan(plan, 0, a, offsa+plansize*i, 0, &plan->
buffer, 1, _state);
12985 for(j=ftbase_ftbasecodeletrecommended; j>=2; j--)
13001 for(j=ftbase_ftbasecodeletrecommended+1; j<=n-1; j++)
13024 if( *n2==1&&*n1!=1 )
13044 for(i=2; i<=ftbase_ftbasemaxsmoothfactor; i++)
13074 ftbase_ftbasefindsmoothrec(n, 1, 2, &best, _state);
13098 ftbase_ftbasefindsmoothrec(n, 2, 2, &best, _state);
13120 result = ftbase_ftbaseinefficiencyfactor*(4*n*
ae_log(n, _state)/
ae_log(2, _state)-6*n+8);
13144 static void ftbase_ftdeterminespacerequirements(
ae_int_t n,
13171 for(i=2; i<=ftbase_maxradix; i++)
13183 if( f>ftbase_raderthreshold )
13189 *precrsize = *precrsize+2*(f-1);
13190 ftbase_ftdeterminespacerequirements(f-1, precrsize, precisize, _state);
13242 static void ftbase_ftcomplexfftplanrec(
ae_int_t n,
13268 ae_assert(n>0,
"FTComplexFFTPlan: N<=0", _state);
13269 ae_assert(k>0,
"FTComplexFFTPlan: K<=0", _state);
13270 ae_assert(!topmostplan||childplan,
"FTComplexFFTPlan: ChildPlan is inconsistent with TopmostPlan", _state);
13275 if( topmostplan&&n>ftbase_recursivethreshold )
13277 ftbase_ftfactorize(n,
ae_false, &n1, &n2, _state);
13286 *bluesteinsize =
ae_maxint(2*m, *bluesteinsize, _state);
13291 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13292 ftbase_ftpushentry4(plan, rowptr, ftbase_opbluesteinsfft, k, n, 2, m, 2, *precrptr, 0, _state);
13294 ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
13295 ftbase_ftcomplexfftplanrec(m, 1,
ae_true,
ae_true, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13298 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13303 ftbase_ftprecomputebluesteinsfft(n, m, &plan->
precr, *precrptr, _state);
13308 *precrptr = *precrptr+4*
m;
13317 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13318 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13320 ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n2, n1, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
13321 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
13322 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
13324 ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n1, n2, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
13325 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13326 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13328 ftbase_ftcomplexfftplanrec(n1, 1,
ae_true,
ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13331 ftbase_ftcomplexfftplanrec(n2, 1,
ae_true,
ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13346 ftbase_ftfactorize(n,
ae_false, &n1, &n2, _state);
13353 if( n<=ftbase_maxradix )
13361 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13363 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexcodeletfft, k, n, 2, 0, _state);
13366 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13371 if( n<=ftbase_raderthreshold )
13380 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13383 ftbase_ftpushentry4(plan, rowptr, ftbase_opradersfft, k, n, 2, 2, gq, giq, *precrptr, _state);
13384 ftbase_ftprecomputeradersfft(n, gq, giq, &plan->
precr, *precrptr, _state);
13385 *precrptr = *precrptr+2*(n-1);
13387 ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
13388 ftbase_ftcomplexfftplanrec(m, 1,
ae_true,
ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13393 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13403 *bluesteinsize =
ae_maxint(2*m, *bluesteinsize, _state);
13406 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13408 ftbase_ftpushentry4(plan, rowptr, ftbase_opbluesteinsfft, k, n, 2, m, 2, *precrptr, 0, _state);
13409 ftbase_ftprecomputebluesteinsfft(n, m, &plan->
precr, *precrptr, _state);
13410 *precrptr = *precrptr+4*
m;
13412 ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
13413 ftbase_ftcomplexfftplanrec(m, 1,
ae_true,
ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13418 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13428 if( n1<=ftbase_maxradix )
13439 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13441 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexcodelettwfft, k, n1, 2*n2, 0, _state);
13442 ftbase_ftcomplexfftplanrec(n2, k*n1,
ae_false,
ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13443 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13446 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13455 if( n<=ftbase_recursivethreshold )
13464 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13466 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13467 ftbase_ftcomplexfftplanrec(n1, k*n2,
ae_false,
ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13468 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
13469 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
13470 ftbase_ftcomplexfftplanrec(n2, k*n1,
ae_false,
ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13471 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13474 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13488 ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13490 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13492 ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n2, n1, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
13493 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
13494 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
13496 ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n1, n2, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
13497 ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13500 ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13507 ftbase_ftcomplexfftplanrec(n1, 1,
ae_true,
ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13510 ftbase_ftcomplexfftplanrec(n2, 1,
ae_true,
ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13550 ftbase_ftpushentry2(plan, rowptr, etype, eopcnt, eopsize, emcvsize, eparam0, -1, _state);
13600 *rowptr = *rowptr+1;
13654 *rowptr = *rowptr+1;
13723 ae_assert(plan->
entries.
ptr.
pp_int[subplan][ftbase_coltype]==ftbase_opstart,
"FTApplySubPlan: incorrect subplan header", _state);
13724 rowidx = subplan+1;
13728 operandscnt = repcnt*plan->
entries.
ptr.
pp_int[rowidx][ftbase_coloperandscnt];
13730 microvectorsize = plan->
entries.
ptr.
pp_int[rowidx][ftbase_colmicrovectorsize];
13738 if( operation==ftbase_opjmp )
13750 if( operation==ftbase_opparallelcall )
13752 parentsize = operandsize*microvectorsize;
13754 ae_assert(plan->
entries.
ptr.
pp_int[rowidx+param0][ftbase_coltype]==ftbase_opstart,
"FTApplySubPlan: incorrect child subplan header", _state);
13755 ae_assert(parentsize==childsize,
"FTApplySubPlan: incorrect child subplan header", _state);
13756 chunksize =
ae_maxint(ftbase_recursivethreshold/childsize, 1, _state);
13757 lastchunksize = operandscnt%chunksize;
13758 if( lastchunksize==0 )
13760 lastchunksize = chunksize;
13763 while(i<operandscnt)
13765 chunksize =
ae_minint(chunksize, operandscnt-i, _state);
13766 ftbase_ftapplysubplan(plan, rowidx+param0, a, abase, aoffset+i*childsize, buf, chunksize, _state);
13776 if( operation==ftbase_opcomplexreffft )
13778 ftbase_ftapplycomplexreffft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, buf, _state);
13786 if( operation==ftbase_opcomplexcodeletfft )
13788 ftbase_ftapplycomplexcodeletfft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, _state);
13796 if( operation==ftbase_opcomplexcodelettwfft )
13798 ftbase_ftapplycomplexcodelettwfft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, _state);
13806 if( operation==ftbase_opbluesteinsfft )
13808 ae_assert(microvectorsize==2,
"FTApplySubPlan: microvectorsize!=2 for Bluesteins FFT", _state);
13813 ftbase_ftbluesteinsfft(plan, a, abase, aoffset, operandscnt, operandsize, plan->
entries.
ptr.
pp_int[rowidx][ftbase_colparam0], plan->
entries.
ptr.
pp_int[rowidx][ftbase_colparam2], rowidx+plan->
entries.
ptr.
pp_int[rowidx][ftbase_colparam1], &bufa->val, &bufb->val, &bufc->val, &bufd->val, _state);
13825 if( operation==ftbase_opradersfft )
13827 ftbase_ftradersfft(plan, a, abase, aoffset, operandscnt, operandsize, rowidx+plan->
entries.
ptr.
pp_int[rowidx][ftbase_colparam0], plan->
entries.
ptr.
pp_int[rowidx][ftbase_colparam1], plan->
entries.
ptr.
pp_int[rowidx][ftbase_colparam2], plan->
entries.
ptr.
pp_int[rowidx][ftbase_colparam3], buf, _state);
13835 if( operation==ftbase_opcomplexfftfactors )
13837 ae_assert(microvectorsize==2,
"FTApplySubPlan: MicrovectorSize<>1", _state);
13839 n2 = operandsize/n1;
13840 for(i=0; i<=operandscnt-1; i++)
13842 ftbase_ffttwcalc(a, abase+aoffset+i*operandsize*2, n1, n2, _state);
13851 if( operation==ftbase_opcomplextranspose )
13853 ae_assert(microvectorsize==2,
"FTApplySubPlan: MicrovectorSize<>1", _state);
13855 n2 = operandsize/n1;
13856 for(i=0; i<=operandscnt-1; i++)
13858 ftbase_internalcomplexlintranspose(a, n1, n2, abase+aoffset+i*operandsize*2, buf, _state);
13892 static void ftbase_ftapplycomplexreffft(
ae_vector* a,
13912 ae_assert(operandscnt>=1,
"FTApplyComplexRefFFT: OperandsCnt<1", _state);
13913 ae_assert(operandsize>=1,
"FTApplyComplexRefFFT: OperandSize<1", _state);
13914 ae_assert(microvectorsize==2,
"FTApplyComplexRefFFT: MicrovectorSize<>2", _state);
13916 for(opidx=0; opidx<=operandscnt-1; opidx++)
13918 for(i=0; i<=n-1; i++)
13922 for(k=0; k<=n-1; k++)
13924 re = a->
ptr.
p_double[offs+opidx*operandsize*2+2*k+0];
13925 im = a->
ptr.
p_double[offs+opidx*operandsize*2+2*k+1];
13928 hre = hre+c*re-s*im;
13929 him = him+c*im+s*re;
13934 for(i=0; i<=operandsize*2-1; i++)
13958 static void ftbase_ftapplycomplexcodeletfft(
ae_vector* a,
14022 ae_assert(operandscnt>=1,
"FTApplyComplexCodeletFFT: OperandsCnt<1", _state);
14023 ae_assert(operandsize>=1,
"FTApplyComplexCodeletFFT: OperandSize<1", _state);
14024 ae_assert(microvectorsize==2,
"FTApplyComplexCodeletFFT: MicrovectorSize<>2", _state);
14030 ae_assert(n<=ftbase_maxradix, "FTApplyComplexCodeletFFT: N>MaxRadix
", _state); 14033 for(opidx=0; opidx<=operandscnt-1; opidx++) 14035 aoffset = offs+opidx*operandsize*2; 14036 a0x = a->ptr.p_double[aoffset+0]; 14037 a0y = a->ptr.p_double[aoffset+1]; 14038 a1x = a->ptr.p_double[aoffset+2]; 14039 a1y = a->ptr.p_double[aoffset+3]; 14044 a->ptr.p_double[aoffset+0] = v0; 14045 a->ptr.p_double[aoffset+1] = v1; 14046 a->ptr.p_double[aoffset+2] = v2; 14047 a->ptr.p_double[aoffset+3] = v3; 14053 c1 = ae_cos(2*ae_pi/3, _state)-1; 14054 c2 = ae_sin(2*ae_pi/3, _state); 14055 for(opidx=0; opidx<=operandscnt-1; opidx++) 14057 aoffset = offs+opidx*operandsize*2; 14058 a0x = a->ptr.p_double[aoffset+0]; 14059 a0y = a->ptr.p_double[aoffset+1]; 14060 a1x = a->ptr.p_double[aoffset+2]; 14061 a1y = a->ptr.p_double[aoffset+3]; 14062 a2x = a->ptr.p_double[aoffset+4]; 14063 a2y = a->ptr.p_double[aoffset+5]; 14070 m2x = c2*(a1y-a2y); 14071 m2y = c2*(a2x-a1x); 14078 a->ptr.p_double[aoffset+0] = a0x; 14079 a->ptr.p_double[aoffset+1] = a0y; 14080 a->ptr.p_double[aoffset+2] = a1x; 14081 a->ptr.p_double[aoffset+3] = a1y; 14082 a->ptr.p_double[aoffset+4] = a2x; 14083 a->ptr.p_double[aoffset+5] = a2y; 14089 for(opidx=0; opidx<=operandscnt-1; opidx++) 14091 aoffset = offs+opidx*operandsize*2; 14092 a0x = a->ptr.p_double[aoffset+0]; 14093 a0y = a->ptr.p_double[aoffset+1]; 14094 a1x = a->ptr.p_double[aoffset+2]; 14095 a1y = a->ptr.p_double[aoffset+3]; 14096 a2x = a->ptr.p_double[aoffset+4]; 14097 a2y = a->ptr.p_double[aoffset+5]; 14098 a3x = a->ptr.p_double[aoffset+6]; 14099 a3y = a->ptr.p_double[aoffset+7]; 14108 a->ptr.p_double[aoffset+0] = t1x+t2x; 14109 a->ptr.p_double[aoffset+1] = t1y+t2y; 14110 a->ptr.p_double[aoffset+4] = t1x-t2x; 14111 a->ptr.p_double[aoffset+5] = t1y-t2y; 14112 a->ptr.p_double[aoffset+2] = m2x+m3x; 14113 a->ptr.p_double[aoffset+3] = m2y+m3y; 14114 a->ptr.p_double[aoffset+6] = m2x-m3x; 14115 a->ptr.p_double[aoffset+7] = m2y-m3y; 14122 c1 = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1; 14123 c2 = (ae_cos(v, _state)-ae_cos(2*v, _state))/2; 14124 c3 = -ae_sin(v, _state); 14125 c4 = -(ae_sin(v, _state)+ae_sin(2*v, _state)); 14126 c5 = ae_sin(v, _state)-ae_sin(2*v, _state); 14127 for(opidx=0; opidx<=operandscnt-1; opidx++) 14129 aoffset = offs+opidx*operandsize*2; 14130 t1x = a->ptr.p_double[aoffset+2]+a->ptr.p_double[aoffset+8]; 14131 t1y = a->ptr.p_double[aoffset+3]+a->ptr.p_double[aoffset+9]; 14132 t2x = a->ptr.p_double[aoffset+4]+a->ptr.p_double[aoffset+6]; 14133 t2y = a->ptr.p_double[aoffset+5]+a->ptr.p_double[aoffset+7]; 14134 t3x = a->ptr.p_double[aoffset+2]-a->ptr.p_double[aoffset+8]; 14135 t3y = a->ptr.p_double[aoffset+3]-a->ptr.p_double[aoffset+9]; 14136 t4x = a->ptr.p_double[aoffset+6]-a->ptr.p_double[aoffset+4]; 14137 t4y = a->ptr.p_double[aoffset+7]-a->ptr.p_double[aoffset+5]; 14140 a->ptr.p_double[aoffset+0] = a->ptr.p_double[aoffset+0]+t5x; 14141 a->ptr.p_double[aoffset+1] = a->ptr.p_double[aoffset+1]+t5y; 14144 m2x = c2*(t1x-t2x); 14145 m2y = c2*(t1y-t2y); 14146 m3x = -c3*(t3y+t4y); 14147 m3y = c3*(t3x+t4x); 14156 s1x = a->ptr.p_double[aoffset+0]+m1x; 14157 s1y = a->ptr.p_double[aoffset+1]+m1y; 14162 a->ptr.p_double[aoffset+2] = s2x+s3x; 14163 a->ptr.p_double[aoffset+3] = s2y+s3y; 14164 a->ptr.p_double[aoffset+4] = s4x+s5x; 14165 a->ptr.p_double[aoffset+5] = s4y+s5y; 14166 a->ptr.p_double[aoffset+6] = s4x-s5x; 14167 a->ptr.p_double[aoffset+7] = s4y-s5y; 14168 a->ptr.p_double[aoffset+8] = s2x-s3x; 14169 a->ptr.p_double[aoffset+9] = s2y-s3y; 14175 c1 = ae_cos(2*ae_pi/3, _state)-1; 14176 c2 = ae_sin(2*ae_pi/3, _state); 14177 c3 = ae_cos(-ae_pi/3, _state); 14178 c4 = ae_sin(-ae_pi/3, _state); 14179 for(opidx=0; opidx<=operandscnt-1; opidx++) 14181 aoffset = offs+opidx*operandsize*2; 14182 a0x = a->ptr.p_double[aoffset+0]; 14183 a0y = a->ptr.p_double[aoffset+1]; 14184 a1x = a->ptr.p_double[aoffset+2]; 14185 a1y = a->ptr.p_double[aoffset+3]; 14186 a2x = a->ptr.p_double[aoffset+4]; 14187 a2y = a->ptr.p_double[aoffset+5]; 14188 a3x = a->ptr.p_double[aoffset+6]; 14189 a3y = a->ptr.p_double[aoffset+7]; 14190 a4x = a->ptr.p_double[aoffset+8]; 14191 a4y = a->ptr.p_double[aoffset+9]; 14192 a5x = a->ptr.p_double[aoffset+10]; 14193 a5y = a->ptr.p_double[aoffset+11]; 14212 t4x = a4x*c3-a4y*c4; 14213 t4y = a4x*c4+a4y*c3; 14216 t5x = -a5x*c3-a5y*c4; 14217 t5y = a5x*c4-a5y*c3; 14226 m2x = c2*(a1y-a2y); 14227 m2y = c2*(a2x-a1x); 14240 m2x = c2*(a4y-a5y); 14241 m2y = c2*(a5x-a4x); 14248 a->ptr.p_double[aoffset+0] = a0x; 14249 a->ptr.p_double[aoffset+1] = a0y; 14250 a->ptr.p_double[aoffset+2] = a3x; 14251 a->ptr.p_double[aoffset+3] = a3y; 14252 a->ptr.p_double[aoffset+4] = a1x; 14253 a->ptr.p_double[aoffset+5] = a1y; 14254 a->ptr.p_double[aoffset+6] = a4x; 14255 a->ptr.p_double[aoffset+7] = a4y; 14256 a->ptr.p_double[aoffset+8] = a2x; 14257 a->ptr.p_double[aoffset+9] = a2y; 14258 a->ptr.p_double[aoffset+10] = a5x; 14259 a->ptr.p_double[aoffset+11] = a5y; 14266 /************************************************************************* 14267 This subroutine applies complex "integrated
" codelet FFT to input/output 14268 array A. "Integrated
" codelet differs from "normal
" one in following ways: 14269 * it can work with MicrovectorSize>1 14270 * hence, it can be used in Cooley-Tukey FFT without transpositions 14271 * it performs inlined multiplication by twiddle factors of Cooley-Tukey 14272 FFT with N2=MicrovectorSize/2. 14275 A - array, must be large enough for plan to work 14276 Offs - offset of the subarray to process 14277 OperandsCnt - operands count (see description of FastTransformPlan) 14278 OperandSize - operand size (see description of FastTransformPlan) 14279 MicrovectorSize-microvector size, must be 1 14282 A - transformed array 14285 Copyright 05.04.2013 by Bochkanov Sergey 14286 *************************************************************************/ 14287 static void ftbase_ftapplycomplexcodelettwfft(/* Real */ ae_vector* a, 14289 ae_int_t operandscnt, 14290 ae_int_t operandsize, 14291 ae_int_t microvectorsize, 14303 ae_int_t aoffset10; 14373 ae_assert(operandscnt>=1, "FTApplyComplexCodeletFFT: OperandsCnt<1
", _state); 14374 ae_assert(operandsize>=1, "FTApplyComplexCodeletFFT: OperandSize<1
", _state); 14375 ae_assert(microvectorsize>=1, "FTApplyComplexCodeletFFT: MicrovectorSize<>1
", _state); 14376 ae_assert(microvectorsize%2==0, "FTApplyComplexCodeletFFT: MicrovectorSize is not even
", _state); 14378 m = microvectorsize/2; 14381 * Hard-coded transforms for different N's 14383 ae_assert(n<=ftbase_maxradix, "FTApplyComplexCodeletTwFFT: N>MaxRadix
", _state); 14386 v = -2*ae_pi/(n*m); 14387 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state); 14388 tw1 = ae_sin(v, _state); 14389 for(opidx=0; opidx<=operandscnt-1; opidx++) 14391 aoffset0 = offs+opidx*operandsize*microvectorsize; 14392 aoffset2 = aoffset0+microvectorsize; 14395 for(mvidx=0; mvidx<=m-1; mvidx++) 14397 a0x = a->ptr.p_double[aoffset0]; 14398 a0y = a->ptr.p_double[aoffset0+1]; 14399 a1x = a->ptr.p_double[aoffset2]; 14400 a1y = a->ptr.p_double[aoffset2+1]; 14405 a->ptr.p_double[aoffset0] = v0; 14406 a->ptr.p_double[aoffset0+1] = v1; 14407 a->ptr.p_double[aoffset2] = v2*(1+twxm1)-v3*twy; 14408 a->ptr.p_double[aoffset2+1] = v3*(1+twxm1)+v2*twy; 14409 aoffset0 = aoffset0+2; 14410 aoffset2 = aoffset2+2; 14411 if( (mvidx+1)%ftbase_updatetw==0 ) 14413 v = -2*ae_pi*(mvidx+1)/(n*m); 14414 twxm1 = ae_sin(0.5*v, _state); 14415 twxm1 = -2*twxm1*twxm1; 14416 twy = ae_sin(v, _state); 14420 v = twxm1+tw0+twxm1*tw0-twy*tw1; 14421 twy = twy+tw1+twxm1*tw1+twy*tw0; 14430 v = -2*ae_pi/(n*m); 14431 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state); 14432 tw1 = ae_sin(v, _state); 14433 c1 = ae_cos(2*ae_pi/3, _state)-1; 14434 c2 = ae_sin(2*ae_pi/3, _state); 14435 for(opidx=0; opidx<=operandscnt-1; opidx++) 14437 aoffset0 = offs+opidx*operandsize*microvectorsize; 14438 aoffset2 = aoffset0+microvectorsize; 14439 aoffset4 = aoffset2+microvectorsize; 14443 for(mvidx=0; mvidx<=m-1; mvidx++) 14445 a0x = a->ptr.p_double[aoffset0]; 14446 a0y = a->ptr.p_double[aoffset0+1]; 14447 a1x = a->ptr.p_double[aoffset2]; 14448 a1y = a->ptr.p_double[aoffset2+1]; 14449 a2x = a->ptr.p_double[aoffset4]; 14450 a2y = a->ptr.p_double[aoffset4+1]; 14457 m2x = c2*(a1y-a2y); 14458 m2y = c2*(a2x-a1x); 14465 tw2x = twx*twx-twy*twy; 14467 a->ptr.p_double[aoffset0] = a0x; 14468 a->ptr.p_double[aoffset0+1] = a0y; 14469 a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy; 14470 a->ptr.p_double[aoffset2+1] = a1y*twx+a1x*twy; 14471 a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y; 14472 a->ptr.p_double[aoffset4+1] = a2y*tw2x+a2x*tw2y; 14473 aoffset0 = aoffset0+2; 14474 aoffset2 = aoffset2+2; 14475 aoffset4 = aoffset4+2; 14476 if( (mvidx+1)%ftbase_updatetw==0 ) 14478 v = -2*ae_pi*(mvidx+1)/(n*m); 14479 twxm1 = ae_sin(0.5*v, _state); 14480 twxm1 = -2*twxm1*twxm1; 14481 twy = ae_sin(v, _state); 14486 v = twxm1+tw0+twxm1*tw0-twy*tw1; 14487 twy = twy+tw1+twxm1*tw1+twy*tw0; 14497 v = -2*ae_pi/(n*m); 14498 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state); 14499 tw1 = ae_sin(v, _state); 14500 for(opidx=0; opidx<=operandscnt-1; opidx++) 14502 aoffset0 = offs+opidx*operandsize*microvectorsize; 14503 aoffset2 = aoffset0+microvectorsize; 14504 aoffset4 = aoffset2+microvectorsize; 14505 aoffset6 = aoffset4+microvectorsize; 14509 for(mvidx=0; mvidx<=m-1; mvidx++) 14511 a0x = a->ptr.p_double[aoffset0]; 14512 a0y = a->ptr.p_double[aoffset0+1]; 14513 a1x = a->ptr.p_double[aoffset2]; 14514 a1y = a->ptr.p_double[aoffset2+1]; 14515 a2x = a->ptr.p_double[aoffset4]; 14516 a2y = a->ptr.p_double[aoffset4+1]; 14517 a3x = a->ptr.p_double[aoffset6]; 14518 a3y = a->ptr.p_double[aoffset6+1]; 14527 tw2x = twx*twx-twy*twy; 14529 tw3x = twx*tw2x-twy*tw2y; 14530 tw3y = twx*tw2y+twy*tw2x; 14537 a->ptr.p_double[aoffset0] = t1x+t2x; 14538 a->ptr.p_double[aoffset0+1] = t1y+t2y; 14539 a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy; 14540 a->ptr.p_double[aoffset2+1] = a1y*twx+a1x*twy; 14541 a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y; 14542 a->ptr.p_double[aoffset4+1] = a2y*tw2x+a2x*tw2y; 14543 a->ptr.p_double[aoffset6] = a3x*tw3x-a3y*tw3y; 14544 a->ptr.p_double[aoffset6+1] = a3y*tw3x+a3x*tw3y; 14545 aoffset0 = aoffset0+2; 14546 aoffset2 = aoffset2+2; 14547 aoffset4 = aoffset4+2; 14548 aoffset6 = aoffset6+2; 14549 if( (mvidx+1)%ftbase_updatetw==0 ) 14551 v = -2*ae_pi*(mvidx+1)/(n*m); 14552 twxm1 = ae_sin(0.5*v, _state); 14553 twxm1 = -2*twxm1*twxm1; 14554 twy = ae_sin(v, _state); 14559 v = twxm1+tw0+twxm1*tw0-twy*tw1; 14560 twy = twy+tw1+twxm1*tw1+twy*tw0; 14570 v = -2*ae_pi/(n*m); 14571 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state); 14572 tw1 = ae_sin(v, _state); 14574 c1 = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1; 14575 c2 = (ae_cos(v, _state)-ae_cos(2*v, _state))/2; 14576 c3 = -ae_sin(v, _state); 14577 c4 = -(ae_sin(v, _state)+ae_sin(2*v, _state)); 14578 c5 = ae_sin(v, _state)-ae_sin(2*v, _state); 14579 for(opidx=0; opidx<=operandscnt-1; opidx++) 14581 aoffset0 = offs+opidx*operandsize*microvectorsize; 14582 aoffset2 = aoffset0+microvectorsize; 14583 aoffset4 = aoffset2+microvectorsize; 14584 aoffset6 = aoffset4+microvectorsize; 14585 aoffset8 = aoffset6+microvectorsize; 14589 for(mvidx=0; mvidx<=m-1; mvidx++) 14591 a0x = a->ptr.p_double[aoffset0]; 14592 a0y = a->ptr.p_double[aoffset0+1]; 14593 a1x = a->ptr.p_double[aoffset2]; 14594 a1y = a->ptr.p_double[aoffset2+1]; 14595 a2x = a->ptr.p_double[aoffset4]; 14596 a2y = a->ptr.p_double[aoffset4+1]; 14597 a3x = a->ptr.p_double[aoffset6]; 14598 a3y = a->ptr.p_double[aoffset6+1]; 14599 a4x = a->ptr.p_double[aoffset8]; 14600 a4y = a->ptr.p_double[aoffset8+1]; 14615 m2x = c2*(t1x-t2x); 14616 m2y = c2*(t1y-t2y); 14617 m3x = -c3*(t3y+t4y); 14618 m3y = c3*(t3x+t4x); 14633 tw2x = twx*twx-twy*twy; 14635 tw3x = twx*tw2x-twy*tw2y; 14636 tw3y = twx*tw2y+twy*tw2x; 14637 tw4x = tw2x*tw2x-tw2y*tw2y; 14638 tw4y = tw2x*tw2y+tw2y*tw2x; 14647 a->ptr.p_double[aoffset0] = q0x; 14648 a->ptr.p_double[aoffset0+1] = q0y; 14649 a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy; 14650 a->ptr.p_double[aoffset2+1] = a1x*twy+a1y*twx; 14651 a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y; 14652 a->ptr.p_double[aoffset4+1] = a2x*tw2y+a2y*tw2x; 14653 a->ptr.p_double[aoffset6] = a3x*tw3x-a3y*tw3y; 14654 a->ptr.p_double[aoffset6+1] = a3x*tw3y+a3y*tw3x; 14655 a->ptr.p_double[aoffset8] = a4x*tw4x-a4y*tw4y; 14656 a->ptr.p_double[aoffset8+1] = a4x*tw4y+a4y*tw4x; 14657 aoffset0 = aoffset0+2; 14658 aoffset2 = aoffset2+2; 14659 aoffset4 = aoffset4+2; 14660 aoffset6 = aoffset6+2; 14661 aoffset8 = aoffset8+2; 14662 if( (mvidx+1)%ftbase_updatetw==0 ) 14664 v = -2*ae_pi*(mvidx+1)/(n*m); 14665 twxm1 = ae_sin(0.5*v, _state); 14666 twxm1 = -2*twxm1*twxm1; 14667 twy = ae_sin(v, _state); 14672 v = twxm1+tw0+twxm1*tw0-twy*tw1; 14673 twy = twy+tw1+twxm1*tw1+twy*tw0; 14683 c1 = ae_cos(2*ae_pi/3, _state)-1; 14684 c2 = ae_sin(2*ae_pi/3, _state); 14685 c3 = ae_cos(-ae_pi/3, _state); 14686 c4 = ae_sin(-ae_pi/3, _state); 14687 v = -2*ae_pi/(n*m); 14688 tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state); 14689 tw1 = ae_sin(v, _state); 14690 for(opidx=0; opidx<=operandscnt-1; opidx++) 14692 aoffset0 = offs+opidx*operandsize*microvectorsize; 14693 aoffset2 = aoffset0+microvectorsize; 14694 aoffset4 = aoffset2+microvectorsize; 14695 aoffset6 = aoffset4+microvectorsize; 14696 aoffset8 = aoffset6+microvectorsize; 14697 aoffset10 = aoffset8+microvectorsize; 14701 for(mvidx=0; mvidx<=m-1; mvidx++) 14703 a0x = a->ptr.p_double[aoffset0+0]; 14704 a0y = a->ptr.p_double[aoffset0+1]; 14705 a1x = a->ptr.p_double[aoffset2+0]; 14706 a1y = a->ptr.p_double[aoffset2+1]; 14707 a2x = a->ptr.p_double[aoffset4+0]; 14708 a2y = a->ptr.p_double[aoffset4+1]; 14709 a3x = a->ptr.p_double[aoffset6+0]; 14710 a3y = a->ptr.p_double[aoffset6+1]; 14711 a4x = a->ptr.p_double[aoffset8+0]; 14712 a4y = a->ptr.p_double[aoffset8+1]; 14713 a5x = a->ptr.p_double[aoffset10+0]; 14714 a5y = a->ptr.p_double[aoffset10+1]; 14733 t4x = a4x*c3-a4y*c4; 14734 t4y = a4x*c4+a4y*c3; 14737 t5x = -a5x*c3-a5y*c4; 14738 t5y = a5x*c4-a5y*c3; 14747 m2x = c2*(a1y-a2y); 14748 m2y = c2*(a2x-a1x); 14761 m2x = c2*(a4y-a5y); 14762 m2y = c2*(a5x-a4x); 14769 tw2x = twx*twx-twy*twy; 14771 tw3x = twx*tw2x-twy*tw2y; 14772 tw3y = twx*tw2y+twy*tw2x; 14773 tw4x = tw2x*tw2x-tw2y*tw2y; 14774 tw4y = 2*tw2x*tw2y; 14775 tw5x = tw3x*tw2x-tw3y*tw2y; 14776 tw5y = tw3x*tw2y+tw3y*tw2x; 14777 a->ptr.p_double[aoffset0+0] = a0x; 14778 a->ptr.p_double[aoffset0+1] = a0y; 14779 a->ptr.p_double[aoffset2+0] = a3x*twx-a3y*twy; 14780 a->ptr.p_double[aoffset2+1] = a3y*twx+a3x*twy; 14781 a->ptr.p_double[aoffset4+0] = a1x*tw2x-a1y*tw2y; 14782 a->ptr.p_double[aoffset4+1] = a1y*tw2x+a1x*tw2y; 14783 a->ptr.p_double[aoffset6+0] = a4x*tw3x-a4y*tw3y; 14784 a->ptr.p_double[aoffset6+1] = a4y*tw3x+a4x*tw3y; 14785 a->ptr.p_double[aoffset8+0] = a2x*tw4x-a2y*tw4y; 14786 a->ptr.p_double[aoffset8+1] = a2y*tw4x+a2x*tw4y; 14787 a->ptr.p_double[aoffset10+0] = a5x*tw5x-a5y*tw5y; 14788 a->ptr.p_double[aoffset10+1] = a5y*tw5x+a5x*tw5y; 14789 aoffset0 = aoffset0+2; 14790 aoffset2 = aoffset2+2; 14791 aoffset4 = aoffset4+2; 14792 aoffset6 = aoffset6+2; 14793 aoffset8 = aoffset8+2; 14794 aoffset10 = aoffset10+2; 14795 if( (mvidx+1)%ftbase_updatetw==0 ) 14797 v = -2*ae_pi*(mvidx+1)/(n*m); 14798 twxm1 = ae_sin(0.5*v, _state); 14799 twxm1 = -2*twxm1*twxm1; 14800 twy = ae_sin(v, _state); 14805 v = twxm1+tw0+twxm1*tw0-twy*tw1; 14806 twy = twy+tw1+twxm1*tw1+twy*tw0; 14817 /************************************************************************* 14818 This subroutine precomputes data for complex Bluestein's FFT and writes 14819 them to array PrecR[] at specified offset. It is responsibility of the 14820 caller to make sure that PrecR[] is large enough. 14823 N - original size of the transform 14824 M - size of the "padded
" Bluestein's transform 14825 PrecR - preallocated array 14829 PrecR - data at Offs:Offs+4*M-1 are modified: 14830 * PrecR[Offs:Offs+2*M-1] stores Z[k]=exp(i*pi*k^2/N) 14831 * PrecR[Offs+2*M:Offs+4*M-1] stores FFT of the Z 14832 Other parts of PrecR are unchanged. 14834 NOTE: this function performs internal M-point FFT. It allocates temporary 14835 plan which is destroyed after leaving this function. 14838 Copyright 08.05.2013 by Bochkanov Sergey 14839 *************************************************************************/ 14840 static void ftbase_ftprecomputebluesteinsfft(ae_int_t n, 14842 /* Real */ ae_vector* precr, 14846 ae_frame _frame_block; 14850 fasttransformplan plan; 14852 ae_frame_make(_state, &_frame_block); 14853 _fasttransformplan_init(&plan, _state, ae_true); 14857 * Fill first half of PrecR with b[k] = exp(i*pi*k^2/N) 14859 for(i=0; i<=2*m-1; i++) 14861 precr->ptr.p_double[offs+i] = 0; 14863 for(i=0; i<=n-1; i++) 14865 bx = ae_cos(ae_pi/n*i*i, _state); 14866 by = ae_sin(ae_pi/n*i*i, _state); 14867 precr->ptr.p_double[offs+2*i+0] = bx; 14868 precr->ptr.p_double[offs+2*i+1] = by; 14869 precr->ptr.p_double[offs+2*((m-i)%m)+0] = bx; 14870 precr->ptr.p_double[offs+2*((m-i)%m)+1] = by; 14876 ftcomplexfftplan(m, 1, &plan, _state); 14877 for(i=0; i<=2*m-1; i++) 14879 precr->ptr.p_double[offs+2*m+i] = precr->ptr.p_double[offs+i]; 14881 ftbase_ftapplysubplan(&plan, 0, precr, offs+2*m, 0, &plan.buffer, 1, _state); 14882 ae_frame_leave(_state); 14886 /************************************************************************* 14887 This subroutine applies complex Bluestein's FFT to input/output array A. 14890 Plan - transformation plan 14891 A - array, must be large enough for plan to work 14892 ABase - base offset in array A, this value points to start of 14893 subarray whose length is equal to length of the plan 14894 AOffset - offset with respect to ABase, 0<=AOffset<PlanLength. 14895 This is an offset within large PlanLength-subarray of 14896 the chunk to process. 14897 OperandsCnt - number of repeated operands (length N each) 14898 N - original data length (measured in complex numbers) 14899 M - padded data length (measured in complex numbers) 14900 PrecOffs - offset of the precomputed data for the plan 14901 SubPlan - position of the length-M FFT subplan which is used by 14903 BufA - temporary buffer, at least 2*M elements 14904 BufB - temporary buffer, at least 2*M elements 14905 BufC - temporary buffer, at least 2*M elements 14906 BufD - temporary buffer, at least 2*M elements 14909 A - transformed array 14912 Copyright 05.04.2013 by Bochkanov Sergey 14913 *************************************************************************/ 14914 static void ftbase_ftbluesteinsfft(fasttransformplan* plan, 14915 /* Real */ ae_vector* a, 14918 ae_int_t operandscnt, 14923 /* Real */ ae_vector* bufa, 14924 /* Real */ ae_vector* bufb, 14925 /* Real */ ae_vector* bufc, 14926 /* Real */ ae_vector* bufd, 14944 for(op=0; op<=operandscnt-1; op++) 14948 * Multiply A by conj(Z), store to buffer. 14951 * NOTE: Z[k]=exp(i*pi*k^2/N) 14953 p0 = abase+aoffset+op*2*n; 14955 for(i=0; i<=n-1; i++) 14957 x = a->ptr.p_double[p0+0]; 14958 y = a->ptr.p_double[p0+1]; 14959 bx = plan->precr.ptr.p_double[p1+0]; 14960 by = -plan->precr.ptr.p_double[p1+1]; 14961 bufa->ptr.p_double[2*i+0] = x*bx-y*by; 14962 bufa->ptr.p_double[2*i+1] = x*by+y*bx; 14966 for(i=2*n; i<=2*m-1; i++) 14968 bufa->ptr.p_double[i] = 0; 14972 * Perform convolution of A and Z (using precomputed 14973 * FFT of Z stored in Plan structure). 14975 ftbase_ftapplysubplan(plan, subplan, bufa, 0, 0, bufc, 1, _state); 14978 for(i=0; i<=m-1; i++) 14980 ax = bufa->ptr.p_double[p0+0]; 14981 ay = bufa->ptr.p_double[p0+1]; 14982 bx = plan->precr.ptr.p_double[p1+0]; 14983 by = plan->precr.ptr.p_double[p1+1]; 14984 bufa->ptr.p_double[p0+0] = ax*bx-ay*by; 14985 bufa->ptr.p_double[p0+1] = -(ax*by+ay*bx); 14989 ftbase_ftapplysubplan(plan, subplan, bufa, 0, 0, bufc, 1, _state); 14993 * A:=conj(Z)*conj(A)/M 14994 * Here conj(A)/M corresponds to last stage of inverse DFT, 14995 * and conj(Z) comes from Bluestein's FFT algorithm. 14999 p2 = abase+aoffset+op*2*n; 15000 for(i=0; i<=n-1; i++) 15002 bx = plan->precr.ptr.p_double[p0+0]; 15003 by = plan->precr.ptr.p_double[p0+1]; 15004 rx = bufa->ptr.p_double[p1+0]/m; 15005 ry = -bufa->ptr.p_double[p1+1]/m; 15006 a->ptr.p_double[p2+0] = rx*bx-ry*(-by); 15007 a->ptr.p_double[p2+1] = rx*(-by)+ry*bx; 15016 /************************************************************************* 15017 This subroutine precomputes data for complex Rader's FFT and writes them 15018 to array PrecR[] at specified offset. It is responsibility of the caller 15019 to make sure that PrecR[] is large enough. 15022 N - original size of the transform (before reduction to N-1) 15023 RQ - primitive root modulo N 15024 RIQ - inverse of primitive root modulo N 15025 PrecR - preallocated array 15029 PrecR - data at Offs:Offs+2*(N-1)-1 store FFT of Rader's factors, 15030 other parts of PrecR are unchanged. 15032 NOTE: this function performs internal (N-1)-point FFT. It allocates temporary 15033 plan which is destroyed after leaving this function. 15036 Copyright 08.05.2013 by Bochkanov Sergey 15037 *************************************************************************/ 15038 static void ftbase_ftprecomputeradersfft(ae_int_t n, 15041 /* Real */ ae_vector* precr, 15045 ae_frame _frame_block; 15047 fasttransformplan plan; 15051 ae_frame_make(_state, &_frame_block); 15052 _fasttransformplan_init(&plan, _state, ae_true); 15056 * Fill PrecR with Rader factors, perform FFT 15059 for(q=0; q<=n-2; q++) 15061 v = -2*ae_pi*kiq/n; 15062 precr->ptr.p_double[offs+2*q+0] = ae_cos(v, _state); 15063 precr->ptr.p_double[offs+2*q+1] = ae_sin(v, _state); 15066 ftcomplexfftplan(n-1, 1, &plan, _state); 15067 ftbase_ftapplysubplan(&plan, 0, precr, offs, 0, &plan.buffer, 1, _state); 15068 ae_frame_leave(_state); 15072 /************************************************************************* 15073 This subroutine applies complex Rader's FFT to input/output array A. 15076 A - array, must be large enough for plan to work 15077 ABase - base offset in array A, this value points to start of 15078 subarray whose length is equal to length of the plan 15079 AOffset - offset with respect to ABase, 0<=AOffset<PlanLength. 15080 This is an offset within large PlanLength-subarray of 15081 the chunk to process. 15082 OperandsCnt - number of repeated operands (length N each) 15083 N - original data length (measured in complex numbers) 15084 SubPlan - position of the (N-1)-point FFT subplan which is used 15086 RQ - primitive root modulo N 15087 RIQ - inverse of primitive root modulo N 15088 PrecOffs - offset of the precomputed data for the plan 15089 Buf - temporary array 15092 A - transformed array 15095 Copyright 05.04.2013 by Bochkanov Sergey 15096 *************************************************************************/ 15097 static void ftbase_ftradersfft(fasttransformplan* plan, 15098 /* Real */ ae_vector* a, 15101 ae_int_t operandscnt, 15107 /* Real */ ae_vector* buf, 15127 ae_assert(operandscnt>=1, "FTApplyComplexRefFFT: OperandsCnt<1
", _state); 15132 for(opidx=0; opidx<=operandscnt-1; opidx++) 15139 p0 = abase+aoffset+opidx*n*2; 15140 p1 = aoffset+opidx*n*2; 15141 rx = a->ptr.p_double[p0+0]; 15142 ry = a->ptr.p_double[p0+1]; 15145 for(q=0; q<=n-2; q++) 15147 ax = a->ptr.p_double[p0+2*kq+0]; 15148 ay = a->ptr.p_double[p0+2*kq+1]; 15149 buf->ptr.p_double[p1+0] = ax; 15150 buf->ptr.p_double[p1+1] = ay; 15156 p0 = abase+aoffset+opidx*n*2; 15157 p1 = aoffset+opidx*n*2; 15158 for(q=0; q<=n-2; q++) 15160 a->ptr.p_double[p0] = buf->ptr.p_double[p1]; 15161 a->ptr.p_double[p0+1] = buf->ptr.p_double[p1+1]; 15169 ftbase_ftapplysubplan(plan, subplan, a, abase, aoffset+opidx*n*2, buf, 1, _state); 15170 p0 = abase+aoffset+opidx*n*2; 15172 for(i=0; i<=n-2; i++) 15174 ax = a->ptr.p_double[p0+0]; 15175 ay = a->ptr.p_double[p0+1]; 15176 bx = plan->precr.ptr.p_double[p1+0]; 15177 by = plan->precr.ptr.p_double[p1+1]; 15178 a->ptr.p_double[p0+0] = ax*bx-ay*by; 15179 a->ptr.p_double[p0+1] = -(ax*by+ay*bx); 15183 ftbase_ftapplysubplan(plan, subplan, a, abase, aoffset+opidx*n*2, buf, 1, _state); 15184 p0 = abase+aoffset+opidx*n*2; 15185 for(i=0; i<=n-2; i++) 15187 a->ptr.p_double[p0+0] = a->ptr.p_double[p0+0]/(n-1); 15188 a->ptr.p_double[p0+1] = -a->ptr.p_double[p0+1]/(n-1); 15195 buf->ptr.p_double[aoffset+opidx*n*2+0] = rx; 15196 buf->ptr.p_double[aoffset+opidx*n*2+1] = ry; 15198 p0 = aoffset+opidx*n*2; 15199 p1 = abase+aoffset+opidx*n*2; 15200 for(q=0; q<=n-2; q++) 15202 buf->ptr.p_double[p0+2*kiq+0] = x0+a->ptr.p_double[p1+0]; 15203 buf->ptr.p_double[p0+2*kiq+1] = y0+a->ptr.p_double[p1+1]; 15207 p0 = abase+aoffset+opidx*n*2; 15208 p1 = aoffset+opidx*n*2; 15209 for(q=0; q<=n-1; q++) 15211 a->ptr.p_double[p0] = buf->ptr.p_double[p1]; 15212 a->ptr.p_double[p0+1] = buf->ptr.p_double[p1+1]; 15220 /************************************************************************* 15221 Factorizes task size N into product of two smaller sizes N1 and N2 15225 IsRoot - whether taks is root task (first one in a sequence) 15228 N1, N2 - such numbers that: 15229 * for prime N: N1=N2=0 15230 * for composite N<=MaxRadix: N1=N2=0 15231 * for composite N>MaxRadix: 1<=N1<=N2, N1*N2=N 15234 Copyright 08.04.2013 by Bochkanov Sergey 15235 *************************************************************************/ 15236 static void ftbase_ftfactorize(ae_int_t n, 15248 ae_assert(n>0, "FTFactorize: N<=0
", _state); 15255 if( n<=ftbase_maxradix ) 15261 * Large N, recursive split 15263 if( n>ftbase_recursivethreshold ) 15265 k = ae_iceil(ae_sqrt(n, _state), _state)+1; 15266 ae_assert(k*k>=n, "FTFactorize:
internal error during recursive factorization
", _state); 15267 for(j=k; j>=2; j--) 15271 *n1 = ae_minint(n/j, j, _state); 15272 *n2 = ae_maxint(n/j, j, _state); 15279 * N>MaxRadix, try to find good codelet 15281 for(j=ftbase_maxradix; j>=2; j--) 15292 * In case no good codelet was found, 15293 * try to factorize N into product of ANY primes. 15297 for(j=2; j<=n-1; j++) 15324 /************************************************************************* 15325 Returns optimistic estimate of the FFT cost, in UNITs (1 UNIT = 100 KFLOPs) 15331 cost in UNITs, rounded down to nearest integer 15333 NOTE: If FFT cost is less than 1 UNIT, it will return 0 as result. 15336 Copyright 08.04.2013 by Bochkanov Sergey 15337 *************************************************************************/ 15338 static ae_int_t ftbase_ftoptimisticestimate(ae_int_t n, ae_state *_state) 15343 ae_assert(n>0, "FTOptimisticEstimate: N<=0
", _state); 15344 result = ae_ifloor(1.0E-5*5*n*ae_log(n, _state)/ae_log(2, _state), _state); 15349 /************************************************************************* 15350 Twiddle factors calculation 15353 Copyright 01.05.2009 by Bochkanov Sergey 15354 *************************************************************************/ 15355 static void ftbase_ffttwcalc(/* Real */ ae_vector* a, 15377 ae_int_t updatetw2; 15382 * Multiplication by twiddle factors for complex Cooley-Tukey FFT 15383 * with N factorized as N1*N2. 15385 * Naive solution to this problem is given below: 15387 * > for K:=1 to N2-1 do 15388 * > for J:=1 to N1-1 do 15391 * > X:=A[AOffset+2*Idx+0]; 15392 * > Y:=A[AOffset+2*Idx+1]; 15393 * > TwX:=Cos(-2*Pi()*K*J/(N1*N2)); 15394 * > TwY:=Sin(-2*Pi()*K*J/(N1*N2)); 15395 * > A[AOffset+2*Idx+0]:=X*TwX-Y*TwY; 15396 * > A[AOffset+2*Idx+1]:=X*TwY+Y*TwX; 15399 * However, there are exist more efficient solutions. 15401 * Each pass of the inner cycle corresponds to multiplication of one 15402 * entry of A by W[k,j]=exp(-I*2*pi*k*j/N). This factor can be rewritten 15403 * as exp(-I*2*pi*k/N)^j. So we can replace costly exponentiation by 15404 * repeated multiplication: W[k,j+1]=W[k,j]*exp(-I*2*pi*k/N), with 15405 * second factor being computed once in the beginning of the iteration. 15407 * Also, exp(-I*2*pi*k/N) can be represented as exp(-I*2*pi/N)^k, i.e. 15408 * we have W[K+1,1]=W[K,1]*W[1,1]. 15410 * In our loop we use following variables: 15411 * * [TwBaseXM1,TwBaseY] = [cos(2*pi/N)-1, sin(2*pi/N)] 15412 * * [TwRowXM1, TwRowY] = [cos(2*pi*I/N)-1, sin(2*pi*I/N)] 15413 * * [TwXM1, TwY] = [cos(2*pi*I*J/N)-1, sin(2*pi*I*J/N)] 15415 * Meaning of the variables: 15416 * * [TwXM1,TwY] is current twiddle factor W[I,J] 15417 * * [TwRowXM1, TwRowY] is W[I,1] 15418 * * [TwBaseXM1,TwBaseY] is W[1,1] 15420 * During inner loop we multiply current twiddle factor by W[I,1], 15421 * during outer loop we update W[I,1]. 15424 ae_assert(ftbase_updatetw>=2, "FFTTwCalc:
internal error - UpdateTw<2
", _state); 15425 updatetw2 = ftbase_updatetw/2; 15429 twbasexm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state); 15430 twbasey = ae_sin(v, _state); 15434 for(i=0; i<=n2-1; i++) 15438 * Initialize twiddle factor for current row 15444 * N1-point block is separated into 2-point chunks and residual 1-point chunk 15445 * (in case N1 is odd). Unrolled loop is several times faster. 15447 for(j2=0; j2<=halfn1-1; j2++) 15452 * * process first element in a chunk. 15453 * * update twiddle factor (unconditional update) 15454 * * process second element 15455 * * conditional update of the twiddle factor 15457 x = a->ptr.p_double[offs+0]; 15458 y = a->ptr.p_double[offs+1]; 15459 tmpx = x*(1+twxm1)-y*twy; 15460 tmpy = x*twy+y*(1+twxm1); 15461 a->ptr.p_double[offs+0] = tmpx; 15462 a->ptr.p_double[offs+1] = tmpy; 15463 tmpx = (1+twxm1)*twrowxm1-twy*twrowy; 15464 twy = twy+(1+twxm1)*twrowy+twy*twrowxm1; 15465 twxm1 = twxm1+tmpx; 15466 x = a->ptr.p_double[offs+2]; 15467 y = a->ptr.p_double[offs+3]; 15468 tmpx = x*(1+twxm1)-y*twy; 15469 tmpy = x*twy+y*(1+twxm1); 15470 a->ptr.p_double[offs+2] = tmpx; 15471 a->ptr.p_double[offs+3] = tmpy; 15473 if( (j2+1)%updatetw2==0&&j2<halfn1-1 ) 15477 * Recalculate twiddle factor 15479 v = -2*ae_pi*i*2*(j2+1)/n; 15480 twxm1 = ae_sin(0.5*v, _state); 15481 twxm1 = -2*twxm1*twxm1; 15482 twy = ae_sin(v, _state); 15488 * Update twiddle factor 15490 tmpx = (1+twxm1)*twrowxm1-twy*twrowy; 15491 twy = twy+(1+twxm1)*twrowy+twy*twrowxm1; 15492 twxm1 = twxm1+tmpx; 15499 * Handle residual chunk 15501 x = a->ptr.p_double[offs+0]; 15502 y = a->ptr.p_double[offs+1]; 15503 tmpx = x*(1+twxm1)-y*twy; 15504 tmpy = x*twy+y*(1+twxm1); 15505 a->ptr.p_double[offs+0] = tmpx; 15506 a->ptr.p_double[offs+1] = tmpy; 15511 * update TwRow: TwRow(new) = TwRow(old)*TwBase 15515 if( (i+1)%ftbase_updatetw==0 ) 15517 v = -2*ae_pi*(i+1)/n; 15518 twrowxm1 = ae_sin(0.5*v, _state); 15519 twrowxm1 = -2*twrowxm1*twrowxm1; 15520 twrowy = ae_sin(v, _state); 15524 tmpx = twbasexm1+twrowxm1*twbasexm1-twrowy*twbasey; 15525 tmpy = twbasey+twrowxm1*twbasey+twrowy*twbasexm1; 15526 twrowxm1 = twrowxm1+tmpx; 15527 twrowy = twrowy+tmpy; 15534 /************************************************************************* 15535 Linear transpose: transpose complex matrix stored in 1-dimensional array 15538 Copyright 01.05.2009 by Bochkanov Sergey 15539 *************************************************************************/ 15540 static void ftbase_internalcomplexlintranspose(/* Real */ ae_vector* a, 15544 /* Real */ ae_vector* buf, 15549 ftbase_ffticltrec(a, astart, n, buf, 0, m, m, n, _state); 15550 ae_v_move(&a->ptr.p_double[astart], 1, &buf->ptr.p_double[0], 1, ae_v_len(astart,astart+2*m*n-1)); 15554 /************************************************************************* 15555 Recurrent subroutine for a InternalComplexLinTranspose 15557 Write A^T to B, where: 15558 * A is m*n complex matrix stored in array A as pairs of real/image values, 15559 beginning from AStart position, with AStride stride 15560 * B is n*m complex matrix stored in array B as pairs of real/image values, 15561 beginning from BStart position, with BStride stride 15562 stride is measured in complex numbers, i.e. in real/image pairs. 15565 Copyright 01.05.2009 by Bochkanov Sergey 15566 *************************************************************************/ 15567 static void ftbase_ffticltrec(/* Real */ ae_vector* a, 15570 /* Real */ ae_vector* b, 15590 if( ae_maxint(m, n, _state)<=8 ) 15593 for(i=0; i<=m-1; i++) 15596 idx2 = astart+2*i*astride; 15597 for(j=0; j<=n-1; j++) 15599 b->ptr.p_double[idx1+0] = a->ptr.p_double[idx2+0]; 15600 b->ptr.p_double[idx1+1] = a->ptr.p_double[idx2+1]; 15613 * "A^T -> B
" becomes "(A1 A2)^T -> ( B1 )
15617 if( n-n1>=8&&n1%8!=0 )
15621 ae_assert(n-n1>0,
"Assertion failed", _state);
15622 ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m, n1, _state);
15623 ftbase_ffticltrec(a, astart+2*n1, astride, b, bstart+2*n1*bstride, bstride, m, n-n1, _state);
15635 if( m-m1>=8&&m1%8!=0 )
15639 ae_assert(m-m1>0,
"Assertion failed", _state);
15640 ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m1, n, _state);
15641 ftbase_ffticltrec(a, astart+2*m1*astride, astride, b, bstart+2*m1, bstride, m-m1, n, _state);
15653 static void ftbase_fftirltrec(
ae_vector*
a,
15677 for(i=0; i<=
m-1; i++)
15680 idx2 = astart+i*astride;
15681 for(j=0; j<=
n-1; j++)
15684 idx1 = idx1+bstride;
15700 if(
n-n1>=8&&n1%8!=0 )
15704 ae_assert(
n-n1>0,
"Assertion failed", _state);
15705 ftbase_fftirltrec(a, astart, astride, b, bstart, bstride,
m, n1, _state);
15706 ftbase_fftirltrec(a, astart+n1, astride, b, bstart+n1*bstride, bstride,
m,
n-n1, _state);
15718 if(
m-m1>=8&&m1%8!=0 )
15722 ae_assert(
m-m1>0,
"Assertion failed", _state);
15723 ftbase_fftirltrec(a, astart, astride, b, bstart, bstride, m1,
n, _state);
15724 ftbase_fftirltrec(a, astart+m1*astride, astride, b, bstart+m1, bstride,
m-m1,
n, _state);
15735 static void ftbase_ftbasefindsmoothrec(
ae_int_t n,
15743 ae_assert(ftbase_ftbasemaxsmoothfactor<=5,
"FTBaseFindSmoothRec: internal error!", _state);
15746 *best =
ae_minint(*best, seed, _state);
15749 if( leastfactor<=2 )
15751 ftbase_ftbasefindsmoothrec(
n, seed*2, 2, best, _state);
15753 if( leastfactor<=3 )
15755 ftbase_ftbasefindsmoothrec(
n, seed*3, 3, best, _state);
15757 if( leastfactor<=5 )
15759 ftbase_ftbasefindsmoothrec(
n, seed*5, 5, best, _state);
15837 result =
ae_log(z, _state);
15841 lp = 4.5270000862445199635215E-5;
15842 lp = lp*x+4.9854102823193375972212E-1;
15843 lp = lp*x+6.5787325942061044846969E0;
15844 lp = lp*x+2.9911919328553073277375E1;
15845 lp = lp*x+6.0949667980987787057556E1;
15846 lp = lp*x+5.7112963590585538103336E1;
15847 lp = lp*x+2.0039553499201281259648E1;
15848 lq = 1.0000000000000000000000E0;
15849 lq = lq*x+1.5062909083469192043167E1;
15850 lq = lq*x+8.3047565967967209469434E1;
15851 lq = lq*x+2.2176239823732856465394E2;
15852 lq = lq*x+3.0909872225312059774938E2;
15853 lq = lq*x+2.1642788614495947685003E2;
15854 lq = lq*x+6.0118660497603843919306E1;
15855 z = -0.5*z+x*(z*lp/lq);
15872 result =
ae_exp(x, _state)-1.0;
15876 ep = 1.2617719307481059087798E-4;
15877 ep = ep*xx+3.0299440770744196129956E-2;
15878 ep = ep*xx+9.9999999999999999991025E-1;
15879 eq = 3.0019850513866445504159E-6;
15880 eq = eq*xx+2.5244834034968410419224E-3;
15881 eq = eq*xx+2.2726554820815502876593E-1;
15882 eq = eq*xx+2.0000000000000000000897E0;
15899 result =
ae_cos(x, _state)-1;
15903 c = 4.7377507964246204691685E-14;
15904 c = c*xx-1.1470284843425359765671E-11;
15905 c = c*xx+2.0876754287081521758361E-9;
15906 c = c*xx-2.7557319214999787979814E-7;
15907 c = c*xx+2.4801587301570552304991E-5;
15908 c = c*xx-1.3888888888888872993737E-3;
15909 c = c*xx+4.1666666666666666609054E-2;
15910 result = -0.5*xx+xx*xx*
c;
ae_bool _armijostate_init(void *_p, ae_state *_state, ae_bool make_automatic)
ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2)
ae_bool apservisfinitectrmatrix(ae_matrix *x, ae_int_t n, ae_bool isupper, ae_state *_state)
void ae_v_cmove(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
double log2(double x, ae_state *_state)
void applyrotationsfromtheleft(ae_bool isforward, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *c, ae_vector *s, ae_matrix *a, ae_vector *work, ae_state *_state)
ae_bool ae_fp_greater_eq(double v1, double v2)
void serializecomplex(ae_serializer *s, ae_complex v, ae_state *_state)
void ftapplyplan(fasttransformplan *plan, ae_vector *a, ae_int_t offsa, ae_int_t repcnt, ae_state *_state)
void _scomplex_clear(void *_p)
void serializerealarray(ae_serializer *s, ae_vector *v, ae_int_t n, ae_state *_state)
ae_bool _sboolean_init(void *_p, ae_state *_state, ae_bool make_automatic)
double ae_c_abs(ae_complex z, ae_state *state)
void ae_v_moved(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
ae_int_t getmlpeserializationcode(ae_state *_state)
ae_int_t vectoridxabsmax(ae_vector *x, ae_int_t i1, ae_int_t i2, ae_state *_state)
ae_bool _sinteger_init(void *_p, ae_state *_state, ae_bool make_automatic)
double ae_sin(double x, ae_state *state)
ae_bool ae_shared_pool_init(void *_dst, ae_state *state, ae_bool make_automatic)
void hermitianrank2update(ae_matrix *a, ae_bool isupper, ae_int_t i1, ae_int_t i2, ae_vector *x, ae_vector *y, ae_vector *t, ae_complex alpha, ae_state *_state)
void apperiodicmap(double *x, double a, double b, double *k, ae_state *_state)
ae_bool _sreal_init(void *_p, ae_state *_state, ae_bool make_automatic)
ae_bool rmatrixsyrkf(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper, ae_state *_state)
void _mlpbuffers_destroy(void *_p)
void dec(ae_int_t *v, ae_state *_state)
ae_bool cmatrixgemmf(ae_int_t m, ae_int_t n, ae_int_t k, ae_complex alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, ae_complex beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
void _armijostate_destroy(void *_p)
ae_bool isfinitertrmatrix(ae_matrix *x, ae_int_t n, ae_bool isupper, ae_state *_state)
void ae_v_muld(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
void copyandtranspose(ae_matrix *a, ae_int_t is1, ae_int_t is2, ae_int_t js1, ae_int_t js2, ae_matrix *b, ae_int_t id1, ae_int_t id2, ae_int_t jd1, ae_int_t jd2, ae_state *_state)
ae_bool _sboolean_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ae_shared_pool_retrieve(ae_shared_pool *pool, ae_smart_ptr *pptr, ae_state *state)
double ae_fabs(double x, ae_state *state)
void ae_shared_pool_clear(void *_dst)
ae_bool rmatrixsyrkmkl(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper, ae_state *_state)
ae_bool cmatrixrank1f(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_vector *u, ae_int_t iu, ae_vector *v, ae_int_t iv, ae_state *_state)
ae_bool cmatrixscaledtrsafesolve(ae_matrix *a, double sa, ae_int_t n, ae_vector *x, ae_bool isupper, ae_int_t trans, ae_bool isunit, double maxgrowth, ae_state *_state)
void ftbasefactorize(ae_int_t n, ae_int_t tasktype, ae_int_t *n1, ae_int_t *n2, ae_state *_state)
void symmetricrank2update(ae_matrix *a, ae_bool isupper, ae_int_t i1, ae_int_t i2, ae_vector *x, ae_vector *y, ae_vector *t, double alpha, ae_state *_state)
double beta(const double a, const double b)
void xdot(ae_vector *a, ae_vector *b, ae_int_t n, ae_vector *temp, double *r, double *rerr, ae_state *_state)
union alglib_impl::ae_matrix::@12 ptr
ae_int_t columnidxabsmax(ae_matrix *x, ae_int_t i1, ae_int_t i2, ae_int_t j, ae_state *_state)
ae_bool _sintegerarray_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
double inttoreal(ae_int_t a, ae_state *_state)
double vectornorm2(ae_vector *x, ae_int_t i1, ae_int_t i2, ae_state *_state)
void ae_frame_make(ae_state *state, ae_frame *tmp)
ae_bool aredistinct(ae_vector *x, ae_int_t n, ae_state *_state)
ae_bool _sintegerarray_init(void *_p, ae_state *_state, ae_bool make_automatic)
void cmatrixgemmk(ae_int_t m, ae_int_t n, ae_int_t k, ae_complex alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, ae_complex beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
void hpcpreparechunkedgradient(ae_vector *weights, ae_int_t wcount, ae_int_t ntotal, ae_int_t nin, ae_int_t nout, mlpbuffers *buf, ae_state *_state)
void taskgenint1dequidist(double a, double b, ae_int_t n, ae_vector *x, ae_vector *y, ae_state *_state)
ae_bool ae_c_eq_d(ae_complex lhs, double rhs)
ae_complex ae_c_conj(ae_complex lhs, ae_state *state)
void tagsortfast(ae_vector *a, ae_vector *bufa, ae_int_t n, ae_state *_state)
void tagsortfastr(ae_vector *a, ae_vector *b, ae_vector *bufa, ae_vector *bufb, ae_int_t n, ae_state *_state)
ae_complex ae_complex_from_d(double v)
ae_bool apservisfinitematrix(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
void bvectorsetlengthatleast(ae_vector *x, ae_int_t n, ae_state *_state)
void _sintegerarray_clear(void *_p)
ae_int_t ftbasefindsmootheven(ae_int_t n, ae_state *_state)
void _sbooleanarray_clear(void *_p)
ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *_a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *_b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, double beta, ae_matrix *_c, ae_int_t ic, ae_int_t jc)
void ae_shared_pool_recycle(ae_shared_pool *pool, ae_smart_ptr *pptr, ae_state *state)
ae_complex ae_c_div_d(ae_complex lhs, double rhs)
void serializeintegerarray(ae_serializer *s, ae_vector *v, ae_int_t n, ae_state *_state)
void _sreal_clear(void *_p)
ae_bool ae_c_neq_d(ae_complex lhs, double rhs)
void allocrealarray(ae_serializer *s, ae_vector *v, ae_int_t n, ae_state *_state)
ae_bool _fasttransformplan_init(void *_p, ae_state *_state, ae_bool make_automatic)
void rmatrixtrsafesolve(ae_matrix *a, ae_int_t n, ae_vector *x, double *s, ae_bool isupper, ae_bool istrans, ae_bool isunit, ae_state *_state)
void rmatrixgemmk(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
ae_bool seterrorflag(ae_bool *flag, ae_bool cond, ae_state *_state)
ae_bool ae_fp_eq(double v1, double v2)
ae_bool _scomplexarray_init(void *_p, ae_state *_state, ae_bool make_automatic)
double nucosm1(double x, ae_state *_state)
ae_bool _sinteger_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha)
ae_bool _linminstate_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
ae_int_t ftbasefindsmooth(ae_int_t n, ae_state *_state)
ae_int_t getkdtreeserializationcode(ae_state *_state)
ae_int_t getrbfserializationcode(ae_state *_state)
void unserializerealmatrix(ae_serializer *s, ae_matrix *v, ae_state *_state)
void _linminstate_clear(void *_p)
ae_bool rmatrixlefttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2, ae_state *_state)
ae_bool apservisfiniteornanmatrix(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
void unserializerealarray(ae_serializer *s, ae_vector *v, ae_state *_state)
double ae_cos(double x, ae_state *state)
void allocintegerarray(ae_serializer *s, ae_vector *v, ae_int_t n, ae_state *_state)
ae_bool ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic)
ae_bool ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
ae_bool hpcchunkedprocess(ae_vector *weights, ae_vector *structinfo, ae_vector *columnmeans, ae_vector *columnsigmas, ae_matrix *xy, ae_int_t cstart, ae_int_t csize, ae_vector *batch4buf, ae_vector *hpcbuf, ae_state *_state)
void ae_matrix_destroy(ae_matrix *dst)
ae_int_t recsearch(ae_vector *a, ae_int_t nrec, ae_int_t nheader, ae_int_t i0, ae_int_t i1, ae_vector *b, ae_state *_state)
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
void _apbuffers_destroy(void *_p)
void ae_v_add(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
ae_bool rmatrixscaledtrsafesolve(ae_matrix *a, double sa, ae_int_t n, ae_vector *x, ae_bool isupper, ae_int_t trans, ae_bool isunit, double maxgrowth, ae_state *_state)
ae_int_t getmlpserializationcode(ae_state *_state)
ae_complex ae_c_sub_d(ae_complex lhs, double rhs)
ae_int_t getrdfserializationcode(ae_state *_state)
double safepythag2(double x, double y, ae_state *_state)
double upperhessenberg1norm(ae_matrix *a, ae_int_t i1, ae_int_t i2, ae_int_t j1, ae_int_t j2, ae_vector *work, ae_state *_state)
void ae_serializer_serialize_int(ae_serializer *serializer, ae_int_t v, ae_state *state)
void _srealarray_destroy(void *_p)
void linminnormalized(ae_vector *d, double *stp, ae_int_t n, ae_state *_state)
void _rcommstate_destroy(rcommstate *p)
ae_int_t ae_v_len(ae_int_t a, ae_int_t b)
void ae_vector_destroy(ae_vector *dst)
ae_bool _rcommstate_init(rcommstate *p, ae_state *_state, ae_bool make_automatic)
void tagsortmiddleir(ae_vector *a, ae_vector *b, ae_int_t offset, ae_int_t n, ae_state *_state)
ae_bool cmatrixsyrkf(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper, ae_state *_state)
ae_bool armijoiteration(armijostate *state, ae_state *_state)
void countdown(ae_int_t *v, ae_state *_state)
void ae_shared_pool_set_seed(ae_shared_pool *dst, void *seed_object, ae_int_t size_of_object, ae_bool(*init)(void *dst, ae_state *state, ae_bool make_automatic), ae_bool(*init_copy)(void *dst, void *src, ae_state *state, ae_bool make_automatic), void(*destroy)(void *ptr), ae_state *state)
void tagsort(ae_vector *a, ae_int_t n, ae_vector *p1, ae_vector *p2, ae_state *_state)
ae_bool _sbooleanarray_init(void *_p, ae_state *_state, ae_bool make_automatic)
ae_bool _rcommstate_init_copy(rcommstate *dst, rcommstate *src, ae_state *_state, ae_bool make_automatic)
void ae_v_move(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
void _srealarray_clear(void *_p)
void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2)
ae_bool seterrorflagdiff(ae_bool *flag, double val, double refval, double tol, double s, ae_state *_state)
void _linminstate_destroy(void *_p)
ae_bool _apbuffers_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
ae_complex ae_c_div(ae_complex lhs, ae_complex rhs)
ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m, ae_int_t n, ae_int_t k, ae_complex alpha, ae_matrix *_a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *_b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, ae_complex beta, ae_matrix *_c, ae_int_t ic, ae_int_t jc)
ae_int_t rowidxabsmax(ae_matrix *x, ae_int_t j1, ae_int_t j2, ae_int_t i, ae_state *_state)
void matrixvectormultiply(ae_matrix *a, ae_int_t i1, ae_int_t i2, ae_int_t j1, ae_int_t j2, ae_bool trans, ae_vector *x, ae_int_t ix1, ae_int_t ix2, double alpha, ae_vector *y, ae_int_t iy1, ae_int_t iy2, double beta, ae_state *_state)
ae_complex ae_c_add(ae_complex lhs, ae_complex rhs)
void ae_vector_clear(ae_vector *dst)
void rmatrixgemmk44v01(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_matrix *b, ae_int_t ib, ae_int_t jb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
void rmatrixgemmk44v00(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_matrix *b, ae_int_t ib, ae_int_t jb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
ae_bool _armijostate_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
void copyintegerarray(ae_vector *src, ae_vector *dst, ae_state *_state)
void inc(ae_int_t *v, ae_state *_state)
ae_complex unserializecomplex(ae_serializer *s, ae_state *_state)
void taskgenint1dcheb1(double a, double b, ae_int_t n, ae_vector *x, ae_vector *y, ae_state *_state)
ae_bool ae_fp_less(double v1, double v2)
void rankx(ae_vector *x, ae_int_t n, ae_bool iscentered, apbuffers *buf, ae_state *_state)
void symmetricmatrixvectormultiply(ae_matrix *a, ae_bool isupper, ae_int_t i1, ae_int_t i2, ae_vector *x, double alpha, ae_vector *y, ae_state *_state)
ae_bool cmatrixmvf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t opa, ae_vector *x, ae_int_t ix, ae_vector *y, ae_int_t iy, ae_state *_state)
double safeminposrv(double x, double y, double v, ae_state *_state)
void ae_serializer_unserialize_int(ae_serializer *serializer, ae_int_t *v, ae_state *state)
double ae_randomreal(ae_state *state)
void rmatrixgemmk44v11(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_matrix *b, ae_int_t ib, ae_int_t jb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
ae_bool ae_smart_ptr_init(ae_smart_ptr *dst, void **subscriber, ae_state *state, ae_bool make_automatic)
void matrixmatrixmultiply(ae_matrix *a, ae_int_t ai1, ae_int_t ai2, ae_int_t aj1, ae_int_t aj2, ae_bool transa, ae_matrix *b, ae_int_t bi1, ae_int_t bi2, ae_int_t bj1, ae_int_t bj2, ae_bool transb, double alpha, ae_matrix *c, ae_int_t ci1, ae_int_t ci2, ae_int_t cj1, ae_int_t cj2, double beta, ae_vector *work, ae_state *_state)
void armijocreate(ae_int_t n, ae_vector *x, double f, ae_vector *s, double stp, double stpmax, ae_int_t fmax, armijostate *state, ae_state *_state)
double pythag2(double x, double y, ae_state *_state)
void eq(Image< double > &op1, const Image< double > &op2)
Be careful with integer images for relational operations...due to double comparisons.
void complexgeneratereflection(ae_vector *x, ae_int_t n, ae_complex *tau, ae_state *_state)
ae_bool ftbaseissmooth(ae_int_t n, ae_state *_state)
void ae_v_caddc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
void generaterotation(double f, double g, double *cs, double *sn, double *r, ae_state *_state)
ae_bool ae_shared_pool_init_copy(void *_dst, void *_src, ae_state *state, ae_bool make_automatic)
void tagheappushi(ae_vector *a, ae_vector *b, ae_int_t *n, double va, ae_int_t vb, ae_state *_state)
ae_complex ae_c_sub(ae_complex lhs, ae_complex rhs)
ae_bool ae_fp_neq(double v1, double v2)
ae_bool ae_isnan(double x, ae_state *state)
ae_bool rmatrixmvf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t opa, ae_vector *x, ae_int_t ix, ae_vector *y, ae_int_t iy, ae_state *_state)
ae_bool isfinitevector(ae_vector *x, ae_int_t n, ae_state *_state)
void ae_v_cmoved(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
void rvectorsetlengthatleast(ae_vector *x, ae_int_t n, ae_state *_state)
void _fasttransformplan_destroy(void *_p)
double randomnormal(ae_state *_state)
double boundval(double x, double b1, double b2, ae_state *_state)
void allocrealmatrix(ae_serializer *s, ae_matrix *v, ae_int_t n0, ae_int_t n1, ae_state *_state)
void ae_v_cadd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
void _sboolean_destroy(void *_p)
void findprimitiverootandinverse(ae_int_t n, ae_int_t *proot, ae_int_t *invproot, ae_state *_state)
void ae_touch_ptr(void *p)
void taskgenint1dcheb2(double a, double b, ae_int_t n, ae_vector *x, ae_vector *y, ae_state *_state)
void _mlpbuffers_clear(void *_p)
double ae_maxreal(double m1, double m2, ae_state *state)
ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
ae_bool apservisfinitecmatrix(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
void splitlength(ae_int_t tasksize, ae_int_t chunksize, ae_int_t *task0, ae_int_t *task1, ae_state *_state)
void _sinteger_destroy(void *_p)
void mcsrch(ae_int_t n, ae_vector *x, double *f, ae_vector *g, ae_vector *s, double *stp, double stpmax, double gtol, ae_int_t *info, ae_int_t *nfev, ae_vector *wa, linminstate *state, ae_int_t *stage, ae_state *_state)
ae_bool ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state)
void unserializeintegerarray(ae_serializer *s, ae_vector *v, ae_state *_state)
ae_bool _srealarray_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void applyreflectionfromtheleft(ae_matrix *c, double tau, ae_vector *v, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *work, ae_state *_state)
void applyreflectionfromtheright(ae_matrix *c, double tau, ae_vector *v, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *work, ae_state *_state)
ae_bool _fasttransformplan_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
ae_int_t lowerbound(ae_vector *a, ae_int_t n, double t, ae_state *_state)
ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2)
void _sreal_destroy(void *_p)
void _scomplexarray_destroy(void *_p)
double ae_log(double x, ae_state *state)
void _fasttransformplan_clear(void *_p)
void applyrotationsfromtheright(ae_bool isforward, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *c, ae_vector *s, ae_matrix *a, ae_vector *work, ae_state *_state)
void ae_serializer_serialize_double(ae_serializer *serializer, double v, ae_state *state)
double nuexpm1(double x, ae_state *_state)
void _apbuffers_clear(void *_p)
ae_bool _apbuffers_init(void *_p, ae_state *_state, ae_bool make_automatic)
double ae_minreal(double m1, double m2, ae_state *state)
ae_bool isfinitecvector(ae_vector *z, ae_int_t n, ae_state *_state)
ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_vector *u, ae_int_t uoffs, ae_vector *v, ae_int_t voffs)
ae_bool _scomplex_init(void *_p, ae_state *_state, ae_bool make_automatic)
void taskgenint1d(double a, double b, ae_int_t n, ae_vector *x, ae_vector *y, ae_state *_state)
ae_complex ae_c_d_div(double lhs, ae_complex rhs)
ae_bool rmatrixrighttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2, ae_state *_state)
ae_bool _sreal_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void rmatrixgemmk44v10(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_matrix *b, ae_int_t ib, ae_int_t jb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
void alloccomplex(ae_serializer *s, ae_complex v, ae_state *_state)
void generatereflection(ae_vector *x, ae_int_t n, double *tau, ae_state *_state)
ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2)
double safepythag3(double x, double y, double z, ae_state *_state)
void hpcfinalizechunkedgradient(mlpbuffers *buf, ae_vector *grad, ae_state *_state)
ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper)
ae_int_t ae_ifloor(double x, ae_state *state)
ae_bool cmatrixlefttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2, ae_state *_state)
void ae_shared_pool_destroy(void *_dst)
void touchint(ae_int_t *a, ae_state *_state)
ae_bool approxequalrel(double a, double b, double tol, ae_state *_state)
void rmatrixsetlengthatleast(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
double ae_sqrt(double x, ae_state *state)
void ae_assert(ae_bool cond, const char *msg, ae_state *state)
union alglib_impl::ae_vector::@11 ptr
void _sboolean_clear(void *_p)
ae_bool rmatrixgemmf(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_vector *u, ae_int_t uoffs, ae_vector *v, ae_int_t voffs)
ae_bool _scomplexarray_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
ae_bool _srealarray_init(void *_p, ae_state *_state, ae_bool make_automatic)
void _armijostate_clear(void *_p)
void xcdot(ae_vector *a, ae_vector *b, ae_int_t n, ae_vector *temp, ae_complex *r, double *rerr, ae_state *_state)
ae_bool _scomplex_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ivectorsetlengthatleast(ae_vector *x, ae_int_t n, ae_state *_state)
void copymatrix(ae_matrix *a, ae_int_t is1, ae_int_t is2, ae_int_t js1, ae_int_t js2, ae_matrix *b, ae_int_t id1, ae_int_t id2, ae_int_t jd1, ae_int_t jd2, ae_state *_state)
double ftbasegetflopestimate(ae_int_t n, ae_state *_state)
void splitlengtheven(ae_int_t tasksize, ae_int_t *task0, ae_int_t *task1, ae_state *_state)
double nulog1p(double x, ae_state *_state)
#define ae_machineepsilon
void ftcomplexfftplan(ae_int_t n, ae_int_t k, fasttransformplan *plan, ae_state *_state)
void serializerealmatrix(ae_serializer *s, ae_matrix *v, ae_int_t n0, ae_int_t n1, ae_state *_state)
ae_bool _ialglib_i_cmatrixsyrkf(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper)
double ae_exp(double x, ae_state *state)
ae_complex ae_c_mul(ae_complex lhs, ae_complex rhs)
void _sinteger_clear(void *_p)
void tagsortbuf(ae_vector *a, ae_int_t n, ae_vector *p1, ae_vector *p2, apbuffers *buf, ae_state *_state)
void _sintegerarray_destroy(void *_p)
void _rcommstate_clear(rcommstate *p)
void complexapplyreflectionfromtheright(ae_matrix *c, ae_complex tau, ae_vector *v, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *work, ae_state *_state)
void rmatrixresize(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
void ae_v_subd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
ae_bool ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state)
ae_complex ae_c_mul_d(ae_complex lhs, double rhs)
ae_bool upperhessenbergschurdecomposition(ae_matrix *h, ae_int_t n, ae_matrix *s, ae_state *_state)
ae_bool ae_isfinite(double x, ae_state *state)
double ae_sqr(double x, ae_state *state)
void inplacetranspose(ae_matrix *a, ae_int_t i1, ae_int_t i2, ae_int_t j1, ae_int_t j2, ae_vector *work, ae_state *_state)
void randomunit(ae_int_t n, ae_vector *x, ae_state *_state)
void ae_serializer_alloc_entry(ae_serializer *serializer)
void ae_serializer_unserialize_double(ae_serializer *serializer, double *v, ae_state *state)
void complexapplyreflectionfromtheleft(ae_matrix *c, ae_complex tau, ae_vector *v, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *work, ae_state *_state)
void _sbooleanarray_destroy(void *_p)
void copyrealarray(ae_vector *src, ae_vector *dst, ae_state *_state)
ae_int_t upperbound(ae_vector *a, ae_int_t n, double t, ae_state *_state)
void tagsortfasti(ae_vector *a, ae_vector *b, ae_vector *bufa, ae_vector *bufb, ae_int_t n, ae_state *_state)
void ae_v_addd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
ae_bool ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic)
void armijoresults(armijostate *state, ae_int_t *info, double *stp, double *f, ae_state *_state)
ae_bool _mlpbuffers_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
ae_bool rmatrixgemmmkl(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
ae_bool ae_fp_less_eq(double v1, double v2)
ae_int_t ae_round(double x, ae_state *state)
void internalschurdecomposition(ae_matrix *h, ae_int_t n, ae_int_t tneeded, ae_int_t zneeded, ae_vector *wr, ae_vector *wi, ae_matrix *z, ae_int_t *info, ae_state *_state)
void safesolvetriangular(ae_matrix *a, ae_int_t n, ae_vector *x, double *s, ae_bool isupper, ae_bool istrans, ae_bool isunit, ae_bool normin, ae_vector *cnorm, ae_state *_state)
void _scomplexarray_clear(void *_p)
void ae_frame_leave(ae_state *state)
ae_bool cmatrixrighttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2, ae_state *_state)
ae_bool _sbooleanarray_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ae_matrix_clear(ae_matrix *dst)
ae_bool _linminstate_init(void *_p, ae_state *_state, ae_bool make_automatic)
ae_bool ae_fp_greater(double v1, double v2)
void touchreal(double *a, ae_state *_state)
ae_bool ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state)
void tagheapreplacetopi(ae_vector *a, ae_vector *b, ae_int_t n, double va, ae_int_t vb, ae_state *_state)
ae_int_t saferdiv(double x, double y, double *r, ae_state *_state)
ae_bool hpcchunkedgradient(ae_vector *weights, ae_vector *structinfo, ae_vector *columnmeans, ae_vector *columnsigmas, ae_matrix *xy, ae_int_t cstart, ae_int_t csize, ae_vector *batch4buf, ae_vector *hpcbuf, double *e, ae_bool naturalerrorfunc, ae_state *_state)
ql0001_ & zero(ctemp+1),(cvec+1),(a+1),(b+1),(bl+1),(bu+1),(x+1),(w+1), &iout, ifail, &zero,(w+3), &lwar2,(iw+1), &leniw, &glob_grd.epsmac
void imatrixresize(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
void tagheappopi(ae_vector *a, ae_vector *b, ae_int_t *n, ae_state *_state)
ae_bool rmatrixrank1f(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_vector *u, ae_int_t iu, ae_vector *v, ae_int_t iv, ae_state *_state)
void hermitianmatrixvectormultiply(ae_matrix *a, ae_bool isupper, ae_int_t i1, ae_int_t i2, ae_vector *x, ae_complex alpha, ae_vector *y, ae_state *_state)
ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2)
ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state)
ae_bool _mlpbuffers_init(void *_p, ae_state *_state, ae_bool make_automatic)
ae_bool aresameboolean(ae_bool v1, ae_bool v2, ae_state *_state)
ae_int_t ae_trunc(double x, ae_state *state)
double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
void copyrealmatrix(ae_matrix *src, ae_matrix *dst, ae_state *_state)
void _scomplex_destroy(void *_p)
void ae_v_cmovec(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)