Xmipp  v3.23.11-Nereus
alglibinternal.cpp
Go to the documentation of this file.
1 /*************************************************************************
2 Copyright (c) Sergey Bochkanov (ALGLIB project).
3 
4 >>> SOURCE LICENSE >>>
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation (www.fsf.org); either version 2 of the
8 License, or (at your option) any later version.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14 
15 A copy of the GNU General Public License is available at
16 http://www.fsf.org/licensing/licenses
17 >>> END OF LICENSE >>>
18 *************************************************************************/
19 #include "stdafx.h"
20 #include "alglibinternal.h"
21 
22 // disable some irrelevant warnings
23 #if (AE_COMPILER==AE_MSVC)
24 #pragma warning(disable:4100)
25 #pragma warning(disable:4127)
26 #pragma warning(disable:4702)
27 #pragma warning(disable:4996)
28 #endif
29 using namespace std;
30 
32 //
33 // THIS SECTION CONTAINS IMPLEMENTATION OF C++ INTERFACE
34 //
36 namespace alglib
37 {
38 
39 
40 }
41 
43 //
44 // THIS SECTION CONTAINS IMPLEMENTATION OF COMPUTATIONAL CORE
45 //
47 namespace alglib_impl
48 {
49 
50 
51 
52 
53 static void tsort_tagsortfastirec(/* Real */ ae_vector* a,
54  /* Integer */ ae_vector* b,
55  /* Real */ ae_vector* bufa,
56  /* Integer */ ae_vector* bufb,
57  ae_int_t i1,
58  ae_int_t i2,
59  ae_state *_state);
60 static void tsort_tagsortfastrrec(/* Real */ ae_vector* a,
61  /* Real */ ae_vector* b,
62  /* Real */ ae_vector* bufa,
63  /* Real */ ae_vector* bufb,
64  ae_int_t i1,
65  ae_int_t i2,
66  ae_state *_state);
67 static void tsort_tagsortfastrec(/* Real */ ae_vector* a,
68  /* Real */ ae_vector* bufa,
69  ae_int_t i1,
70  ae_int_t i2,
71  ae_state *_state);
72 
73 
74 
75 
76 
77 
78 
79 
80 
81 
82 
83 
84 
85 
86 
87 
88 
89 
90 
91 
92 static void hsschur_internalauxschur(ae_bool wantt,
93  ae_bool wantz,
94  ae_int_t n,
95  ae_int_t ilo,
96  ae_int_t ihi,
97  /* Real */ ae_matrix* h,
98  /* Real */ ae_vector* wr,
99  /* Real */ ae_vector* wi,
100  ae_int_t iloz,
101  ae_int_t ihiz,
102  /* Real */ ae_matrix* z,
103  /* Real */ ae_vector* work,
104  /* Real */ ae_vector* workv3,
105  /* Real */ ae_vector* workc1,
106  /* Real */ ae_vector* works1,
107  ae_int_t* info,
108  ae_state *_state);
109 static void hsschur_aux2x2schur(double* a,
110  double* b,
111  double* c,
112  double* d,
113  double* rt1r,
114  double* rt1i,
115  double* rt2r,
116  double* rt2i,
117  double* cs,
118  double* sn,
119  ae_state *_state);
120 static double hsschur_extschursign(double a, double b, ae_state *_state);
121 static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state);
122 
123 
124 
125 
126 static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha,
128  double lnmax,
129  double bnorm,
130  double maxgrowth,
131  double* xnorm,
132  ae_complex* x,
133  ae_state *_state);
134 
135 
136 static ae_bool hpccores_hpcpreparechunkedgradientx(/* Real */ ae_vector* weights,
137  ae_int_t wcount,
138  /* Real */ ae_vector* hpcbuf,
139  ae_state *_state);
140 static ae_bool hpccores_hpcfinalizechunkedgradientx(/* Real */ ae_vector* buf,
141  ae_int_t wcount,
142  /* Real */ ae_vector* grad,
143  ae_state *_state);
144 
145 
146 static void xblas_xsum(/* Real */ ae_vector* w,
147  double mx,
148  ae_int_t n,
149  double* r,
150  double* rerr,
151  ae_state *_state);
152 static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state);
153 
154 
155 static double linmin_ftol = 0.001;
156 static double linmin_xtol = 100*ae_machineepsilon;
157 static ae_int_t linmin_maxfev = 20;
158 static double linmin_stpmin = 1.0E-50;
159 static double linmin_defstpmax = 1.0E+50;
160 static double linmin_armijofactor = 1.3;
161 static void linmin_mcstep(double* stx,
162  double* fx,
163  double* dx,
164  double* sty,
165  double* fy,
166  double* dy,
167  double* stp,
168  double fp,
169  double dp,
170  ae_bool* brackt,
171  double stmin,
172  double stmax,
173  ae_int_t* info,
174  ae_state *_state);
175 
176 
177 static ae_bool ntheory_isprime(ae_int_t n, ae_state *_state);
178 static ae_int_t ntheory_modmul(ae_int_t a,
179  ae_int_t b,
180  ae_int_t n,
181  ae_state *_state);
182 static ae_int_t ntheory_modexp(ae_int_t a,
183  ae_int_t b,
184  ae_int_t n,
185  ae_state *_state);
186 
187 
188 static ae_int_t ftbase_coltype = 0;
189 static ae_int_t ftbase_coloperandscnt = 1;
190 static ae_int_t ftbase_coloperandsize = 2;
191 static ae_int_t ftbase_colmicrovectorsize = 3;
192 static ae_int_t ftbase_colparam0 = 4;
193 static ae_int_t ftbase_colparam1 = 5;
194 static ae_int_t ftbase_colparam2 = 6;
195 static ae_int_t ftbase_colparam3 = 7;
196 static ae_int_t ftbase_colscnt = 8;
197 static ae_int_t ftbase_opend = 0;
198 static ae_int_t ftbase_opcomplexreffft = 1;
199 static ae_int_t ftbase_opbluesteinsfft = 2;
200 static ae_int_t ftbase_opcomplexcodeletfft = 3;
201 static ae_int_t ftbase_opcomplexcodelettwfft = 4;
202 static ae_int_t ftbase_opradersfft = 5;
203 static ae_int_t ftbase_opcomplextranspose = -1;
204 static ae_int_t ftbase_opcomplexfftfactors = -2;
205 static ae_int_t ftbase_opstart = -3;
206 static ae_int_t ftbase_opjmp = -4;
207 static ae_int_t ftbase_opparallelcall = -5;
208 static ae_int_t ftbase_maxradix = 6;
209 static ae_int_t ftbase_updatetw = 16;
210 static ae_int_t ftbase_recursivethreshold = 1024;
211 static ae_int_t ftbase_raderthreshold = 19;
212 static ae_int_t ftbase_ftbasecodeletrecommended = 5;
213 static double ftbase_ftbaseinefficiencyfactor = 1.3;
214 static ae_int_t ftbase_ftbasemaxsmoothfactor = 5;
215 static void ftbase_ftdeterminespacerequirements(ae_int_t n,
216  ae_int_t* precrsize,
217  ae_int_t* precisize,
218  ae_state *_state);
219 static void ftbase_ftcomplexfftplanrec(ae_int_t n,
220  ae_int_t k,
221  ae_bool childplan,
222  ae_bool topmostplan,
223  ae_int_t* rowptr,
224  ae_int_t* bluesteinsize,
225  ae_int_t* precrptr,
226  ae_int_t* preciptr,
227  fasttransformplan* plan,
228  ae_state *_state);
229 static void ftbase_ftpushentry(fasttransformplan* plan,
230  ae_int_t* rowptr,
231  ae_int_t etype,
232  ae_int_t eopcnt,
233  ae_int_t eopsize,
234  ae_int_t emcvsize,
235  ae_int_t eparam0,
236  ae_state *_state);
237 static void ftbase_ftpushentry2(fasttransformplan* plan,
238  ae_int_t* rowptr,
239  ae_int_t etype,
240  ae_int_t eopcnt,
241  ae_int_t eopsize,
242  ae_int_t emcvsize,
243  ae_int_t eparam0,
244  ae_int_t eparam1,
245  ae_state *_state);
246 static void ftbase_ftpushentry4(fasttransformplan* plan,
247  ae_int_t* rowptr,
248  ae_int_t etype,
249  ae_int_t eopcnt,
250  ae_int_t eopsize,
251  ae_int_t emcvsize,
252  ae_int_t eparam0,
253  ae_int_t eparam1,
254  ae_int_t eparam2,
255  ae_int_t eparam3,
256  ae_state *_state);
257 static void ftbase_ftapplysubplan(fasttransformplan* plan,
258  ae_int_t subplan,
259  /* Real */ ae_vector* a,
260  ae_int_t abase,
261  ae_int_t aoffset,
262  /* Real */ ae_vector* buf,
263  ae_int_t repcnt,
264  ae_state *_state);
265 static void ftbase_ftapplycomplexreffft(/* Real */ ae_vector* a,
266  ae_int_t offs,
267  ae_int_t operandscnt,
268  ae_int_t operandsize,
269  ae_int_t microvectorsize,
270  /* Real */ ae_vector* buf,
271  ae_state *_state);
272 static void ftbase_ftapplycomplexcodeletfft(/* Real */ ae_vector* a,
273  ae_int_t offs,
274  ae_int_t operandscnt,
275  ae_int_t operandsize,
276  ae_int_t microvectorsize,
277  ae_state *_state);
278 static void ftbase_ftapplycomplexcodelettwfft(/* Real */ ae_vector* a,
279  ae_int_t offs,
280  ae_int_t operandscnt,
281  ae_int_t operandsize,
282  ae_int_t microvectorsize,
283  ae_state *_state);
284 static void ftbase_ftprecomputebluesteinsfft(ae_int_t n,
285  ae_int_t m,
286  /* Real */ ae_vector* precr,
287  ae_int_t offs,
288  ae_state *_state);
289 static void ftbase_ftbluesteinsfft(fasttransformplan* plan,
290  /* Real */ ae_vector* a,
291  ae_int_t abase,
292  ae_int_t aoffset,
293  ae_int_t operandscnt,
294  ae_int_t n,
295  ae_int_t m,
296  ae_int_t precoffs,
297  ae_int_t subplan,
298  /* Real */ ae_vector* bufa,
299  /* Real */ ae_vector* bufb,
300  /* Real */ ae_vector* bufc,
301  /* Real */ ae_vector* bufd,
302  ae_state *_state);
303 static void ftbase_ftprecomputeradersfft(ae_int_t n,
304  ae_int_t rq,
305  ae_int_t riq,
306  /* Real */ ae_vector* precr,
307  ae_int_t offs,
308  ae_state *_state);
309 static void ftbase_ftradersfft(fasttransformplan* plan,
310  /* Real */ ae_vector* a,
311  ae_int_t abase,
312  ae_int_t aoffset,
313  ae_int_t operandscnt,
314  ae_int_t n,
315  ae_int_t subplan,
316  ae_int_t rq,
317  ae_int_t riq,
318  ae_int_t precoffs,
319  /* Real */ ae_vector* buf,
320  ae_state *_state);
321 static void ftbase_ftfactorize(ae_int_t n,
322  ae_bool isroot,
323  ae_int_t* n1,
324  ae_int_t* n2,
325  ae_state *_state);
326 static ae_int_t ftbase_ftoptimisticestimate(ae_int_t n, ae_state *_state);
327 static void ftbase_ffttwcalc(/* Real */ ae_vector* a,
328  ae_int_t aoffset,
329  ae_int_t n1,
330  ae_int_t n2,
331  ae_state *_state);
332 static void ftbase_internalcomplexlintranspose(/* Real */ ae_vector* a,
333  ae_int_t m,
334  ae_int_t n,
335  ae_int_t astart,
336  /* Real */ ae_vector* buf,
337  ae_state *_state);
338 static void ftbase_ffticltrec(/* Real */ ae_vector* a,
339  ae_int_t astart,
340  ae_int_t astride,
341  /* Real */ ae_vector* b,
342  ae_int_t bstart,
343  ae_int_t bstride,
344  ae_int_t m,
345  ae_int_t n,
346  ae_state *_state);
347 static void ftbase_fftirltrec(/* Real */ ae_vector* a,
348  ae_int_t astart,
349  ae_int_t astride,
350  /* Real */ ae_vector* b,
351  ae_int_t bstart,
352  ae_int_t bstride,
353  ae_int_t m,
354  ae_int_t n,
355  ae_state *_state);
356 static void ftbase_ftbasefindsmoothrec(ae_int_t n,
357  ae_int_t seed,
358  ae_int_t leastfactor,
359  ae_int_t* best,
360  ae_state *_state);
361 
362 
363 
364 
365 
366 
367 
368 
369 
370 /*************************************************************************
371 This function is used to set error flags during unit tests. When COND
372 parameter is True, FLAG variable is set to True. When COND is False,
373 FLAG is unchanged.
374 
375 The purpose of this function is to have single point where failures of
376 unit tests can be detected.
377 
378 This function returns value of COND.
379 *************************************************************************/
381 {
382  ae_bool result;
383 
384 
385  if( cond )
386  {
387  *flag = ae_true;
388  }
389  result = cond;
390  return result;
391 }
392 
393 
394 /*************************************************************************
395 Internally calls SetErrorFlag() with condition:
396 
397  Abs(Val-RefVal)>Tol*Max(Abs(RefVal),S)
398 
399 This function is used to test relative error in Val against RefVal, with
400 relative error being replaced by absolute when scale of RefVal is less
401 than S.
402 
403 This function returns value of COND.
404 *************************************************************************/
406  double val,
407  double refval,
408  double tol,
409  double s,
410  ae_state *_state)
411 {
412  ae_bool result;
413 
414 
415  result = seterrorflag(flag, ae_fp_greater(ae_fabs(val-refval, _state),tol*ae_maxreal(ae_fabs(refval, _state), s, _state)), _state);
416  return result;
417 }
418 
419 
420 /*************************************************************************
421 The function "touches" integer - it is used to avoid compiler messages
422 about unused variables (in rare cases when we do NOT want to remove these
423 variables).
424 
425  -- ALGLIB --
426  Copyright 17.09.2012 by Bochkanov Sergey
427 *************************************************************************/
428 void touchint(ae_int_t* a, ae_state *_state)
429 {
430 
431 
432 }
433 
434 
435 /*************************************************************************
436 The function "touches" real - it is used to avoid compiler messages
437 about unused variables (in rare cases when we do NOT want to remove these
438 variables).
439 
440  -- ALGLIB --
441  Copyright 17.09.2012 by Bochkanov Sergey
442 *************************************************************************/
443 void touchreal(double* a, ae_state *_state)
444 {
445 
446 
447 }
448 
449 
450 /*************************************************************************
451 The function convert integer value to real value.
452 
453  -- ALGLIB --
454  Copyright 17.09.2012 by Bochkanov Sergey
455 *************************************************************************/
456 double inttoreal(ae_int_t a, ae_state *_state)
457 {
458  double result;
459 
460 
461  result = a;
462  return result;
463 }
464 
465 
466 /*************************************************************************
467 The function calculates binary logarithm.
468 
469 NOTE: it costs twice as much as Ln(x)
470 
471  -- ALGLIB --
472  Copyright 17.09.2012 by Bochkanov Sergey
473 *************************************************************************/
474 double log2(double x, ae_state *_state)
475 {
476  double result;
477 
478 
479  result = ae_log(x, _state)/ae_log(2, _state);
480  return result;
481 }
482 
483 
484 /*************************************************************************
485 This function compares two numbers for approximate equality, with tolerance
486 to errors as large as max(|a|,|b|)*tol.
487 
488 
489  -- ALGLIB --
490  Copyright 02.12.2009 by Bochkanov Sergey
491 *************************************************************************/
492 ae_bool approxequalrel(double a, double b, double tol, ae_state *_state)
493 {
494  ae_bool result;
495 
496 
497  result = ae_fp_less_eq(ae_fabs(a-b, _state),ae_maxreal(ae_fabs(a, _state), ae_fabs(b, _state), _state)*tol);
498  return result;
499 }
500 
501 
502 /*************************************************************************
503 This function generates 1-dimensional general interpolation task with
504 moderate Lipshitz constant (close to 1.0)
505 
506 If N=1 then suborutine generates only one point at the middle of [A,B]
507 
508  -- ALGLIB --
509  Copyright 02.12.2009 by Bochkanov Sergey
510 *************************************************************************/
511 void taskgenint1d(double a,
512  double b,
513  ae_int_t n,
514  /* Real */ ae_vector* x,
515  /* Real */ ae_vector* y,
516  ae_state *_state)
517 {
518  ae_int_t i;
519  double h;
520 
521  ae_vector_clear(x);
522  ae_vector_clear(y);
523 
524  ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state);
525  ae_vector_set_length(x, n, _state);
526  ae_vector_set_length(y, n, _state);
527  if( n>1 )
528  {
529  x->ptr.p_double[0] = a;
530  y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
531  h = (b-a)/(n-1);
532  for(i=1; i<=n-1; i++)
533  {
534  if( i!=n-1 )
535  {
536  x->ptr.p_double[i] = a+(i+0.2*(2*ae_randomreal(_state)-1))*h;
537  }
538  else
539  {
540  x->ptr.p_double[i] = b;
541  }
542  y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
543  }
544  }
545  else
546  {
547  x->ptr.p_double[0] = 0.5*(a+b);
548  y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
549  }
550 }
551 
552 
553 /*************************************************************************
554 This function generates 1-dimensional equidistant interpolation task with
555 moderate Lipshitz constant (close to 1.0)
556 
557 If N=1 then suborutine generates only one point at the middle of [A,B]
558 
559  -- ALGLIB --
560  Copyright 02.12.2009 by Bochkanov Sergey
561 *************************************************************************/
562 void taskgenint1dequidist(double a,
563  double b,
564  ae_int_t n,
565  /* Real */ ae_vector* x,
566  /* Real */ ae_vector* y,
567  ae_state *_state)
568 {
569  ae_int_t i;
570  double h;
571 
572  ae_vector_clear(x);
573  ae_vector_clear(y);
574 
575  ae_assert(n>=1, "TaskGenInterpolationEqdist1D: N<1!", _state);
576  ae_vector_set_length(x, n, _state);
577  ae_vector_set_length(y, n, _state);
578  if( n>1 )
579  {
580  x->ptr.p_double[0] = a;
581  y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
582  h = (b-a)/(n-1);
583  for(i=1; i<=n-1; i++)
584  {
585  x->ptr.p_double[i] = a+i*h;
586  y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*h;
587  }
588  }
589  else
590  {
591  x->ptr.p_double[0] = 0.5*(a+b);
592  y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
593  }
594 }
595 
596 
597 /*************************************************************************
598 This function generates 1-dimensional Chebyshev-1 interpolation task with
599 moderate Lipshitz constant (close to 1.0)
600 
601 If N=1 then suborutine generates only one point at the middle of [A,B]
602 
603  -- ALGLIB --
604  Copyright 02.12.2009 by Bochkanov Sergey
605 *************************************************************************/
606 void taskgenint1dcheb1(double a,
607  double b,
608  ae_int_t n,
609  /* Real */ ae_vector* x,
610  /* Real */ ae_vector* y,
611  ae_state *_state)
612 {
613  ae_int_t i;
614 
615  ae_vector_clear(x);
616  ae_vector_clear(y);
617 
618  ae_assert(n>=1, "TaskGenInterpolation1DCheb1: N<1!", _state);
619  ae_vector_set_length(x, n, _state);
620  ae_vector_set_length(y, n, _state);
621  if( n>1 )
622  {
623  for(i=0; i<=n-1; i++)
624  {
625  x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*(2*i+1)/(2*n), _state);
626  if( i==0 )
627  {
628  y->ptr.p_double[i] = 2*ae_randomreal(_state)-1;
629  }
630  else
631  {
632  y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
633  }
634  }
635  }
636  else
637  {
638  x->ptr.p_double[0] = 0.5*(a+b);
639  y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
640  }
641 }
642 
643 
644 /*************************************************************************
645 This function generates 1-dimensional Chebyshev-2 interpolation task with
646 moderate Lipshitz constant (close to 1.0)
647 
648 If N=1 then suborutine generates only one point at the middle of [A,B]
649 
650  -- ALGLIB --
651  Copyright 02.12.2009 by Bochkanov Sergey
652 *************************************************************************/
653 void taskgenint1dcheb2(double a,
654  double b,
655  ae_int_t n,
656  /* Real */ ae_vector* x,
657  /* Real */ ae_vector* y,
658  ae_state *_state)
659 {
660  ae_int_t i;
661 
662  ae_vector_clear(x);
663  ae_vector_clear(y);
664 
665  ae_assert(n>=1, "TaskGenInterpolation1DCheb2: N<1!", _state);
666  ae_vector_set_length(x, n, _state);
667  ae_vector_set_length(y, n, _state);
668  if( n>1 )
669  {
670  for(i=0; i<=n-1; i++)
671  {
672  x->ptr.p_double[i] = 0.5*(b+a)+0.5*(b-a)*ae_cos(ae_pi*i/(n-1), _state);
673  if( i==0 )
674  {
675  y->ptr.p_double[i] = 2*ae_randomreal(_state)-1;
676  }
677  else
678  {
679  y->ptr.p_double[i] = y->ptr.p_double[i-1]+(2*ae_randomreal(_state)-1)*(x->ptr.p_double[i]-x->ptr.p_double[i-1]);
680  }
681  }
682  }
683  else
684  {
685  x->ptr.p_double[0] = 0.5*(a+b);
686  y->ptr.p_double[0] = 2*ae_randomreal(_state)-1;
687  }
688 }
689 
690 
691 /*************************************************************************
692 This function checks that all values from X[] are distinct. It does more
693 than just usual floating point comparison:
694 * first, it calculates max(X) and min(X)
695 * second, it maps X[] from [min,max] to [1,2]
696 * only at this stage actual comparison is done
697 
698 The meaning of such check is to ensure that all values are "distinct enough"
699 and will not cause interpolation subroutine to fail.
700 
701 NOTE:
702  X[] must be sorted by ascending (subroutine ASSERT's it)
703 
704  -- ALGLIB --
705  Copyright 02.12.2009 by Bochkanov Sergey
706 *************************************************************************/
708  ae_int_t n,
709  ae_state *_state)
710 {
711  double a;
712  double b;
713  ae_int_t i;
714  ae_bool nonsorted;
715  ae_bool result;
716 
717 
718  ae_assert(n>=1, "APSERVAreDistinct: internal error (N<1)", _state);
719  if( n==1 )
720  {
721 
722  /*
723  * everything is alright, it is up to caller to decide whether it
724  * can interpolate something with just one point
725  */
726  result = ae_true;
727  return result;
728  }
729  a = x->ptr.p_double[0];
730  b = x->ptr.p_double[0];
731  nonsorted = ae_false;
732  for(i=1; i<=n-1; i++)
733  {
734  a = ae_minreal(a, x->ptr.p_double[i], _state);
735  b = ae_maxreal(b, x->ptr.p_double[i], _state);
736  nonsorted = nonsorted||ae_fp_greater_eq(x->ptr.p_double[i-1],x->ptr.p_double[i]);
737  }
738  ae_assert(!nonsorted, "APSERVAreDistinct: internal error (not sorted)", _state);
739  for(i=1; i<=n-1; i++)
740  {
741  if( ae_fp_eq((x->ptr.p_double[i]-a)/(b-a)+1,(x->ptr.p_double[i-1]-a)/(b-a)+1) )
742  {
743  result = ae_false;
744  return result;
745  }
746  }
747  result = ae_true;
748  return result;
749 }
750 
751 
752 /*************************************************************************
753 This function checks that two boolean values are the same (both are True
754 or both are False).
755 
756  -- ALGLIB --
757  Copyright 02.12.2009 by Bochkanov Sergey
758 *************************************************************************/
760 {
761  ae_bool result;
762 
763 
764  result = (v1&&v2)||(!v1&&!v2);
765  return result;
766 }
767 
768 
769 /*************************************************************************
770 If Length(X)<N, resizes X
771 
772  -- ALGLIB --
773  Copyright 20.03.2009 by Bochkanov Sergey
774 *************************************************************************/
775 void bvectorsetlengthatleast(/* Boolean */ ae_vector* x,
776  ae_int_t n,
777  ae_state *_state)
778 {
779 
780 
781  if( x->cnt<n )
782  {
783  ae_vector_set_length(x, n, _state);
784  }
785 }
786 
787 
788 /*************************************************************************
789 If Length(X)<N, resizes X
790 
791  -- ALGLIB --
792  Copyright 20.03.2009 by Bochkanov Sergey
793 *************************************************************************/
794 void ivectorsetlengthatleast(/* Integer */ ae_vector* x,
795  ae_int_t n,
796  ae_state *_state)
797 {
798 
799 
800  if( x->cnt<n )
801  {
802  ae_vector_set_length(x, n, _state);
803  }
804 }
805 
806 
807 /*************************************************************************
808 If Length(X)<N, resizes X
809 
810  -- ALGLIB --
811  Copyright 20.03.2009 by Bochkanov Sergey
812 *************************************************************************/
814  ae_int_t n,
815  ae_state *_state)
816 {
817 
818 
819  if( x->cnt<n )
820  {
821  ae_vector_set_length(x, n, _state);
822  }
823 }
824 
825 
826 /*************************************************************************
827 If Cols(X)<N or Rows(X)<M, resizes X
828 
829  -- ALGLIB --
830  Copyright 20.03.2009 by Bochkanov Sergey
831 *************************************************************************/
833  ae_int_t m,
834  ae_int_t n,
835  ae_state *_state)
836 {
837 
838 
839  if( m>0&&n>0 )
840  {
841  if( x->rows<m||x->cols<n )
842  {
843  ae_matrix_set_length(x, m, n, _state);
844  }
845  }
846 }
847 
848 
849 /*************************************************************************
850 Resizes X and:
851 * preserves old contents of X
852 * fills new elements by zeros
853 
854  -- ALGLIB --
855  Copyright 20.03.2009 by Bochkanov Sergey
856 *************************************************************************/
857 void rmatrixresize(/* Real */ ae_matrix* x,
858  ae_int_t m,
859  ae_int_t n,
860  ae_state *_state)
861 {
862  ae_frame _frame_block;
863  ae_matrix oldx;
864  ae_int_t i;
865  ae_int_t j;
866  ae_int_t m2;
867  ae_int_t n2;
868 
869  ae_frame_make(_state, &_frame_block);
870  ae_matrix_init(&oldx, 0, 0, DT_REAL, _state, ae_true);
871 
872  m2 = x->rows;
873  n2 = x->cols;
874  ae_swap_matrices(x, &oldx);
875  ae_matrix_set_length(x, m, n, _state);
876  for(i=0; i<=m-1; i++)
877  {
878  for(j=0; j<=n-1; j++)
879  {
880  if( i<m2&&j<n2 )
881  {
882  x->ptr.pp_double[i][j] = oldx.ptr.pp_double[i][j];
883  }
884  else
885  {
886  x->ptr.pp_double[i][j] = 0.0;
887  }
888  }
889  }
890  ae_frame_leave(_state);
891 }
892 
893 
894 /*************************************************************************
895 Resizes X and:
896 * preserves old contents of X
897 * fills new elements by zeros
898 
899  -- ALGLIB --
900  Copyright 20.03.2009 by Bochkanov Sergey
901 *************************************************************************/
902 void imatrixresize(/* Integer */ ae_matrix* x,
903  ae_int_t m,
904  ae_int_t n,
905  ae_state *_state)
906 {
907  ae_frame _frame_block;
908  ae_matrix oldx;
909  ae_int_t i;
910  ae_int_t j;
911  ae_int_t m2;
912  ae_int_t n2;
913 
914  ae_frame_make(_state, &_frame_block);
915  ae_matrix_init(&oldx, 0, 0, DT_INT, _state, ae_true);
916 
917  m2 = x->rows;
918  n2 = x->cols;
919  ae_swap_matrices(x, &oldx);
920  ae_matrix_set_length(x, m, n, _state);
921  for(i=0; i<=m-1; i++)
922  {
923  for(j=0; j<=n-1; j++)
924  {
925  if( i<m2&&j<n2 )
926  {
927  x->ptr.pp_int[i][j] = oldx.ptr.pp_int[i][j];
928  }
929  else
930  {
931  x->ptr.pp_int[i][j] = 0;
932  }
933  }
934  }
935  ae_frame_leave(_state);
936 }
937 
938 
939 /*************************************************************************
940 This function checks that length(X) is at least N and first N values from
941 X[] are finite
942 
943  -- ALGLIB --
944  Copyright 18.06.2010 by Bochkanov Sergey
945 *************************************************************************/
947  ae_int_t n,
948  ae_state *_state)
949 {
950  ae_int_t i;
951  ae_bool result;
952 
953 
954  ae_assert(n>=0, "APSERVIsFiniteVector: internal error (N<0)", _state);
955  if( n==0 )
956  {
957  result = ae_true;
958  return result;
959  }
960  if( x->cnt<n )
961  {
962  result = ae_false;
963  return result;
964  }
965  for(i=0; i<=n-1; i++)
966  {
967  if( !ae_isfinite(x->ptr.p_double[i], _state) )
968  {
969  result = ae_false;
970  return result;
971  }
972  }
973  result = ae_true;
974  return result;
975 }
976 
977 
978 /*************************************************************************
979 This function checks that first N values from X[] are finite
980 
981  -- ALGLIB --
982  Copyright 18.06.2010 by Bochkanov Sergey
983 *************************************************************************/
985  ae_int_t n,
986  ae_state *_state)
987 {
988  ae_int_t i;
989  ae_bool result;
990 
991 
992  ae_assert(n>=0, "APSERVIsFiniteCVector: internal error (N<0)", _state);
993  for(i=0; i<=n-1; i++)
994  {
995  if( !ae_isfinite(z->ptr.p_complex[i].x, _state)||!ae_isfinite(z->ptr.p_complex[i].y, _state) )
996  {
997  result = ae_false;
998  return result;
999  }
1000  }
1001  result = ae_true;
1002  return result;
1003 }
1004 
1005 
1006 /*************************************************************************
1007 This function checks that size of X is at least MxN and values from
1008 X[0..M-1,0..N-1] are finite.
1009 
1010  -- ALGLIB --
1011  Copyright 18.06.2010 by Bochkanov Sergey
1012 *************************************************************************/
1014  ae_int_t m,
1015  ae_int_t n,
1016  ae_state *_state)
1017 {
1018  ae_int_t i;
1019  ae_int_t j;
1020  ae_bool result;
1021 
1022 
1023  ae_assert(n>=0, "APSERVIsFiniteMatrix: internal error (N<0)", _state);
1024  ae_assert(m>=0, "APSERVIsFiniteMatrix: internal error (M<0)", _state);
1025  if( m==0||n==0 )
1026  {
1027  result = ae_true;
1028  return result;
1029  }
1030  if( x->rows<m||x->cols<n )
1031  {
1032  result = ae_false;
1033  return result;
1034  }
1035  for(i=0; i<=m-1; i++)
1036  {
1037  for(j=0; j<=n-1; j++)
1038  {
1039  if( !ae_isfinite(x->ptr.pp_double[i][j], _state) )
1040  {
1041  result = ae_false;
1042  return result;
1043  }
1044  }
1045  }
1046  result = ae_true;
1047  return result;
1048 }
1049 
1050 
1051 /*************************************************************************
1052 This function checks that all values from X[0..M-1,0..N-1] are finite
1053 
1054  -- ALGLIB --
1055  Copyright 18.06.2010 by Bochkanov Sergey
1056 *************************************************************************/
1058  ae_int_t m,
1059  ae_int_t n,
1060  ae_state *_state)
1061 {
1062  ae_int_t i;
1063  ae_int_t j;
1064  ae_bool result;
1065 
1066 
1067  ae_assert(n>=0, "APSERVIsFiniteCMatrix: internal error (N<0)", _state);
1068  ae_assert(m>=0, "APSERVIsFiniteCMatrix: internal error (M<0)", _state);
1069  for(i=0; i<=m-1; i++)
1070  {
1071  for(j=0; j<=n-1; j++)
1072  {
1073  if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) )
1074  {
1075  result = ae_false;
1076  return result;
1077  }
1078  }
1079  }
1080  result = ae_true;
1081  return result;
1082 }
1083 
1084 
1085 /*************************************************************************
1086 This function checks that size of X is at least NxN and all values from
1087 upper/lower triangle of X[0..N-1,0..N-1] are finite
1088 
1089  -- ALGLIB --
1090  Copyright 18.06.2010 by Bochkanov Sergey
1091 *************************************************************************/
1093  ae_int_t n,
1094  ae_bool isupper,
1095  ae_state *_state)
1096 {
1097  ae_int_t i;
1098  ae_int_t j1;
1099  ae_int_t j2;
1100  ae_int_t j;
1101  ae_bool result;
1102 
1103 
1104  ae_assert(n>=0, "APSERVIsFiniteRTRMatrix: internal error (N<0)", _state);
1105  if( n==0 )
1106  {
1107  result = ae_true;
1108  return result;
1109  }
1110  if( x->rows<n||x->cols<n )
1111  {
1112  result = ae_false;
1113  return result;
1114  }
1115  for(i=0; i<=n-1; i++)
1116  {
1117  if( isupper )
1118  {
1119  j1 = i;
1120  j2 = n-1;
1121  }
1122  else
1123  {
1124  j1 = 0;
1125  j2 = i;
1126  }
1127  for(j=j1; j<=j2; j++)
1128  {
1129  if( !ae_isfinite(x->ptr.pp_double[i][j], _state) )
1130  {
1131  result = ae_false;
1132  return result;
1133  }
1134  }
1135  }
1136  result = ae_true;
1137  return result;
1138 }
1139 
1140 
1141 /*************************************************************************
1142 This function checks that all values from upper/lower triangle of
1143 X[0..N-1,0..N-1] are finite
1144 
1145  -- ALGLIB --
1146  Copyright 18.06.2010 by Bochkanov Sergey
1147 *************************************************************************/
1149  ae_int_t n,
1150  ae_bool isupper,
1151  ae_state *_state)
1152 {
1153  ae_int_t i;
1154  ae_int_t j1;
1155  ae_int_t j2;
1156  ae_int_t j;
1157  ae_bool result;
1158 
1159 
1160  ae_assert(n>=0, "APSERVIsFiniteCTRMatrix: internal error (N<0)", _state);
1161  for(i=0; i<=n-1; i++)
1162  {
1163  if( isupper )
1164  {
1165  j1 = i;
1166  j2 = n-1;
1167  }
1168  else
1169  {
1170  j1 = 0;
1171  j2 = i;
1172  }
1173  for(j=j1; j<=j2; j++)
1174  {
1175  if( !ae_isfinite(x->ptr.pp_complex[i][j].x, _state)||!ae_isfinite(x->ptr.pp_complex[i][j].y, _state) )
1176  {
1177  result = ae_false;
1178  return result;
1179  }
1180  }
1181  }
1182  result = ae_true;
1183  return result;
1184 }
1185 
1186 
1187 /*************************************************************************
1188 This function checks that all values from X[0..M-1,0..N-1] are finite or
1189 NaN's.
1190 
1191  -- ALGLIB --
1192  Copyright 18.06.2010 by Bochkanov Sergey
1193 *************************************************************************/
1195  ae_int_t m,
1196  ae_int_t n,
1197  ae_state *_state)
1198 {
1199  ae_int_t i;
1200  ae_int_t j;
1201  ae_bool result;
1202 
1203 
1204  ae_assert(n>=0, "APSERVIsFiniteOrNaNMatrix: internal error (N<0)", _state);
1205  ae_assert(m>=0, "APSERVIsFiniteOrNaNMatrix: internal error (M<0)", _state);
1206  for(i=0; i<=m-1; i++)
1207  {
1208  for(j=0; j<=n-1; j++)
1209  {
1210  if( !(ae_isfinite(x->ptr.pp_double[i][j], _state)||ae_isnan(x->ptr.pp_double[i][j], _state)) )
1211  {
1212  result = ae_false;
1213  return result;
1214  }
1215  }
1216  }
1217  result = ae_true;
1218  return result;
1219 }
1220 
1221 
1222 /*************************************************************************
1223 Safe sqrt(x^2+y^2)
1224 
1225  -- ALGLIB --
1226  Copyright by Bochkanov Sergey
1227 *************************************************************************/
1228 double safepythag2(double x, double y, ae_state *_state)
1229 {
1230  double w;
1231  double xabs;
1232  double yabs;
1233  double z;
1234  double result;
1235 
1236 
1237  xabs = ae_fabs(x, _state);
1238  yabs = ae_fabs(y, _state);
1239  w = ae_maxreal(xabs, yabs, _state);
1240  z = ae_minreal(xabs, yabs, _state);
1241  if( ae_fp_eq(z,0) )
1242  {
1243  result = w;
1244  }
1245  else
1246  {
1247  result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state);
1248  }
1249  return result;
1250 }
1251 
1252 
1253 /*************************************************************************
1254 Safe sqrt(x^2+y^2)
1255 
1256  -- ALGLIB --
1257  Copyright by Bochkanov Sergey
1258 *************************************************************************/
1259 double safepythag3(double x, double y, double z, ae_state *_state)
1260 {
1261  double w;
1262  double result;
1263 
1264 
1265  w = ae_maxreal(ae_fabs(x, _state), ae_maxreal(ae_fabs(y, _state), ae_fabs(z, _state), _state), _state);
1266  if( ae_fp_eq(w,0) )
1267  {
1268  result = 0;
1269  return result;
1270  }
1271  x = x/w;
1272  y = y/w;
1273  z = z/w;
1274  result = w*ae_sqrt(ae_sqr(x, _state)+ae_sqr(y, _state)+ae_sqr(z, _state), _state);
1275  return result;
1276 }
1277 
1278 
1279 /*************************************************************************
1280 Safe division.
1281 
1282 This function attempts to calculate R=X/Y without overflow.
1283 
1284 It returns:
1285 * +1, if abs(X/Y)>=MaxRealNumber or undefined - overflow-like situation
1286  (no overlfow is generated, R is either NAN, PosINF, NegINF)
1287 * 0, if MinRealNumber<abs(X/Y)<MaxRealNumber or X=0, Y<>0
1288  (R contains result, may be zero)
1289 * -1, if 0<abs(X/Y)<MinRealNumber - underflow-like situation
1290  (R contains zero; it corresponds to underflow)
1291 
1292 No overflow is generated in any case.
1293 
1294  -- ALGLIB --
1295  Copyright by Bochkanov Sergey
1296 *************************************************************************/
1297 ae_int_t saferdiv(double x, double y, double* r, ae_state *_state)
1298 {
1299  ae_int_t result;
1300 
1301  *r = 0;
1302 
1303 
1304  /*
1305  * Two special cases:
1306  * * Y=0
1307  * * X=0 and Y<>0
1308  */
1309  if( ae_fp_eq(y,0) )
1310  {
1311  result = 1;
1312  if( ae_fp_eq(x,0) )
1313  {
1314  *r = _state->v_nan;
1315  }
1316  if( ae_fp_greater(x,0) )
1317  {
1318  *r = _state->v_posinf;
1319  }
1320  if( ae_fp_less(x,0) )
1321  {
1322  *r = _state->v_neginf;
1323  }
1324  return result;
1325  }
1326  if( ae_fp_eq(x,0) )
1327  {
1328  *r = 0;
1329  result = 0;
1330  return result;
1331  }
1332 
1333  /*
1334  * make Y>0
1335  */
1336  if( ae_fp_less(y,0) )
1337  {
1338  x = -x;
1339  y = -y;
1340  }
1341 
1342  /*
1343  *
1344  */
1345  if( ae_fp_greater_eq(y,1) )
1346  {
1347  *r = x/y;
1348  if( ae_fp_less_eq(ae_fabs(*r, _state),ae_minrealnumber) )
1349  {
1350  result = -1;
1351  *r = 0;
1352  }
1353  else
1354  {
1355  result = 0;
1356  }
1357  }
1358  else
1359  {
1360  if( ae_fp_greater_eq(ae_fabs(x, _state),ae_maxrealnumber*y) )
1361  {
1362  if( ae_fp_greater(x,0) )
1363  {
1364  *r = _state->v_posinf;
1365  }
1366  else
1367  {
1368  *r = _state->v_neginf;
1369  }
1370  result = 1;
1371  }
1372  else
1373  {
1374  *r = x/y;
1375  result = 0;
1376  }
1377  }
1378  return result;
1379 }
1380 
1381 
1382 /*************************************************************************
1383 This function calculates "safe" min(X/Y,V) for positive finite X, Y, V.
1384 No overflow is generated in any case.
1385 
1386  -- ALGLIB --
1387  Copyright by Bochkanov Sergey
1388 *************************************************************************/
1389 double safeminposrv(double x, double y, double v, ae_state *_state)
1390 {
1391  double r;
1392  double result;
1393 
1394 
1395  if( ae_fp_greater_eq(y,1) )
1396  {
1397 
1398  /*
1399  * Y>=1, we can safely divide by Y
1400  */
1401  r = x/y;
1402  result = v;
1403  if( ae_fp_greater(v,r) )
1404  {
1405  result = r;
1406  }
1407  else
1408  {
1409  result = v;
1410  }
1411  }
1412  else
1413  {
1414 
1415  /*
1416  * Y<1, we can safely multiply by Y
1417  */
1418  if( ae_fp_less(x,v*y) )
1419  {
1420  result = x/y;
1421  }
1422  else
1423  {
1424  result = v;
1425  }
1426  }
1427  return result;
1428 }
1429 
1430 
1431 /*************************************************************************
1432 This function makes periodic mapping of X to [A,B].
1433 
1434 It accepts X, A, B (A>B). It returns T which lies in [A,B] and integer K,
1435 such that X = T + K*(B-A).
1436 
1437 NOTES:
1438 * K is represented as real value, although actually it is integer
1439 * T is guaranteed to be in [A,B]
1440 * T replaces X
1441 
1442  -- ALGLIB --
1443  Copyright by Bochkanov Sergey
1444 *************************************************************************/
1445 void apperiodicmap(double* x,
1446  double a,
1447  double b,
1448  double* k,
1449  ae_state *_state)
1450 {
1451 
1452  *k = 0;
1453 
1454  ae_assert(ae_fp_less(a,b), "APPeriodicMap: internal error!", _state);
1455  *k = ae_ifloor((*x-a)/(b-a), _state);
1456  *x = *x-*k*(b-a);
1457  while(ae_fp_less(*x,a))
1458  {
1459  *x = *x+(b-a);
1460  *k = *k-1;
1461  }
1462  while(ae_fp_greater(*x,b))
1463  {
1464  *x = *x-(b-a);
1465  *k = *k+1;
1466  }
1467  *x = ae_maxreal(*x, a, _state);
1468  *x = ae_minreal(*x, b, _state);
1469 }
1470 
1471 
1472 /*************************************************************************
1473 Returns random normal number using low-quality system-provided generator
1474 
1475  -- ALGLIB --
1476  Copyright 20.03.2009 by Bochkanov Sergey
1477 *************************************************************************/
1478 double randomnormal(ae_state *_state)
1479 {
1480  double u;
1481  double v;
1482  double s;
1483  double result;
1484 
1485 
1486  for(;;)
1487  {
1488  u = 2*ae_randomreal(_state)-1;
1489  v = 2*ae_randomreal(_state)-1;
1490  s = ae_sqr(u, _state)+ae_sqr(v, _state);
1491  if( ae_fp_greater(s,0)&&ae_fp_less(s,1) )
1492  {
1493 
1494  /*
1495  * two Sqrt's instead of one to
1496  * avoid overflow when S is too small
1497  */
1498  s = ae_sqrt(-2*ae_log(s, _state), _state)/ae_sqrt(s, _state);
1499  result = u*s;
1500  return result;
1501  }
1502  }
1503  return result;
1504 }
1505 
1506 
1507 /*************************************************************************
1508 Generates random unit vector using low-quality system-provided generator.
1509 Reallocates array if its size is too short.
1510 
1511  -- ALGLIB --
1512  Copyright 20.03.2009 by Bochkanov Sergey
1513 *************************************************************************/
1514 void randomunit(ae_int_t n, /* Real */ ae_vector* x, ae_state *_state)
1515 {
1516  ae_int_t i;
1517  double v;
1518  double vv;
1519 
1520 
1521  ae_assert(n>0, "RandomUnit: N<=0", _state);
1522  if( x->cnt<n )
1523  {
1524  ae_vector_set_length(x, n, _state);
1525  }
1526  do
1527  {
1528  v = 0.0;
1529  for(i=0; i<=n-1; i++)
1530  {
1531  vv = randomnormal(_state);
1532  x->ptr.p_double[i] = vv;
1533  v = v+vv*vv;
1534  }
1535  }
1536  while(ae_fp_less_eq(v,0));
1537  v = 1/ae_sqrt(v, _state);
1538  for(i=0; i<=n-1; i++)
1539  {
1540  x->ptr.p_double[i] = x->ptr.p_double[i]*v;
1541  }
1542 }
1543 
1544 
1545 /*************************************************************************
1546 This function is used to increment value of integer variable
1547 *************************************************************************/
1548 void inc(ae_int_t* v, ae_state *_state)
1549 {
1550 
1551 
1552  *v = *v+1;
1553 }
1554 
1555 
1556 /*************************************************************************
1557 This function is used to decrement value of integer variable
1558 *************************************************************************/
1559 void dec(ae_int_t* v, ae_state *_state)
1560 {
1561 
1562 
1563  *v = *v-1;
1564 }
1565 
1566 
1567 /*************************************************************************
1568 This function performs two operations:
1569 1. decrements value of integer variable, if it is positive
1570 2. explicitly sets variable to zero if it is non-positive
1571 It is used by some algorithms to decrease value of internal counters.
1572 *************************************************************************/
1573 void countdown(ae_int_t* v, ae_state *_state)
1574 {
1575 
1576 
1577  if( *v>0 )
1578  {
1579  *v = *v-1;
1580  }
1581  else
1582  {
1583  *v = 0;
1584  }
1585 }
1586 
1587 
1588 /*************************************************************************
1589 'bounds' value: maps X to [B1,B2]
1590 
1591  -- ALGLIB --
1592  Copyright 20.03.2009 by Bochkanov Sergey
1593 *************************************************************************/
1594 double boundval(double x, double b1, double b2, ae_state *_state)
1595 {
1596  double result;
1597 
1598 
1599  if( ae_fp_less_eq(x,b1) )
1600  {
1601  result = b1;
1602  return result;
1603  }
1604  if( ae_fp_greater_eq(x,b2) )
1605  {
1606  result = b2;
1607  return result;
1608  }
1609  result = x;
1610  return result;
1611 }
1612 
1613 
1614 /*************************************************************************
1615 Allocation of serializer: complex value
1616 *************************************************************************/
1618 {
1619 
1620 
1623 }
1624 
1625 
1626 /*************************************************************************
1627 Serialization: complex value
1628 *************************************************************************/
1630 {
1631 
1632 
1633  ae_serializer_serialize_double(s, v.x, _state);
1634  ae_serializer_serialize_double(s, v.y, _state);
1635 }
1636 
1637 
1638 /*************************************************************************
1639 Unserialization: complex value
1640 *************************************************************************/
1642 {
1643  ae_complex result;
1644 
1645 
1646  ae_serializer_unserialize_double(s, &result.x, _state);
1647  ae_serializer_unserialize_double(s, &result.y, _state);
1648  return result;
1649 }
1650 
1651 
1652 /*************************************************************************
1653 Allocation of serializer: real array
1654 *************************************************************************/
1656  /* Real */ ae_vector* v,
1657  ae_int_t n,
1658  ae_state *_state)
1659 {
1660  ae_int_t i;
1661 
1662 
1663  if( n<0 )
1664  {
1665  n = v->cnt;
1666  }
1668  for(i=0; i<=n-1; i++)
1669  {
1671  }
1672 }
1673 
1674 
1675 /*************************************************************************
1676 Serialization: complex value
1677 *************************************************************************/
1679  /* Real */ ae_vector* v,
1680  ae_int_t n,
1681  ae_state *_state)
1682 {
1683  ae_int_t i;
1684 
1685 
1686  if( n<0 )
1687  {
1688  n = v->cnt;
1689  }
1690  ae_serializer_serialize_int(s, n, _state);
1691  for(i=0; i<=n-1; i++)
1692  {
1693  ae_serializer_serialize_double(s, v->ptr.p_double[i], _state);
1694  }
1695 }
1696 
1697 
1698 /*************************************************************************
1699 Unserialization: complex value
1700 *************************************************************************/
1702  /* Real */ ae_vector* v,
1703  ae_state *_state)
1704 {
1705  ae_int_t n;
1706  ae_int_t i;
1707  double t;
1708 
1709  ae_vector_clear(v);
1710 
1711  ae_serializer_unserialize_int(s, &n, _state);
1712  if( n==0 )
1713  {
1714  return;
1715  }
1716  ae_vector_set_length(v, n, _state);
1717  for(i=0; i<=n-1; i++)
1718  {
1719  ae_serializer_unserialize_double(s, &t, _state);
1720  v->ptr.p_double[i] = t;
1721  }
1722 }
1723 
1724 
1725 /*************************************************************************
1726 Allocation of serializer: Integer array
1727 *************************************************************************/
1729  /* Integer */ ae_vector* v,
1730  ae_int_t n,
1731  ae_state *_state)
1732 {
1733  ae_int_t i;
1734 
1735 
1736  if( n<0 )
1737  {
1738  n = v->cnt;
1739  }
1741  for(i=0; i<=n-1; i++)
1742  {
1744  }
1745 }
1746 
1747 
1748 /*************************************************************************
1749 Serialization: Integer array
1750 *************************************************************************/
1752  /* Integer */ ae_vector* v,
1753  ae_int_t n,
1754  ae_state *_state)
1755 {
1756  ae_int_t i;
1757 
1758 
1759  if( n<0 )
1760  {
1761  n = v->cnt;
1762  }
1763  ae_serializer_serialize_int(s, n, _state);
1764  for(i=0; i<=n-1; i++)
1765  {
1766  ae_serializer_serialize_int(s, v->ptr.p_int[i], _state);
1767  }
1768 }
1769 
1770 
1771 /*************************************************************************
1772 Unserialization: complex value
1773 *************************************************************************/
1775  /* Integer */ ae_vector* v,
1776  ae_state *_state)
1777 {
1778  ae_int_t n;
1779  ae_int_t i;
1780  ae_int_t t;
1781 
1782  ae_vector_clear(v);
1783 
1784  ae_serializer_unserialize_int(s, &n, _state);
1785  if( n==0 )
1786  {
1787  return;
1788  }
1789  ae_vector_set_length(v, n, _state);
1790  for(i=0; i<=n-1; i++)
1791  {
1792  ae_serializer_unserialize_int(s, &t, _state);
1793  v->ptr.p_int[i] = t;
1794  }
1795 }
1796 
1797 
1798 /*************************************************************************
1799 Allocation of serializer: real matrix
1800 *************************************************************************/
1802  /* Real */ ae_matrix* v,
1803  ae_int_t n0,
1804  ae_int_t n1,
1805  ae_state *_state)
1806 {
1807  ae_int_t i;
1808  ae_int_t j;
1809 
1810 
1811  if( n0<0 )
1812  {
1813  n0 = v->rows;
1814  }
1815  if( n1<0 )
1816  {
1817  n1 = v->cols;
1818  }
1821  for(i=0; i<=n0-1; i++)
1822  {
1823  for(j=0; j<=n1-1; j++)
1824  {
1826  }
1827  }
1828 }
1829 
1830 
1831 /*************************************************************************
1832 Serialization: complex value
1833 *************************************************************************/
1835  /* Real */ ae_matrix* v,
1836  ae_int_t n0,
1837  ae_int_t n1,
1838  ae_state *_state)
1839 {
1840  ae_int_t i;
1841  ae_int_t j;
1842 
1843 
1844  if( n0<0 )
1845  {
1846  n0 = v->rows;
1847  }
1848  if( n1<0 )
1849  {
1850  n1 = v->cols;
1851  }
1852  ae_serializer_serialize_int(s, n0, _state);
1853  ae_serializer_serialize_int(s, n1, _state);
1854  for(i=0; i<=n0-1; i++)
1855  {
1856  for(j=0; j<=n1-1; j++)
1857  {
1858  ae_serializer_serialize_double(s, v->ptr.pp_double[i][j], _state);
1859  }
1860  }
1861 }
1862 
1863 
1864 /*************************************************************************
1865 Unserialization: complex value
1866 *************************************************************************/
1868  /* Real */ ae_matrix* v,
1869  ae_state *_state)
1870 {
1871  ae_int_t i;
1872  ae_int_t j;
1873  ae_int_t n0;
1874  ae_int_t n1;
1875  double t;
1876 
1877  ae_matrix_clear(v);
1878 
1879  ae_serializer_unserialize_int(s, &n0, _state);
1880  ae_serializer_unserialize_int(s, &n1, _state);
1881  if( n0==0||n1==0 )
1882  {
1883  return;
1884  }
1885  ae_matrix_set_length(v, n0, n1, _state);
1886  for(i=0; i<=n0-1; i++)
1887  {
1888  for(j=0; j<=n1-1; j++)
1889  {
1890  ae_serializer_unserialize_double(s, &t, _state);
1891  v->ptr.pp_double[i][j] = t;
1892  }
1893  }
1894 }
1895 
1896 
1897 /*************************************************************************
1898 Copy integer array
1899 *************************************************************************/
1900 void copyintegerarray(/* Integer */ ae_vector* src,
1901  /* Integer */ ae_vector* dst,
1902  ae_state *_state)
1903 {
1904  ae_int_t i;
1905 
1906  ae_vector_clear(dst);
1907 
1908  if( src->cnt>0 )
1909  {
1910  ae_vector_set_length(dst, src->cnt, _state);
1911  for(i=0; i<=src->cnt-1; i++)
1912  {
1913  dst->ptr.p_int[i] = src->ptr.p_int[i];
1914  }
1915  }
1916 }
1917 
1918 
1919 /*************************************************************************
1920 Copy real array
1921 *************************************************************************/
1922 void copyrealarray(/* Real */ ae_vector* src,
1923  /* Real */ ae_vector* dst,
1924  ae_state *_state)
1925 {
1926  ae_int_t i;
1927 
1928  ae_vector_clear(dst);
1929 
1930  if( src->cnt>0 )
1931  {
1932  ae_vector_set_length(dst, src->cnt, _state);
1933  for(i=0; i<=src->cnt-1; i++)
1934  {
1935  dst->ptr.p_double[i] = src->ptr.p_double[i];
1936  }
1937  }
1938 }
1939 
1940 
1941 /*************************************************************************
1942 Copy real matrix
1943 *************************************************************************/
1944 void copyrealmatrix(/* Real */ ae_matrix* src,
1945  /* Real */ ae_matrix* dst,
1946  ae_state *_state)
1947 {
1948  ae_int_t i;
1949  ae_int_t j;
1950 
1951  ae_matrix_clear(dst);
1952 
1953  if( src->rows>0&&src->cols>0 )
1954  {
1955  ae_matrix_set_length(dst, src->rows, src->cols, _state);
1956  for(i=0; i<=src->rows-1; i++)
1957  {
1958  for(j=0; j<=src->cols-1; j++)
1959  {
1960  dst->ptr.pp_double[i][j] = src->ptr.pp_double[i][j];
1961  }
1962  }
1963  }
1964 }
1965 
1966 
1967 /*************************************************************************
1968 This function searches integer array. Elements in this array are actually
1969 records, each NRec elements wide. Each record has unique header - NHeader
1970 integer values, which identify it. Records are lexicographically sorted by
1971 header.
1972 
1973 Records are identified by their index, not offset (offset = NRec*index).
1974 
1975 This function searches A (records with indices [I0,I1)) for a record with
1976 header B. It returns index of this record (not offset!), or -1 on failure.
1977 
1978  -- ALGLIB --
1979  Copyright 28.03.2011 by Bochkanov Sergey
1980 *************************************************************************/
1981 ae_int_t recsearch(/* Integer */ ae_vector* a,
1982  ae_int_t nrec,
1983  ae_int_t nheader,
1984  ae_int_t i0,
1985  ae_int_t i1,
1986  /* Integer */ ae_vector* b,
1987  ae_state *_state)
1988 {
1989  ae_int_t mididx;
1990  ae_int_t cflag;
1991  ae_int_t k;
1992  ae_int_t offs;
1993  ae_int_t result;
1994 
1995 
1996  result = -1;
1997  for(;;)
1998  {
1999  if( i0>=i1 )
2000  {
2001  break;
2002  }
2003  mididx = (i0+i1)/2;
2004  offs = nrec*mididx;
2005  cflag = 0;
2006  for(k=0; k<=nheader-1; k++)
2007  {
2008  if( a->ptr.p_int[offs+k]<b->ptr.p_int[k] )
2009  {
2010  cflag = -1;
2011  break;
2012  }
2013  if( a->ptr.p_int[offs+k]>b->ptr.p_int[k] )
2014  {
2015  cflag = 1;
2016  break;
2017  }
2018  }
2019  if( cflag==0 )
2020  {
2021  result = mididx;
2022  return result;
2023  }
2024  if( cflag<0 )
2025  {
2026  i0 = mididx+1;
2027  }
2028  else
2029  {
2030  i1 = mididx;
2031  }
2032  }
2033  return result;
2034 }
2035 
2036 
2037 /*************************************************************************
2038 This function is used in parallel functions for recurrent division of large
2039 task into two smaller tasks.
2040 
2041 It has following properties:
2042 * it works only for TaskSize>=2 (assertion is thrown otherwise)
2043 * for TaskSize=2, it returns Task0=1, Task1=1
2044 * in case TaskSize is odd, Task0=TaskSize-1, Task1=1
2045 * in case TaskSize is even, Task0 and Task1 are approximately TaskSize/2
2046  and both Task0 and Task1 are even, Task0>=Task1
2047 
2048  -- ALGLIB --
2049  Copyright 07.04.2013 by Bochkanov Sergey
2050 *************************************************************************/
2052  ae_int_t* task0,
2053  ae_int_t* task1,
2054  ae_state *_state)
2055 {
2056 
2057  *task0 = 0;
2058  *task1 = 0;
2059 
2060  ae_assert(tasksize>=2, "SplitLengthEven: TaskSize<2", _state);
2061  if( tasksize==2 )
2062  {
2063  *task0 = 1;
2064  *task1 = 1;
2065  return;
2066  }
2067  if( tasksize%2==0 )
2068  {
2069 
2070  /*
2071  * Even division
2072  */
2073  *task0 = tasksize/2;
2074  *task1 = tasksize/2;
2075  if( *task0%2!=0 )
2076  {
2077  *task0 = *task0+1;
2078  *task1 = *task1-1;
2079  }
2080  }
2081  else
2082  {
2083 
2084  /*
2085  * Odd task size, split trailing odd part from it.
2086  */
2087  *task0 = tasksize-1;
2088  *task1 = 1;
2089  }
2090  ae_assert(*task0>=1, "SplitLengthEven: internal error", _state);
2091  ae_assert(*task1>=1, "SplitLengthEven: internal error", _state);
2092 }
2093 
2094 
2095 /*************************************************************************
2096 This function is used in parallel functions for recurrent division of large
2097 task into two smaller tasks.
2098 
2099 It has following properties:
2100 * it works only for TaskSize>=2 and ChunkSize>=2
2101  (assertion is thrown otherwise)
2102 * Task0+Task1=TaskSize, Task0>0, Task1>0
2103 * Task0 and Task1 are close to each other
2104 * in case TaskSize>ChunkSize, Task0 is always divisible by ChunkSize
2105 
2106  -- ALGLIB --
2107  Copyright 07.04.2013 by Bochkanov Sergey
2108 *************************************************************************/
2109 void splitlength(ae_int_t tasksize,
2110  ae_int_t chunksize,
2111  ae_int_t* task0,
2112  ae_int_t* task1,
2113  ae_state *_state)
2114 {
2115 
2116  *task0 = 0;
2117  *task1 = 0;
2118 
2119  ae_assert(chunksize>=2, "SplitLength: ChunkSize<2", _state);
2120  ae_assert(tasksize>=2, "SplitLength: TaskSize<2", _state);
2121  *task0 = tasksize/2;
2122  if( *task0>chunksize&&*task0%chunksize!=0 )
2123  {
2124  *task0 = *task0-*task0%chunksize;
2125  }
2126  *task1 = tasksize-(*task0);
2127  ae_assert(*task0>=1, "SplitLength: internal error", _state);
2128  ae_assert(*task1>=1, "SplitLength: internal error", _state);
2129 }
2130 
2131 
2132 ae_bool _apbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
2133 {
2134  apbuffers *p = (apbuffers*)_p;
2135  ae_touch_ptr((void*)p);
2136  if( !ae_vector_init(&p->ia0, 0, DT_INT, _state, make_automatic) )
2137  return ae_false;
2138  if( !ae_vector_init(&p->ia1, 0, DT_INT, _state, make_automatic) )
2139  return ae_false;
2140  if( !ae_vector_init(&p->ia2, 0, DT_INT, _state, make_automatic) )
2141  return ae_false;
2142  if( !ae_vector_init(&p->ia3, 0, DT_INT, _state, make_automatic) )
2143  return ae_false;
2144  if( !ae_vector_init(&p->ra0, 0, DT_REAL, _state, make_automatic) )
2145  return ae_false;
2146  if( !ae_vector_init(&p->ra1, 0, DT_REAL, _state, make_automatic) )
2147  return ae_false;
2148  if( !ae_vector_init(&p->ra2, 0, DT_REAL, _state, make_automatic) )
2149  return ae_false;
2150  if( !ae_vector_init(&p->ra3, 0, DT_REAL, _state, make_automatic) )
2151  return ae_false;
2152  return ae_true;
2153 }
2154 
2155 
2156 ae_bool _apbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
2157 {
2158  apbuffers *dst = (apbuffers*)_dst;
2159  apbuffers *src = (apbuffers*)_src;
2160  if( !ae_vector_init_copy(&dst->ia0, &src->ia0, _state, make_automatic) )
2161  return ae_false;
2162  if( !ae_vector_init_copy(&dst->ia1, &src->ia1, _state, make_automatic) )
2163  return ae_false;
2164  if( !ae_vector_init_copy(&dst->ia2, &src->ia2, _state, make_automatic) )
2165  return ae_false;
2166  if( !ae_vector_init_copy(&dst->ia3, &src->ia3, _state, make_automatic) )
2167  return ae_false;
2168  if( !ae_vector_init_copy(&dst->ra0, &src->ra0, _state, make_automatic) )
2169  return ae_false;
2170  if( !ae_vector_init_copy(&dst->ra1, &src->ra1, _state, make_automatic) )
2171  return ae_false;
2172  if( !ae_vector_init_copy(&dst->ra2, &src->ra2, _state, make_automatic) )
2173  return ae_false;
2174  if( !ae_vector_init_copy(&dst->ra3, &src->ra3, _state, make_automatic) )
2175  return ae_false;
2176  return ae_true;
2177 }
2178 
2179 
2180 void _apbuffers_clear(void* _p)
2181 {
2182  apbuffers *p = (apbuffers*)_p;
2183  ae_touch_ptr((void*)p);
2184  ae_vector_clear(&p->ia0);
2185  ae_vector_clear(&p->ia1);
2186  ae_vector_clear(&p->ia2);
2187  ae_vector_clear(&p->ia3);
2188  ae_vector_clear(&p->ra0);
2189  ae_vector_clear(&p->ra1);
2190  ae_vector_clear(&p->ra2);
2191  ae_vector_clear(&p->ra3);
2192 }
2193 
2194 
2195 void _apbuffers_destroy(void* _p)
2196 {
2197  apbuffers *p = (apbuffers*)_p;
2198  ae_touch_ptr((void*)p);
2199  ae_vector_destroy(&p->ia0);
2200  ae_vector_destroy(&p->ia1);
2201  ae_vector_destroy(&p->ia2);
2202  ae_vector_destroy(&p->ia3);
2203  ae_vector_destroy(&p->ra0);
2204  ae_vector_destroy(&p->ra1);
2205  ae_vector_destroy(&p->ra2);
2206  ae_vector_destroy(&p->ra3);
2207 }
2208 
2209 
2210 ae_bool _sboolean_init(void* _p, ae_state *_state, ae_bool make_automatic)
2211 {
2212  sboolean *p = (sboolean*)_p;
2213  ae_touch_ptr((void*)p);
2214  return ae_true;
2215 }
2216 
2217 
2218 ae_bool _sboolean_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
2219 {
2220  sboolean *dst = (sboolean*)_dst;
2221  sboolean *src = (sboolean*)_src;
2222  dst->val = src->val;
2223  return ae_true;
2224 }
2225 
2226 
2227 void _sboolean_clear(void* _p)
2228 {
2229  sboolean *p = (sboolean*)_p;
2230  ae_touch_ptr((void*)p);
2231 }
2232 
2233 
2234 void _sboolean_destroy(void* _p)
2235 {
2236  sboolean *p = (sboolean*)_p;
2237  ae_touch_ptr((void*)p);
2238 }
2239 
2240 
2241 ae_bool _sbooleanarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
2242 {
2243  sbooleanarray *p = (sbooleanarray*)_p;
2244  ae_touch_ptr((void*)p);
2245  if( !ae_vector_init(&p->val, 0, DT_BOOL, _state, make_automatic) )
2246  return ae_false;
2247  return ae_true;
2248 }
2249 
2250 
2251 ae_bool _sbooleanarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
2252 {
2253  sbooleanarray *dst = (sbooleanarray*)_dst;
2254  sbooleanarray *src = (sbooleanarray*)_src;
2255  if( !ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic) )
2256  return ae_false;
2257  return ae_true;
2258 }
2259 
2260 
2261 void _sbooleanarray_clear(void* _p)
2262 {
2263  sbooleanarray *p = (sbooleanarray*)_p;
2264  ae_touch_ptr((void*)p);
2265  ae_vector_clear(&p->val);
2266 }
2267 
2268 
2270 {
2271  sbooleanarray *p = (sbooleanarray*)_p;
2272  ae_touch_ptr((void*)p);
2273  ae_vector_destroy(&p->val);
2274 }
2275 
2276 
2277 ae_bool _sinteger_init(void* _p, ae_state *_state, ae_bool make_automatic)
2278 {
2279  sinteger *p = (sinteger*)_p;
2280  ae_touch_ptr((void*)p);
2281  return ae_true;
2282 }
2283 
2284 
2285 ae_bool _sinteger_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
2286 {
2287  sinteger *dst = (sinteger*)_dst;
2288  sinteger *src = (sinteger*)_src;
2289  dst->val = src->val;
2290  return ae_true;
2291 }
2292 
2293 
2294 void _sinteger_clear(void* _p)
2295 {
2296  sinteger *p = (sinteger*)_p;
2297  ae_touch_ptr((void*)p);
2298 }
2299 
2300 
2301 void _sinteger_destroy(void* _p)
2302 {
2303  sinteger *p = (sinteger*)_p;
2304  ae_touch_ptr((void*)p);
2305 }
2306 
2307 
2308 ae_bool _sintegerarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
2309 {
2310  sintegerarray *p = (sintegerarray*)_p;
2311  ae_touch_ptr((void*)p);
2312  if( !ae_vector_init(&p->val, 0, DT_INT, _state, make_automatic) )
2313  return ae_false;
2314  return ae_true;
2315 }
2316 
2317 
2318 ae_bool _sintegerarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
2319 {
2320  sintegerarray *dst = (sintegerarray*)_dst;
2321  sintegerarray *src = (sintegerarray*)_src;
2322  if( !ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic) )
2323  return ae_false;
2324  return ae_true;
2325 }
2326 
2327 
2328 void _sintegerarray_clear(void* _p)
2329 {
2330  sintegerarray *p = (sintegerarray*)_p;
2331  ae_touch_ptr((void*)p);
2332  ae_vector_clear(&p->val);
2333 }
2334 
2335 
2337 {
2338  sintegerarray *p = (sintegerarray*)_p;
2339  ae_touch_ptr((void*)p);
2340  ae_vector_destroy(&p->val);
2341 }
2342 
2343 
2344 ae_bool _sreal_init(void* _p, ae_state *_state, ae_bool make_automatic)
2345 {
2346  sreal *p = (sreal*)_p;
2347  ae_touch_ptr((void*)p);
2348  return ae_true;
2349 }
2350 
2351 
2352 ae_bool _sreal_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
2353 {
2354  sreal *dst = (sreal*)_dst;
2355  sreal *src = (sreal*)_src;
2356  dst->val = src->val;
2357  return ae_true;
2358 }
2359 
2360 
2361 void _sreal_clear(void* _p)
2362 {
2363  sreal *p = (sreal*)_p;
2364  ae_touch_ptr((void*)p);
2365 }
2366 
2367 
2368 void _sreal_destroy(void* _p)
2369 {
2370  sreal *p = (sreal*)_p;
2371  ae_touch_ptr((void*)p);
2372 }
2373 
2374 
2375 ae_bool _srealarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
2376 {
2377  srealarray *p = (srealarray*)_p;
2378  ae_touch_ptr((void*)p);
2379  if( !ae_vector_init(&p->val, 0, DT_REAL, _state, make_automatic) )
2380  return ae_false;
2381  return ae_true;
2382 }
2383 
2384 
2385 ae_bool _srealarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
2386 {
2387  srealarray *dst = (srealarray*)_dst;
2388  srealarray *src = (srealarray*)_src;
2389  if( !ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic) )
2390  return ae_false;
2391  return ae_true;
2392 }
2393 
2394 
2395 void _srealarray_clear(void* _p)
2396 {
2397  srealarray *p = (srealarray*)_p;
2398  ae_touch_ptr((void*)p);
2399  ae_vector_clear(&p->val);
2400 }
2401 
2402 
2403 void _srealarray_destroy(void* _p)
2404 {
2405  srealarray *p = (srealarray*)_p;
2406  ae_touch_ptr((void*)p);
2407  ae_vector_destroy(&p->val);
2408 }
2409 
2410 
2411 ae_bool _scomplex_init(void* _p, ae_state *_state, ae_bool make_automatic)
2412 {
2413  scomplex *p = (scomplex*)_p;
2414  ae_touch_ptr((void*)p);
2415  return ae_true;
2416 }
2417 
2418 
2419 ae_bool _scomplex_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
2420 {
2421  scomplex *dst = (scomplex*)_dst;
2422  scomplex *src = (scomplex*)_src;
2423  dst->val = src->val;
2424  return ae_true;
2425 }
2426 
2427 
2428 void _scomplex_clear(void* _p)
2429 {
2430  scomplex *p = (scomplex*)_p;
2431  ae_touch_ptr((void*)p);
2432 }
2433 
2434 
2435 void _scomplex_destroy(void* _p)
2436 {
2437  scomplex *p = (scomplex*)_p;
2438  ae_touch_ptr((void*)p);
2439 }
2440 
2441 
2442 ae_bool _scomplexarray_init(void* _p, ae_state *_state, ae_bool make_automatic)
2443 {
2444  scomplexarray *p = (scomplexarray*)_p;
2445  ae_touch_ptr((void*)p);
2446  if( !ae_vector_init(&p->val, 0, DT_COMPLEX, _state, make_automatic) )
2447  return ae_false;
2448  return ae_true;
2449 }
2450 
2451 
2452 ae_bool _scomplexarray_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
2453 {
2454  scomplexarray *dst = (scomplexarray*)_dst;
2455  scomplexarray *src = (scomplexarray*)_src;
2456  if( !ae_vector_init_copy(&dst->val, &src->val, _state, make_automatic) )
2457  return ae_false;
2458  return ae_true;
2459 }
2460 
2461 
2462 void _scomplexarray_clear(void* _p)
2463 {
2464  scomplexarray *p = (scomplexarray*)_p;
2465  ae_touch_ptr((void*)p);
2466  ae_vector_clear(&p->val);
2467 }
2468 
2469 
2471 {
2472  scomplexarray *p = (scomplexarray*)_p;
2473  ae_touch_ptr((void*)p);
2474  ae_vector_destroy(&p->val);
2475 }
2476 
2477 
2478 
2479 
2481 {
2482  ae_int_t result;
2483 
2484 
2485  result = 1;
2486  return result;
2487 }
2488 
2489 
2491 {
2492  ae_int_t result;
2493 
2494 
2495  result = 2;
2496  return result;
2497 }
2498 
2499 
2501 {
2502  ae_int_t result;
2503 
2504 
2505  result = 3;
2506  return result;
2507 }
2508 
2509 
2511 {
2512  ae_int_t result;
2513 
2514 
2515  result = 4;
2516  return result;
2517 }
2518 
2519 
2521 {
2522  ae_int_t result;
2523 
2524 
2525  result = 5;
2526  return result;
2527 }
2528 
2529 
2530 
2531 
2532 /*************************************************************************
2533 This function sorts array of real keys by ascending.
2534 
2535 Its results are:
2536 * sorted array A
2537 * permutation tables P1, P2
2538 
2539 Algorithm outputs permutation tables using two formats:
2540 * as usual permutation of [0..N-1]. If P1[i]=j, then sorted A[i] contains
2541  value which was moved there from J-th position.
2542 * as a sequence of pairwise permutations. Sorted A[] may be obtained by
2543  swaping A[i] and A[P2[i]] for all i from 0 to N-1.
2544 
2545 INPUT PARAMETERS:
2546  A - unsorted array
2547  N - array size
2548 
2549 OUPUT PARAMETERS:
2550  A - sorted array
2551  P1, P2 - permutation tables, array[N]
2552 
2553 NOTES:
2554  this function assumes that A[] is finite; it doesn't checks that
2555  condition. All other conditions (size of input arrays, etc.) are not
2556  checked too.
2557 
2558  -- ALGLIB --
2559  Copyright 14.05.2008 by Bochkanov Sergey
2560 *************************************************************************/
2561 void tagsort(/* Real */ ae_vector* a,
2562  ae_int_t n,
2563  /* Integer */ ae_vector* p1,
2564  /* Integer */ ae_vector* p2,
2565  ae_state *_state)
2566 {
2567  ae_frame _frame_block;
2568  apbuffers buf;
2569 
2570  ae_frame_make(_state, &_frame_block);
2571  ae_vector_clear(p1);
2572  ae_vector_clear(p2);
2573  _apbuffers_init(&buf, _state, ae_true);
2574 
2575  tagsortbuf(a, n, p1, p2, &buf, _state);
2576  ae_frame_leave(_state);
2577 }
2578 
2579 
2580 /*************************************************************************
2581 Buffered variant of TagSort, which accepts preallocated output arrays as
2582 well as special structure for buffered allocations. If arrays are too
2583 short, they are reallocated. If they are large enough, no memory
2584 allocation is done.
2585 
2586 It is intended to be used in the performance-critical parts of code, where
2587 additional allocations can lead to severe performance degradation
2588 
2589  -- ALGLIB --
2590  Copyright 14.05.2008 by Bochkanov Sergey
2591 *************************************************************************/
2592 void tagsortbuf(/* Real */ ae_vector* a,
2593  ae_int_t n,
2594  /* Integer */ ae_vector* p1,
2595  /* Integer */ ae_vector* p2,
2596  apbuffers* buf,
2597  ae_state *_state)
2598 {
2599  ae_int_t i;
2600  ae_int_t lv;
2601  ae_int_t lp;
2602  ae_int_t rv;
2603  ae_int_t rp;
2604 
2605 
2606 
2607  /*
2608  * Special cases
2609  */
2610  if( n<=0 )
2611  {
2612  return;
2613  }
2614  if( n==1 )
2615  {
2616  ivectorsetlengthatleast(p1, 1, _state);
2617  ivectorsetlengthatleast(p2, 1, _state);
2618  p1->ptr.p_int[0] = 0;
2619  p2->ptr.p_int[0] = 0;
2620  return;
2621  }
2622 
2623  /*
2624  * General case, N>1: prepare permutations table P1
2625  */
2626  ivectorsetlengthatleast(p1, n, _state);
2627  for(i=0; i<=n-1; i++)
2628  {
2629  p1->ptr.p_int[i] = i;
2630  }
2631 
2632  /*
2633  * General case, N>1: sort, update P1
2634  */
2635  rvectorsetlengthatleast(&buf->ra0, n, _state);
2636  ivectorsetlengthatleast(&buf->ia0, n, _state);
2637  tagsortfasti(a, p1, &buf->ra0, &buf->ia0, n, _state);
2638 
2639  /*
2640  * General case, N>1: fill permutations table P2
2641  *
2642  * To fill P2 we maintain two arrays:
2643  * * PV (Buf.IA0), Position(Value). PV[i] contains position of I-th key at the moment
2644  * * VP (Buf.IA1), Value(Position). VP[i] contains key which has position I at the moment
2645  *
2646  * At each step we making permutation of two items:
2647  * Left, which is given by position/value pair LP/LV
2648  * and Right, which is given by RP/RV
2649  * and updating PV[] and VP[] correspondingly.
2650  */
2651  ivectorsetlengthatleast(&buf->ia0, n, _state);
2652  ivectorsetlengthatleast(&buf->ia1, n, _state);
2653  ivectorsetlengthatleast(p2, n, _state);
2654  for(i=0; i<=n-1; i++)
2655  {
2656  buf->ia0.ptr.p_int[i] = i;
2657  buf->ia1.ptr.p_int[i] = i;
2658  }
2659  for(i=0; i<=n-1; i++)
2660  {
2661 
2662  /*
2663  * calculate LP, LV, RP, RV
2664  */
2665  lp = i;
2666  lv = buf->ia1.ptr.p_int[lp];
2667  rv = p1->ptr.p_int[i];
2668  rp = buf->ia0.ptr.p_int[rv];
2669 
2670  /*
2671  * Fill P2
2672  */
2673  p2->ptr.p_int[i] = rp;
2674 
2675  /*
2676  * update PV and VP
2677  */
2678  buf->ia1.ptr.p_int[lp] = rv;
2679  buf->ia1.ptr.p_int[rp] = lv;
2680  buf->ia0.ptr.p_int[lv] = rp;
2681  buf->ia0.ptr.p_int[rv] = lp;
2682  }
2683 }
2684 
2685 
2686 /*************************************************************************
2687 Same as TagSort, but optimized for real keys and integer labels.
2688 
2689 A is sorted, and same permutations are applied to B.
2690 
2691 NOTES:
2692 1. this function assumes that A[] is finite; it doesn't checks that
2693  condition. All other conditions (size of input arrays, etc.) are not
2694  checked too.
2695 2. this function uses two buffers, BufA and BufB, each is N elements large.
2696  They may be preallocated (which will save some time) or not, in which
2697  case function will automatically allocate memory.
2698 
2699  -- ALGLIB --
2700  Copyright 11.12.2008 by Bochkanov Sergey
2701 *************************************************************************/
2702 void tagsortfasti(/* Real */ ae_vector* a,
2703  /* Integer */ ae_vector* b,
2704  /* Real */ ae_vector* bufa,
2705  /* Integer */ ae_vector* bufb,
2706  ae_int_t n,
2707  ae_state *_state)
2708 {
2709  ae_int_t i;
2710  ae_int_t j;
2711  ae_bool isascending;
2712  ae_bool isdescending;
2713  double tmpr;
2714  ae_int_t tmpi;
2715 
2716 
2717 
2718  /*
2719  * Special case
2720  */
2721  if( n<=1 )
2722  {
2723  return;
2724  }
2725 
2726  /*
2727  * Test for already sorted set
2728  */
2729  isascending = ae_true;
2730  isdescending = ae_true;
2731  for(i=1; i<=n-1; i++)
2732  {
2733  isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
2734  isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
2735  }
2736  if( isascending )
2737  {
2738  return;
2739  }
2740  if( isdescending )
2741  {
2742  for(i=0; i<=n-1; i++)
2743  {
2744  j = n-1-i;
2745  if( j<=i )
2746  {
2747  break;
2748  }
2749  tmpr = a->ptr.p_double[i];
2750  a->ptr.p_double[i] = a->ptr.p_double[j];
2751  a->ptr.p_double[j] = tmpr;
2752  tmpi = b->ptr.p_int[i];
2753  b->ptr.p_int[i] = b->ptr.p_int[j];
2754  b->ptr.p_int[j] = tmpi;
2755  }
2756  return;
2757  }
2758 
2759  /*
2760  * General case
2761  */
2762  if( bufa->cnt<n )
2763  {
2764  ae_vector_set_length(bufa, n, _state);
2765  }
2766  if( bufb->cnt<n )
2767  {
2768  ae_vector_set_length(bufb, n, _state);
2769  }
2770  tsort_tagsortfastirec(a, b, bufa, bufb, 0, n-1, _state);
2771 }
2772 
2773 
2774 /*************************************************************************
2775 Same as TagSort, but optimized for real keys and real labels.
2776 
2777 A is sorted, and same permutations are applied to B.
2778 
2779 NOTES:
2780 1. this function assumes that A[] is finite; it doesn't checks that
2781  condition. All other conditions (size of input arrays, etc.) are not
2782  checked too.
2783 2. this function uses two buffers, BufA and BufB, each is N elements large.
2784  They may be preallocated (which will save some time) or not, in which
2785  case function will automatically allocate memory.
2786 
2787  -- ALGLIB --
2788  Copyright 11.12.2008 by Bochkanov Sergey
2789 *************************************************************************/
2790 void tagsortfastr(/* Real */ ae_vector* a,
2791  /* Real */ ae_vector* b,
2792  /* Real */ ae_vector* bufa,
2793  /* Real */ ae_vector* bufb,
2794  ae_int_t n,
2795  ae_state *_state)
2796 {
2797  ae_int_t i;
2798  ae_int_t j;
2799  ae_bool isascending;
2800  ae_bool isdescending;
2801  double tmpr;
2802 
2803 
2804 
2805  /*
2806  * Special case
2807  */
2808  if( n<=1 )
2809  {
2810  return;
2811  }
2812 
2813  /*
2814  * Test for already sorted set
2815  */
2816  isascending = ae_true;
2817  isdescending = ae_true;
2818  for(i=1; i<=n-1; i++)
2819  {
2820  isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
2821  isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
2822  }
2823  if( isascending )
2824  {
2825  return;
2826  }
2827  if( isdescending )
2828  {
2829  for(i=0; i<=n-1; i++)
2830  {
2831  j = n-1-i;
2832  if( j<=i )
2833  {
2834  break;
2835  }
2836  tmpr = a->ptr.p_double[i];
2837  a->ptr.p_double[i] = a->ptr.p_double[j];
2838  a->ptr.p_double[j] = tmpr;
2839  tmpr = b->ptr.p_double[i];
2840  b->ptr.p_double[i] = b->ptr.p_double[j];
2841  b->ptr.p_double[j] = tmpr;
2842  }
2843  return;
2844  }
2845 
2846  /*
2847  * General case
2848  */
2849  if( bufa->cnt<n )
2850  {
2851  ae_vector_set_length(bufa, n, _state);
2852  }
2853  if( bufb->cnt<n )
2854  {
2855  ae_vector_set_length(bufb, n, _state);
2856  }
2857  tsort_tagsortfastrrec(a, b, bufa, bufb, 0, n-1, _state);
2858 }
2859 
2860 
2861 /*************************************************************************
2862 Same as TagSort, but optimized for real keys without labels.
2863 
2864 A is sorted, and that's all.
2865 
2866 NOTES:
2867 1. this function assumes that A[] is finite; it doesn't checks that
2868  condition. All other conditions (size of input arrays, etc.) are not
2869  checked too.
2870 2. this function uses buffer, BufA, which is N elements large. It may be
2871  preallocated (which will save some time) or not, in which case
2872  function will automatically allocate memory.
2873 
2874  -- ALGLIB --
2875  Copyright 11.12.2008 by Bochkanov Sergey
2876 *************************************************************************/
2877 void tagsortfast(/* Real */ ae_vector* a,
2878  /* Real */ ae_vector* bufa,
2879  ae_int_t n,
2880  ae_state *_state)
2881 {
2882  ae_int_t i;
2883  ae_int_t j;
2884  ae_bool isascending;
2885  ae_bool isdescending;
2886  double tmpr;
2887 
2888 
2889 
2890  /*
2891  * Special case
2892  */
2893  if( n<=1 )
2894  {
2895  return;
2896  }
2897 
2898  /*
2899  * Test for already sorted set
2900  */
2901  isascending = ae_true;
2902  isdescending = ae_true;
2903  for(i=1; i<=n-1; i++)
2904  {
2905  isascending = isascending&&a->ptr.p_double[i]>=a->ptr.p_double[i-1];
2906  isdescending = isdescending&&a->ptr.p_double[i]<=a->ptr.p_double[i-1];
2907  }
2908  if( isascending )
2909  {
2910  return;
2911  }
2912  if( isdescending )
2913  {
2914  for(i=0; i<=n-1; i++)
2915  {
2916  j = n-1-i;
2917  if( j<=i )
2918  {
2919  break;
2920  }
2921  tmpr = a->ptr.p_double[i];
2922  a->ptr.p_double[i] = a->ptr.p_double[j];
2923  a->ptr.p_double[j] = tmpr;
2924  }
2925  return;
2926  }
2927 
2928  /*
2929  * General case
2930  */
2931  if( bufa->cnt<n )
2932  {
2933  ae_vector_set_length(bufa, n, _state);
2934  }
2935  tsort_tagsortfastrec(a, bufa, 0, n-1, _state);
2936 }
2937 
2938 
2939 /*************************************************************************
2940 Sorting function optimized for integer keys and real labels, can be used
2941 to sort middle of the array
2942 
2943 A is sorted, and same permutations are applied to B.
2944 
2945 NOTES:
2946  this function assumes that A[] is finite; it doesn't checks that
2947  condition. All other conditions (size of input arrays, etc.) are not
2948  checked too.
2949 
2950  -- ALGLIB --
2951  Copyright 11.12.2008 by Bochkanov Sergey
2952 *************************************************************************/
2953 void tagsortmiddleir(/* Integer */ ae_vector* a,
2954  /* Real */ ae_vector* b,
2955  ae_int_t offset,
2956  ae_int_t n,
2957  ae_state *_state)
2958 {
2959  ae_int_t i;
2960  ae_int_t k;
2961  ae_int_t t;
2962  ae_int_t tmp;
2963  double tmpr;
2964 
2965 
2966 
2967  /*
2968  * Special cases
2969  */
2970  if( n<=1 )
2971  {
2972  return;
2973  }
2974 
2975  /*
2976  * General case, N>1: sort, update B
2977  */
2978  i = 2;
2979  do
2980  {
2981  t = i;
2982  while(t!=1)
2983  {
2984  k = t/2;
2985  if( a->ptr.p_int[offset+k-1]>=a->ptr.p_int[offset+t-1] )
2986  {
2987  t = 1;
2988  }
2989  else
2990  {
2991  tmp = a->ptr.p_int[offset+k-1];
2992  a->ptr.p_int[offset+k-1] = a->ptr.p_int[offset+t-1];
2993  a->ptr.p_int[offset+t-1] = tmp;
2994  tmpr = b->ptr.p_double[offset+k-1];
2995  b->ptr.p_double[offset+k-1] = b->ptr.p_double[offset+t-1];
2996  b->ptr.p_double[offset+t-1] = tmpr;
2997  t = k;
2998  }
2999  }
3000  i = i+1;
3001  }
3002  while(i<=n);
3003  i = n-1;
3004  do
3005  {
3006  tmp = a->ptr.p_int[offset+i];
3007  a->ptr.p_int[offset+i] = a->ptr.p_int[offset+0];
3008  a->ptr.p_int[offset+0] = tmp;
3009  tmpr = b->ptr.p_double[offset+i];
3010  b->ptr.p_double[offset+i] = b->ptr.p_double[offset+0];
3011  b->ptr.p_double[offset+0] = tmpr;
3012  t = 1;
3013  while(t!=0)
3014  {
3015  k = 2*t;
3016  if( k>i )
3017  {
3018  t = 0;
3019  }
3020  else
3021  {
3022  if( k<i )
3023  {
3024  if( a->ptr.p_int[offset+k]>a->ptr.p_int[offset+k-1] )
3025  {
3026  k = k+1;
3027  }
3028  }
3029  if( a->ptr.p_int[offset+t-1]>=a->ptr.p_int[offset+k-1] )
3030  {
3031  t = 0;
3032  }
3033  else
3034  {
3035  tmp = a->ptr.p_int[offset+k-1];
3036  a->ptr.p_int[offset+k-1] = a->ptr.p_int[offset+t-1];
3037  a->ptr.p_int[offset+t-1] = tmp;
3038  tmpr = b->ptr.p_double[offset+k-1];
3039  b->ptr.p_double[offset+k-1] = b->ptr.p_double[offset+t-1];
3040  b->ptr.p_double[offset+t-1] = tmpr;
3041  t = k;
3042  }
3043  }
3044  }
3045  i = i-1;
3046  }
3047  while(i>=1);
3048 }
3049 
3050 
3051 /*************************************************************************
3052 Heap operations: adds element to the heap
3053 
3054 PARAMETERS:
3055  A - heap itself, must be at least array[0..N]
3056  B - array of integer tags, which are updated according to
3057  permutations in the heap
3058  N - size of the heap (without new element).
3059  updated on output
3060  VA - value of the element being added
3061  VB - value of the tag
3062 
3063  -- ALGLIB --
3064  Copyright 28.02.2010 by Bochkanov Sergey
3065 *************************************************************************/
3066 void tagheappushi(/* Real */ ae_vector* a,
3067  /* Integer */ ae_vector* b,
3068  ae_int_t* n,
3069  double va,
3070  ae_int_t vb,
3071  ae_state *_state)
3072 {
3073  ae_int_t j;
3074  ae_int_t k;
3075  double v;
3076 
3077 
3078  if( *n<0 )
3079  {
3080  return;
3081  }
3082 
3083  /*
3084  * N=0 is a special case
3085  */
3086  if( *n==0 )
3087  {
3088  a->ptr.p_double[0] = va;
3089  b->ptr.p_int[0] = vb;
3090  *n = *n+1;
3091  return;
3092  }
3093 
3094  /*
3095  * add current point to the heap
3096  * (add to the bottom, then move up)
3097  *
3098  * we don't write point to the heap
3099  * until its final position is determined
3100  * (it allow us to reduce number of array access operations)
3101  */
3102  j = *n;
3103  *n = *n+1;
3104  while(j>0)
3105  {
3106  k = (j-1)/2;
3107  v = a->ptr.p_double[k];
3108  if( ae_fp_less(v,va) )
3109  {
3110 
3111  /*
3112  * swap with higher element
3113  */
3114  a->ptr.p_double[j] = v;
3115  b->ptr.p_int[j] = b->ptr.p_int[k];
3116  j = k;
3117  }
3118  else
3119  {
3120 
3121  /*
3122  * element in its place. terminate.
3123  */
3124  break;
3125  }
3126  }
3127  a->ptr.p_double[j] = va;
3128  b->ptr.p_int[j] = vb;
3129 }
3130 
3131 
3132 /*************************************************************************
3133 Heap operations: replaces top element with new element
3134 (which is moved down)
3135 
3136 PARAMETERS:
3137  A - heap itself, must be at least array[0..N-1]
3138  B - array of integer tags, which are updated according to
3139  permutations in the heap
3140  N - size of the heap
3141  VA - value of the element which replaces top element
3142  VB - value of the tag
3143 
3144  -- ALGLIB --
3145  Copyright 28.02.2010 by Bochkanov Sergey
3146 *************************************************************************/
3147 void tagheapreplacetopi(/* Real */ ae_vector* a,
3148  /* Integer */ ae_vector* b,
3149  ae_int_t n,
3150  double va,
3151  ae_int_t vb,
3152  ae_state *_state)
3153 {
3154  ae_int_t j;
3155  ae_int_t k1;
3156  ae_int_t k2;
3157  double v;
3158  double v1;
3159  double v2;
3160 
3161 
3162  if( n<1 )
3163  {
3164  return;
3165  }
3166 
3167  /*
3168  * N=1 is a special case
3169  */
3170  if( n==1 )
3171  {
3172  a->ptr.p_double[0] = va;
3173  b->ptr.p_int[0] = vb;
3174  return;
3175  }
3176 
3177  /*
3178  * move down through heap:
3179  * * J - current element
3180  * * K1 - first child (always exists)
3181  * * K2 - second child (may not exists)
3182  *
3183  * we don't write point to the heap
3184  * until its final position is determined
3185  * (it allow us to reduce number of array access operations)
3186  */
3187  j = 0;
3188  k1 = 1;
3189  k2 = 2;
3190  while(k1<n)
3191  {
3192  if( k2>=n )
3193  {
3194 
3195  /*
3196  * only one child.
3197  *
3198  * swap and terminate (because this child
3199  * have no siblings due to heap structure)
3200  */
3201  v = a->ptr.p_double[k1];
3202  if( ae_fp_greater(v,va) )
3203  {
3204  a->ptr.p_double[j] = v;
3205  b->ptr.p_int[j] = b->ptr.p_int[k1];
3206  j = k1;
3207  }
3208  break;
3209  }
3210  else
3211  {
3212 
3213  /*
3214  * two childs
3215  */
3216  v1 = a->ptr.p_double[k1];
3217  v2 = a->ptr.p_double[k2];
3218  if( ae_fp_greater(v1,v2) )
3219  {
3220  if( ae_fp_less(va,v1) )
3221  {
3222  a->ptr.p_double[j] = v1;
3223  b->ptr.p_int[j] = b->ptr.p_int[k1];
3224  j = k1;
3225  }
3226  else
3227  {
3228  break;
3229  }
3230  }
3231  else
3232  {
3233  if( ae_fp_less(va,v2) )
3234  {
3235  a->ptr.p_double[j] = v2;
3236  b->ptr.p_int[j] = b->ptr.p_int[k2];
3237  j = k2;
3238  }
3239  else
3240  {
3241  break;
3242  }
3243  }
3244  k1 = 2*j+1;
3245  k2 = 2*j+2;
3246  }
3247  }
3248  a->ptr.p_double[j] = va;
3249  b->ptr.p_int[j] = vb;
3250 }
3251 
3252 
3253 /*************************************************************************
3254 Heap operations: pops top element from the heap
3255 
3256 PARAMETERS:
3257  A - heap itself, must be at least array[0..N-1]
3258  B - array of integer tags, which are updated according to
3259  permutations in the heap
3260  N - size of the heap, N>=1
3261 
3262 On output top element is moved to A[N-1], B[N-1], heap is reordered, N is
3263 decreased by 1.
3264 
3265  -- ALGLIB --
3266  Copyright 28.02.2010 by Bochkanov Sergey
3267 *************************************************************************/
3268 void tagheappopi(/* Real */ ae_vector* a,
3269  /* Integer */ ae_vector* b,
3270  ae_int_t* n,
3271  ae_state *_state)
3272 {
3273  double va;
3274  ae_int_t vb;
3275 
3276 
3277  if( *n<1 )
3278  {
3279  return;
3280  }
3281 
3282  /*
3283  * N=1 is a special case
3284  */
3285  if( *n==1 )
3286  {
3287  *n = 0;
3288  return;
3289  }
3290 
3291  /*
3292  * swap top element and last element,
3293  * then reorder heap
3294  */
3295  va = a->ptr.p_double[*n-1];
3296  vb = b->ptr.p_int[*n-1];
3297  a->ptr.p_double[*n-1] = a->ptr.p_double[0];
3298  b->ptr.p_int[*n-1] = b->ptr.p_int[0];
3299  *n = *n-1;
3300  tagheapreplacetopi(a, b, *n, va, vb, _state);
3301 }
3302 
3303 
3304 /*************************************************************************
3305 Search first element less than T in sorted array.
3306 
3307 PARAMETERS:
3308  A - sorted array by ascending from 0 to N-1
3309  N - number of elements in array
3310  T - the desired element
3311 
3312 RESULT:
3313  The very first element's index, which isn't less than T.
3314 In the case when there aren't such elements, returns N.
3315 *************************************************************************/
3317  ae_int_t n,
3318  double t,
3319  ae_state *_state)
3320 {
3321  ae_int_t l;
3322  ae_int_t half;
3323  ae_int_t first;
3324  ae_int_t middle;
3325  ae_int_t result;
3326 
3327 
3328  l = n;
3329  first = 0;
3330  while(l>0)
3331  {
3332  half = l/2;
3333  middle = first+half;
3334  if( ae_fp_less(a->ptr.p_double[middle],t) )
3335  {
3336  first = middle+1;
3337  l = l-half-1;
3338  }
3339  else
3340  {
3341  l = half;
3342  }
3343  }
3344  result = first;
3345  return result;
3346 }
3347 
3348 
3349 /*************************************************************************
3350 Search first element more than T in sorted array.
3351 
3352 PARAMETERS:
3353  A - sorted array by ascending from 0 to N-1
3354  N - number of elements in array
3355  T - the desired element
3356 
3357  RESULT:
3358  The very first element's index, which more than T.
3359 In the case when there aren't such elements, returns N.
3360 *************************************************************************/
3362  ae_int_t n,
3363  double t,
3364  ae_state *_state)
3365 {
3366  ae_int_t l;
3367  ae_int_t half;
3368  ae_int_t first;
3369  ae_int_t middle;
3370  ae_int_t result;
3371 
3372 
3373  l = n;
3374  first = 0;
3375  while(l>0)
3376  {
3377  half = l/2;
3378  middle = first+half;
3379  if( ae_fp_less(t,a->ptr.p_double[middle]) )
3380  {
3381  l = half;
3382  }
3383  else
3384  {
3385  first = middle+1;
3386  l = l-half-1;
3387  }
3388  }
3389  result = first;
3390  return result;
3391 }
3392 
3393 
3394 /*************************************************************************
3395 Internal TagSortFastI: sorts A[I1...I2] (both bounds are included),
3396 applies same permutations to B.
3397 
3398  -- ALGLIB --
3399  Copyright 06.09.2010 by Bochkanov Sergey
3400 *************************************************************************/
3401 static void tsort_tagsortfastirec(/* Real */ ae_vector* a,
3402  /* Integer */ ae_vector* b,
3403  /* Real */ ae_vector* bufa,
3404  /* Integer */ ae_vector* bufb,
3405  ae_int_t i1,
3406  ae_int_t i2,
3407  ae_state *_state)
3408 {
3409  ae_int_t i;
3410  ae_int_t j;
3411  ae_int_t k;
3412  ae_int_t cntless;
3413  ae_int_t cnteq;
3414  ae_int_t cntgreater;
3415  double tmpr;
3416  ae_int_t tmpi;
3417  double v0;
3418  double v1;
3419  double v2;
3420  double vp;
3421 
3422 
3423 
3424  /*
3425  * Fast exit
3426  */
3427  if( i2<=i1 )
3428  {
3429  return;
3430  }
3431 
3432  /*
3433  * Non-recursive sort for small arrays
3434  */
3435  if( i2-i1<=16 )
3436  {
3437  for(j=i1+1; j<=i2; j++)
3438  {
3439 
3440  /*
3441  * Search elements [I1..J-1] for place to insert Jth element.
3442  *
3443  * This code stops immediately if we can leave A[J] at J-th position
3444  * (all elements have same value of A[J] larger than any of them)
3445  */
3446  tmpr = a->ptr.p_double[j];
3447  tmpi = j;
3448  for(k=j-1; k>=i1; k--)
3449  {
3450  if( a->ptr.p_double[k]<=tmpr )
3451  {
3452  break;
3453  }
3454  tmpi = k;
3455  }
3456  k = tmpi;
3457 
3458  /*
3459  * Insert Jth element into Kth position
3460  */
3461  if( k!=j )
3462  {
3463  tmpr = a->ptr.p_double[j];
3464  tmpi = b->ptr.p_int[j];
3465  for(i=j-1; i>=k; i--)
3466  {
3467  a->ptr.p_double[i+1] = a->ptr.p_double[i];
3468  b->ptr.p_int[i+1] = b->ptr.p_int[i];
3469  }
3470  a->ptr.p_double[k] = tmpr;
3471  b->ptr.p_int[k] = tmpi;
3472  }
3473  }
3474  return;
3475  }
3476 
3477  /*
3478  * Quicksort: choose pivot
3479  * Here we assume that I2-I1>=2
3480  */
3481  v0 = a->ptr.p_double[i1];
3482  v1 = a->ptr.p_double[i1+(i2-i1)/2];
3483  v2 = a->ptr.p_double[i2];
3484  if( v0>v1 )
3485  {
3486  tmpr = v1;
3487  v1 = v0;
3488  v0 = tmpr;
3489  }
3490  if( v1>v2 )
3491  {
3492  tmpr = v2;
3493  v2 = v1;
3494  v1 = tmpr;
3495  }
3496  if( v0>v1 )
3497  {
3498  tmpr = v1;
3499  v1 = v0;
3500  v0 = tmpr;
3501  }
3502  vp = v1;
3503 
3504  /*
3505  * now pass through A/B and:
3506  * * move elements that are LESS than VP to the left of A/B
3507  * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
3508  * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
3509  * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
3510  * * move elements from the left of BufA/BufB to the end of A/B
3511  */
3512  cntless = 0;
3513  cnteq = 0;
3514  cntgreater = 0;
3515  for(i=i1; i<=i2; i++)
3516  {
3517  v0 = a->ptr.p_double[i];
3518  if( v0<vp )
3519  {
3520 
3521  /*
3522  * LESS
3523  */
3524  k = i1+cntless;
3525  if( i!=k )
3526  {
3527  a->ptr.p_double[k] = v0;
3528  b->ptr.p_int[k] = b->ptr.p_int[i];
3529  }
3530  cntless = cntless+1;
3531  continue;
3532  }
3533  if( v0==vp )
3534  {
3535 
3536  /*
3537  * EQUAL
3538  */
3539  k = i2-cnteq;
3540  bufa->ptr.p_double[k] = v0;
3541  bufb->ptr.p_int[k] = b->ptr.p_int[i];
3542  cnteq = cnteq+1;
3543  continue;
3544  }
3545 
3546  /*
3547  * GREATER
3548  */
3549  k = i1+cntgreater;
3550  bufa->ptr.p_double[k] = v0;
3551  bufb->ptr.p_int[k] = b->ptr.p_int[i];
3552  cntgreater = cntgreater+1;
3553  }
3554  for(i=0; i<=cnteq-1; i++)
3555  {
3556  j = i1+cntless+cnteq-1-i;
3557  k = i2+i-(cnteq-1);
3558  a->ptr.p_double[j] = bufa->ptr.p_double[k];
3559  b->ptr.p_int[j] = bufb->ptr.p_int[k];
3560  }
3561  for(i=0; i<=cntgreater-1; i++)
3562  {
3563  j = i1+cntless+cnteq+i;
3564  k = i1+i;
3565  a->ptr.p_double[j] = bufa->ptr.p_double[k];
3566  b->ptr.p_int[j] = bufb->ptr.p_int[k];
3567  }
3568 
3569  /*
3570  * Sort left and right parts of the array (ignoring middle part)
3571  */
3572  tsort_tagsortfastirec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
3573  tsort_tagsortfastirec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
3574 }
3575 
3576 
3577 /*************************************************************************
3578 Internal TagSortFastR: sorts A[I1...I2] (both bounds are included),
3579 applies same permutations to B.
3580 
3581  -- ALGLIB --
3582  Copyright 06.09.2010 by Bochkanov Sergey
3583 *************************************************************************/
3584 static void tsort_tagsortfastrrec(/* Real */ ae_vector* a,
3585  /* Real */ ae_vector* b,
3586  /* Real */ ae_vector* bufa,
3587  /* Real */ ae_vector* bufb,
3588  ae_int_t i1,
3589  ae_int_t i2,
3590  ae_state *_state)
3591 {
3592  ae_int_t i;
3593  ae_int_t j;
3594  ae_int_t k;
3595  double tmpr;
3596  double tmpr2;
3597  ae_int_t tmpi;
3598  ae_int_t cntless;
3599  ae_int_t cnteq;
3600  ae_int_t cntgreater;
3601  double v0;
3602  double v1;
3603  double v2;
3604  double vp;
3605 
3606 
3607 
3608  /*
3609  * Fast exit
3610  */
3611  if( i2<=i1 )
3612  {
3613  return;
3614  }
3615 
3616  /*
3617  * Non-recursive sort for small arrays
3618  */
3619  if( i2-i1<=16 )
3620  {
3621  for(j=i1+1; j<=i2; j++)
3622  {
3623 
3624  /*
3625  * Search elements [I1..J-1] for place to insert Jth element.
3626  *
3627  * This code stops immediately if we can leave A[J] at J-th position
3628  * (all elements have same value of A[J] larger than any of them)
3629  */
3630  tmpr = a->ptr.p_double[j];
3631  tmpi = j;
3632  for(k=j-1; k>=i1; k--)
3633  {
3634  if( a->ptr.p_double[k]<=tmpr )
3635  {
3636  break;
3637  }
3638  tmpi = k;
3639  }
3640  k = tmpi;
3641 
3642  /*
3643  * Insert Jth element into Kth position
3644  */
3645  if( k!=j )
3646  {
3647  tmpr = a->ptr.p_double[j];
3648  tmpr2 = b->ptr.p_double[j];
3649  for(i=j-1; i>=k; i--)
3650  {
3651  a->ptr.p_double[i+1] = a->ptr.p_double[i];
3652  b->ptr.p_double[i+1] = b->ptr.p_double[i];
3653  }
3654  a->ptr.p_double[k] = tmpr;
3655  b->ptr.p_double[k] = tmpr2;
3656  }
3657  }
3658  return;
3659  }
3660 
3661  /*
3662  * Quicksort: choose pivot
3663  * Here we assume that I2-I1>=16
3664  */
3665  v0 = a->ptr.p_double[i1];
3666  v1 = a->ptr.p_double[i1+(i2-i1)/2];
3667  v2 = a->ptr.p_double[i2];
3668  if( v0>v1 )
3669  {
3670  tmpr = v1;
3671  v1 = v0;
3672  v0 = tmpr;
3673  }
3674  if( v1>v2 )
3675  {
3676  tmpr = v2;
3677  v2 = v1;
3678  v1 = tmpr;
3679  }
3680  if( v0>v1 )
3681  {
3682  tmpr = v1;
3683  v1 = v0;
3684  v0 = tmpr;
3685  }
3686  vp = v1;
3687 
3688  /*
3689  * now pass through A/B and:
3690  * * move elements that are LESS than VP to the left of A/B
3691  * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
3692  * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
3693  * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
3694  * * move elements from the left of BufA/BufB to the end of A/B
3695  */
3696  cntless = 0;
3697  cnteq = 0;
3698  cntgreater = 0;
3699  for(i=i1; i<=i2; i++)
3700  {
3701  v0 = a->ptr.p_double[i];
3702  if( v0<vp )
3703  {
3704 
3705  /*
3706  * LESS
3707  */
3708  k = i1+cntless;
3709  if( i!=k )
3710  {
3711  a->ptr.p_double[k] = v0;
3712  b->ptr.p_double[k] = b->ptr.p_double[i];
3713  }
3714  cntless = cntless+1;
3715  continue;
3716  }
3717  if( v0==vp )
3718  {
3719 
3720  /*
3721  * EQUAL
3722  */
3723  k = i2-cnteq;
3724  bufa->ptr.p_double[k] = v0;
3725  bufb->ptr.p_double[k] = b->ptr.p_double[i];
3726  cnteq = cnteq+1;
3727  continue;
3728  }
3729 
3730  /*
3731  * GREATER
3732  */
3733  k = i1+cntgreater;
3734  bufa->ptr.p_double[k] = v0;
3735  bufb->ptr.p_double[k] = b->ptr.p_double[i];
3736  cntgreater = cntgreater+1;
3737  }
3738  for(i=0; i<=cnteq-1; i++)
3739  {
3740  j = i1+cntless+cnteq-1-i;
3741  k = i2+i-(cnteq-1);
3742  a->ptr.p_double[j] = bufa->ptr.p_double[k];
3743  b->ptr.p_double[j] = bufb->ptr.p_double[k];
3744  }
3745  for(i=0; i<=cntgreater-1; i++)
3746  {
3747  j = i1+cntless+cnteq+i;
3748  k = i1+i;
3749  a->ptr.p_double[j] = bufa->ptr.p_double[k];
3750  b->ptr.p_double[j] = bufb->ptr.p_double[k];
3751  }
3752 
3753  /*
3754  * Sort left and right parts of the array (ignoring middle part)
3755  */
3756  tsort_tagsortfastrrec(a, b, bufa, bufb, i1, i1+cntless-1, _state);
3757  tsort_tagsortfastrrec(a, b, bufa, bufb, i1+cntless+cnteq, i2, _state);
3758 }
3759 
3760 
3761 /*************************************************************************
3762 Internal TagSortFastI: sorts A[I1...I2] (both bounds are included),
3763 applies same permutations to B.
3764 
3765  -- ALGLIB --
3766  Copyright 06.09.2010 by Bochkanov Sergey
3767 *************************************************************************/
3768 static void tsort_tagsortfastrec(/* Real */ ae_vector* a,
3769  /* Real */ ae_vector* bufa,
3770  ae_int_t i1,
3771  ae_int_t i2,
3772  ae_state *_state)
3773 {
3774  ae_int_t cntless;
3775  ae_int_t cnteq;
3776  ae_int_t cntgreater;
3777  ae_int_t i;
3778  ae_int_t j;
3779  ae_int_t k;
3780  double tmpr;
3781  ae_int_t tmpi;
3782  double v0;
3783  double v1;
3784  double v2;
3785  double vp;
3786 
3787 
3788 
3789  /*
3790  * Fast exit
3791  */
3792  if( i2<=i1 )
3793  {
3794  return;
3795  }
3796 
3797  /*
3798  * Non-recursive sort for small arrays
3799  */
3800  if( i2-i1<=16 )
3801  {
3802  for(j=i1+1; j<=i2; j++)
3803  {
3804 
3805  /*
3806  * Search elements [I1..J-1] for place to insert Jth element.
3807  *
3808  * This code stops immediately if we can leave A[J] at J-th position
3809  * (all elements have same value of A[J] larger than any of them)
3810  */
3811  tmpr = a->ptr.p_double[j];
3812  tmpi = j;
3813  for(k=j-1; k>=i1; k--)
3814  {
3815  if( a->ptr.p_double[k]<=tmpr )
3816  {
3817  break;
3818  }
3819  tmpi = k;
3820  }
3821  k = tmpi;
3822 
3823  /*
3824  * Insert Jth element into Kth position
3825  */
3826  if( k!=j )
3827  {
3828  tmpr = a->ptr.p_double[j];
3829  for(i=j-1; i>=k; i--)
3830  {
3831  a->ptr.p_double[i+1] = a->ptr.p_double[i];
3832  }
3833  a->ptr.p_double[k] = tmpr;
3834  }
3835  }
3836  return;
3837  }
3838 
3839  /*
3840  * Quicksort: choose pivot
3841  * Here we assume that I2-I1>=16
3842  */
3843  v0 = a->ptr.p_double[i1];
3844  v1 = a->ptr.p_double[i1+(i2-i1)/2];
3845  v2 = a->ptr.p_double[i2];
3846  if( v0>v1 )
3847  {
3848  tmpr = v1;
3849  v1 = v0;
3850  v0 = tmpr;
3851  }
3852  if( v1>v2 )
3853  {
3854  tmpr = v2;
3855  v2 = v1;
3856  v1 = tmpr;
3857  }
3858  if( v0>v1 )
3859  {
3860  tmpr = v1;
3861  v1 = v0;
3862  v0 = tmpr;
3863  }
3864  vp = v1;
3865 
3866  /*
3867  * now pass through A/B and:
3868  * * move elements that are LESS than VP to the left of A/B
3869  * * move elements that are EQUAL to VP to the right of BufA/BufB (in the reverse order)
3870  * * move elements that are GREATER than VP to the left of BufA/BufB (in the normal order
3871  * * move elements from the tail of BufA/BufB to the middle of A/B (restoring normal order)
3872  * * move elements from the left of BufA/BufB to the end of A/B
3873  */
3874  cntless = 0;
3875  cnteq = 0;
3876  cntgreater = 0;
3877  for(i=i1; i<=i2; i++)
3878  {
3879  v0 = a->ptr.p_double[i];
3880  if( v0<vp )
3881  {
3882 
3883  /*
3884  * LESS
3885  */
3886  k = i1+cntless;
3887  if( i!=k )
3888  {
3889  a->ptr.p_double[k] = v0;
3890  }
3891  cntless = cntless+1;
3892  continue;
3893  }
3894  if( v0==vp )
3895  {
3896 
3897  /*
3898  * EQUAL
3899  */
3900  k = i2-cnteq;
3901  bufa->ptr.p_double[k] = v0;
3902  cnteq = cnteq+1;
3903  continue;
3904  }
3905 
3906  /*
3907  * GREATER
3908  */
3909  k = i1+cntgreater;
3910  bufa->ptr.p_double[k] = v0;
3911  cntgreater = cntgreater+1;
3912  }
3913  for(i=0; i<=cnteq-1; i++)
3914  {
3915  j = i1+cntless+cnteq-1-i;
3916  k = i2+i-(cnteq-1);
3917  a->ptr.p_double[j] = bufa->ptr.p_double[k];
3918  }
3919  for(i=0; i<=cntgreater-1; i++)
3920  {
3921  j = i1+cntless+cnteq+i;
3922  k = i1+i;
3923  a->ptr.p_double[j] = bufa->ptr.p_double[k];
3924  }
3925 
3926  /*
3927  * Sort left and right parts of the array (ignoring middle part)
3928  */
3929  tsort_tagsortfastrec(a, bufa, i1, i1+cntless-1, _state);
3930  tsort_tagsortfastrec(a, bufa, i1+cntless+cnteq, i2, _state);
3931 }
3932 
3933 
3934 
3935 
3936 /*************************************************************************
3937 Internal ranking subroutine.
3938 
3939 INPUT PARAMETERS:
3940  X - array to rank
3941  N - array size
3942  IsCentered- whether ranks are centered or not:
3943  * True - ranks are centered in such way that their
3944  sum is zero
3945  * False - ranks are not centered
3946  Buf - temporary buffers
3947 
3948 NOTE: when IsCentered is True and all X[] are equal, this function fills
3949  X by zeros (exact zeros are used, not sum which is only approximately
3950  equal to zero).
3951 *************************************************************************/
3952 void rankx(/* Real */ ae_vector* x,
3953  ae_int_t n,
3954  ae_bool iscentered,
3955  apbuffers* buf,
3956  ae_state *_state)
3957 {
3958  ae_int_t i;
3959  ae_int_t j;
3960  ae_int_t k;
3961  double tmp;
3962  double voffs;
3963 
3964 
3965 
3966  /*
3967  * Prepare
3968  */
3969  if( n<1 )
3970  {
3971  return;
3972  }
3973  if( n==1 )
3974  {
3975  x->ptr.p_double[0] = 0;
3976  return;
3977  }
3978  if( buf->ra1.cnt<n )
3979  {
3980  ae_vector_set_length(&buf->ra1, n, _state);
3981  }
3982  if( buf->ia1.cnt<n )
3983  {
3984  ae_vector_set_length(&buf->ia1, n, _state);
3985  }
3986  for(i=0; i<=n-1; i++)
3987  {
3988  buf->ra1.ptr.p_double[i] = x->ptr.p_double[i];
3989  buf->ia1.ptr.p_int[i] = i;
3990  }
3991  tagsortfasti(&buf->ra1, &buf->ia1, &buf->ra2, &buf->ia2, n, _state);
3992 
3993  /*
3994  * Special test for all values being equal
3995  */
3996  if( ae_fp_eq(buf->ra1.ptr.p_double[0],buf->ra1.ptr.p_double[n-1]) )
3997  {
3998  if( iscentered )
3999  {
4000  tmp = 0.0;
4001  }
4002  else
4003  {
4004  tmp = (double)(n-1)/(double)2;
4005  }
4006  for(i=0; i<=n-1; i++)
4007  {
4008  x->ptr.p_double[i] = tmp;
4009  }
4010  return;
4011  }
4012 
4013  /*
4014  * compute tied ranks
4015  */
4016  i = 0;
4017  while(i<=n-1)
4018  {
4019  j = i+1;
4020  while(j<=n-1)
4021  {
4022  if( ae_fp_neq(buf->ra1.ptr.p_double[j],buf->ra1.ptr.p_double[i]) )
4023  {
4024  break;
4025  }
4026  j = j+1;
4027  }
4028  for(k=i; k<=j-1; k++)
4029  {
4030  buf->ra1.ptr.p_double[k] = (double)(i+j-1)/(double)2;
4031  }
4032  i = j;
4033  }
4034 
4035  /*
4036  * back to x
4037  */
4038  if( iscentered )
4039  {
4040  voffs = (double)(n-1)/(double)2;
4041  }
4042  else
4043  {
4044  voffs = 0.0;
4045  }
4046  for(i=0; i<=n-1; i++)
4047  {
4048  x->ptr.p_double[buf->ia1.ptr.p_int[i]] = buf->ra1.ptr.p_double[i]-voffs;
4049  }
4050 }
4051 
4052 
4053 
4054 
4055 /*************************************************************************
4056 Fast kernel
4057 
4058  -- ALGLIB routine --
4059  19.01.2010
4060  Bochkanov Sergey
4061 *************************************************************************/
4063  ae_int_t n,
4064  /* Complex */ ae_matrix* a,
4065  ae_int_t ia,
4066  ae_int_t ja,
4067  /* Complex */ ae_vector* u,
4068  ae_int_t iu,
4069  /* Complex */ ae_vector* v,
4070  ae_int_t iv,
4071  ae_state *_state)
4072 {
4073 #ifndef ALGLIB_INTERCEPTS_ABLAS
4074  ae_bool result;
4075 
4076 
4077  result = ae_false;
4078  return result;
4079 #else
4080  return _ialglib_i_cmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv);
4081 #endif
4082 }
4083 
4084 
4085 /*************************************************************************
4086 Fast kernel
4087 
4088  -- ALGLIB routine --
4089  19.01.2010
4090  Bochkanov Sergey
4091 *************************************************************************/
4093  ae_int_t n,
4094  /* Real */ ae_matrix* a,
4095  ae_int_t ia,
4096  ae_int_t ja,
4097  /* Real */ ae_vector* u,
4098  ae_int_t iu,
4099  /* Real */ ae_vector* v,
4100  ae_int_t iv,
4101  ae_state *_state)
4102 {
4103 #ifndef ALGLIB_INTERCEPTS_ABLAS
4104  ae_bool result;
4105 
4106 
4107  result = ae_false;
4108  return result;
4109 #else
4110  return _ialglib_i_rmatrixrank1f(m, n, a, ia, ja, u, iu, v, iv);
4111 #endif
4112 }
4113 
4114 
4115 /*************************************************************************
4116 Fast kernel
4117 
4118  -- ALGLIB routine --
4119  19.01.2010
4120  Bochkanov Sergey
4121 *************************************************************************/
4123  ae_int_t n,
4124  /* Complex */ ae_matrix* a,
4125  ae_int_t ia,
4126  ae_int_t ja,
4127  ae_int_t opa,
4128  /* Complex */ ae_vector* x,
4129  ae_int_t ix,
4130  /* Complex */ ae_vector* y,
4131  ae_int_t iy,
4132  ae_state *_state)
4133 {
4134  ae_bool result;
4135 
4136 
4137  result = ae_false;
4138  return result;
4139 }
4140 
4141 
4142 /*************************************************************************
4143 Fast kernel
4144 
4145  -- ALGLIB routine --
4146  19.01.2010
4147  Bochkanov Sergey
4148 *************************************************************************/
4150  ae_int_t n,
4151  /* Real */ ae_matrix* a,
4152  ae_int_t ia,
4153  ae_int_t ja,
4154  ae_int_t opa,
4155  /* Real */ ae_vector* x,
4156  ae_int_t ix,
4157  /* Real */ ae_vector* y,
4158  ae_int_t iy,
4159  ae_state *_state)
4160 {
4161  ae_bool result;
4162 
4163 
4164  result = ae_false;
4165  return result;
4166 }
4167 
4168 
4169 /*************************************************************************
4170 Fast kernel
4171 
4172  -- ALGLIB routine --
4173  19.01.2010
4174  Bochkanov Sergey
4175 *************************************************************************/
4177  ae_int_t n,
4178  /* Complex */ ae_matrix* a,
4179  ae_int_t i1,
4180  ae_int_t j1,
4181  ae_bool isupper,
4182  ae_bool isunit,
4183  ae_int_t optype,
4184  /* Complex */ ae_matrix* x,
4185  ae_int_t i2,
4186  ae_int_t j2,
4187  ae_state *_state)
4188 {
4189 #ifndef ALGLIB_INTERCEPTS_ABLAS
4190  ae_bool result;
4191 
4192 
4193  result = ae_false;
4194  return result;
4195 #else
4196  return _ialglib_i_cmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
4197 #endif
4198 }
4199 
4200 
4201 /*************************************************************************
4202 Fast kernel
4203 
4204  -- ALGLIB routine --
4205  19.01.2010
4206  Bochkanov Sergey
4207 *************************************************************************/
4209  ae_int_t n,
4210  /* Complex */ ae_matrix* a,
4211  ae_int_t i1,
4212  ae_int_t j1,
4213  ae_bool isupper,
4214  ae_bool isunit,
4215  ae_int_t optype,
4216  /* Complex */ ae_matrix* x,
4217  ae_int_t i2,
4218  ae_int_t j2,
4219  ae_state *_state)
4220 {
4221 #ifndef ALGLIB_INTERCEPTS_ABLAS
4222  ae_bool result;
4223 
4224 
4225  result = ae_false;
4226  return result;
4227 #else
4228  return _ialglib_i_cmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
4229 #endif
4230 }
4231 
4232 
4233 /*************************************************************************
4234 Fast kernel
4235 
4236  -- ALGLIB routine --
4237  19.01.2010
4238  Bochkanov Sergey
4239 *************************************************************************/
4241  ae_int_t n,
4242  /* Real */ ae_matrix* a,
4243  ae_int_t i1,
4244  ae_int_t j1,
4245  ae_bool isupper,
4246  ae_bool isunit,
4247  ae_int_t optype,
4248  /* Real */ ae_matrix* x,
4249  ae_int_t i2,
4250  ae_int_t j2,
4251  ae_state *_state)
4252 {
4253 #ifndef ALGLIB_INTERCEPTS_ABLAS
4254  ae_bool result;
4255 
4256 
4257  result = ae_false;
4258  return result;
4259 #else
4260  return _ialglib_i_rmatrixrighttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
4261 #endif
4262 }
4263 
4264 
4265 /*************************************************************************
4266 Fast kernel
4267 
4268  -- ALGLIB routine --
4269  19.01.2010
4270  Bochkanov Sergey
4271 *************************************************************************/
4273  ae_int_t n,
4274  /* Real */ ae_matrix* a,
4275  ae_int_t i1,
4276  ae_int_t j1,
4277  ae_bool isupper,
4278  ae_bool isunit,
4279  ae_int_t optype,
4280  /* Real */ ae_matrix* x,
4281  ae_int_t i2,
4282  ae_int_t j2,
4283  ae_state *_state)
4284 {
4285 #ifndef ALGLIB_INTERCEPTS_ABLAS
4286  ae_bool result;
4287 
4288 
4289  result = ae_false;
4290  return result;
4291 #else
4292  return _ialglib_i_rmatrixlefttrsmf(m, n, a, i1, j1, isupper, isunit, optype, x, i2, j2);
4293 #endif
4294 }
4295 
4296 
4297 /*************************************************************************
4298 Fast kernel
4299 
4300  -- ALGLIB routine --
4301  19.01.2010
4302  Bochkanov Sergey
4303 *************************************************************************/
4305  ae_int_t k,
4306  double alpha,
4307  /* Complex */ ae_matrix* a,
4308  ae_int_t ia,
4309  ae_int_t ja,
4310  ae_int_t optypea,
4311  double beta,
4312  /* Complex */ ae_matrix* c,
4313  ae_int_t ic,
4314  ae_int_t jc,
4315  ae_bool isupper,
4316  ae_state *_state)
4317 {
4318 #ifndef ALGLIB_INTERCEPTS_ABLAS
4319  ae_bool result;
4320 
4321 
4322  result = ae_false;
4323  return result;
4324 #else
4325  return _ialglib_i_cmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
4326 #endif
4327 }
4328 
4329 
4330 /*************************************************************************
4331 Fast kernel
4332 
4333  -- ALGLIB routine --
4334  19.01.2010
4335  Bochkanov Sergey
4336 *************************************************************************/
4338  ae_int_t k,
4339  double alpha,
4340  /* Real */ ae_matrix* a,
4341  ae_int_t ia,
4342  ae_int_t ja,
4343  ae_int_t optypea,
4344  double beta,
4345  /* Real */ ae_matrix* c,
4346  ae_int_t ic,
4347  ae_int_t jc,
4348  ae_bool isupper,
4349  ae_state *_state)
4350 {
4351 #ifndef ALGLIB_INTERCEPTS_ABLAS
4352  ae_bool result;
4353 
4354 
4355  result = ae_false;
4356  return result;
4357 #else
4358  return _ialglib_i_rmatrixsyrkf(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
4359 #endif
4360 }
4361 
4362 
4363 /*************************************************************************
4364 Fast kernel
4365 
4366  -- ALGLIB routine --
4367  19.01.2010
4368  Bochkanov Sergey
4369 *************************************************************************/
4371  ae_int_t n,
4372  ae_int_t k,
4373  double alpha,
4374  /* Real */ ae_matrix* a,
4375  ae_int_t ia,
4376  ae_int_t ja,
4377  ae_int_t optypea,
4378  /* Real */ ae_matrix* b,
4379  ae_int_t ib,
4380  ae_int_t jb,
4381  ae_int_t optypeb,
4382  double beta,
4383  /* Real */ ae_matrix* c,
4384  ae_int_t ic,
4385  ae_int_t jc,
4386  ae_state *_state)
4387 {
4388 #ifndef ALGLIB_INTERCEPTS_ABLAS
4389  ae_bool result;
4390 
4391 
4392  result = ae_false;
4393  return result;
4394 #else
4395  return _ialglib_i_rmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
4396 #endif
4397 }
4398 
4399 
4400 /*************************************************************************
4401 Fast kernel
4402 
4403  -- ALGLIB routine --
4404  19.01.2010
4405  Bochkanov Sergey
4406 *************************************************************************/
4408  ae_int_t n,
4409  ae_int_t k,
4410  ae_complex alpha,
4411  /* Complex */ ae_matrix* a,
4412  ae_int_t ia,
4413  ae_int_t ja,
4414  ae_int_t optypea,
4415  /* Complex */ ae_matrix* b,
4416  ae_int_t ib,
4417  ae_int_t jb,
4418  ae_int_t optypeb,
4419  ae_complex beta,
4420  /* Complex */ ae_matrix* c,
4421  ae_int_t ic,
4422  ae_int_t jc,
4423  ae_state *_state)
4424 {
4425 #ifndef ALGLIB_INTERCEPTS_ABLAS
4426  ae_bool result;
4427 
4428 
4429  result = ae_false;
4430  return result;
4431 #else
4432  return _ialglib_i_cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
4433 #endif
4434 }
4435 
4436 
4437 /*************************************************************************
4438 CMatrixGEMM kernel, basecase code for CMatrixGEMM.
4439 
4440 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
4441 * C is MxN general matrix
4442 * op1(A) is MxK matrix
4443 * op2(B) is KxN matrix
4444 * "op" may be identity transformation, transposition, conjugate transposition
4445 
4446 Additional info:
4447 * multiplication result replaces C. If Beta=0, C elements are not used in
4448  calculations (not multiplied by zero - just not referenced)
4449 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
4450 * if both Beta and Alpha are zero, C is filled by zeros.
4451 
4452 IMPORTANT:
4453 
4454 This function does NOT preallocate output matrix C, it MUST be preallocated
4455 by caller prior to calling this function. In case C does not have enough
4456 space to store result, exception will be generated.
4457 
4458 INPUT PARAMETERS
4459  M - matrix size, M>0
4460  N - matrix size, N>0
4461  K - matrix size, K>0
4462  Alpha - coefficient
4463  A - matrix
4464  IA - submatrix offset
4465  JA - submatrix offset
4466  OpTypeA - transformation type:
4467  * 0 - no transformation
4468  * 1 - transposition
4469  * 2 - conjugate transposition
4470  B - matrix
4471  IB - submatrix offset
4472  JB - submatrix offset
4473  OpTypeB - transformation type:
4474  * 0 - no transformation
4475  * 1 - transposition
4476  * 2 - conjugate transposition
4477  Beta - coefficient
4478  C - PREALLOCATED output matrix
4479  IC - submatrix offset
4480  JC - submatrix offset
4481 
4482  -- ALGLIB routine --
4483  27.03.2013
4484  Bochkanov Sergey
4485 *************************************************************************/
4487  ae_int_t n,
4488  ae_int_t k,
4489  ae_complex alpha,
4490  /* Complex */ ae_matrix* a,
4491  ae_int_t ia,
4492  ae_int_t ja,
4493  ae_int_t optypea,
4494  /* Complex */ ae_matrix* b,
4495  ae_int_t ib,
4496  ae_int_t jb,
4497  ae_int_t optypeb,
4498  ae_complex beta,
4499  /* Complex */ ae_matrix* c,
4500  ae_int_t ic,
4501  ae_int_t jc,
4502  ae_state *_state)
4503 {
4504  ae_int_t i;
4505  ae_int_t j;
4506  ae_complex v;
4507  ae_complex v00;
4508  ae_complex v01;
4509  ae_complex v10;
4510  ae_complex v11;
4511  double v00x;
4512  double v00y;
4513  double v01x;
4514  double v01y;
4515  double v10x;
4516  double v10y;
4517  double v11x;
4518  double v11y;
4519  double a0x;
4520  double a0y;
4521  double a1x;
4522  double a1y;
4523  double b0x;
4524  double b0y;
4525  double b1x;
4526  double b1y;
4527  ae_int_t idxa0;
4528  ae_int_t idxa1;
4529  ae_int_t idxb0;
4530  ae_int_t idxb1;
4531  ae_int_t i0;
4532  ae_int_t i1;
4533  ae_int_t ik;
4534  ae_int_t j0;
4535  ae_int_t j1;
4536  ae_int_t jk;
4537  ae_int_t t;
4538  ae_int_t offsa;
4539  ae_int_t offsb;
4540 
4541 
4542 
4543  /*
4544  * if matrix size is zero
4545  */
4546  if( m==0||n==0 )
4547  {
4548  return;
4549  }
4550 
4551  /*
4552  * Try optimized code
4553  */
4554  if( cmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
4555  {
4556  return;
4557  }
4558 
4559  /*
4560  * if K=0, then C=Beta*C
4561  */
4562  if( k==0 )
4563  {
4564  if( ae_c_neq_d(beta,1) )
4565  {
4566  if( ae_c_neq_d(beta,0) )
4567  {
4568  for(i=0; i<=m-1; i++)
4569  {
4570  for(j=0; j<=n-1; j++)
4571  {
4572  c->ptr.pp_complex[ic+i][jc+j] = ae_c_mul(beta,c->ptr.pp_complex[ic+i][jc+j]);
4573  }
4574  }
4575  }
4576  else
4577  {
4578  for(i=0; i<=m-1; i++)
4579  {
4580  for(j=0; j<=n-1; j++)
4581  {
4582  c->ptr.pp_complex[ic+i][jc+j] = ae_complex_from_d(0);
4583  }
4584  }
4585  }
4586  }
4587  return;
4588  }
4589 
4590  /*
4591  * This phase is not really necessary, but compiler complains
4592  * about "possibly uninitialized variables"
4593  */
4594  a0x = 0;
4595  a0y = 0;
4596  a1x = 0;
4597  a1y = 0;
4598  b0x = 0;
4599  b0y = 0;
4600  b1x = 0;
4601  b1y = 0;
4602 
4603  /*
4604  * General case
4605  */
4606  i = 0;
4607  while(i<m)
4608  {
4609  j = 0;
4610  while(j<n)
4611  {
4612 
4613  /*
4614  * Choose between specialized 4x4 code and general code
4615  */
4616  if( i+2<=m&&j+2<=n )
4617  {
4618 
4619  /*
4620  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
4621  *
4622  * This submatrix is calculated as sum of K rank-1 products,
4623  * with operands cached in local variables in order to speed
4624  * up operations with arrays.
4625  */
4626  v00x = 0.0;
4627  v00y = 0.0;
4628  v01x = 0.0;
4629  v01y = 0.0;
4630  v10x = 0.0;
4631  v10y = 0.0;
4632  v11x = 0.0;
4633  v11y = 0.0;
4634  if( optypea==0 )
4635  {
4636  idxa0 = ia+i+0;
4637  idxa1 = ia+i+1;
4638  offsa = ja;
4639  }
4640  else
4641  {
4642  idxa0 = ja+i+0;
4643  idxa1 = ja+i+1;
4644  offsa = ia;
4645  }
4646  if( optypeb==0 )
4647  {
4648  idxb0 = jb+j+0;
4649  idxb1 = jb+j+1;
4650  offsb = ib;
4651  }
4652  else
4653  {
4654  idxb0 = ib+j+0;
4655  idxb1 = ib+j+1;
4656  offsb = jb;
4657  }
4658  for(t=0; t<=k-1; t++)
4659  {
4660  if( optypea==0 )
4661  {
4662  a0x = a->ptr.pp_complex[idxa0][offsa].x;
4663  a0y = a->ptr.pp_complex[idxa0][offsa].y;
4664  a1x = a->ptr.pp_complex[idxa1][offsa].x;
4665  a1y = a->ptr.pp_complex[idxa1][offsa].y;
4666  }
4667  if( optypea==1 )
4668  {
4669  a0x = a->ptr.pp_complex[offsa][idxa0].x;
4670  a0y = a->ptr.pp_complex[offsa][idxa0].y;
4671  a1x = a->ptr.pp_complex[offsa][idxa1].x;
4672  a1y = a->ptr.pp_complex[offsa][idxa1].y;
4673  }
4674  if( optypea==2 )
4675  {
4676  a0x = a->ptr.pp_complex[offsa][idxa0].x;
4677  a0y = -a->ptr.pp_complex[offsa][idxa0].y;
4678  a1x = a->ptr.pp_complex[offsa][idxa1].x;
4679  a1y = -a->ptr.pp_complex[offsa][idxa1].y;
4680  }
4681  if( optypeb==0 )
4682  {
4683  b0x = b->ptr.pp_complex[offsb][idxb0].x;
4684  b0y = b->ptr.pp_complex[offsb][idxb0].y;
4685  b1x = b->ptr.pp_complex[offsb][idxb1].x;
4686  b1y = b->ptr.pp_complex[offsb][idxb1].y;
4687  }
4688  if( optypeb==1 )
4689  {
4690  b0x = b->ptr.pp_complex[idxb0][offsb].x;
4691  b0y = b->ptr.pp_complex[idxb0][offsb].y;
4692  b1x = b->ptr.pp_complex[idxb1][offsb].x;
4693  b1y = b->ptr.pp_complex[idxb1][offsb].y;
4694  }
4695  if( optypeb==2 )
4696  {
4697  b0x = b->ptr.pp_complex[idxb0][offsb].x;
4698  b0y = -b->ptr.pp_complex[idxb0][offsb].y;
4699  b1x = b->ptr.pp_complex[idxb1][offsb].x;
4700  b1y = -b->ptr.pp_complex[idxb1][offsb].y;
4701  }
4702  v00x = v00x+a0x*b0x-a0y*b0y;
4703  v00y = v00y+a0x*b0y+a0y*b0x;
4704  v01x = v01x+a0x*b1x-a0y*b1y;
4705  v01y = v01y+a0x*b1y+a0y*b1x;
4706  v10x = v10x+a1x*b0x-a1y*b0y;
4707  v10y = v10y+a1x*b0y+a1y*b0x;
4708  v11x = v11x+a1x*b1x-a1y*b1y;
4709  v11y = v11y+a1x*b1y+a1y*b1x;
4710  offsa = offsa+1;
4711  offsb = offsb+1;
4712  }
4713  v00.x = v00x;
4714  v00.y = v00y;
4715  v10.x = v10x;
4716  v10.y = v10y;
4717  v01.x = v01x;
4718  v01.y = v01y;
4719  v11.x = v11x;
4720  v11.y = v11y;
4721  if( ae_c_eq_d(beta,0) )
4722  {
4723  c->ptr.pp_complex[ic+i+0][jc+j+0] = ae_c_mul(alpha,v00);
4724  c->ptr.pp_complex[ic+i+0][jc+j+1] = ae_c_mul(alpha,v01);
4725  c->ptr.pp_complex[ic+i+1][jc+j+0] = ae_c_mul(alpha,v10);
4726  c->ptr.pp_complex[ic+i+1][jc+j+1] = ae_c_mul(alpha,v11);
4727  }
4728  else
4729  {
4730  c->ptr.pp_complex[ic+i+0][jc+j+0] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+0][jc+j+0]),ae_c_mul(alpha,v00));
4731  c->ptr.pp_complex[ic+i+0][jc+j+1] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+0][jc+j+1]),ae_c_mul(alpha,v01));
4732  c->ptr.pp_complex[ic+i+1][jc+j+0] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+1][jc+j+0]),ae_c_mul(alpha,v10));
4733  c->ptr.pp_complex[ic+i+1][jc+j+1] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+i+1][jc+j+1]),ae_c_mul(alpha,v11));
4734  }
4735  }
4736  else
4737  {
4738 
4739  /*
4740  * Determine submatrix [I0..I1]x[J0..J1] to process
4741  */
4742  i0 = i;
4743  i1 = ae_minint(i+1, m-1, _state);
4744  j0 = j;
4745  j1 = ae_minint(j+1, n-1, _state);
4746 
4747  /*
4748  * Process submatrix
4749  */
4750  for(ik=i0; ik<=i1; ik++)
4751  {
4752  for(jk=j0; jk<=j1; jk++)
4753  {
4754  if( k==0||ae_c_eq_d(alpha,0) )
4755  {
4756  v = ae_complex_from_d(0);
4757  }
4758  else
4759  {
4760  v = ae_complex_from_d(0.0);
4761  if( optypea==0&&optypeb==0 )
4762  {
4763  v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+ik][ja], 1, "N", &b->ptr.pp_complex[ib][jb+jk], b->stride, "N", ae_v_len(ja,ja+k-1));
4764  }
4765  if( optypea==0&&optypeb==1 )
4766  {
4767  v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+ik][ja], 1, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "N", ae_v_len(ja,ja+k-1));
4768  }
4769  if( optypea==0&&optypeb==2 )
4770  {
4771  v = ae_v_cdotproduct(&a->ptr.pp_complex[ia+ik][ja], 1, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "Conj", ae_v_len(ja,ja+k-1));
4772  }
4773  if( optypea==1&&optypeb==0 )
4774  {
4775  v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "N", &b->ptr.pp_complex[ib][jb+jk], b->stride, "N", ae_v_len(ia,ia+k-1));
4776  }
4777  if( optypea==1&&optypeb==1 )
4778  {
4779  v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "N", ae_v_len(ia,ia+k-1));
4780  }
4781  if( optypea==1&&optypeb==2 )
4782  {
4783  v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "N", &b->ptr.pp_complex[ib+jk][jb], 1, "Conj", ae_v_len(ia,ia+k-1));
4784  }
4785  if( optypea==2&&optypeb==0 )
4786  {
4787  v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "Conj", &b->ptr.pp_complex[ib][jb+jk], b->stride, "N", ae_v_len(ia,ia+k-1));
4788  }
4789  if( optypea==2&&optypeb==1 )
4790  {
4791  v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "Conj", &b->ptr.pp_complex[ib+jk][jb], 1, "N", ae_v_len(ia,ia+k-1));
4792  }
4793  if( optypea==2&&optypeb==2 )
4794  {
4795  v = ae_v_cdotproduct(&a->ptr.pp_complex[ia][ja+ik], a->stride, "Conj", &b->ptr.pp_complex[ib+jk][jb], 1, "Conj", ae_v_len(ia,ia+k-1));
4796  }
4797  }
4798  if( ae_c_eq_d(beta,0) )
4799  {
4800  c->ptr.pp_complex[ic+ik][jc+jk] = ae_c_mul(alpha,v);
4801  }
4802  else
4803  {
4804  c->ptr.pp_complex[ic+ik][jc+jk] = ae_c_add(ae_c_mul(beta,c->ptr.pp_complex[ic+ik][jc+jk]),ae_c_mul(alpha,v));
4805  }
4806  }
4807  }
4808  }
4809  j = j+2;
4810  }
4811  i = i+2;
4812  }
4813 }
4814 
4815 
4816 /*************************************************************************
4817 RMatrixGEMM kernel, basecase code for RMatrixGEMM.
4818 
4819 This subroutine calculates C = alpha*op1(A)*op2(B) +beta*C where:
4820 * C is MxN general matrix
4821 * op1(A) is MxK matrix
4822 * op2(B) is KxN matrix
4823 * "op" may be identity transformation, transposition
4824 
4825 Additional info:
4826 * multiplication result replaces C. If Beta=0, C elements are not used in
4827  calculations (not multiplied by zero - just not referenced)
4828 * if Alpha=0, A is not used (not multiplied by zero - just not referenced)
4829 * if both Beta and Alpha are zero, C is filled by zeros.
4830 
4831 IMPORTANT:
4832 
4833 This function does NOT preallocate output matrix C, it MUST be preallocated
4834 by caller prior to calling this function. In case C does not have enough
4835 space to store result, exception will be generated.
4836 
4837 INPUT PARAMETERS
4838  M - matrix size, M>0
4839  N - matrix size, N>0
4840  K - matrix size, K>0
4841  Alpha - coefficient
4842  A - matrix
4843  IA - submatrix offset
4844  JA - submatrix offset
4845  OpTypeA - transformation type:
4846  * 0 - no transformation
4847  * 1 - transposition
4848  B - matrix
4849  IB - submatrix offset
4850  JB - submatrix offset
4851  OpTypeB - transformation type:
4852  * 0 - no transformation
4853  * 1 - transposition
4854  Beta - coefficient
4855  C - PREALLOCATED output matrix
4856  IC - submatrix offset
4857  JC - submatrix offset
4858 
4859  -- ALGLIB routine --
4860  27.03.2013
4861  Bochkanov Sergey
4862 *************************************************************************/
4864  ae_int_t n,
4865  ae_int_t k,
4866  double alpha,
4867  /* Real */ ae_matrix* a,
4868  ae_int_t ia,
4869  ae_int_t ja,
4870  ae_int_t optypea,
4871  /* Real */ ae_matrix* b,
4872  ae_int_t ib,
4873  ae_int_t jb,
4874  ae_int_t optypeb,
4875  double beta,
4876  /* Real */ ae_matrix* c,
4877  ae_int_t ic,
4878  ae_int_t jc,
4879  ae_state *_state)
4880 {
4881  ae_int_t i;
4882  ae_int_t j;
4883 
4884 
4885 
4886  /*
4887  * if matrix size is zero
4888  */
4889  if( m==0||n==0 )
4890  {
4891  return;
4892  }
4893 
4894  /*
4895  * Try optimized code
4896  */
4897  if( rmatrixgemmf(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc, _state) )
4898  {
4899  return;
4900  }
4901 
4902  /*
4903  * if K=0, then C=Beta*C
4904  */
4905  if( k==0||ae_fp_eq(alpha,0) )
4906  {
4907  if( ae_fp_neq(beta,1) )
4908  {
4909  if( ae_fp_neq(beta,0) )
4910  {
4911  for(i=0; i<=m-1; i++)
4912  {
4913  for(j=0; j<=n-1; j++)
4914  {
4915  c->ptr.pp_double[ic+i][jc+j] = beta*c->ptr.pp_double[ic+i][jc+j];
4916  }
4917  }
4918  }
4919  else
4920  {
4921  for(i=0; i<=m-1; i++)
4922  {
4923  for(j=0; j<=n-1; j++)
4924  {
4925  c->ptr.pp_double[ic+i][jc+j] = 0;
4926  }
4927  }
4928  }
4929  }
4930  return;
4931  }
4932 
4933  /*
4934  * Call specialized code.
4935  *
4936  * NOTE: specialized code was moved to separate function because of strange
4937  * issues with instructions cache on some systems; Having too long
4938  * functions significantly slows down internal loop of the algorithm.
4939  */
4940  if( optypea==0&&optypeb==0 )
4941  {
4942  rmatrixgemmk44v00(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
4943  }
4944  if( optypea==0&&optypeb!=0 )
4945  {
4946  rmatrixgemmk44v01(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
4947  }
4948  if( optypea!=0&&optypeb==0 )
4949  {
4950  rmatrixgemmk44v10(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
4951  }
4952  if( optypea!=0&&optypeb!=0 )
4953  {
4954  rmatrixgemmk44v11(m, n, k, alpha, a, ia, ja, b, ib, jb, beta, c, ic, jc, _state);
4955  }
4956 }
4957 
4958 
4959 /*************************************************************************
4960 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
4961 with OpTypeA=0 and OpTypeB=0.
4962 
4963 Additional info:
4964 * this function requires that Alpha<>0 (assertion is thrown otherwise)
4965 
4966 INPUT PARAMETERS
4967  M - matrix size, M>0
4968  N - matrix size, N>0
4969  K - matrix size, K>0
4970  Alpha - coefficient
4971  A - matrix
4972  IA - submatrix offset
4973  JA - submatrix offset
4974  B - matrix
4975  IB - submatrix offset
4976  JB - submatrix offset
4977  Beta - coefficient
4978  C - PREALLOCATED output matrix
4979  IC - submatrix offset
4980  JC - submatrix offset
4981 
4982  -- ALGLIB routine --
4983  27.03.2013
4984  Bochkanov Sergey
4985 *************************************************************************/
4987  ae_int_t n,
4988  ae_int_t k,
4989  double alpha,
4990  /* Real */ ae_matrix* a,
4991  ae_int_t ia,
4992  ae_int_t ja,
4993  /* Real */ ae_matrix* b,
4994  ae_int_t ib,
4995  ae_int_t jb,
4996  double beta,
4997  /* Real */ ae_matrix* c,
4998  ae_int_t ic,
4999  ae_int_t jc,
5000  ae_state *_state)
5001 {
5002  ae_int_t i;
5003  ae_int_t j;
5004  double v;
5005  double v00;
5006  double v01;
5007  double v02;
5008  double v03;
5009  double v10;
5010  double v11;
5011  double v12;
5012  double v13;
5013  double v20;
5014  double v21;
5015  double v22;
5016  double v23;
5017  double v30;
5018  double v31;
5019  double v32;
5020  double v33;
5021  double a0;
5022  double a1;
5023  double a2;
5024  double a3;
5025  double b0;
5026  double b1;
5027  double b2;
5028  double b3;
5029  ae_int_t idxa0;
5030  ae_int_t idxa1;
5031  ae_int_t idxa2;
5032  ae_int_t idxa3;
5033  ae_int_t idxb0;
5034  ae_int_t idxb1;
5035  ae_int_t idxb2;
5036  ae_int_t idxb3;
5037  ae_int_t i0;
5038  ae_int_t i1;
5039  ae_int_t ik;
5040  ae_int_t j0;
5041  ae_int_t j1;
5042  ae_int_t jk;
5043  ae_int_t t;
5044  ae_int_t offsa;
5045  ae_int_t offsb;
5046 
5047 
5048  ae_assert(ae_fp_neq(alpha,0), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
5049 
5050  /*
5051  * if matrix size is zero
5052  */
5053  if( m==0||n==0 )
5054  {
5055  return;
5056  }
5057 
5058  /*
5059  * A*B
5060  */
5061  i = 0;
5062  while(i<m)
5063  {
5064  j = 0;
5065  while(j<n)
5066  {
5067 
5068  /*
5069  * Choose between specialized 4x4 code and general code
5070  */
5071  if( i+4<=m&&j+4<=n )
5072  {
5073 
5074  /*
5075  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
5076  *
5077  * This submatrix is calculated as sum of K rank-1 products,
5078  * with operands cached in local variables in order to speed
5079  * up operations with arrays.
5080  */
5081  idxa0 = ia+i+0;
5082  idxa1 = ia+i+1;
5083  idxa2 = ia+i+2;
5084  idxa3 = ia+i+3;
5085  offsa = ja;
5086  idxb0 = jb+j+0;
5087  idxb1 = jb+j+1;
5088  idxb2 = jb+j+2;
5089  idxb3 = jb+j+3;
5090  offsb = ib;
5091  v00 = 0.0;
5092  v01 = 0.0;
5093  v02 = 0.0;
5094  v03 = 0.0;
5095  v10 = 0.0;
5096  v11 = 0.0;
5097  v12 = 0.0;
5098  v13 = 0.0;
5099  v20 = 0.0;
5100  v21 = 0.0;
5101  v22 = 0.0;
5102  v23 = 0.0;
5103  v30 = 0.0;
5104  v31 = 0.0;
5105  v32 = 0.0;
5106  v33 = 0.0;
5107 
5108  /*
5109  * Different variants of internal loop
5110  */
5111  for(t=0; t<=k-1; t++)
5112  {
5113  a0 = a->ptr.pp_double[idxa0][offsa];
5114  a1 = a->ptr.pp_double[idxa1][offsa];
5115  b0 = b->ptr.pp_double[offsb][idxb0];
5116  b1 = b->ptr.pp_double[offsb][idxb1];
5117  v00 = v00+a0*b0;
5118  v01 = v01+a0*b1;
5119  v10 = v10+a1*b0;
5120  v11 = v11+a1*b1;
5121  a2 = a->ptr.pp_double[idxa2][offsa];
5122  a3 = a->ptr.pp_double[idxa3][offsa];
5123  v20 = v20+a2*b0;
5124  v21 = v21+a2*b1;
5125  v30 = v30+a3*b0;
5126  v31 = v31+a3*b1;
5127  b2 = b->ptr.pp_double[offsb][idxb2];
5128  b3 = b->ptr.pp_double[offsb][idxb3];
5129  v22 = v22+a2*b2;
5130  v23 = v23+a2*b3;
5131  v32 = v32+a3*b2;
5132  v33 = v33+a3*b3;
5133  v02 = v02+a0*b2;
5134  v03 = v03+a0*b3;
5135  v12 = v12+a1*b2;
5136  v13 = v13+a1*b3;
5137  offsa = offsa+1;
5138  offsb = offsb+1;
5139  }
5140  if( ae_fp_eq(beta,0) )
5141  {
5142  c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
5143  c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
5144  c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
5145  c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
5146  c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
5147  c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
5148  c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
5149  c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
5150  c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
5151  c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
5152  c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
5153  c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
5154  c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
5155  c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
5156  c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
5157  c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
5158  }
5159  else
5160  {
5161  c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
5162  c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
5163  c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
5164  c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
5165  c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
5166  c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
5167  c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
5168  c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
5169  c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
5170  c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
5171  c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
5172  c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
5173  c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
5174  c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
5175  c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
5176  c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
5177  }
5178  }
5179  else
5180  {
5181 
5182  /*
5183  * Determine submatrix [I0..I1]x[J0..J1] to process
5184  */
5185  i0 = i;
5186  i1 = ae_minint(i+3, m-1, _state);
5187  j0 = j;
5188  j1 = ae_minint(j+3, n-1, _state);
5189 
5190  /*
5191  * Process submatrix
5192  */
5193  for(ik=i0; ik<=i1; ik++)
5194  {
5195  for(jk=j0; jk<=j1; jk++)
5196  {
5197  if( k==0||ae_fp_eq(alpha,0) )
5198  {
5199  v = 0;
5200  }
5201  else
5202  {
5203  v = ae_v_dotproduct(&a->ptr.pp_double[ia+ik][ja], 1, &b->ptr.pp_double[ib][jb+jk], b->stride, ae_v_len(ja,ja+k-1));
5204  }
5205  if( ae_fp_eq(beta,0) )
5206  {
5207  c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
5208  }
5209  else
5210  {
5211  c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
5212  }
5213  }
5214  }
5215  }
5216  j = j+4;
5217  }
5218  i = i+4;
5219  }
5220 }
5221 
5222 
5223 /*************************************************************************
5224 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
5225 with OpTypeA=0 and OpTypeB=1.
5226 
5227 Additional info:
5228 * this function requires that Alpha<>0 (assertion is thrown otherwise)
5229 
5230 INPUT PARAMETERS
5231  M - matrix size, M>0
5232  N - matrix size, N>0
5233  K - matrix size, K>0
5234  Alpha - coefficient
5235  A - matrix
5236  IA - submatrix offset
5237  JA - submatrix offset
5238  B - matrix
5239  IB - submatrix offset
5240  JB - submatrix offset
5241  Beta - coefficient
5242  C - PREALLOCATED output matrix
5243  IC - submatrix offset
5244  JC - submatrix offset
5245 
5246  -- ALGLIB routine --
5247  27.03.2013
5248  Bochkanov Sergey
5249 *************************************************************************/
5251  ae_int_t n,
5252  ae_int_t k,
5253  double alpha,
5254  /* Real */ ae_matrix* a,
5255  ae_int_t ia,
5256  ae_int_t ja,
5257  /* Real */ ae_matrix* b,
5258  ae_int_t ib,
5259  ae_int_t jb,
5260  double beta,
5261  /* Real */ ae_matrix* c,
5262  ae_int_t ic,
5263  ae_int_t jc,
5264  ae_state *_state)
5265 {
5266  ae_int_t i;
5267  ae_int_t j;
5268  double v;
5269  double v00;
5270  double v01;
5271  double v02;
5272  double v03;
5273  double v10;
5274  double v11;
5275  double v12;
5276  double v13;
5277  double v20;
5278  double v21;
5279  double v22;
5280  double v23;
5281  double v30;
5282  double v31;
5283  double v32;
5284  double v33;
5285  double a0;
5286  double a1;
5287  double a2;
5288  double a3;
5289  double b0;
5290  double b1;
5291  double b2;
5292  double b3;
5293  ae_int_t idxa0;
5294  ae_int_t idxa1;
5295  ae_int_t idxa2;
5296  ae_int_t idxa3;
5297  ae_int_t idxb0;
5298  ae_int_t idxb1;
5299  ae_int_t idxb2;
5300  ae_int_t idxb3;
5301  ae_int_t i0;
5302  ae_int_t i1;
5303  ae_int_t ik;
5304  ae_int_t j0;
5305  ae_int_t j1;
5306  ae_int_t jk;
5307  ae_int_t t;
5308  ae_int_t offsa;
5309  ae_int_t offsb;
5310 
5311 
5312  ae_assert(ae_fp_neq(alpha,0), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
5313 
5314  /*
5315  * if matrix size is zero
5316  */
5317  if( m==0||n==0 )
5318  {
5319  return;
5320  }
5321 
5322  /*
5323  * A*B'
5324  */
5325  i = 0;
5326  while(i<m)
5327  {
5328  j = 0;
5329  while(j<n)
5330  {
5331 
5332  /*
5333  * Choose between specialized 4x4 code and general code
5334  */
5335  if( i+4<=m&&j+4<=n )
5336  {
5337 
5338  /*
5339  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
5340  *
5341  * This submatrix is calculated as sum of K rank-1 products,
5342  * with operands cached in local variables in order to speed
5343  * up operations with arrays.
5344  */
5345  idxa0 = ia+i+0;
5346  idxa1 = ia+i+1;
5347  idxa2 = ia+i+2;
5348  idxa3 = ia+i+3;
5349  offsa = ja;
5350  idxb0 = ib+j+0;
5351  idxb1 = ib+j+1;
5352  idxb2 = ib+j+2;
5353  idxb3 = ib+j+3;
5354  offsb = jb;
5355  v00 = 0.0;
5356  v01 = 0.0;
5357  v02 = 0.0;
5358  v03 = 0.0;
5359  v10 = 0.0;
5360  v11 = 0.0;
5361  v12 = 0.0;
5362  v13 = 0.0;
5363  v20 = 0.0;
5364  v21 = 0.0;
5365  v22 = 0.0;
5366  v23 = 0.0;
5367  v30 = 0.0;
5368  v31 = 0.0;
5369  v32 = 0.0;
5370  v33 = 0.0;
5371  for(t=0; t<=k-1; t++)
5372  {
5373  a0 = a->ptr.pp_double[idxa0][offsa];
5374  a1 = a->ptr.pp_double[idxa1][offsa];
5375  b0 = b->ptr.pp_double[idxb0][offsb];
5376  b1 = b->ptr.pp_double[idxb1][offsb];
5377  v00 = v00+a0*b0;
5378  v01 = v01+a0*b1;
5379  v10 = v10+a1*b0;
5380  v11 = v11+a1*b1;
5381  a2 = a->ptr.pp_double[idxa2][offsa];
5382  a3 = a->ptr.pp_double[idxa3][offsa];
5383  v20 = v20+a2*b0;
5384  v21 = v21+a2*b1;
5385  v30 = v30+a3*b0;
5386  v31 = v31+a3*b1;
5387  b2 = b->ptr.pp_double[idxb2][offsb];
5388  b3 = b->ptr.pp_double[idxb3][offsb];
5389  v22 = v22+a2*b2;
5390  v23 = v23+a2*b3;
5391  v32 = v32+a3*b2;
5392  v33 = v33+a3*b3;
5393  v02 = v02+a0*b2;
5394  v03 = v03+a0*b3;
5395  v12 = v12+a1*b2;
5396  v13 = v13+a1*b3;
5397  offsa = offsa+1;
5398  offsb = offsb+1;
5399  }
5400  if( ae_fp_eq(beta,0) )
5401  {
5402  c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
5403  c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
5404  c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
5405  c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
5406  c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
5407  c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
5408  c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
5409  c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
5410  c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
5411  c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
5412  c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
5413  c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
5414  c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
5415  c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
5416  c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
5417  c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
5418  }
5419  else
5420  {
5421  c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
5422  c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
5423  c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
5424  c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
5425  c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
5426  c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
5427  c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
5428  c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
5429  c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
5430  c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
5431  c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
5432  c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
5433  c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
5434  c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
5435  c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
5436  c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
5437  }
5438  }
5439  else
5440  {
5441 
5442  /*
5443  * Determine submatrix [I0..I1]x[J0..J1] to process
5444  */
5445  i0 = i;
5446  i1 = ae_minint(i+3, m-1, _state);
5447  j0 = j;
5448  j1 = ae_minint(j+3, n-1, _state);
5449 
5450  /*
5451  * Process submatrix
5452  */
5453  for(ik=i0; ik<=i1; ik++)
5454  {
5455  for(jk=j0; jk<=j1; jk++)
5456  {
5457  if( k==0||ae_fp_eq(alpha,0) )
5458  {
5459  v = 0;
5460  }
5461  else
5462  {
5463  v = ae_v_dotproduct(&a->ptr.pp_double[ia+ik][ja], 1, &b->ptr.pp_double[ib+jk][jb], 1, ae_v_len(ja,ja+k-1));
5464  }
5465  if( ae_fp_eq(beta,0) )
5466  {
5467  c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
5468  }
5469  else
5470  {
5471  c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
5472  }
5473  }
5474  }
5475  }
5476  j = j+4;
5477  }
5478  i = i+4;
5479  }
5480 }
5481 
5482 
5483 /*************************************************************************
5484 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
5485 with OpTypeA=1 and OpTypeB=0.
5486 
5487 Additional info:
5488 * this function requires that Alpha<>0 (assertion is thrown otherwise)
5489 
5490 INPUT PARAMETERS
5491  M - matrix size, M>0
5492  N - matrix size, N>0
5493  K - matrix size, K>0
5494  Alpha - coefficient
5495  A - matrix
5496  IA - submatrix offset
5497  JA - submatrix offset
5498  B - matrix
5499  IB - submatrix offset
5500  JB - submatrix offset
5501  Beta - coefficient
5502  C - PREALLOCATED output matrix
5503  IC - submatrix offset
5504  JC - submatrix offset
5505 
5506  -- ALGLIB routine --
5507  27.03.2013
5508  Bochkanov Sergey
5509 *************************************************************************/
5511  ae_int_t n,
5512  ae_int_t k,
5513  double alpha,
5514  /* Real */ ae_matrix* a,
5515  ae_int_t ia,
5516  ae_int_t ja,
5517  /* Real */ ae_matrix* b,
5518  ae_int_t ib,
5519  ae_int_t jb,
5520  double beta,
5521  /* Real */ ae_matrix* c,
5522  ae_int_t ic,
5523  ae_int_t jc,
5524  ae_state *_state)
5525 {
5526  ae_int_t i;
5527  ae_int_t j;
5528  double v;
5529  double v00;
5530  double v01;
5531  double v02;
5532  double v03;
5533  double v10;
5534  double v11;
5535  double v12;
5536  double v13;
5537  double v20;
5538  double v21;
5539  double v22;
5540  double v23;
5541  double v30;
5542  double v31;
5543  double v32;
5544  double v33;
5545  double a0;
5546  double a1;
5547  double a2;
5548  double a3;
5549  double b0;
5550  double b1;
5551  double b2;
5552  double b3;
5553  ae_int_t idxa0;
5554  ae_int_t idxa1;
5555  ae_int_t idxa2;
5556  ae_int_t idxa3;
5557  ae_int_t idxb0;
5558  ae_int_t idxb1;
5559  ae_int_t idxb2;
5560  ae_int_t idxb3;
5561  ae_int_t i0;
5562  ae_int_t i1;
5563  ae_int_t ik;
5564  ae_int_t j0;
5565  ae_int_t j1;
5566  ae_int_t jk;
5567  ae_int_t t;
5568  ae_int_t offsa;
5569  ae_int_t offsb;
5570 
5571 
5572  ae_assert(ae_fp_neq(alpha,0), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
5573 
5574  /*
5575  * if matrix size is zero
5576  */
5577  if( m==0||n==0 )
5578  {
5579  return;
5580  }
5581 
5582  /*
5583  * A'*B
5584  */
5585  i = 0;
5586  while(i<m)
5587  {
5588  j = 0;
5589  while(j<n)
5590  {
5591 
5592  /*
5593  * Choose between specialized 4x4 code and general code
5594  */
5595  if( i+4<=m&&j+4<=n )
5596  {
5597 
5598  /*
5599  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
5600  *
5601  * This submatrix is calculated as sum of K rank-1 products,
5602  * with operands cached in local variables in order to speed
5603  * up operations with arrays.
5604  */
5605  idxa0 = ja+i+0;
5606  idxa1 = ja+i+1;
5607  idxa2 = ja+i+2;
5608  idxa3 = ja+i+3;
5609  offsa = ia;
5610  idxb0 = jb+j+0;
5611  idxb1 = jb+j+1;
5612  idxb2 = jb+j+2;
5613  idxb3 = jb+j+3;
5614  offsb = ib;
5615  v00 = 0.0;
5616  v01 = 0.0;
5617  v02 = 0.0;
5618  v03 = 0.0;
5619  v10 = 0.0;
5620  v11 = 0.0;
5621  v12 = 0.0;
5622  v13 = 0.0;
5623  v20 = 0.0;
5624  v21 = 0.0;
5625  v22 = 0.0;
5626  v23 = 0.0;
5627  v30 = 0.0;
5628  v31 = 0.0;
5629  v32 = 0.0;
5630  v33 = 0.0;
5631  for(t=0; t<=k-1; t++)
5632  {
5633  a0 = a->ptr.pp_double[offsa][idxa0];
5634  a1 = a->ptr.pp_double[offsa][idxa1];
5635  b0 = b->ptr.pp_double[offsb][idxb0];
5636  b1 = b->ptr.pp_double[offsb][idxb1];
5637  v00 = v00+a0*b0;
5638  v01 = v01+a0*b1;
5639  v10 = v10+a1*b0;
5640  v11 = v11+a1*b1;
5641  a2 = a->ptr.pp_double[offsa][idxa2];
5642  a3 = a->ptr.pp_double[offsa][idxa3];
5643  v20 = v20+a2*b0;
5644  v21 = v21+a2*b1;
5645  v30 = v30+a3*b0;
5646  v31 = v31+a3*b1;
5647  b2 = b->ptr.pp_double[offsb][idxb2];
5648  b3 = b->ptr.pp_double[offsb][idxb3];
5649  v22 = v22+a2*b2;
5650  v23 = v23+a2*b3;
5651  v32 = v32+a3*b2;
5652  v33 = v33+a3*b3;
5653  v02 = v02+a0*b2;
5654  v03 = v03+a0*b3;
5655  v12 = v12+a1*b2;
5656  v13 = v13+a1*b3;
5657  offsa = offsa+1;
5658  offsb = offsb+1;
5659  }
5660  if( ae_fp_eq(beta,0) )
5661  {
5662  c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
5663  c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
5664  c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
5665  c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
5666  c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
5667  c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
5668  c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
5669  c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
5670  c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
5671  c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
5672  c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
5673  c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
5674  c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
5675  c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
5676  c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
5677  c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
5678  }
5679  else
5680  {
5681  c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
5682  c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
5683  c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
5684  c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
5685  c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
5686  c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
5687  c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
5688  c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
5689  c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
5690  c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
5691  c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
5692  c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
5693  c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
5694  c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
5695  c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
5696  c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
5697  }
5698  }
5699  else
5700  {
5701 
5702  /*
5703  * Determine submatrix [I0..I1]x[J0..J1] to process
5704  */
5705  i0 = i;
5706  i1 = ae_minint(i+3, m-1, _state);
5707  j0 = j;
5708  j1 = ae_minint(j+3, n-1, _state);
5709 
5710  /*
5711  * Process submatrix
5712  */
5713  for(ik=i0; ik<=i1; ik++)
5714  {
5715  for(jk=j0; jk<=j1; jk++)
5716  {
5717  if( k==0||ae_fp_eq(alpha,0) )
5718  {
5719  v = 0;
5720  }
5721  else
5722  {
5723  v = 0.0;
5724  v = ae_v_dotproduct(&a->ptr.pp_double[ia][ja+ik], a->stride, &b->ptr.pp_double[ib][jb+jk], b->stride, ae_v_len(ia,ia+k-1));
5725  }
5726  if( ae_fp_eq(beta,0) )
5727  {
5728  c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
5729  }
5730  else
5731  {
5732  c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
5733  }
5734  }
5735  }
5736  }
5737  j = j+4;
5738  }
5739  i = i+4;
5740  }
5741 }
5742 
5743 
5744 /*************************************************************************
5745 RMatrixGEMM kernel, basecase code for RMatrixGEMM, specialized for sitation
5746 with OpTypeA=1 and OpTypeB=1.
5747 
5748 Additional info:
5749 * this function requires that Alpha<>0 (assertion is thrown otherwise)
5750 
5751 INPUT PARAMETERS
5752  M - matrix size, M>0
5753  N - matrix size, N>0
5754  K - matrix size, K>0
5755  Alpha - coefficient
5756  A - matrix
5757  IA - submatrix offset
5758  JA - submatrix offset
5759  B - matrix
5760  IB - submatrix offset
5761  JB - submatrix offset
5762  Beta - coefficient
5763  C - PREALLOCATED output matrix
5764  IC - submatrix offset
5765  JC - submatrix offset
5766 
5767  -- ALGLIB routine --
5768  27.03.2013
5769  Bochkanov Sergey
5770 *************************************************************************/
5772  ae_int_t n,
5773  ae_int_t k,
5774  double alpha,
5775  /* Real */ ae_matrix* a,
5776  ae_int_t ia,
5777  ae_int_t ja,
5778  /* Real */ ae_matrix* b,
5779  ae_int_t ib,
5780  ae_int_t jb,
5781  double beta,
5782  /* Real */ ae_matrix* c,
5783  ae_int_t ic,
5784  ae_int_t jc,
5785  ae_state *_state)
5786 {
5787  ae_int_t i;
5788  ae_int_t j;
5789  double v;
5790  double v00;
5791  double v01;
5792  double v02;
5793  double v03;
5794  double v10;
5795  double v11;
5796  double v12;
5797  double v13;
5798  double v20;
5799  double v21;
5800  double v22;
5801  double v23;
5802  double v30;
5803  double v31;
5804  double v32;
5805  double v33;
5806  double a0;
5807  double a1;
5808  double a2;
5809  double a3;
5810  double b0;
5811  double b1;
5812  double b2;
5813  double b3;
5814  ae_int_t idxa0;
5815  ae_int_t idxa1;
5816  ae_int_t idxa2;
5817  ae_int_t idxa3;
5818  ae_int_t idxb0;
5819  ae_int_t idxb1;
5820  ae_int_t idxb2;
5821  ae_int_t idxb3;
5822  ae_int_t i0;
5823  ae_int_t i1;
5824  ae_int_t ik;
5825  ae_int_t j0;
5826  ae_int_t j1;
5827  ae_int_t jk;
5828  ae_int_t t;
5829  ae_int_t offsa;
5830  ae_int_t offsb;
5831 
5832 
5833  ae_assert(ae_fp_neq(alpha,0), "RMatrixGEMMK44V00: internal error (Alpha=0)", _state);
5834 
5835  /*
5836  * if matrix size is zero
5837  */
5838  if( m==0||n==0 )
5839  {
5840  return;
5841  }
5842 
5843  /*
5844  * A'*B'
5845  */
5846  i = 0;
5847  while(i<m)
5848  {
5849  j = 0;
5850  while(j<n)
5851  {
5852 
5853  /*
5854  * Choose between specialized 4x4 code and general code
5855  */
5856  if( i+4<=m&&j+4<=n )
5857  {
5858 
5859  /*
5860  * Specialized 4x4 code for [I..I+3]x[J..J+3] submatrix of C.
5861  *
5862  * This submatrix is calculated as sum of K rank-1 products,
5863  * with operands cached in local variables in order to speed
5864  * up operations with arrays.
5865  */
5866  idxa0 = ja+i+0;
5867  idxa1 = ja+i+1;
5868  idxa2 = ja+i+2;
5869  idxa3 = ja+i+3;
5870  offsa = ia;
5871  idxb0 = ib+j+0;
5872  idxb1 = ib+j+1;
5873  idxb2 = ib+j+2;
5874  idxb3 = ib+j+3;
5875  offsb = jb;
5876  v00 = 0.0;
5877  v01 = 0.0;
5878  v02 = 0.0;
5879  v03 = 0.0;
5880  v10 = 0.0;
5881  v11 = 0.0;
5882  v12 = 0.0;
5883  v13 = 0.0;
5884  v20 = 0.0;
5885  v21 = 0.0;
5886  v22 = 0.0;
5887  v23 = 0.0;
5888  v30 = 0.0;
5889  v31 = 0.0;
5890  v32 = 0.0;
5891  v33 = 0.0;
5892  for(t=0; t<=k-1; t++)
5893  {
5894  a0 = a->ptr.pp_double[offsa][idxa0];
5895  a1 = a->ptr.pp_double[offsa][idxa1];
5896  b0 = b->ptr.pp_double[idxb0][offsb];
5897  b1 = b->ptr.pp_double[idxb1][offsb];
5898  v00 = v00+a0*b0;
5899  v01 = v01+a0*b1;
5900  v10 = v10+a1*b0;
5901  v11 = v11+a1*b1;
5902  a2 = a->ptr.pp_double[offsa][idxa2];
5903  a3 = a->ptr.pp_double[offsa][idxa3];
5904  v20 = v20+a2*b0;
5905  v21 = v21+a2*b1;
5906  v30 = v30+a3*b0;
5907  v31 = v31+a3*b1;
5908  b2 = b->ptr.pp_double[idxb2][offsb];
5909  b3 = b->ptr.pp_double[idxb3][offsb];
5910  v22 = v22+a2*b2;
5911  v23 = v23+a2*b3;
5912  v32 = v32+a3*b2;
5913  v33 = v33+a3*b3;
5914  v02 = v02+a0*b2;
5915  v03 = v03+a0*b3;
5916  v12 = v12+a1*b2;
5917  v13 = v13+a1*b3;
5918  offsa = offsa+1;
5919  offsb = offsb+1;
5920  }
5921  if( ae_fp_eq(beta,0) )
5922  {
5923  c->ptr.pp_double[ic+i+0][jc+j+0] = alpha*v00;
5924  c->ptr.pp_double[ic+i+0][jc+j+1] = alpha*v01;
5925  c->ptr.pp_double[ic+i+0][jc+j+2] = alpha*v02;
5926  c->ptr.pp_double[ic+i+0][jc+j+3] = alpha*v03;
5927  c->ptr.pp_double[ic+i+1][jc+j+0] = alpha*v10;
5928  c->ptr.pp_double[ic+i+1][jc+j+1] = alpha*v11;
5929  c->ptr.pp_double[ic+i+1][jc+j+2] = alpha*v12;
5930  c->ptr.pp_double[ic+i+1][jc+j+3] = alpha*v13;
5931  c->ptr.pp_double[ic+i+2][jc+j+0] = alpha*v20;
5932  c->ptr.pp_double[ic+i+2][jc+j+1] = alpha*v21;
5933  c->ptr.pp_double[ic+i+2][jc+j+2] = alpha*v22;
5934  c->ptr.pp_double[ic+i+2][jc+j+3] = alpha*v23;
5935  c->ptr.pp_double[ic+i+3][jc+j+0] = alpha*v30;
5936  c->ptr.pp_double[ic+i+3][jc+j+1] = alpha*v31;
5937  c->ptr.pp_double[ic+i+3][jc+j+2] = alpha*v32;
5938  c->ptr.pp_double[ic+i+3][jc+j+3] = alpha*v33;
5939  }
5940  else
5941  {
5942  c->ptr.pp_double[ic+i+0][jc+j+0] = beta*c->ptr.pp_double[ic+i+0][jc+j+0]+alpha*v00;
5943  c->ptr.pp_double[ic+i+0][jc+j+1] = beta*c->ptr.pp_double[ic+i+0][jc+j+1]+alpha*v01;
5944  c->ptr.pp_double[ic+i+0][jc+j+2] = beta*c->ptr.pp_double[ic+i+0][jc+j+2]+alpha*v02;
5945  c->ptr.pp_double[ic+i+0][jc+j+3] = beta*c->ptr.pp_double[ic+i+0][jc+j+3]+alpha*v03;
5946  c->ptr.pp_double[ic+i+1][jc+j+0] = beta*c->ptr.pp_double[ic+i+1][jc+j+0]+alpha*v10;
5947  c->ptr.pp_double[ic+i+1][jc+j+1] = beta*c->ptr.pp_double[ic+i+1][jc+j+1]+alpha*v11;
5948  c->ptr.pp_double[ic+i+1][jc+j+2] = beta*c->ptr.pp_double[ic+i+1][jc+j+2]+alpha*v12;
5949  c->ptr.pp_double[ic+i+1][jc+j+3] = beta*c->ptr.pp_double[ic+i+1][jc+j+3]+alpha*v13;
5950  c->ptr.pp_double[ic+i+2][jc+j+0] = beta*c->ptr.pp_double[ic+i+2][jc+j+0]+alpha*v20;
5951  c->ptr.pp_double[ic+i+2][jc+j+1] = beta*c->ptr.pp_double[ic+i+2][jc+j+1]+alpha*v21;
5952  c->ptr.pp_double[ic+i+2][jc+j+2] = beta*c->ptr.pp_double[ic+i+2][jc+j+2]+alpha*v22;
5953  c->ptr.pp_double[ic+i+2][jc+j+3] = beta*c->ptr.pp_double[ic+i+2][jc+j+3]+alpha*v23;
5954  c->ptr.pp_double[ic+i+3][jc+j+0] = beta*c->ptr.pp_double[ic+i+3][jc+j+0]+alpha*v30;
5955  c->ptr.pp_double[ic+i+3][jc+j+1] = beta*c->ptr.pp_double[ic+i+3][jc+j+1]+alpha*v31;
5956  c->ptr.pp_double[ic+i+3][jc+j+2] = beta*c->ptr.pp_double[ic+i+3][jc+j+2]+alpha*v32;
5957  c->ptr.pp_double[ic+i+3][jc+j+3] = beta*c->ptr.pp_double[ic+i+3][jc+j+3]+alpha*v33;
5958  }
5959  }
5960  else
5961  {
5962 
5963  /*
5964  * Determine submatrix [I0..I1]x[J0..J1] to process
5965  */
5966  i0 = i;
5967  i1 = ae_minint(i+3, m-1, _state);
5968  j0 = j;
5969  j1 = ae_minint(j+3, n-1, _state);
5970 
5971  /*
5972  * Process submatrix
5973  */
5974  for(ik=i0; ik<=i1; ik++)
5975  {
5976  for(jk=j0; jk<=j1; jk++)
5977  {
5978  if( k==0||ae_fp_eq(alpha,0) )
5979  {
5980  v = 0;
5981  }
5982  else
5983  {
5984  v = 0.0;
5985  v = ae_v_dotproduct(&a->ptr.pp_double[ia][ja+ik], a->stride, &b->ptr.pp_double[ib+jk][jb], 1, ae_v_len(ia,ia+k-1));
5986  }
5987  if( ae_fp_eq(beta,0) )
5988  {
5989  c->ptr.pp_double[ic+ik][jc+jk] = alpha*v;
5990  }
5991  else
5992  {
5993  c->ptr.pp_double[ic+ik][jc+jk] = beta*c->ptr.pp_double[ic+ik][jc+jk]+alpha*v;
5994  }
5995  }
5996  }
5997  }
5998  j = j+4;
5999  }
6000  i = i+4;
6001  }
6002 }
6003 
6004 
6005 
6006 
6007 /*************************************************************************
6008 MKL-based kernel
6009 
6010  -- ALGLIB routine --
6011  19.01.2010
6012  Bochkanov Sergey
6013 *************************************************************************/
6015  ae_int_t k,
6016  double alpha,
6017  /* Real */ ae_matrix* a,
6018  ae_int_t ia,
6019  ae_int_t ja,
6020  ae_int_t optypea,
6021  double beta,
6022  /* Real */ ae_matrix* c,
6023  ae_int_t ic,
6024  ae_int_t jc,
6025  ae_bool isupper,
6026  ae_state *_state)
6027 {
6028 #ifndef ALGLIB_INTERCEPTS_MKL
6029  ae_bool result;
6030 
6031 
6032  result = ae_false;
6033  return result;
6034 #else
6035  return _ialglib_i_rmatrixsyrkmkl(n, k, alpha, a, ia, ja, optypea, beta, c, ic, jc, isupper);
6036 #endif
6037 }
6038 
6039 
6040 /*************************************************************************
6041 MKL-based kernel
6042 
6043  -- ALGLIB routine --
6044  19.01.2010
6045  Bochkanov Sergey
6046 *************************************************************************/
6048  ae_int_t n,
6049  ae_int_t k,
6050  double alpha,
6051  /* Real */ ae_matrix* a,
6052  ae_int_t ia,
6053  ae_int_t ja,
6054  ae_int_t optypea,
6055  /* Real */ ae_matrix* b,
6056  ae_int_t ib,
6057  ae_int_t jb,
6058  ae_int_t optypeb,
6059  double beta,
6060  /* Real */ ae_matrix* c,
6061  ae_int_t ic,
6062  ae_int_t jc,
6063  ae_state *_state)
6064 {
6065 #ifndef ALGLIB_INTERCEPTS_MKL
6066  ae_bool result;
6067 
6068 
6069  result = ae_false;
6070  return result;
6071 #else
6072  return _ialglib_i_rmatrixgemmmkl(m, n, k, alpha, a, ia, ja, optypea, b, ib, jb, optypeb, beta, c, ic, jc);
6073 #endif
6074 }
6075 
6076 
6077 
6078 
6079 double vectornorm2(/* Real */ ae_vector* x,
6080  ae_int_t i1,
6081  ae_int_t i2,
6082  ae_state *_state)
6083 {
6084  ae_int_t n;
6085  ae_int_t ix;
6086  double absxi;
6087  double scl;
6088  double ssq;
6089  double result;
6090 
6091 
6092  n = i2-i1+1;
6093  if( n<1 )
6094  {
6095  result = 0;
6096  return result;
6097  }
6098  if( n==1 )
6099  {
6100  result = ae_fabs(x->ptr.p_double[i1], _state);
6101  return result;
6102  }
6103  scl = 0;
6104  ssq = 1;
6105  for(ix=i1; ix<=i2; ix++)
6106  {
6107  if( ae_fp_neq(x->ptr.p_double[ix],0) )
6108  {
6109  absxi = ae_fabs(x->ptr.p_double[ix], _state);
6110  if( ae_fp_less(scl,absxi) )
6111  {
6112  ssq = 1+ssq*ae_sqr(scl/absxi, _state);
6113  scl = absxi;
6114  }
6115  else
6116  {
6117  ssq = ssq+ae_sqr(absxi/scl, _state);
6118  }
6119  }
6120  }
6121  result = scl*ae_sqrt(ssq, _state);
6122  return result;
6123 }
6124 
6125 
6127  ae_int_t i1,
6128  ae_int_t i2,
6129  ae_state *_state)
6130 {
6131  ae_int_t i;
6132  ae_int_t result;
6133 
6134 
6135  result = i1;
6136  for(i=i1+1; i<=i2; i++)
6137  {
6138  if( ae_fp_greater(ae_fabs(x->ptr.p_double[i], _state),ae_fabs(x->ptr.p_double[result], _state)) )
6139  {
6140  result = i;
6141  }
6142  }
6143  return result;
6144 }
6145 
6146 
6148  ae_int_t i1,
6149  ae_int_t i2,
6150  ae_int_t j,
6151  ae_state *_state)
6152 {
6153  ae_int_t i;
6154  ae_int_t result;
6155 
6156 
6157  result = i1;
6158  for(i=i1+1; i<=i2; i++)
6159  {
6160  if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[result][j], _state)) )
6161  {
6162  result = i;
6163  }
6164  }
6165  return result;
6166 }
6167 
6168 
6170  ae_int_t j1,
6171  ae_int_t j2,
6172  ae_int_t i,
6173  ae_state *_state)
6174 {
6175  ae_int_t j;
6176  ae_int_t result;
6177 
6178 
6179  result = j1;
6180  for(j=j1+1; j<=j2; j++)
6181  {
6182  if( ae_fp_greater(ae_fabs(x->ptr.pp_double[i][j], _state),ae_fabs(x->ptr.pp_double[i][result], _state)) )
6183  {
6184  result = j;
6185  }
6186  }
6187  return result;
6188 }
6189 
6190 
6191 double upperhessenberg1norm(/* Real */ ae_matrix* a,
6192  ae_int_t i1,
6193  ae_int_t i2,
6194  ae_int_t j1,
6195  ae_int_t j2,
6196  /* Real */ ae_vector* work,
6197  ae_state *_state)
6198 {
6199  ae_int_t i;
6200  ae_int_t j;
6201  double result;
6202 
6203 
6204  ae_assert(i2-i1==j2-j1, "UpperHessenberg1Norm: I2-I1<>J2-J1!", _state);
6205  for(j=j1; j<=j2; j++)
6206  {
6207  work->ptr.p_double[j] = 0;
6208  }
6209  for(i=i1; i<=i2; i++)
6210  {
6211  for(j=ae_maxint(j1, j1+i-i1-1, _state); j<=j2; j++)
6212  {
6213  work->ptr.p_double[j] = work->ptr.p_double[j]+ae_fabs(a->ptr.pp_double[i][j], _state);
6214  }
6215  }
6216  result = 0;
6217  for(j=j1; j<=j2; j++)
6218  {
6219  result = ae_maxreal(result, work->ptr.p_double[j], _state);
6220  }
6221  return result;
6222 }
6223 
6224 
6225 void copymatrix(/* Real */ ae_matrix* a,
6226  ae_int_t is1,
6227  ae_int_t is2,
6228  ae_int_t js1,
6229  ae_int_t js2,
6230  /* Real */ ae_matrix* b,
6231  ae_int_t id1,
6232  ae_int_t id2,
6233  ae_int_t jd1,
6234  ae_int_t jd2,
6235  ae_state *_state)
6236 {
6237  ae_int_t isrc;
6238  ae_int_t idst;
6239 
6240 
6241  if( is1>is2||js1>js2 )
6242  {
6243  return;
6244  }
6245  ae_assert(is2-is1==id2-id1, "CopyMatrix: different sizes!", _state);
6246  ae_assert(js2-js1==jd2-jd1, "CopyMatrix: different sizes!", _state);
6247  for(isrc=is1; isrc<=is2; isrc++)
6248  {
6249  idst = isrc-is1+id1;
6250  ae_v_move(&b->ptr.pp_double[idst][jd1], 1, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(jd1,jd2));
6251  }
6252 }
6253 
6254 
6255 void inplacetranspose(/* Real */ ae_matrix* a,
6256  ae_int_t i1,
6257  ae_int_t i2,
6258  ae_int_t j1,
6259  ae_int_t j2,
6260  /* Real */ ae_vector* work,
6261  ae_state *_state)
6262 {
6263  ae_int_t i;
6264  ae_int_t j;
6265  ae_int_t ips;
6266  ae_int_t jps;
6267  ae_int_t l;
6268 
6269 
6270  if( i1>i2||j1>j2 )
6271  {
6272  return;
6273  }
6274  ae_assert(i1-i2==j1-j2, "InplaceTranspose error: incorrect array size!", _state);
6275  for(i=i1; i<=i2-1; i++)
6276  {
6277  j = j1+i-i1;
6278  ips = i+1;
6279  jps = j1+ips-i1;
6280  l = i2-i;
6281  ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ips][j], a->stride, ae_v_len(1,l));
6282  ae_v_move(&a->ptr.pp_double[ips][j], a->stride, &a->ptr.pp_double[i][jps], 1, ae_v_len(ips,i2));
6283  ae_v_move(&a->ptr.pp_double[i][jps], 1, &work->ptr.p_double[1], 1, ae_v_len(jps,j2));
6284  }
6285 }
6286 
6287 
6288 void copyandtranspose(/* Real */ ae_matrix* a,
6289  ae_int_t is1,
6290  ae_int_t is2,
6291  ae_int_t js1,
6292  ae_int_t js2,
6293  /* Real */ ae_matrix* b,
6294  ae_int_t id1,
6295  ae_int_t id2,
6296  ae_int_t jd1,
6297  ae_int_t jd2,
6298  ae_state *_state)
6299 {
6300  ae_int_t isrc;
6301  ae_int_t jdst;
6302 
6303 
6304  if( is1>is2||js1>js2 )
6305  {
6306  return;
6307  }
6308  ae_assert(is2-is1==jd2-jd1, "CopyAndTranspose: different sizes!", _state);
6309  ae_assert(js2-js1==id2-id1, "CopyAndTranspose: different sizes!", _state);
6310  for(isrc=is1; isrc<=is2; isrc++)
6311  {
6312  jdst = isrc-is1+jd1;
6313  ae_v_move(&b->ptr.pp_double[id1][jdst], b->stride, &a->ptr.pp_double[isrc][js1], 1, ae_v_len(id1,id2));
6314  }
6315 }
6316 
6317 
6318 void matrixvectormultiply(/* Real */ ae_matrix* a,
6319  ae_int_t i1,
6320  ae_int_t i2,
6321  ae_int_t j1,
6322  ae_int_t j2,
6323  ae_bool trans,
6324  /* Real */ ae_vector* x,
6325  ae_int_t ix1,
6326  ae_int_t ix2,
6327  double alpha,
6328  /* Real */ ae_vector* y,
6329  ae_int_t iy1,
6330  ae_int_t iy2,
6331  double beta,
6332  ae_state *_state)
6333 {
6334  ae_int_t i;
6335  double v;
6336 
6337 
6338  if( !trans )
6339  {
6340 
6341  /*
6342  * y := alpha*A*x + beta*y;
6343  */
6344  if( i1>i2||j1>j2 )
6345  {
6346  return;
6347  }
6348  ae_assert(j2-j1==ix2-ix1, "MatrixVectorMultiply: A and X dont match!", _state);
6349  ae_assert(i2-i1==iy2-iy1, "MatrixVectorMultiply: A and Y dont match!", _state);
6350 
6351  /*
6352  * beta*y
6353  */
6354  if( ae_fp_eq(beta,0) )
6355  {
6356  for(i=iy1; i<=iy2; i++)
6357  {
6358  y->ptr.p_double[i] = 0;
6359  }
6360  }
6361  else
6362  {
6363  ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta);
6364  }
6365 
6366  /*
6367  * alpha*A*x
6368  */
6369  for(i=i1; i<=i2; i++)
6370  {
6371  v = ae_v_dotproduct(&a->ptr.pp_double[i][j1], 1, &x->ptr.p_double[ix1], 1, ae_v_len(j1,j2));
6372  y->ptr.p_double[iy1+i-i1] = y->ptr.p_double[iy1+i-i1]+alpha*v;
6373  }
6374  }
6375  else
6376  {
6377 
6378  /*
6379  * y := alpha*A'*x + beta*y;
6380  */
6381  if( i1>i2||j1>j2 )
6382  {
6383  return;
6384  }
6385  ae_assert(i2-i1==ix2-ix1, "MatrixVectorMultiply: A and X do not match!", _state);
6386  ae_assert(j2-j1==iy2-iy1, "MatrixVectorMultiply: A and Y do not match!", _state);
6387 
6388  /*
6389  * beta*y
6390  */
6391  if( ae_fp_eq(beta,0) )
6392  {
6393  for(i=iy1; i<=iy2; i++)
6394  {
6395  y->ptr.p_double[i] = 0;
6396  }
6397  }
6398  else
6399  {
6400  ae_v_muld(&y->ptr.p_double[iy1], 1, ae_v_len(iy1,iy2), beta);
6401  }
6402 
6403  /*
6404  * alpha*A'*x
6405  */
6406  for(i=i1; i<=i2; i++)
6407  {
6408  v = alpha*x->ptr.p_double[ix1+i-i1];
6409  ae_v_addd(&y->ptr.p_double[iy1], 1, &a->ptr.pp_double[i][j1], 1, ae_v_len(iy1,iy2), v);
6410  }
6411  }
6412 }
6413 
6414 
6415 double pythag2(double x, double y, ae_state *_state)
6416 {
6417  double w;
6418  double xabs;
6419  double yabs;
6420  double z;
6421  double result;
6422 
6423 
6424  xabs = ae_fabs(x, _state);
6425  yabs = ae_fabs(y, _state);
6426  w = ae_maxreal(xabs, yabs, _state);
6427  z = ae_minreal(xabs, yabs, _state);
6428  if( ae_fp_eq(z,0) )
6429  {
6430  result = w;
6431  }
6432  else
6433  {
6434  result = w*ae_sqrt(1+ae_sqr(z/w, _state), _state);
6435  }
6436  return result;
6437 }
6438 
6439 
6440 void matrixmatrixmultiply(/* Real */ ae_matrix* a,
6441  ae_int_t ai1,
6442  ae_int_t ai2,
6443  ae_int_t aj1,
6444  ae_int_t aj2,
6445  ae_bool transa,
6446  /* Real */ ae_matrix* b,
6447  ae_int_t bi1,
6448  ae_int_t bi2,
6449  ae_int_t bj1,
6450  ae_int_t bj2,
6451  ae_bool transb,
6452  double alpha,
6453  /* Real */ ae_matrix* c,
6454  ae_int_t ci1,
6455  ae_int_t ci2,
6456  ae_int_t cj1,
6457  ae_int_t cj2,
6458  double beta,
6459  /* Real */ ae_vector* work,
6460  ae_state *_state)
6461 {
6462  ae_int_t arows;
6463  ae_int_t acols;
6464  ae_int_t brows;
6465  ae_int_t bcols;
6466  ae_int_t crows;
6467  ae_int_t i;
6468  ae_int_t j;
6469  ae_int_t k;
6470  ae_int_t l;
6471  ae_int_t r;
6472  double v;
6473 
6474 
6475 
6476  /*
6477  * Setup
6478  */
6479  if( !transa )
6480  {
6481  arows = ai2-ai1+1;
6482  acols = aj2-aj1+1;
6483  }
6484  else
6485  {
6486  arows = aj2-aj1+1;
6487  acols = ai2-ai1+1;
6488  }
6489  if( !transb )
6490  {
6491  brows = bi2-bi1+1;
6492  bcols = bj2-bj1+1;
6493  }
6494  else
6495  {
6496  brows = bj2-bj1+1;
6497  bcols = bi2-bi1+1;
6498  }
6499  ae_assert(acols==brows, "MatrixMatrixMultiply: incorrect matrix sizes!", _state);
6500  if( ((arows<=0||acols<=0)||brows<=0)||bcols<=0 )
6501  {
6502  return;
6503  }
6504  crows = arows;
6505 
6506  /*
6507  * Test WORK
6508  */
6509  i = ae_maxint(arows, acols, _state);
6510  i = ae_maxint(brows, i, _state);
6511  i = ae_maxint(i, bcols, _state);
6512  work->ptr.p_double[1] = 0;
6513  work->ptr.p_double[i] = 0;
6514 
6515  /*
6516  * Prepare C
6517  */
6518  if( ae_fp_eq(beta,0) )
6519  {
6520  for(i=ci1; i<=ci2; i++)
6521  {
6522  for(j=cj1; j<=cj2; j++)
6523  {
6524  c->ptr.pp_double[i][j] = 0;
6525  }
6526  }
6527  }
6528  else
6529  {
6530  for(i=ci1; i<=ci2; i++)
6531  {
6532  ae_v_muld(&c->ptr.pp_double[i][cj1], 1, ae_v_len(cj1,cj2), beta);
6533  }
6534  }
6535 
6536  /*
6537  * A*B
6538  */
6539  if( !transa&&!transb )
6540  {
6541  for(l=ai1; l<=ai2; l++)
6542  {
6543  for(r=bi1; r<=bi2; r++)
6544  {
6545  v = alpha*a->ptr.pp_double[l][aj1+r-bi1];
6546  k = ci1+l-ai1;
6547  ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v);
6548  }
6549  }
6550  return;
6551  }
6552 
6553  /*
6554  * A*B'
6555  */
6556  if( !transa&&transb )
6557  {
6558  if( arows*acols<brows*bcols )
6559  {
6560  for(r=bi1; r<=bi2; r++)
6561  {
6562  for(l=ai1; l<=ai2; l++)
6563  {
6564  v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2));
6565  c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v;
6566  }
6567  }
6568  return;
6569  }
6570  else
6571  {
6572  for(l=ai1; l<=ai2; l++)
6573  {
6574  for(r=bi1; r<=bi2; r++)
6575  {
6576  v = ae_v_dotproduct(&a->ptr.pp_double[l][aj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(aj1,aj2));
6577  c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-ai1][cj1+r-bi1]+alpha*v;
6578  }
6579  }
6580  return;
6581  }
6582  }
6583 
6584  /*
6585  * A'*B
6586  */
6587  if( transa&&!transb )
6588  {
6589  for(l=aj1; l<=aj2; l++)
6590  {
6591  for(r=bi1; r<=bi2; r++)
6592  {
6593  v = alpha*a->ptr.pp_double[ai1+r-bi1][l];
6594  k = ci1+l-aj1;
6595  ae_v_addd(&c->ptr.pp_double[k][cj1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(cj1,cj2), v);
6596  }
6597  }
6598  return;
6599  }
6600 
6601  /*
6602  * A'*B'
6603  */
6604  if( transa&&transb )
6605  {
6606  if( arows*acols<brows*bcols )
6607  {
6608  for(r=bi1; r<=bi2; r++)
6609  {
6610  k = cj1+r-bi1;
6611  for(i=1; i<=crows; i++)
6612  {
6613  work->ptr.p_double[i] = 0.0;
6614  }
6615  for(l=ai1; l<=ai2; l++)
6616  {
6617  v = alpha*b->ptr.pp_double[r][bj1+l-ai1];
6618  ae_v_addd(&work->ptr.p_double[1], 1, &a->ptr.pp_double[l][aj1], 1, ae_v_len(1,crows), v);
6619  }
6620  ae_v_add(&c->ptr.pp_double[ci1][k], c->stride, &work->ptr.p_double[1], 1, ae_v_len(ci1,ci2));
6621  }
6622  return;
6623  }
6624  else
6625  {
6626  for(l=aj1; l<=aj2; l++)
6627  {
6628  k = ai2-ai1+1;
6629  ae_v_move(&work->ptr.p_double[1], 1, &a->ptr.pp_double[ai1][l], a->stride, ae_v_len(1,k));
6630  for(r=bi1; r<=bi2; r++)
6631  {
6632  v = ae_v_dotproduct(&work->ptr.p_double[1], 1, &b->ptr.pp_double[r][bj1], 1, ae_v_len(1,k));
6633  c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1] = c->ptr.pp_double[ci1+l-aj1][cj1+r-bi1]+alpha*v;
6634  }
6635  }
6636  return;
6637  }
6638  }
6639 }
6640 
6641 
6642 
6643 
6645  ae_bool isupper,
6646  ae_int_t i1,
6647  ae_int_t i2,
6648  /* Complex */ ae_vector* x,
6649  ae_complex alpha,
6650  /* Complex */ ae_vector* y,
6651  ae_state *_state)
6652 {
6653  ae_int_t i;
6654  ae_int_t ba1;
6655  ae_int_t by1;
6656  ae_int_t by2;
6657  ae_int_t bx1;
6658  ae_int_t bx2;
6659  ae_int_t n;
6660  ae_complex v;
6661 
6662 
6663  n = i2-i1+1;
6664  if( n<=0 )
6665  {
6666  return;
6667  }
6668 
6669  /*
6670  * Let A = L + D + U, where
6671  * L is strictly lower triangular (main diagonal is zero)
6672  * D is diagonal
6673  * U is strictly upper triangular (main diagonal is zero)
6674  *
6675  * A*x = L*x + D*x + U*x
6676  *
6677  * Calculate D*x first
6678  */
6679  for(i=i1; i<=i2; i++)
6680  {
6681  y->ptr.p_complex[i-i1+1] = ae_c_mul(a->ptr.pp_complex[i][i],x->ptr.p_complex[i-i1+1]);
6682  }
6683 
6684  /*
6685  * Add L*x + U*x
6686  */
6687  if( isupper )
6688  {
6689  for(i=i1; i<=i2-1; i++)
6690  {
6691 
6692  /*
6693  * Add L*x to the result
6694  */
6695  v = x->ptr.p_complex[i-i1+1];
6696  by1 = i-i1+2;
6697  by2 = n;
6698  ba1 = i+1;
6699  ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v);
6700 
6701  /*
6702  * Add U*x to the result
6703  */
6704  bx1 = i-i1+2;
6705  bx2 = n;
6706  ba1 = i+1;
6707  v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2));
6708  y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v);
6709  }
6710  }
6711  else
6712  {
6713  for(i=i1+1; i<=i2; i++)
6714  {
6715 
6716  /*
6717  * Add L*x to the result
6718  */
6719  bx1 = 1;
6720  bx2 = i-i1;
6721  ba1 = i1;
6722  v = ae_v_cdotproduct(&x->ptr.p_complex[bx1], 1, "N", &a->ptr.pp_complex[i][ba1], 1, "N", ae_v_len(bx1,bx2));
6723  y->ptr.p_complex[i-i1+1] = ae_c_add(y->ptr.p_complex[i-i1+1],v);
6724 
6725  /*
6726  * Add U*x to the result
6727  */
6728  v = x->ptr.p_complex[i-i1+1];
6729  by1 = 1;
6730  by2 = i-i1;
6731  ba1 = i1;
6732  ae_v_caddc(&y->ptr.p_complex[by1], 1, &a->ptr.pp_complex[i][ba1], 1, "Conj", ae_v_len(by1,by2), v);
6733  }
6734  }
6735  ae_v_cmulc(&y->ptr.p_complex[1], 1, ae_v_len(1,n), alpha);
6736 }
6737 
6738 
6739 void hermitianrank2update(/* Complex */ ae_matrix* a,
6740  ae_bool isupper,
6741  ae_int_t i1,
6742  ae_int_t i2,
6743  /* Complex */ ae_vector* x,
6744  /* Complex */ ae_vector* y,
6745  /* Complex */ ae_vector* t,
6746  ae_complex alpha,
6747  ae_state *_state)
6748 {
6749  ae_int_t i;
6750  ae_int_t tp1;
6751  ae_int_t tp2;
6752  ae_complex v;
6753 
6754 
6755  if( isupper )
6756  {
6757  for(i=i1; i<=i2; i++)
6758  {
6759  tp1 = i+1-i1;
6760  tp2 = i2-i1+1;
6761  v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]);
6762  ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
6763  v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]);
6764  ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
6765  ae_v_cadd(&a->ptr.pp_complex[i][i], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i,i2));
6766  }
6767  }
6768  else
6769  {
6770  for(i=i1; i<=i2; i++)
6771  {
6772  tp1 = 1;
6773  tp2 = i+1-i1;
6774  v = ae_c_mul(alpha,x->ptr.p_complex[i+1-i1]);
6775  ae_v_cmovec(&t->ptr.p_complex[tp1], 1, &y->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
6776  v = ae_c_mul(ae_c_conj(alpha, _state),y->ptr.p_complex[i+1-i1]);
6777  ae_v_caddc(&t->ptr.p_complex[tp1], 1, &x->ptr.p_complex[tp1], 1, "Conj", ae_v_len(tp1,tp2), v);
6778  ae_v_cadd(&a->ptr.pp_complex[i][i1], 1, &t->ptr.p_complex[tp1], 1, "N", ae_v_len(i1,i));
6779  }
6780  }
6781 }
6782 
6783 
6784 
6785 
6786 /*************************************************************************
6787 Generation of an elementary reflection transformation
6788 
6789 The subroutine generates elementary reflection H of order N, so that, for
6790 a given X, the following equality holds true:
6791 
6792  ( X(1) ) ( Beta )
6793 H * ( .. ) = ( 0 )
6794  ( X(n) ) ( 0 )
6795 
6796 where
6797  ( V(1) )
6798 H = 1 - Tau * ( .. ) * ( V(1), ..., V(n) )
6799  ( V(n) )
6800 
6801 where the first component of vector V equals 1.
6802 
6803 Input parameters:
6804  X - vector. Array whose index ranges within [1..N].
6805  N - reflection order.
6806 
6807 Output parameters:
6808  X - components from 2 to N are replaced with vector V.
6809  The first component is replaced with parameter Beta.
6810  Tau - scalar value Tau. If X is a null vector, Tau equals 0,
6811  otherwise 1 <= Tau <= 2.
6812 
6813 This subroutine is the modification of the DLARFG subroutines from
6814 the LAPACK library.
6815 
6816 MODIFICATIONS:
6817  24.12.2005 sign(Alpha) was replaced with an analogous to the Fortran SIGN code.
6818 
6819  -- LAPACK auxiliary routine (version 3.0) --
6820  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
6821  Courant Institute, Argonne National Lab, and Rice University
6822  September 30, 1994
6823 *************************************************************************/
6824 void generatereflection(/* Real */ ae_vector* x,
6825  ae_int_t n,
6826  double* tau,
6827  ae_state *_state)
6828 {
6829  ae_int_t j;
6830  double alpha;
6831  double xnorm;
6832  double v;
6833  double beta;
6834  double mx;
6835  double s;
6836 
6837  *tau = 0;
6838 
6839  if( n<=1 )
6840  {
6841  *tau = 0;
6842  return;
6843  }
6844 
6845  /*
6846  * Scale if needed (to avoid overflow/underflow during intermediate
6847  * calculations).
6848  */
6849  mx = 0;
6850  for(j=1; j<=n; j++)
6851  {
6852  mx = ae_maxreal(ae_fabs(x->ptr.p_double[j], _state), mx, _state);
6853  }
6854  s = 1;
6855  if( ae_fp_neq(mx,0) )
6856  {
6858  {
6860  v = 1/s;
6861  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v);
6862  mx = mx*v;
6863  }
6864  else
6865  {
6867  {
6869  v = 1/s;
6870  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), v);
6871  mx = mx*v;
6872  }
6873  }
6874  }
6875 
6876  /*
6877  * XNORM = DNRM2( N-1, X, INCX )
6878  */
6879  alpha = x->ptr.p_double[1];
6880  xnorm = 0;
6881  if( ae_fp_neq(mx,0) )
6882  {
6883  for(j=2; j<=n; j++)
6884  {
6885  xnorm = xnorm+ae_sqr(x->ptr.p_double[j]/mx, _state);
6886  }
6887  xnorm = ae_sqrt(xnorm, _state)*mx;
6888  }
6889  if( ae_fp_eq(xnorm,0) )
6890  {
6891 
6892  /*
6893  * H = I
6894  */
6895  *tau = 0;
6896  x->ptr.p_double[1] = x->ptr.p_double[1]*s;
6897  return;
6898  }
6899 
6900  /*
6901  * general case
6902  */
6903  mx = ae_maxreal(ae_fabs(alpha, _state), ae_fabs(xnorm, _state), _state);
6904  beta = -mx*ae_sqrt(ae_sqr(alpha/mx, _state)+ae_sqr(xnorm/mx, _state), _state);
6905  if( ae_fp_less(alpha,0) )
6906  {
6907  beta = -beta;
6908  }
6909  *tau = (beta-alpha)/beta;
6910  v = 1/(alpha-beta);
6911  ae_v_muld(&x->ptr.p_double[2], 1, ae_v_len(2,n), v);
6912  x->ptr.p_double[1] = beta;
6913 
6914  /*
6915  * Scale back outputs
6916  */
6917  x->ptr.p_double[1] = x->ptr.p_double[1]*s;
6918 }
6919 
6920 
6921 /*************************************************************************
6922 Application of an elementary reflection to a rectangular matrix of size MxN
6923 
6924 The algorithm pre-multiplies the matrix by an elementary reflection transformation
6925 which is given by column V and scalar Tau (see the description of the
6926 GenerateReflection procedure). Not the whole matrix but only a part of it
6927 is transformed (rows from M1 to M2, columns from N1 to N2). Only the elements
6928 of this submatrix are changed.
6929 
6930 Input parameters:
6931  C - matrix to be transformed.
6932  Tau - scalar defining the transformation.
6933  V - column defining the transformation.
6934  Array whose index ranges within [1..M2-M1+1].
6935  M1, M2 - range of rows to be transformed.
6936  N1, N2 - range of columns to be transformed.
6937  WORK - working array whose indexes goes from N1 to N2.
6938 
6939 Output parameters:
6940  C - the result of multiplying the input matrix C by the
6941  transformation matrix which is given by Tau and V.
6942  If N1>N2 or M1>M2, C is not modified.
6943 
6944  -- LAPACK auxiliary routine (version 3.0) --
6945  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
6946  Courant Institute, Argonne National Lab, and Rice University
6947  September 30, 1994
6948 *************************************************************************/
6950  double tau,
6951  /* Real */ ae_vector* v,
6952  ae_int_t m1,
6953  ae_int_t m2,
6954  ae_int_t n1,
6955  ae_int_t n2,
6956  /* Real */ ae_vector* work,
6957  ae_state *_state)
6958 {
6959  double t;
6960  ae_int_t i;
6961 
6962 
6963  if( (ae_fp_eq(tau,0)||n1>n2)||m1>m2 )
6964  {
6965  return;
6966  }
6967 
6968  /*
6969  * w := C' * v
6970  */
6971  for(i=n1; i<=n2; i++)
6972  {
6973  work->ptr.p_double[i] = 0;
6974  }
6975  for(i=m1; i<=m2; i++)
6976  {
6977  t = v->ptr.p_double[i+1-m1];
6978  ae_v_addd(&work->ptr.p_double[n1], 1, &c->ptr.pp_double[i][n1], 1, ae_v_len(n1,n2), t);
6979  }
6980 
6981  /*
6982  * C := C - tau * v * w'
6983  */
6984  for(i=m1; i<=m2; i++)
6985  {
6986  t = v->ptr.p_double[i-m1+1]*tau;
6987  ae_v_subd(&c->ptr.pp_double[i][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2), t);
6988  }
6989 }
6990 
6991 
6992 /*************************************************************************
6993 Application of an elementary reflection to a rectangular matrix of size MxN
6994 
6995 The algorithm post-multiplies the matrix by an elementary reflection transformation
6996 which is given by column V and scalar Tau (see the description of the
6997 GenerateReflection procedure). Not the whole matrix but only a part of it
6998 is transformed (rows from M1 to M2, columns from N1 to N2). Only the
6999 elements of this submatrix are changed.
7000 
7001 Input parameters:
7002  C - matrix to be transformed.
7003  Tau - scalar defining the transformation.
7004  V - column defining the transformation.
7005  Array whose index ranges within [1..N2-N1+1].
7006  M1, M2 - range of rows to be transformed.
7007  N1, N2 - range of columns to be transformed.
7008  WORK - working array whose indexes goes from M1 to M2.
7009 
7010 Output parameters:
7011  C - the result of multiplying the input matrix C by the
7012  transformation matrix which is given by Tau and V.
7013  If N1>N2 or M1>M2, C is not modified.
7014 
7015  -- LAPACK auxiliary routine (version 3.0) --
7016  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
7017  Courant Institute, Argonne National Lab, and Rice University
7018  September 30, 1994
7019 *************************************************************************/
7021  double tau,
7022  /* Real */ ae_vector* v,
7023  ae_int_t m1,
7024  ae_int_t m2,
7025  ae_int_t n1,
7026  ae_int_t n2,
7027  /* Real */ ae_vector* work,
7028  ae_state *_state)
7029 {
7030  double t;
7031  ae_int_t i;
7032  ae_int_t vm;
7033 
7034 
7035  if( (ae_fp_eq(tau,0)||n1>n2)||m1>m2 )
7036  {
7037  return;
7038  }
7039  vm = n2-n1+1;
7040  for(i=m1; i<=m2; i++)
7041  {
7042  t = ae_v_dotproduct(&c->ptr.pp_double[i][n1], 1, &v->ptr.p_double[1], 1, ae_v_len(n1,n2));
7043  t = t*tau;
7044  ae_v_subd(&c->ptr.pp_double[i][n1], 1, &v->ptr.p_double[1], 1, ae_v_len(n1,n2), t);
7045  }
7046 
7047  /*
7048  * This line is necessary to avoid spurious compiler warnings
7049  */
7050  touchint(&vm, _state);
7051 }
7052 
7053 
7054 
7055 
7056 /*************************************************************************
7057 Generation of an elementary complex reflection transformation
7058 
7059 The subroutine generates elementary complex reflection H of order N, so
7060 that, for a given X, the following equality holds true:
7061 
7062  ( X(1) ) ( Beta )
7063 H' * ( .. ) = ( 0 ), H'*H = I, Beta is a real number
7064  ( X(n) ) ( 0 )
7065 
7066 where
7067 
7068  ( V(1) )
7069 H = 1 - Tau * ( .. ) * ( conj(V(1)), ..., conj(V(n)) )
7070  ( V(n) )
7071 
7072 where the first component of vector V equals 1.
7073 
7074 Input parameters:
7075  X - vector. Array with elements [1..N].
7076  N - reflection order.
7077 
7078 Output parameters:
7079  X - components from 2 to N are replaced by vector V.
7080  The first component is replaced with parameter Beta.
7081  Tau - scalar value Tau.
7082 
7083 This subroutine is the modification of CLARFG subroutines from the LAPACK
7084 library. It has similar functionality except for the fact that it doesnÂ’t
7085 handle errors when intermediate results cause an overflow.
7086 
7087  -- LAPACK auxiliary routine (version 3.0) --
7088  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
7089  Courant Institute, Argonne National Lab, and Rice University
7090  September 30, 1994
7091 *************************************************************************/
7092 void complexgeneratereflection(/* Complex */ ae_vector* x,
7093  ae_int_t n,
7094  ae_complex* tau,
7095  ae_state *_state)
7096 {
7097  ae_int_t j;
7098  ae_complex alpha;
7099  double alphi;
7100  double alphr;
7101  double beta;
7102  double xnorm;
7103  double mx;
7104  ae_complex t;
7105  double s;
7106  ae_complex v;
7107 
7108  tau->x = 0;
7109  tau->y = 0;
7110 
7111  if( n<=0 )
7112  {
7113  *tau = ae_complex_from_d(0);
7114  return;
7115  }
7116 
7117  /*
7118  * Scale if needed (to avoid overflow/underflow during intermediate
7119  * calculations).
7120  */
7121  mx = 0;
7122  for(j=1; j<=n; j++)
7123  {
7124  mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state);
7125  }
7126  s = 1;
7127  if( ae_fp_neq(mx,0) )
7128  {
7129  if( ae_fp_less(mx,1) )
7130  {
7131  s = ae_sqrt(ae_minrealnumber, _state);
7132  v = ae_complex_from_d(1/s);
7133  ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v);
7134  }
7135  else
7136  {
7137  s = ae_sqrt(ae_maxrealnumber, _state);
7138  v = ae_complex_from_d(1/s);
7139  ae_v_cmulc(&x->ptr.p_complex[1], 1, ae_v_len(1,n), v);
7140  }
7141  }
7142 
7143  /*
7144  * calculate
7145  */
7146  alpha = x->ptr.p_complex[1];
7147  mx = 0;
7148  for(j=2; j<=n; j++)
7149  {
7150  mx = ae_maxreal(ae_c_abs(x->ptr.p_complex[j], _state), mx, _state);
7151  }
7152  xnorm = 0;
7153  if( ae_fp_neq(mx,0) )
7154  {
7155  for(j=2; j<=n; j++)
7156  {
7157  t = ae_c_div_d(x->ptr.p_complex[j],mx);
7158  xnorm = xnorm+ae_c_mul(t,ae_c_conj(t, _state)).x;
7159  }
7160  xnorm = ae_sqrt(xnorm, _state)*mx;
7161  }
7162  alphr = alpha.x;
7163  alphi = alpha.y;
7164  if( ae_fp_eq(xnorm,0)&&ae_fp_eq(alphi,0) )
7165  {
7166  *tau = ae_complex_from_d(0);
7167  x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s);
7168  return;
7169  }
7170  mx = ae_maxreal(ae_fabs(alphr, _state), ae_fabs(alphi, _state), _state);
7171  mx = ae_maxreal(mx, ae_fabs(xnorm, _state), _state);
7172  beta = -mx*ae_sqrt(ae_sqr(alphr/mx, _state)+ae_sqr(alphi/mx, _state)+ae_sqr(xnorm/mx, _state), _state);
7173  if( ae_fp_less(alphr,0) )
7174  {
7175  beta = -beta;
7176  }
7177  tau->x = (beta-alphr)/beta;
7178  tau->y = -alphi/beta;
7179  alpha = ae_c_d_div(1,ae_c_sub_d(alpha,beta));
7180  if( n>1 )
7181  {
7182  ae_v_cmulc(&x->ptr.p_complex[2], 1, ae_v_len(2,n), alpha);
7183  }
7184  alpha = ae_complex_from_d(beta);
7185  x->ptr.p_complex[1] = alpha;
7186 
7187  /*
7188  * Scale back
7189  */
7190  x->ptr.p_complex[1] = ae_c_mul_d(x->ptr.p_complex[1],s);
7191 }
7192 
7193 
7194 /*************************************************************************
7195 Application of an elementary reflection to a rectangular matrix of size MxN
7196 
7197 The algorithm pre-multiplies the matrix by an elementary reflection
7198 transformation which is given by column V and scalar Tau (see the
7199 description of the GenerateReflection). Not the whole matrix but only a
7200 part of it is transformed (rows from M1 to M2, columns from N1 to N2). Only
7201 the elements of this submatrix are changed.
7202 
7203 Note: the matrix is multiplied by H, not by H'. If it is required to
7204 multiply the matrix by H', it is necessary to pass Conj(Tau) instead of Tau.
7205 
7206 Input parameters:
7207  C - matrix to be transformed.
7208  Tau - scalar defining transformation.
7209  V - column defining transformation.
7210  Array whose index ranges within [1..M2-M1+1]
7211  M1, M2 - range of rows to be transformed.
7212  N1, N2 - range of columns to be transformed.
7213  WORK - working array whose index goes from N1 to N2.
7214 
7215 Output parameters:
7216  C - the result of multiplying the input matrix C by the
7217  transformation matrix which is given by Tau and V.
7218  If N1>N2 or M1>M2, C is not modified.
7219 
7220  -- LAPACK auxiliary routine (version 3.0) --
7221  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
7222  Courant Institute, Argonne National Lab, and Rice University
7223  September 30, 1994
7224 *************************************************************************/
7226  ae_complex tau,
7227  /* Complex */ ae_vector* v,
7228  ae_int_t m1,
7229  ae_int_t m2,
7230  ae_int_t n1,
7231  ae_int_t n2,
7232  /* Complex */ ae_vector* work,
7233  ae_state *_state)
7234 {
7235  ae_complex t;
7236  ae_int_t i;
7237 
7238 
7239  if( (ae_c_eq_d(tau,0)||n1>n2)||m1>m2 )
7240  {
7241  return;
7242  }
7243 
7244  /*
7245  * w := C^T * conj(v)
7246  */
7247  for(i=n1; i<=n2; i++)
7248  {
7249  work->ptr.p_complex[i] = ae_complex_from_d(0);
7250  }
7251  for(i=m1; i<=m2; i++)
7252  {
7253  t = ae_c_conj(v->ptr.p_complex[i+1-m1], _state);
7254  ae_v_caddc(&work->ptr.p_complex[n1], 1, &c->ptr.pp_complex[i][n1], 1, "N", ae_v_len(n1,n2), t);
7255  }
7256 
7257  /*
7258  * C := C - tau * v * w^T
7259  */
7260  for(i=m1; i<=m2; i++)
7261  {
7262  t = ae_c_mul(v->ptr.p_complex[i-m1+1],tau);
7263  ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &work->ptr.p_complex[n1], 1, "N", ae_v_len(n1,n2), t);
7264  }
7265 }
7266 
7267 
7268 /*************************************************************************
7269 Application of an elementary reflection to a rectangular matrix of size MxN
7270 
7271 The algorithm post-multiplies the matrix by an elementary reflection
7272 transformation which is given by column V and scalar Tau (see the
7273 description of the GenerateReflection). Not the whole matrix but only a
7274 part of it is transformed (rows from M1 to M2, columns from N1 to N2).
7275 Only the elements of this submatrix are changed.
7276 
7277 Input parameters:
7278  C - matrix to be transformed.
7279  Tau - scalar defining transformation.
7280  V - column defining transformation.
7281  Array whose index ranges within [1..N2-N1+1]
7282  M1, M2 - range of rows to be transformed.
7283  N1, N2 - range of columns to be transformed.
7284  WORK - working array whose index goes from M1 to M2.
7285 
7286 Output parameters:
7287  C - the result of multiplying the input matrix C by the
7288  transformation matrix which is given by Tau and V.
7289  If N1>N2 or M1>M2, C is not modified.
7290 
7291  -- LAPACK auxiliary routine (version 3.0) --
7292  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
7293  Courant Institute, Argonne National Lab, and Rice University
7294  September 30, 1994
7295 *************************************************************************/
7297  ae_complex tau,
7298  /* Complex */ ae_vector* v,
7299  ae_int_t m1,
7300  ae_int_t m2,
7301  ae_int_t n1,
7302  ae_int_t n2,
7303  /* Complex */ ae_vector* work,
7304  ae_state *_state)
7305 {
7306  ae_complex t;
7307  ae_int_t i;
7308  ae_int_t vm;
7309 
7310 
7311  if( (ae_c_eq_d(tau,0)||n1>n2)||m1>m2 )
7312  {
7313  return;
7314  }
7315 
7316  /*
7317  * w := C * v
7318  */
7319  vm = n2-n1+1;
7320  for(i=m1; i<=m2; i++)
7321  {
7322  t = ae_v_cdotproduct(&c->ptr.pp_complex[i][n1], 1, "N", &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2));
7323  work->ptr.p_complex[i] = t;
7324  }
7325 
7326  /*
7327  * C := C - w * conj(v^T)
7328  */
7329  ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm));
7330  for(i=m1; i<=m2; i++)
7331  {
7332  t = ae_c_mul(work->ptr.p_complex[i],tau);
7333  ae_v_csubc(&c->ptr.pp_complex[i][n1], 1, &v->ptr.p_complex[1], 1, "N", ae_v_len(n1,n2), t);
7334  }
7335  ae_v_cmove(&v->ptr.p_complex[1], 1, &v->ptr.p_complex[1], 1, "Conj", ae_v_len(1,vm));
7336 }
7337 
7338 
7339 
7340 
7342  ae_bool isupper,
7343  ae_int_t i1,
7344  ae_int_t i2,
7345  /* Real */ ae_vector* x,
7346  double alpha,
7347  /* Real */ ae_vector* y,
7348  ae_state *_state)
7349 {
7350  ae_int_t i;
7351  ae_int_t ba1;
7352  ae_int_t ba2;
7353  ae_int_t by1;
7354  ae_int_t by2;
7355  ae_int_t bx1;
7356  ae_int_t bx2;
7357  ae_int_t n;
7358  double v;
7359 
7360 
7361  n = i2-i1+1;
7362  if( n<=0 )
7363  {
7364  return;
7365  }
7366 
7367  /*
7368  * Let A = L + D + U, where
7369  * L is strictly lower triangular (main diagonal is zero)
7370  * D is diagonal
7371  * U is strictly upper triangular (main diagonal is zero)
7372  *
7373  * A*x = L*x + D*x + U*x
7374  *
7375  * Calculate D*x first
7376  */
7377  for(i=i1; i<=i2; i++)
7378  {
7379  y->ptr.p_double[i-i1+1] = a->ptr.pp_double[i][i]*x->ptr.p_double[i-i1+1];
7380  }
7381 
7382  /*
7383  * Add L*x + U*x
7384  */
7385  if( isupper )
7386  {
7387  for(i=i1; i<=i2-1; i++)
7388  {
7389 
7390  /*
7391  * Add L*x to the result
7392  */
7393  v = x->ptr.p_double[i-i1+1];
7394  by1 = i-i1+2;
7395  by2 = n;
7396  ba1 = i+1;
7397  ba2 = i2;
7398  ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v);
7399 
7400  /*
7401  * Add U*x to the result
7402  */
7403  bx1 = i-i1+2;
7404  bx2 = n;
7405  ba1 = i+1;
7406  ba2 = i2;
7407  v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2));
7408  y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v;
7409  }
7410  }
7411  else
7412  {
7413  for(i=i1+1; i<=i2; i++)
7414  {
7415 
7416  /*
7417  * Add L*x to the result
7418  */
7419  bx1 = 1;
7420  bx2 = i-i1;
7421  ba1 = i1;
7422  ba2 = i-1;
7423  v = ae_v_dotproduct(&x->ptr.p_double[bx1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(bx1,bx2));
7424  y->ptr.p_double[i-i1+1] = y->ptr.p_double[i-i1+1]+v;
7425 
7426  /*
7427  * Add U*x to the result
7428  */
7429  v = x->ptr.p_double[i-i1+1];
7430  by1 = 1;
7431  by2 = i-i1;
7432  ba1 = i1;
7433  ba2 = i-1;
7434  ae_v_addd(&y->ptr.p_double[by1], 1, &a->ptr.pp_double[i][ba1], 1, ae_v_len(by1,by2), v);
7435  }
7436  }
7437  ae_v_muld(&y->ptr.p_double[1], 1, ae_v_len(1,n), alpha);
7438  touchint(&ba2, _state);
7439 }
7440 
7441 
7442 void symmetricrank2update(/* Real */ ae_matrix* a,
7443  ae_bool isupper,
7444  ae_int_t i1,
7445  ae_int_t i2,
7446  /* Real */ ae_vector* x,
7447  /* Real */ ae_vector* y,
7448  /* Real */ ae_vector* t,
7449  double alpha,
7450  ae_state *_state)
7451 {
7452  ae_int_t i;
7453  ae_int_t tp1;
7454  ae_int_t tp2;
7455  double v;
7456 
7457 
7458  if( isupper )
7459  {
7460  for(i=i1; i<=i2; i++)
7461  {
7462  tp1 = i+1-i1;
7463  tp2 = i2-i1+1;
7464  v = x->ptr.p_double[i+1-i1];
7465  ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
7466  v = y->ptr.p_double[i+1-i1];
7467  ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
7468  ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha);
7469  ae_v_add(&a->ptr.pp_double[i][i], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i,i2));
7470  }
7471  }
7472  else
7473  {
7474  for(i=i1; i<=i2; i++)
7475  {
7476  tp1 = 1;
7477  tp2 = i+1-i1;
7478  v = x->ptr.p_double[i+1-i1];
7479  ae_v_moved(&t->ptr.p_double[tp1], 1, &y->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
7480  v = y->ptr.p_double[i+1-i1];
7481  ae_v_addd(&t->ptr.p_double[tp1], 1, &x->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), v);
7482  ae_v_muld(&t->ptr.p_double[tp1], 1, ae_v_len(tp1,tp2), alpha);
7483  ae_v_add(&a->ptr.pp_double[i][i1], 1, &t->ptr.p_double[tp1], 1, ae_v_len(i1,i));
7484  }
7485  }
7486 }
7487 
7488 
7489 
7490 
7491 /*************************************************************************
7492 Application of a sequence of elementary rotations to a matrix
7493 
7494 The algorithm pre-multiplies the matrix by a sequence of rotation
7495 transformations which is given by arrays C and S. Depending on the value
7496 of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
7497 rows are rotated, or the rows N and N-1, N-2 and N-3 and so on, are rotated.
7498 
7499 Not the whole matrix but only a part of it is transformed (rows from M1 to
7500 M2, columns from N1 to N2). Only the elements of this submatrix are changed.
7501 
7502 Input parameters:
7503  IsForward - the sequence of the rotation application.
7504  M1,M2 - the range of rows to be transformed.
7505  N1, N2 - the range of columns to be transformed.
7506  C,S - transformation coefficients.
7507  Array whose index ranges within [1..M2-M1].
7508  A - processed matrix.
7509  WORK - working array whose index ranges within [N1..N2].
7510 
7511 Output parameters:
7512  A - transformed matrix.
7513 
7514 Utility subroutine.
7515 *************************************************************************/
7517  ae_int_t m1,
7518  ae_int_t m2,
7519  ae_int_t n1,
7520  ae_int_t n2,
7521  /* Real */ ae_vector* c,
7522  /* Real */ ae_vector* s,
7523  /* Real */ ae_matrix* a,
7524  /* Real */ ae_vector* work,
7525  ae_state *_state)
7526 {
7527  ae_int_t j;
7528  ae_int_t jp1;
7529  double ctemp;
7530  double stemp;
7531  double temp;
7532 
7533 
7534  if( m1>m2||n1>n2 )
7535  {
7536  return;
7537  }
7538 
7539  /*
7540  * Form P * A
7541  */
7542  if( isforward )
7543  {
7544  if( n1!=n2 )
7545  {
7546 
7547  /*
7548  * Common case: N1<>N2
7549  */
7550  for(j=m1; j<=m2-1; j++)
7551  {
7552  ctemp = c->ptr.p_double[j-m1+1];
7553  stemp = s->ptr.p_double[j-m1+1];
7554  if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
7555  {
7556  jp1 = j+1;
7557  ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp);
7558  ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp);
7559  ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp);
7560  ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp);
7561  ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2));
7562  }
7563  }
7564  }
7565  else
7566  {
7567 
7568  /*
7569  * Special case: N1=N2
7570  */
7571  for(j=m1; j<=m2-1; j++)
7572  {
7573  ctemp = c->ptr.p_double[j-m1+1];
7574  stemp = s->ptr.p_double[j-m1+1];
7575  if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
7576  {
7577  temp = a->ptr.pp_double[j+1][n1];
7578  a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1];
7579  a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1];
7580  }
7581  }
7582  }
7583  }
7584  else
7585  {
7586  if( n1!=n2 )
7587  {
7588 
7589  /*
7590  * Common case: N1<>N2
7591  */
7592  for(j=m2-1; j>=m1; j--)
7593  {
7594  ctemp = c->ptr.p_double[j-m1+1];
7595  stemp = s->ptr.p_double[j-m1+1];
7596  if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
7597  {
7598  jp1 = j+1;
7599  ae_v_moved(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), ctemp);
7600  ae_v_subd(&work->ptr.p_double[n1], 1, &a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), stemp);
7601  ae_v_muld(&a->ptr.pp_double[j][n1], 1, ae_v_len(n1,n2), ctemp);
7602  ae_v_addd(&a->ptr.pp_double[j][n1], 1, &a->ptr.pp_double[jp1][n1], 1, ae_v_len(n1,n2), stemp);
7603  ae_v_move(&a->ptr.pp_double[jp1][n1], 1, &work->ptr.p_double[n1], 1, ae_v_len(n1,n2));
7604  }
7605  }
7606  }
7607  else
7608  {
7609 
7610  /*
7611  * Special case: N1=N2
7612  */
7613  for(j=m2-1; j>=m1; j--)
7614  {
7615  ctemp = c->ptr.p_double[j-m1+1];
7616  stemp = s->ptr.p_double[j-m1+1];
7617  if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
7618  {
7619  temp = a->ptr.pp_double[j+1][n1];
7620  a->ptr.pp_double[j+1][n1] = ctemp*temp-stemp*a->ptr.pp_double[j][n1];
7621  a->ptr.pp_double[j][n1] = stemp*temp+ctemp*a->ptr.pp_double[j][n1];
7622  }
7623  }
7624  }
7625  }
7626 }
7627 
7628 
7629 /*************************************************************************
7630 Application of a sequence of elementary rotations to a matrix
7631 
7632 The algorithm post-multiplies the matrix by a sequence of rotation
7633 transformations which is given by arrays C and S. Depending on the value
7634 of the IsForward parameter either 1 and 2, 3 and 4 and so on (if IsForward=true)
7635 rows are rotated, or the rows N and N-1, N-2 and N-3 and so on are rotated.
7636 
7637 Not the whole matrix but only a part of it is transformed (rows from M1
7638 to M2, columns from N1 to N2). Only the elements of this submatrix are changed.
7639 
7640 Input parameters:
7641  IsForward - the sequence of the rotation application.
7642  M1,M2 - the range of rows to be transformed.
7643  N1, N2 - the range of columns to be transformed.
7644  C,S - transformation coefficients.
7645  Array whose index ranges within [1..N2-N1].
7646  A - processed matrix.
7647  WORK - working array whose index ranges within [M1..M2].
7648 
7649 Output parameters:
7650  A - transformed matrix.
7651 
7652 Utility subroutine.
7653 *************************************************************************/
7655  ae_int_t m1,
7656  ae_int_t m2,
7657  ae_int_t n1,
7658  ae_int_t n2,
7659  /* Real */ ae_vector* c,
7660  /* Real */ ae_vector* s,
7661  /* Real */ ae_matrix* a,
7662  /* Real */ ae_vector* work,
7663  ae_state *_state)
7664 {
7665  ae_int_t j;
7666  ae_int_t jp1;
7667  double ctemp;
7668  double stemp;
7669  double temp;
7670 
7671 
7672 
7673  /*
7674  * Form A * P'
7675  */
7676  if( isforward )
7677  {
7678  if( m1!=m2 )
7679  {
7680 
7681  /*
7682  * Common case: M1<>M2
7683  */
7684  for(j=n1; j<=n2-1; j++)
7685  {
7686  ctemp = c->ptr.p_double[j-n1+1];
7687  stemp = s->ptr.p_double[j-n1+1];
7688  if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
7689  {
7690  jp1 = j+1;
7691  ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp);
7692  ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp);
7693  ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp);
7694  ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp);
7695  ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2));
7696  }
7697  }
7698  }
7699  else
7700  {
7701 
7702  /*
7703  * Special case: M1=M2
7704  */
7705  for(j=n1; j<=n2-1; j++)
7706  {
7707  ctemp = c->ptr.p_double[j-n1+1];
7708  stemp = s->ptr.p_double[j-n1+1];
7709  if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
7710  {
7711  temp = a->ptr.pp_double[m1][j+1];
7712  a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j];
7713  a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j];
7714  }
7715  }
7716  }
7717  }
7718  else
7719  {
7720  if( m1!=m2 )
7721  {
7722 
7723  /*
7724  * Common case: M1<>M2
7725  */
7726  for(j=n2-1; j>=n1; j--)
7727  {
7728  ctemp = c->ptr.p_double[j-n1+1];
7729  stemp = s->ptr.p_double[j-n1+1];
7730  if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
7731  {
7732  jp1 = j+1;
7733  ae_v_moved(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), ctemp);
7734  ae_v_subd(&work->ptr.p_double[m1], 1, &a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), stemp);
7735  ae_v_muld(&a->ptr.pp_double[m1][j], a->stride, ae_v_len(m1,m2), ctemp);
7736  ae_v_addd(&a->ptr.pp_double[m1][j], a->stride, &a->ptr.pp_double[m1][jp1], a->stride, ae_v_len(m1,m2), stemp);
7737  ae_v_move(&a->ptr.pp_double[m1][jp1], a->stride, &work->ptr.p_double[m1], 1, ae_v_len(m1,m2));
7738  }
7739  }
7740  }
7741  else
7742  {
7743 
7744  /*
7745  * Special case: M1=M2
7746  */
7747  for(j=n2-1; j>=n1; j--)
7748  {
7749  ctemp = c->ptr.p_double[j-n1+1];
7750  stemp = s->ptr.p_double[j-n1+1];
7751  if( ae_fp_neq(ctemp,1)||ae_fp_neq(stemp,0) )
7752  {
7753  temp = a->ptr.pp_double[m1][j+1];
7754  a->ptr.pp_double[m1][j+1] = ctemp*temp-stemp*a->ptr.pp_double[m1][j];
7755  a->ptr.pp_double[m1][j] = stemp*temp+ctemp*a->ptr.pp_double[m1][j];
7756  }
7757  }
7758  }
7759  }
7760 }
7761 
7762 
7763 /*************************************************************************
7764 The subroutine generates the elementary rotation, so that:
7765 
7766 [ CS SN ] . [ F ] = [ R ]
7767 [ -SN CS ] [ G ] [ 0 ]
7768 
7769 CS**2 + SN**2 = 1
7770 *************************************************************************/
7771 void generaterotation(double f,
7772  double g,
7773  double* cs,
7774  double* sn,
7775  double* r,
7776  ae_state *_state)
7777 {
7778  double f1;
7779  double g1;
7780 
7781  *cs = 0;
7782  *sn = 0;
7783  *r = 0;
7784 
7785  if( ae_fp_eq(g,0) )
7786  {
7787  *cs = 1;
7788  *sn = 0;
7789  *r = f;
7790  }
7791  else
7792  {
7793  if( ae_fp_eq(f,0) )
7794  {
7795  *cs = 0;
7796  *sn = 1;
7797  *r = g;
7798  }
7799  else
7800  {
7801  f1 = f;
7802  g1 = g;
7803  if( ae_fp_greater(ae_fabs(f1, _state),ae_fabs(g1, _state)) )
7804  {
7805  *r = ae_fabs(f1, _state)*ae_sqrt(1+ae_sqr(g1/f1, _state), _state);
7806  }
7807  else
7808  {
7809  *r = ae_fabs(g1, _state)*ae_sqrt(1+ae_sqr(f1/g1, _state), _state);
7810  }
7811  *cs = f1/(*r);
7812  *sn = g1/(*r);
7813  if( ae_fp_greater(ae_fabs(f, _state),ae_fabs(g, _state))&&ae_fp_less(*cs,0) )
7814  {
7815  *cs = -*cs;
7816  *sn = -*sn;
7817  *r = -*r;
7818  }
7819  }
7820  }
7821 }
7822 
7823 
7824 
7825 
7826 /*************************************************************************
7827 Subroutine performing the Schur decomposition of a matrix in upper
7828 Hessenberg form using the QR algorithm with multiple shifts.
7829 
7830 The source matrix H is represented as S'*H*S = T, where H - matrix in
7831 upper Hessenberg form, S - orthogonal matrix (Schur vectors), T - upper
7832 quasi-triangular matrix (with blocks of sizes 1x1 and 2x2 on the main
7833 diagonal).
7834 
7835 Input parameters:
7836  H - matrix to be decomposed.
7837  Array whose indexes range within [1..N, 1..N].
7838  N - size of H, N>=0.
7839 
7840 
7841 Output parameters:
7842  H – contains the matrix T.
7843  Array whose indexes range within [1..N, 1..N].
7844  All elements below the blocks on the main diagonal are equal
7845  to 0.
7846  S - contains Schur vectors.
7847  Array whose indexes range within [1..N, 1..N].
7848 
7849 Note 1:
7850  The block structure of matrix T could be easily recognized: since all
7851  the elements below the blocks are zeros, the elements a[i+1,i] which
7852  are equal to 0 show the block border.
7853 
7854 Note 2:
7855  the algorithm performance depends on the value of the internal
7856  parameter NS of InternalSchurDecomposition subroutine which defines
7857  the number of shifts in the QR algorithm (analog of the block width
7858  in block matrix algorithms in linear algebra). If you require maximum
7859  performance on your machine, it is recommended to adjust this
7860  parameter manually.
7861 
7862 Result:
7863  True, if the algorithm has converged and the parameters H and S contain
7864  the result.
7865  False, if the algorithm has not converged.
7866 
7867 Algorithm implemented on the basis of subroutine DHSEQR (LAPACK 3.0 library).
7868 *************************************************************************/
7870  ae_int_t n,
7871  /* Real */ ae_matrix* s,
7872  ae_state *_state)
7873 {
7874  ae_frame _frame_block;
7875  ae_vector wi;
7876  ae_vector wr;
7877  ae_int_t info;
7878  ae_bool result;
7879 
7880  ae_frame_make(_state, &_frame_block);
7881  ae_matrix_clear(s);
7882  ae_vector_init(&wi, 0, DT_REAL, _state, ae_true);
7883  ae_vector_init(&wr, 0, DT_REAL, _state, ae_true);
7884 
7885  internalschurdecomposition(h, n, 1, 2, &wr, &wi, s, &info, _state);
7886  result = info==0;
7887  ae_frame_leave(_state);
7888  return result;
7889 }
7890 
7891 
7893  ae_int_t n,
7894  ae_int_t tneeded,
7895  ae_int_t zneeded,
7896  /* Real */ ae_vector* wr,
7897  /* Real */ ae_vector* wi,
7898  /* Real */ ae_matrix* z,
7899  ae_int_t* info,
7900  ae_state *_state)
7901 {
7902  ae_frame _frame_block;
7903  ae_vector work;
7904  ae_int_t i;
7905  ae_int_t i1;
7906  ae_int_t i2;
7907  ae_int_t ierr;
7908  ae_int_t ii;
7909  ae_int_t itemp;
7910  ae_int_t itn;
7911  ae_int_t its;
7912  ae_int_t j;
7913  ae_int_t k;
7914  ae_int_t l;
7915  ae_int_t maxb;
7916  ae_int_t nr;
7917  ae_int_t ns;
7918  ae_int_t nv;
7919  double absw;
7920  double smlnum;
7921  double tau;
7922  double temp;
7923  double tst1;
7924  double ulp;
7925  double unfl;
7926  ae_matrix s;
7927  ae_vector v;
7928  ae_vector vv;
7929  ae_vector workc1;
7930  ae_vector works1;
7931  ae_vector workv3;
7932  ae_vector tmpwr;
7933  ae_vector tmpwi;
7934  ae_bool initz;
7935  ae_bool wantt;
7936  ae_bool wantz;
7937  double cnst;
7938  ae_bool failflag;
7939  ae_int_t p1;
7940  ae_int_t p2;
7941  double vt;
7942 
7943  ae_frame_make(_state, &_frame_block);
7944  ae_vector_clear(wr);
7945  ae_vector_clear(wi);
7946  *info = 0;
7947  ae_vector_init(&work, 0, DT_REAL, _state, ae_true);
7948  ae_matrix_init(&s, 0, 0, DT_REAL, _state, ae_true);
7949  ae_vector_init(&v, 0, DT_REAL, _state, ae_true);
7950  ae_vector_init(&vv, 0, DT_REAL, _state, ae_true);
7951  ae_vector_init(&workc1, 0, DT_REAL, _state, ae_true);
7952  ae_vector_init(&works1, 0, DT_REAL, _state, ae_true);
7953  ae_vector_init(&workv3, 0, DT_REAL, _state, ae_true);
7954  ae_vector_init(&tmpwr, 0, DT_REAL, _state, ae_true);
7955  ae_vector_init(&tmpwi, 0, DT_REAL, _state, ae_true);
7956 
7957 
7958  /*
7959  * Set the order of the multi-shift QR algorithm to be used.
7960  * If you want to tune algorithm, change this values
7961  */
7962  ns = 12;
7963  maxb = 50;
7964 
7965  /*
7966  * Now 2 < NS <= MAXB < NH.
7967  */
7968  maxb = ae_maxint(3, maxb, _state);
7969  ns = ae_minint(maxb, ns, _state);
7970 
7971  /*
7972  * Initialize
7973  */
7974  cnst = 1.5;
7975  ae_vector_set_length(&work, ae_maxint(n, 1, _state)+1, _state);
7976  ae_matrix_set_length(&s, ns+1, ns+1, _state);
7977  ae_vector_set_length(&v, ns+1+1, _state);
7978  ae_vector_set_length(&vv, ns+1+1, _state);
7979  ae_vector_set_length(wr, ae_maxint(n, 1, _state)+1, _state);
7980  ae_vector_set_length(wi, ae_maxint(n, 1, _state)+1, _state);
7981  ae_vector_set_length(&workc1, 1+1, _state);
7982  ae_vector_set_length(&works1, 1+1, _state);
7983  ae_vector_set_length(&workv3, 3+1, _state);
7984  ae_vector_set_length(&tmpwr, ae_maxint(n, 1, _state)+1, _state);
7985  ae_vector_set_length(&tmpwi, ae_maxint(n, 1, _state)+1, _state);
7986  ae_assert(n>=0, "InternalSchurDecomposition: incorrect N!", _state);
7987  ae_assert(tneeded==0||tneeded==1, "InternalSchurDecomposition: incorrect TNeeded!", _state);
7988  ae_assert((zneeded==0||zneeded==1)||zneeded==2, "InternalSchurDecomposition: incorrect ZNeeded!", _state);
7989  wantt = tneeded==1;
7990  initz = zneeded==2;
7991  wantz = zneeded!=0;
7992  *info = 0;
7993 
7994  /*
7995  * Initialize Z, if necessary
7996  */
7997  if( initz )
7998  {
7999  ae_matrix_set_length(z, n+1, n+1, _state);
8000  for(i=1; i<=n; i++)
8001  {
8002  for(j=1; j<=n; j++)
8003  {
8004  if( i==j )
8005  {
8006  z->ptr.pp_double[i][j] = 1;
8007  }
8008  else
8009  {
8010  z->ptr.pp_double[i][j] = 0;
8011  }
8012  }
8013  }
8014  }
8015 
8016  /*
8017  * Quick return if possible
8018  */
8019  if( n==0 )
8020  {
8021  ae_frame_leave(_state);
8022  return;
8023  }
8024  if( n==1 )
8025  {
8026  wr->ptr.p_double[1] = h->ptr.pp_double[1][1];
8027  wi->ptr.p_double[1] = 0;
8028  ae_frame_leave(_state);
8029  return;
8030  }
8031 
8032  /*
8033  * Set rows and columns 1 to N to zero below the first
8034  * subdiagonal.
8035  */
8036  for(j=1; j<=n-2; j++)
8037  {
8038  for(i=j+2; i<=n; i++)
8039  {
8040  h->ptr.pp_double[i][j] = 0;
8041  }
8042  }
8043 
8044  /*
8045  * Test if N is sufficiently small
8046  */
8047  if( (ns<=2||ns>n)||maxb>=n )
8048  {
8049 
8050  /*
8051  * Use the standard double-shift algorithm
8052  */
8053  hsschur_internalauxschur(wantt, wantz, n, 1, n, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);
8054 
8055  /*
8056  * fill entries under diagonal blocks of T with zeros
8057  */
8058  if( wantt )
8059  {
8060  j = 1;
8061  while(j<=n)
8062  {
8063  if( ae_fp_eq(wi->ptr.p_double[j],0) )
8064  {
8065  for(i=j+1; i<=n; i++)
8066  {
8067  h->ptr.pp_double[i][j] = 0;
8068  }
8069  j = j+1;
8070  }
8071  else
8072  {
8073  for(i=j+2; i<=n; i++)
8074  {
8075  h->ptr.pp_double[i][j] = 0;
8076  h->ptr.pp_double[i][j+1] = 0;
8077  }
8078  j = j+2;
8079  }
8080  }
8081  }
8082  ae_frame_leave(_state);
8083  return;
8084  }
8085  unfl = ae_minrealnumber;
8086  ulp = 2*ae_machineepsilon;
8087  smlnum = unfl*(n/ulp);
8088 
8089  /*
8090  * I1 and I2 are the indices of the first row and last column of H
8091  * to which transformations must be applied. If eigenvalues only are
8092  * being computed, I1 and I2 are set inside the main loop.
8093  */
8094  i1 = 1;
8095  i2 = n;
8096 
8097  /*
8098  * ITN is the total number of multiple-shift QR iterations allowed.
8099  */
8100  itn = 30*n;
8101 
8102  /*
8103  * The main loop begins here. I is the loop index and decreases from
8104  * IHI to ILO in steps of at most MAXB. Each iteration of the loop
8105  * works with the active submatrix in rows and columns L to I.
8106  * Eigenvalues I+1 to IHI have already converged. Either L = ILO or
8107  * H(L,L-1) is negligible so that the matrix splits.
8108  */
8109  i = n;
8110  for(;;)
8111  {
8112  l = 1;
8113  if( i<1 )
8114  {
8115 
8116  /*
8117  * fill entries under diagonal blocks of T with zeros
8118  */
8119  if( wantt )
8120  {
8121  j = 1;
8122  while(j<=n)
8123  {
8124  if( ae_fp_eq(wi->ptr.p_double[j],0) )
8125  {
8126  for(i=j+1; i<=n; i++)
8127  {
8128  h->ptr.pp_double[i][j] = 0;
8129  }
8130  j = j+1;
8131  }
8132  else
8133  {
8134  for(i=j+2; i<=n; i++)
8135  {
8136  h->ptr.pp_double[i][j] = 0;
8137  h->ptr.pp_double[i][j+1] = 0;
8138  }
8139  j = j+2;
8140  }
8141  }
8142  }
8143 
8144  /*
8145  * Exit
8146  */
8147  ae_frame_leave(_state);
8148  return;
8149  }
8150 
8151  /*
8152  * Perform multiple-shift QR iterations on rows and columns ILO to I
8153  * until a submatrix of order at most MAXB splits off at the bottom
8154  * because a subdiagonal element has become negligible.
8155  */
8156  failflag = ae_true;
8157  for(its=0; its<=itn; its++)
8158  {
8159 
8160  /*
8161  * Look for a single small subdiagonal element.
8162  */
8163  for(k=i; k>=l+1; k--)
8164  {
8165  tst1 = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state);
8166  if( ae_fp_eq(tst1,0) )
8167  {
8168  tst1 = upperhessenberg1norm(h, l, i, l, i, &work, _state);
8169  }
8170  if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ae_maxreal(ulp*tst1, smlnum, _state)) )
8171  {
8172  break;
8173  }
8174  }
8175  l = k;
8176  if( l>1 )
8177  {
8178 
8179  /*
8180  * H(L,L-1) is negligible.
8181  */
8182  h->ptr.pp_double[l][l-1] = 0;
8183  }
8184 
8185  /*
8186  * Exit from loop if a submatrix of order <= MAXB has split off.
8187  */
8188  if( l>=i-maxb+1 )
8189  {
8190  failflag = ae_false;
8191  break;
8192  }
8193 
8194  /*
8195  * Now the active submatrix is in rows and columns L to I. If
8196  * eigenvalues only are being computed, only the active submatrix
8197  * need be transformed.
8198  */
8199  if( its==20||its==30 )
8200  {
8201 
8202  /*
8203  * Exceptional shifts.
8204  */
8205  for(ii=i-ns+1; ii<=i; ii++)
8206  {
8207  wr->ptr.p_double[ii] = cnst*(ae_fabs(h->ptr.pp_double[ii][ii-1], _state)+ae_fabs(h->ptr.pp_double[ii][ii], _state));
8208  wi->ptr.p_double[ii] = 0;
8209  }
8210  }
8211  else
8212  {
8213 
8214  /*
8215  * Use eigenvalues of trailing submatrix of order NS as shifts.
8216  */
8217  copymatrix(h, i-ns+1, i, i-ns+1, i, &s, 1, ns, 1, ns, _state);
8218  hsschur_internalauxschur(ae_false, ae_false, ns, 1, ns, &s, &tmpwr, &tmpwi, 1, ns, z, &work, &workv3, &workc1, &works1, &ierr, _state);
8219  for(p1=1; p1<=ns; p1++)
8220  {
8221  wr->ptr.p_double[i-ns+p1] = tmpwr.ptr.p_double[p1];
8222  wi->ptr.p_double[i-ns+p1] = tmpwi.ptr.p_double[p1];
8223  }
8224  if( ierr>0 )
8225  {
8226 
8227  /*
8228  * If DLAHQR failed to compute all NS eigenvalues, use the
8229  * unconverged diagonal elements as the remaining shifts.
8230  */
8231  for(ii=1; ii<=ierr; ii++)
8232  {
8233  wr->ptr.p_double[i-ns+ii] = s.ptr.pp_double[ii][ii];
8234  wi->ptr.p_double[i-ns+ii] = 0;
8235  }
8236  }
8237  }
8238 
8239  /*
8240  * Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
8241  * where G is the Hessenberg submatrix H(L:I,L:I) and w is
8242  * the vector of shifts (stored in WR and WI). The result is
8243  * stored in the local array V.
8244  */
8245  v.ptr.p_double[1] = 1;
8246  for(ii=2; ii<=ns+1; ii++)
8247  {
8248  v.ptr.p_double[ii] = 0;
8249  }
8250  nv = 1;
8251  for(j=i-ns+1; j<=i; j++)
8252  {
8253  if( ae_fp_greater_eq(wi->ptr.p_double[j],0) )
8254  {
8255  if( ae_fp_eq(wi->ptr.p_double[j],0) )
8256  {
8257 
8258  /*
8259  * real shift
8260  */
8261  p1 = nv+1;
8262  ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1));
8263  matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &vv, 1, nv, 1.0, &v, 1, nv+1, -wr->ptr.p_double[j], _state);
8264  nv = nv+1;
8265  }
8266  else
8267  {
8268  if( ae_fp_greater(wi->ptr.p_double[j],0) )
8269  {
8270 
8271  /*
8272  * complex conjugate pair of shifts
8273  */
8274  p1 = nv+1;
8275  ae_v_move(&vv.ptr.p_double[1], 1, &v.ptr.p_double[1], 1, ae_v_len(1,p1));
8276  matrixvectormultiply(h, l, l+nv, l, l+nv-1, ae_false, &v, 1, nv, 1.0, &vv, 1, nv+1, -2*wr->ptr.p_double[j], _state);
8277  itemp = vectoridxabsmax(&vv, 1, nv+1, _state);
8278  temp = 1/ae_maxreal(ae_fabs(vv.ptr.p_double[itemp], _state), smlnum, _state);
8279  p1 = nv+1;
8280  ae_v_muld(&vv.ptr.p_double[1], 1, ae_v_len(1,p1), temp);
8281  absw = pythag2(wr->ptr.p_double[j], wi->ptr.p_double[j], _state);
8282  temp = temp*absw*absw;
8283  matrixvectormultiply(h, l, l+nv+1, l, l+nv, ae_false, &vv, 1, nv+1, 1.0, &v, 1, nv+2, temp, _state);
8284  nv = nv+2;
8285  }
8286  }
8287 
8288  /*
8289  * Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
8290  * reset it to the unit vector.
8291  */
8292  itemp = vectoridxabsmax(&v, 1, nv, _state);
8293  temp = ae_fabs(v.ptr.p_double[itemp], _state);
8294  if( ae_fp_eq(temp,0) )
8295  {
8296  v.ptr.p_double[1] = 1;
8297  for(ii=2; ii<=nv; ii++)
8298  {
8299  v.ptr.p_double[ii] = 0;
8300  }
8301  }
8302  else
8303  {
8304  temp = ae_maxreal(temp, smlnum, _state);
8305  vt = 1/temp;
8306  ae_v_muld(&v.ptr.p_double[1], 1, ae_v_len(1,nv), vt);
8307  }
8308  }
8309  }
8310 
8311  /*
8312  * Multiple-shift QR step
8313  */
8314  for(k=l; k<=i-1; k++)
8315  {
8316 
8317  /*
8318  * The first iteration of this loop determines a reflection G
8319  * from the vector V and applies it from left and right to H,
8320  * thus creating a nonzero bulge below the subdiagonal.
8321  *
8322  * Each subsequent iteration determines a reflection G to
8323  * restore the Hessenberg form in the (K-1)th column, and thus
8324  * chases the bulge one step toward the bottom of the active
8325  * submatrix. NR is the order of G.
8326  */
8327  nr = ae_minint(ns+1, i-k+1, _state);
8328  if( k>l )
8329  {
8330  p1 = k-1;
8331  p2 = k+nr-1;
8332  ae_v_move(&v.ptr.p_double[1], 1, &h->ptr.pp_double[k][p1], h->stride, ae_v_len(1,nr));
8333  touchint(&p2, _state);
8334  }
8335  generatereflection(&v, nr, &tau, _state);
8336  if( k>l )
8337  {
8338  h->ptr.pp_double[k][k-1] = v.ptr.p_double[1];
8339  for(ii=k+1; ii<=i; ii++)
8340  {
8341  h->ptr.pp_double[ii][k-1] = 0;
8342  }
8343  }
8344  v.ptr.p_double[1] = 1;
8345 
8346  /*
8347  * Apply G from the left to transform the rows of the matrix in
8348  * columns K to I2.
8349  */
8350  applyreflectionfromtheleft(h, tau, &v, k, k+nr-1, k, i2, &work, _state);
8351 
8352  /*
8353  * Apply G from the right to transform the columns of the
8354  * matrix in rows I1 to min(K+NR,I).
8355  */
8356  applyreflectionfromtheright(h, tau, &v, i1, ae_minint(k+nr, i, _state), k, k+nr-1, &work, _state);
8357  if( wantz )
8358  {
8359 
8360  /*
8361  * Accumulate transformations in the matrix Z
8362  */
8363  applyreflectionfromtheright(z, tau, &v, 1, n, k, k+nr-1, &work, _state);
8364  }
8365  }
8366  }
8367 
8368  /*
8369  * Failure to converge in remaining number of iterations
8370  */
8371  if( failflag )
8372  {
8373  *info = i;
8374  ae_frame_leave(_state);
8375  return;
8376  }
8377 
8378  /*
8379  * A submatrix of order <= MAXB in rows and columns L to I has split
8380  * off. Use the double-shift QR algorithm to handle it.
8381  */
8382  hsschur_internalauxschur(wantt, wantz, n, l, i, h, wr, wi, 1, n, z, &work, &workv3, &workc1, &works1, info, _state);
8383  if( *info>0 )
8384  {
8385  ae_frame_leave(_state);
8386  return;
8387  }
8388 
8389  /*
8390  * Decrement number of remaining iterations, and return to start of
8391  * the main loop with a new value of I.
8392  */
8393  itn = itn-its;
8394  i = l-1;
8395  }
8396  ae_frame_leave(_state);
8397 }
8398 
8399 
8400 static void hsschur_internalauxschur(ae_bool wantt,
8401  ae_bool wantz,
8402  ae_int_t n,
8403  ae_int_t ilo,
8404  ae_int_t ihi,
8405  /* Real */ ae_matrix* h,
8406  /* Real */ ae_vector* wr,
8407  /* Real */ ae_vector* wi,
8408  ae_int_t iloz,
8409  ae_int_t ihiz,
8410  /* Real */ ae_matrix* z,
8411  /* Real */ ae_vector* work,
8412  /* Real */ ae_vector* workv3,
8413  /* Real */ ae_vector* workc1,
8414  /* Real */ ae_vector* works1,
8415  ae_int_t* info,
8416  ae_state *_state)
8417 {
8418  ae_int_t i;
8419  ae_int_t i1;
8420  ae_int_t i2;
8421  ae_int_t itn;
8422  ae_int_t its;
8423  ae_int_t j;
8424  ae_int_t k;
8425  ae_int_t l;
8426  ae_int_t m;
8427  ae_int_t nh;
8428  ae_int_t nr;
8429  ae_int_t nz;
8430  double ave;
8431  double cs;
8432  double disc;
8433  double h00;
8434  double h10;
8435  double h11;
8436  double h12;
8437  double h21;
8438  double h22;
8439  double h33;
8440  double h33s;
8441  double h43h34;
8442  double h44;
8443  double h44s;
8444  double s;
8445  double smlnum;
8446  double sn;
8447  double sum;
8448  double t1;
8449  double t2;
8450  double t3;
8451  double tst1;
8452  double unfl;
8453  double v1;
8454  double v2;
8455  double v3;
8456  ae_bool failflag;
8457  double dat1;
8458  double dat2;
8459  ae_int_t p1;
8460  double him1im1;
8461  double him1i;
8462  double hiim1;
8463  double hii;
8464  double wrim1;
8465  double wri;
8466  double wiim1;
8467  double wii;
8468  double ulp;
8469 
8470  *info = 0;
8471 
8472  *info = 0;
8473  dat1 = 0.75;
8474  dat2 = -0.4375;
8475  ulp = ae_machineepsilon;
8476 
8477  /*
8478  * Quick return if possible
8479  */
8480  if( n==0 )
8481  {
8482  return;
8483  }
8484  if( ilo==ihi )
8485  {
8486  wr->ptr.p_double[ilo] = h->ptr.pp_double[ilo][ilo];
8487  wi->ptr.p_double[ilo] = 0;
8488  return;
8489  }
8490  nh = ihi-ilo+1;
8491  nz = ihiz-iloz+1;
8492 
8493  /*
8494  * Set machine-dependent constants for the stopping criterion.
8495  * If norm(H) <= sqrt(MaxRealNumber), overflow should not occur.
8496  */
8497  unfl = ae_minrealnumber;
8498  smlnum = unfl*(nh/ulp);
8499 
8500  /*
8501  * I1 and I2 are the indices of the first row and last column of H
8502  * to which transformations must be applied. If eigenvalues only are
8503  * being computed, I1 and I2 are set inside the main loop.
8504  */
8505  i1 = 1;
8506  i2 = n;
8507 
8508  /*
8509  * ITN is the total number of QR iterations allowed.
8510  */
8511  itn = 30*nh;
8512 
8513  /*
8514  * The main loop begins here. I is the loop index and decreases from
8515  * IHI to ILO in steps of 1 or 2. Each iteration of the loop works
8516  * with the active submatrix in rows and columns L to I.
8517  * Eigenvalues I+1 to IHI have already converged. Either L = ILO or
8518  * H(L,L-1) is negligible so that the matrix splits.
8519  */
8520  i = ihi;
8521  for(;;)
8522  {
8523  l = ilo;
8524  if( i<ilo )
8525  {
8526  return;
8527  }
8528 
8529  /*
8530  * Perform QR iterations on rows and columns ILO to I until a
8531  * submatrix of order 1 or 2 splits off at the bottom because a
8532  * subdiagonal element has become negligible.
8533  */
8534  failflag = ae_true;
8535  for(its=0; its<=itn; its++)
8536  {
8537 
8538  /*
8539  * Look for a single small subdiagonal element.
8540  */
8541  for(k=i; k>=l+1; k--)
8542  {
8543  tst1 = ae_fabs(h->ptr.pp_double[k-1][k-1], _state)+ae_fabs(h->ptr.pp_double[k][k], _state);
8544  if( ae_fp_eq(tst1,0) )
8545  {
8546  tst1 = upperhessenberg1norm(h, l, i, l, i, work, _state);
8547  }
8548  if( ae_fp_less_eq(ae_fabs(h->ptr.pp_double[k][k-1], _state),ae_maxreal(ulp*tst1, smlnum, _state)) )
8549  {
8550  break;
8551  }
8552  }
8553  l = k;
8554  if( l>ilo )
8555  {
8556 
8557  /*
8558  * H(L,L-1) is negligible
8559  */
8560  h->ptr.pp_double[l][l-1] = 0;
8561  }
8562 
8563  /*
8564  * Exit from loop if a submatrix of order 1 or 2 has split off.
8565  */
8566  if( l>=i-1 )
8567  {
8568  failflag = ae_false;
8569  break;
8570  }
8571 
8572  /*
8573  * Now the active submatrix is in rows and columns L to I. If
8574  * eigenvalues only are being computed, only the active submatrix
8575  * need be transformed.
8576  */
8577  if( its==10||its==20 )
8578  {
8579 
8580  /*
8581  * Exceptional shift.
8582  */
8583  s = ae_fabs(h->ptr.pp_double[i][i-1], _state)+ae_fabs(h->ptr.pp_double[i-1][i-2], _state);
8584  h44 = dat1*s+h->ptr.pp_double[i][i];
8585  h33 = h44;
8586  h43h34 = dat2*s*s;
8587  }
8588  else
8589  {
8590 
8591  /*
8592  * Prepare to use Francis' double shift
8593  * (i.e. 2nd degree generalized Rayleigh quotient)
8594  */
8595  h44 = h->ptr.pp_double[i][i];
8596  h33 = h->ptr.pp_double[i-1][i-1];
8597  h43h34 = h->ptr.pp_double[i][i-1]*h->ptr.pp_double[i-1][i];
8598  s = h->ptr.pp_double[i-1][i-2]*h->ptr.pp_double[i-1][i-2];
8599  disc = (h33-h44)*0.5;
8600  disc = disc*disc+h43h34;
8601  if( ae_fp_greater(disc,0) )
8602  {
8603 
8604  /*
8605  * Real roots: use Wilkinson's shift twice
8606  */
8607  disc = ae_sqrt(disc, _state);
8608  ave = 0.5*(h33+h44);
8609  if( ae_fp_greater(ae_fabs(h33, _state)-ae_fabs(h44, _state),0) )
8610  {
8611  h33 = h33*h44-h43h34;
8612  h44 = h33/(hsschur_extschursign(disc, ave, _state)+ave);
8613  }
8614  else
8615  {
8616  h44 = hsschur_extschursign(disc, ave, _state)+ave;
8617  }
8618  h33 = h44;
8619  h43h34 = 0;
8620  }
8621  }
8622 
8623  /*
8624  * Look for two consecutive small subdiagonal elements.
8625  */
8626  for(m=i-2; m>=l; m--)
8627  {
8628 
8629  /*
8630  * Determine the effect of starting the double-shift QR
8631  * iteration at row M, and see if this would make H(M,M-1)
8632  * negligible.
8633  */
8634  h11 = h->ptr.pp_double[m][m];
8635  h22 = h->ptr.pp_double[m+1][m+1];
8636  h21 = h->ptr.pp_double[m+1][m];
8637  h12 = h->ptr.pp_double[m][m+1];
8638  h44s = h44-h11;
8639  h33s = h33-h11;
8640  v1 = (h33s*h44s-h43h34)/h21+h12;
8641  v2 = h22-h11-h33s-h44s;
8642  v3 = h->ptr.pp_double[m+2][m+1];
8643  s = ae_fabs(v1, _state)+ae_fabs(v2, _state)+ae_fabs(v3, _state);
8644  v1 = v1/s;
8645  v2 = v2/s;
8646  v3 = v3/s;
8647  workv3->ptr.p_double[1] = v1;
8648  workv3->ptr.p_double[2] = v2;
8649  workv3->ptr.p_double[3] = v3;
8650  if( m==l )
8651  {
8652  break;
8653  }
8654  h00 = h->ptr.pp_double[m-1][m-1];
8655  h10 = h->ptr.pp_double[m][m-1];
8656  tst1 = ae_fabs(v1, _state)*(ae_fabs(h00, _state)+ae_fabs(h11, _state)+ae_fabs(h22, _state));
8657  if( ae_fp_less_eq(ae_fabs(h10, _state)*(ae_fabs(v2, _state)+ae_fabs(v3, _state)),ulp*tst1) )
8658  {
8659  break;
8660  }
8661  }
8662 
8663  /*
8664  * Double-shift QR step
8665  */
8666  for(k=m; k<=i-1; k++)
8667  {
8668 
8669  /*
8670  * The first iteration of this loop determines a reflection G
8671  * from the vector V and applies it from left and right to H,
8672  * thus creating a nonzero bulge below the subdiagonal.
8673  *
8674  * Each subsequent iteration determines a reflection G to
8675  * restore the Hessenberg form in the (K-1)th column, and thus
8676  * chases the bulge one step toward the bottom of the active
8677  * submatrix. NR is the order of G.
8678  */
8679  nr = ae_minint(3, i-k+1, _state);
8680  if( k>m )
8681  {
8682  for(p1=1; p1<=nr; p1++)
8683  {
8684  workv3->ptr.p_double[p1] = h->ptr.pp_double[k+p1-1][k-1];
8685  }
8686  }
8687  generatereflection(workv3, nr, &t1, _state);
8688  if( k>m )
8689  {
8690  h->ptr.pp_double[k][k-1] = workv3->ptr.p_double[1];
8691  h->ptr.pp_double[k+1][k-1] = 0;
8692  if( k<i-1 )
8693  {
8694  h->ptr.pp_double[k+2][k-1] = 0;
8695  }
8696  }
8697  else
8698  {
8699  if( m>l )
8700  {
8701  h->ptr.pp_double[k][k-1] = -h->ptr.pp_double[k][k-1];
8702  }
8703  }
8704  v2 = workv3->ptr.p_double[2];
8705  t2 = t1*v2;
8706  if( nr==3 )
8707  {
8708  v3 = workv3->ptr.p_double[3];
8709  t3 = t1*v3;
8710 
8711  /*
8712  * Apply G from the left to transform the rows of the matrix
8713  * in columns K to I2.
8714  */
8715  for(j=k; j<=i2; j++)
8716  {
8717  sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j]+v3*h->ptr.pp_double[k+2][j];
8718  h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1;
8719  h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2;
8720  h->ptr.pp_double[k+2][j] = h->ptr.pp_double[k+2][j]-sum*t3;
8721  }
8722 
8723  /*
8724  * Apply G from the right to transform the columns of the
8725  * matrix in rows I1 to min(K+3,I).
8726  */
8727  for(j=i1; j<=ae_minint(k+3, i, _state); j++)
8728  {
8729  sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1]+v3*h->ptr.pp_double[j][k+2];
8730  h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1;
8731  h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2;
8732  h->ptr.pp_double[j][k+2] = h->ptr.pp_double[j][k+2]-sum*t3;
8733  }
8734  if( wantz )
8735  {
8736 
8737  /*
8738  * Accumulate transformations in the matrix Z
8739  */
8740  for(j=iloz; j<=ihiz; j++)
8741  {
8742  sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1]+v3*z->ptr.pp_double[j][k+2];
8743  z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1;
8744  z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2;
8745  z->ptr.pp_double[j][k+2] = z->ptr.pp_double[j][k+2]-sum*t3;
8746  }
8747  }
8748  }
8749  else
8750  {
8751  if( nr==2 )
8752  {
8753 
8754  /*
8755  * Apply G from the left to transform the rows of the matrix
8756  * in columns K to I2.
8757  */
8758  for(j=k; j<=i2; j++)
8759  {
8760  sum = h->ptr.pp_double[k][j]+v2*h->ptr.pp_double[k+1][j];
8761  h->ptr.pp_double[k][j] = h->ptr.pp_double[k][j]-sum*t1;
8762  h->ptr.pp_double[k+1][j] = h->ptr.pp_double[k+1][j]-sum*t2;
8763  }
8764 
8765  /*
8766  * Apply G from the right to transform the columns of the
8767  * matrix in rows I1 to min(K+3,I).
8768  */
8769  for(j=i1; j<=i; j++)
8770  {
8771  sum = h->ptr.pp_double[j][k]+v2*h->ptr.pp_double[j][k+1];
8772  h->ptr.pp_double[j][k] = h->ptr.pp_double[j][k]-sum*t1;
8773  h->ptr.pp_double[j][k+1] = h->ptr.pp_double[j][k+1]-sum*t2;
8774  }
8775  if( wantz )
8776  {
8777 
8778  /*
8779  * Accumulate transformations in the matrix Z
8780  */
8781  for(j=iloz; j<=ihiz; j++)
8782  {
8783  sum = z->ptr.pp_double[j][k]+v2*z->ptr.pp_double[j][k+1];
8784  z->ptr.pp_double[j][k] = z->ptr.pp_double[j][k]-sum*t1;
8785  z->ptr.pp_double[j][k+1] = z->ptr.pp_double[j][k+1]-sum*t2;
8786  }
8787  }
8788  }
8789  }
8790  }
8791  }
8792  if( failflag )
8793  {
8794 
8795  /*
8796  * Failure to converge in remaining number of iterations
8797  */
8798  *info = i;
8799  return;
8800  }
8801  if( l==i )
8802  {
8803 
8804  /*
8805  * H(I,I-1) is negligible: one eigenvalue has converged.
8806  */
8807  wr->ptr.p_double[i] = h->ptr.pp_double[i][i];
8808  wi->ptr.p_double[i] = 0;
8809  }
8810  else
8811  {
8812  if( l==i-1 )
8813  {
8814 
8815  /*
8816  * H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
8817  *
8818  * Transform the 2-by-2 submatrix to standard Schur form,
8819  * and compute and store the eigenvalues.
8820  */
8821  him1im1 = h->ptr.pp_double[i-1][i-1];
8822  him1i = h->ptr.pp_double[i-1][i];
8823  hiim1 = h->ptr.pp_double[i][i-1];
8824  hii = h->ptr.pp_double[i][i];
8825  hsschur_aux2x2schur(&him1im1, &him1i, &hiim1, &hii, &wrim1, &wiim1, &wri, &wii, &cs, &sn, _state);
8826  wr->ptr.p_double[i-1] = wrim1;
8827  wi->ptr.p_double[i-1] = wiim1;
8828  wr->ptr.p_double[i] = wri;
8829  wi->ptr.p_double[i] = wii;
8830  h->ptr.pp_double[i-1][i-1] = him1im1;
8831  h->ptr.pp_double[i-1][i] = him1i;
8832  h->ptr.pp_double[i][i-1] = hiim1;
8833  h->ptr.pp_double[i][i] = hii;
8834  if( wantt )
8835  {
8836 
8837  /*
8838  * Apply the transformation to the rest of H.
8839  */
8840  if( i2>i )
8841  {
8842  workc1->ptr.p_double[1] = cs;
8843  works1->ptr.p_double[1] = sn;
8844  applyrotationsfromtheleft(ae_true, i-1, i, i+1, i2, workc1, works1, h, work, _state);
8845  }
8846  workc1->ptr.p_double[1] = cs;
8847  works1->ptr.p_double[1] = sn;
8848  applyrotationsfromtheright(ae_true, i1, i-2, i-1, i, workc1, works1, h, work, _state);
8849  }
8850  if( wantz )
8851  {
8852 
8853  /*
8854  * Apply the transformation to Z.
8855  */
8856  workc1->ptr.p_double[1] = cs;
8857  works1->ptr.p_double[1] = sn;
8858  applyrotationsfromtheright(ae_true, iloz, iloz+nz-1, i-1, i, workc1, works1, z, work, _state);
8859  }
8860  }
8861  }
8862 
8863  /*
8864  * Decrement number of remaining iterations, and return to start of
8865  * the main loop with new value of I.
8866  */
8867  itn = itn-its;
8868  i = l-1;
8869  }
8870 }
8871 
8872 
8873 static void hsschur_aux2x2schur(double* a,
8874  double* b,
8875  double* c,
8876  double* d,
8877  double* rt1r,
8878  double* rt1i,
8879  double* rt2r,
8880  double* rt2i,
8881  double* cs,
8882  double* sn,
8883  ae_state *_state)
8884 {
8885  double multpl;
8886  double aa;
8887  double bb;
8888  double bcmax;
8889  double bcmis;
8890  double cc;
8891  double cs1;
8892  double dd;
8893  double eps;
8894  double p;
8895  double sab;
8896  double sac;
8897  double scl;
8898  double sigma;
8899  double sn1;
8900  double tau;
8901  double temp;
8902  double z;
8903 
8904  *rt1r = 0;
8905  *rt1i = 0;
8906  *rt2r = 0;
8907  *rt2i = 0;
8908  *cs = 0;
8909  *sn = 0;
8910 
8911  multpl = 4.0;
8912  eps = ae_machineepsilon;
8913  if( ae_fp_eq(*c,0) )
8914  {
8915  *cs = 1;
8916  *sn = 0;
8917  }
8918  else
8919  {
8920  if( ae_fp_eq(*b,0) )
8921  {
8922 
8923  /*
8924  * Swap rows and columns
8925  */
8926  *cs = 0;
8927  *sn = 1;
8928  temp = *d;
8929  *d = *a;
8930  *a = temp;
8931  *b = -*c;
8932  *c = 0;
8933  }
8934  else
8935  {
8936  if( ae_fp_eq(*a-(*d),0)&&hsschur_extschursigntoone(*b, _state)!=hsschur_extschursigntoone(*c, _state) )
8937  {
8938  *cs = 1;
8939  *sn = 0;
8940  }
8941  else
8942  {
8943  temp = *a-(*d);
8944  p = 0.5*temp;
8945  bcmax = ae_maxreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state);
8946  bcmis = ae_minreal(ae_fabs(*b, _state), ae_fabs(*c, _state), _state)*hsschur_extschursigntoone(*b, _state)*hsschur_extschursigntoone(*c, _state);
8947  scl = ae_maxreal(ae_fabs(p, _state), bcmax, _state);
8948  z = p/scl*p+bcmax/scl*bcmis;
8949 
8950  /*
8951  * If Z is of the order of the machine accuracy, postpone the
8952  * decision on the nature of eigenvalues
8953  */
8954  if( ae_fp_greater_eq(z,multpl*eps) )
8955  {
8956 
8957  /*
8958  * Real eigenvalues. Compute A and D.
8959  */
8960  z = p+hsschur_extschursign(ae_sqrt(scl, _state)*ae_sqrt(z, _state), p, _state);
8961  *a = *d+z;
8962  *d = *d-bcmax/z*bcmis;
8963 
8964  /*
8965  * Compute B and the rotation matrix
8966  */
8967  tau = pythag2(*c, z, _state);
8968  *cs = z/tau;
8969  *sn = *c/tau;
8970  *b = *b-(*c);
8971  *c = 0;
8972  }
8973  else
8974  {
8975 
8976  /*
8977  * Complex eigenvalues, or real (almost) equal eigenvalues.
8978  * Make diagonal elements equal.
8979  */
8980  sigma = *b+(*c);
8981  tau = pythag2(sigma, temp, _state);
8982  *cs = ae_sqrt(0.5*(1+ae_fabs(sigma, _state)/tau), _state);
8983  *sn = -p/(tau*(*cs))*hsschur_extschursign(1, sigma, _state);
8984 
8985  /*
8986  * Compute [ AA BB ] = [ A B ] [ CS -SN ]
8987  * [ CC DD ] [ C D ] [ SN CS ]
8988  */
8989  aa = *a*(*cs)+*b*(*sn);
8990  bb = -*a*(*sn)+*b*(*cs);
8991  cc = *c*(*cs)+*d*(*sn);
8992  dd = -*c*(*sn)+*d*(*cs);
8993 
8994  /*
8995  * Compute [ A B ] = [ CS SN ] [ AA BB ]
8996  * [ C D ] [-SN CS ] [ CC DD ]
8997  */
8998  *a = aa*(*cs)+cc*(*sn);
8999  *b = bb*(*cs)+dd*(*sn);
9000  *c = -aa*(*sn)+cc*(*cs);
9001  *d = -bb*(*sn)+dd*(*cs);
9002  temp = 0.5*(*a+(*d));
9003  *a = temp;
9004  *d = temp;
9005  if( ae_fp_neq(*c,0) )
9006  {
9007  if( ae_fp_neq(*b,0) )
9008  {
9009  if( hsschur_extschursigntoone(*b, _state)==hsschur_extschursigntoone(*c, _state) )
9010  {
9011 
9012  /*
9013  * Real eigenvalues: reduce to upper triangular form
9014  */
9015  sab = ae_sqrt(ae_fabs(*b, _state), _state);
9016  sac = ae_sqrt(ae_fabs(*c, _state), _state);
9017  p = hsschur_extschursign(sab*sac, *c, _state);
9018  tau = 1/ae_sqrt(ae_fabs(*b+(*c), _state), _state);
9019  *a = temp+p;
9020  *d = temp-p;
9021  *b = *b-(*c);
9022  *c = 0;
9023  cs1 = sab*tau;
9024  sn1 = sac*tau;
9025  temp = *cs*cs1-*sn*sn1;
9026  *sn = *cs*sn1+*sn*cs1;
9027  *cs = temp;
9028  }
9029  }
9030  else
9031  {
9032  *b = -*c;
9033  *c = 0;
9034  temp = *cs;
9035  *cs = -*sn;
9036  *sn = temp;
9037  }
9038  }
9039  }
9040  }
9041  }
9042  }
9043 
9044  /*
9045  * Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I).
9046  */
9047  *rt1r = *a;
9048  *rt2r = *d;
9049  if( ae_fp_eq(*c,0) )
9050  {
9051  *rt1i = 0;
9052  *rt2i = 0;
9053  }
9054  else
9055  {
9056  *rt1i = ae_sqrt(ae_fabs(*b, _state), _state)*ae_sqrt(ae_fabs(*c, _state), _state);
9057  *rt2i = -*rt1i;
9058  }
9059 }
9060 
9061 
9062 static double hsschur_extschursign(double a, double b, ae_state *_state)
9063 {
9064  double result;
9065 
9066 
9067  if( ae_fp_greater_eq(b,0) )
9068  {
9069  result = ae_fabs(a, _state);
9070  }
9071  else
9072  {
9073  result = -ae_fabs(a, _state);
9074  }
9075  return result;
9076 }
9077 
9078 
9079 static ae_int_t hsschur_extschursigntoone(double b, ae_state *_state)
9080 {
9081  ae_int_t result;
9082 
9083 
9084  if( ae_fp_greater_eq(b,0) )
9085  {
9086  result = 1;
9087  }
9088  else
9089  {
9090  result = -1;
9091  }
9092  return result;
9093 }
9094 
9095 
9096 
9097 
9098 /*************************************************************************
9099 Utility subroutine performing the "safe" solution of system of linear
9100 equations with triangular coefficient matrices.
9101 
9102 The subroutine uses scaling and solves the scaled system A*x=s*b (where s
9103 is a scalar value) instead of A*x=b, choosing s so that x can be
9104 represented by a floating-point number. The closer the system gets to a
9105 singular, the less s is. If the system is singular, s=0 and x contains the
9106 non-trivial solution of equation A*x=0.
9107 
9108 The feature of an algorithm is that it could not cause an overflow or a
9109 division by zero regardless of the matrix used as the input.
9110 
9111 The algorithm can solve systems of equations with upper/lower triangular
9112 matrices, with/without unit diagonal, and systems of type A*x=b or A'*x=b
9113 (where A' is a transposed matrix A).
9114 
9115 Input parameters:
9116  A - system matrix. Array whose indexes range within [0..N-1, 0..N-1].
9117  N - size of matrix A.
9118  X - right-hand member of a system.
9119  Array whose index ranges within [0..N-1].
9120  IsUpper - matrix type. If it is True, the system matrix is the upper
9121  triangular and is located in the corresponding part of
9122  matrix A.
9123  Trans - problem type. If it is True, the problem to be solved is
9124  A'*x=b, otherwise it is A*x=b.
9125  Isunit - matrix type. If it is True, the system matrix has a unit
9126  diagonal (the elements on the main diagonal are not used
9127  in the calculation process), otherwise the matrix is considered
9128  to be a general triangular matrix.
9129 
9130 Output parameters:
9131  X - solution. Array whose index ranges within [0..N-1].
9132  S - scaling factor.
9133 
9134  -- LAPACK auxiliary routine (version 3.0) --
9135  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
9136  Courant Institute, Argonne National Lab, and Rice University
9137  June 30, 1992
9138 *************************************************************************/
9139 void rmatrixtrsafesolve(/* Real */ ae_matrix* a,
9140  ae_int_t n,
9141  /* Real */ ae_vector* x,
9142  double* s,
9143  ae_bool isupper,
9144  ae_bool istrans,
9145  ae_bool isunit,
9146  ae_state *_state)
9147 {
9148  ae_frame _frame_block;
9149  ae_bool normin;
9150  ae_vector cnorm;
9151  ae_matrix a1;
9152  ae_vector x1;
9153  ae_int_t i;
9154 
9155  ae_frame_make(_state, &_frame_block);
9156  *s = 0;
9157  ae_vector_init(&cnorm, 0, DT_REAL, _state, ae_true);
9158  ae_matrix_init(&a1, 0, 0, DT_REAL, _state, ae_true);
9159  ae_vector_init(&x1, 0, DT_REAL, _state, ae_true);
9160 
9161 
9162  /*
9163  * From 0-based to 1-based
9164  */
9165  normin = ae_false;
9166  ae_matrix_set_length(&a1, n+1, n+1, _state);
9167  ae_vector_set_length(&x1, n+1, _state);
9168  for(i=1; i<=n; i++)
9169  {
9170  ae_v_move(&a1.ptr.pp_double[i][1], 1, &a->ptr.pp_double[i-1][0], 1, ae_v_len(1,n));
9171  }
9172  ae_v_move(&x1.ptr.p_double[1], 1, &x->ptr.p_double[0], 1, ae_v_len(1,n));
9173 
9174  /*
9175  * Solve 1-based
9176  */
9177  safesolvetriangular(&a1, n, &x1, s, isupper, istrans, isunit, normin, &cnorm, _state);
9178 
9179  /*
9180  * From 1-based to 0-based
9181  */
9182  ae_v_move(&x->ptr.p_double[0], 1, &x1.ptr.p_double[1], 1, ae_v_len(0,n-1));
9183  ae_frame_leave(_state);
9184 }
9185 
9186 
9187 /*************************************************************************
9188 Obsolete 1-based subroutine.
9189 See RMatrixTRSafeSolve for 0-based replacement.
9190 *************************************************************************/
9191 void safesolvetriangular(/* Real */ ae_matrix* a,
9192  ae_int_t n,
9193  /* Real */ ae_vector* x,
9194  double* s,
9195  ae_bool isupper,
9196  ae_bool istrans,
9197  ae_bool isunit,
9198  ae_bool normin,
9199  /* Real */ ae_vector* cnorm,
9200  ae_state *_state)
9201 {
9202  ae_int_t i;
9203  ae_int_t imax;
9204  ae_int_t j;
9205  ae_int_t jfirst;
9206  ae_int_t jinc;
9207  ae_int_t jlast;
9208  ae_int_t jm1;
9209  ae_int_t jp1;
9210  ae_int_t ip1;
9211  ae_int_t im1;
9212  ae_int_t k;
9213  ae_int_t flg;
9214  double v;
9215  double vd;
9216  double bignum;
9217  double grow;
9218  double rec;
9219  double smlnum;
9220  double sumj;
9221  double tjj;
9222  double tjjs;
9223  double tmax;
9224  double tscal;
9225  double uscal;
9226  double xbnd;
9227  double xj;
9228  double xmax;
9229  ae_bool notran;
9230  ae_bool upper;
9231  ae_bool nounit;
9232 
9233  *s = 0;
9234 
9235  upper = isupper;
9236  notran = !istrans;
9237  nounit = !isunit;
9238 
9239  /*
9240  * these initializers are not really necessary,
9241  * but without them compiler complains about uninitialized locals
9242  */
9243  tjjs = 0;
9244 
9245  /*
9246  * Quick return if possible
9247  */
9248  if( n==0 )
9249  {
9250  return;
9251  }
9252 
9253  /*
9254  * Determine machine dependent parameters to control overflow.
9255  */
9256  smlnum = ae_minrealnumber/(ae_machineepsilon*2);
9257  bignum = 1/smlnum;
9258  *s = 1;
9259  if( !normin )
9260  {
9261  ae_vector_set_length(cnorm, n+1, _state);
9262 
9263  /*
9264  * Compute the 1-norm of each column, not including the diagonal.
9265  */
9266  if( upper )
9267  {
9268 
9269  /*
9270  * A is upper triangular.
9271  */
9272  for(j=1; j<=n; j++)
9273  {
9274  v = 0;
9275  for(k=1; k<=j-1; k++)
9276  {
9277  v = v+ae_fabs(a->ptr.pp_double[k][j], _state);
9278  }
9279  cnorm->ptr.p_double[j] = v;
9280  }
9281  }
9282  else
9283  {
9284 
9285  /*
9286  * A is lower triangular.
9287  */
9288  for(j=1; j<=n-1; j++)
9289  {
9290  v = 0;
9291  for(k=j+1; k<=n; k++)
9292  {
9293  v = v+ae_fabs(a->ptr.pp_double[k][j], _state);
9294  }
9295  cnorm->ptr.p_double[j] = v;
9296  }
9297  cnorm->ptr.p_double[n] = 0;
9298  }
9299  }
9300 
9301  /*
9302  * Scale the column norms by TSCAL if the maximum element in CNORM is
9303  * greater than BIGNUM.
9304  */
9305  imax = 1;
9306  for(k=2; k<=n; k++)
9307  {
9308  if( ae_fp_greater(cnorm->ptr.p_double[k],cnorm->ptr.p_double[imax]) )
9309  {
9310  imax = k;
9311  }
9312  }
9313  tmax = cnorm->ptr.p_double[imax];
9314  if( ae_fp_less_eq(tmax,bignum) )
9315  {
9316  tscal = 1;
9317  }
9318  else
9319  {
9320  tscal = 1/(smlnum*tmax);
9321  ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), tscal);
9322  }
9323 
9324  /*
9325  * Compute a bound on the computed solution vector to see if the
9326  * Level 2 BLAS routine DTRSV can be used.
9327  */
9328  j = 1;
9329  for(k=2; k<=n; k++)
9330  {
9331  if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[j], _state)) )
9332  {
9333  j = k;
9334  }
9335  }
9336  xmax = ae_fabs(x->ptr.p_double[j], _state);
9337  xbnd = xmax;
9338  if( notran )
9339  {
9340 
9341  /*
9342  * Compute the growth in A * x = b.
9343  */
9344  if( upper )
9345  {
9346  jfirst = n;
9347  jlast = 1;
9348  jinc = -1;
9349  }
9350  else
9351  {
9352  jfirst = 1;
9353  jlast = n;
9354  jinc = 1;
9355  }
9356  if( ae_fp_neq(tscal,1) )
9357  {
9358  grow = 0;
9359  }
9360  else
9361  {
9362  if( nounit )
9363  {
9364 
9365  /*
9366  * A is non-unit triangular.
9367  *
9368  * Compute GROW = 1/G(j) and XBND = 1/M(j).
9369  * Initially, G(0) = max{x(i), i=1,...,n}.
9370  */
9371  grow = 1/ae_maxreal(xbnd, smlnum, _state);
9372  xbnd = grow;
9373  j = jfirst;
9374  while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9375  {
9376 
9377  /*
9378  * Exit the loop if the growth factor is too small.
9379  */
9380  if( ae_fp_less_eq(grow,smlnum) )
9381  {
9382  break;
9383  }
9384 
9385  /*
9386  * M(j) = G(j-1) / abs(A(j,j))
9387  */
9388  tjj = ae_fabs(a->ptr.pp_double[j][j], _state);
9389  xbnd = ae_minreal(xbnd, ae_minreal(1, tjj, _state)*grow, _state);
9390  if( ae_fp_greater_eq(tjj+cnorm->ptr.p_double[j],smlnum) )
9391  {
9392 
9393  /*
9394  * G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) )
9395  */
9396  grow = grow*(tjj/(tjj+cnorm->ptr.p_double[j]));
9397  }
9398  else
9399  {
9400 
9401  /*
9402  * G(j) could overflow, set GROW to 0.
9403  */
9404  grow = 0;
9405  }
9406  if( j==jlast )
9407  {
9408  grow = xbnd;
9409  }
9410  j = j+jinc;
9411  }
9412  }
9413  else
9414  {
9415 
9416  /*
9417  * A is unit triangular.
9418  *
9419  * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
9420  */
9421  grow = ae_minreal(1, 1/ae_maxreal(xbnd, smlnum, _state), _state);
9422  j = jfirst;
9423  while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9424  {
9425 
9426  /*
9427  * Exit the loop if the growth factor is too small.
9428  */
9429  if( ae_fp_less_eq(grow,smlnum) )
9430  {
9431  break;
9432  }
9433 
9434  /*
9435  * G(j) = G(j-1)*( 1 + CNORM(j) )
9436  */
9437  grow = grow*(1/(1+cnorm->ptr.p_double[j]));
9438  j = j+jinc;
9439  }
9440  }
9441  }
9442  }
9443  else
9444  {
9445 
9446  /*
9447  * Compute the growth in A' * x = b.
9448  */
9449  if( upper )
9450  {
9451  jfirst = 1;
9452  jlast = n;
9453  jinc = 1;
9454  }
9455  else
9456  {
9457  jfirst = n;
9458  jlast = 1;
9459  jinc = -1;
9460  }
9461  if( ae_fp_neq(tscal,1) )
9462  {
9463  grow = 0;
9464  }
9465  else
9466  {
9467  if( nounit )
9468  {
9469 
9470  /*
9471  * A is non-unit triangular.
9472  *
9473  * Compute GROW = 1/G(j) and XBND = 1/M(j).
9474  * Initially, M(0) = max{x(i), i=1,...,n}.
9475  */
9476  grow = 1/ae_maxreal(xbnd, smlnum, _state);
9477  xbnd = grow;
9478  j = jfirst;
9479  while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9480  {
9481 
9482  /*
9483  * Exit the loop if the growth factor is too small.
9484  */
9485  if( ae_fp_less_eq(grow,smlnum) )
9486  {
9487  break;
9488  }
9489 
9490  /*
9491  * G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) )
9492  */
9493  xj = 1+cnorm->ptr.p_double[j];
9494  grow = ae_minreal(grow, xbnd/xj, _state);
9495 
9496  /*
9497  * M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j))
9498  */
9499  tjj = ae_fabs(a->ptr.pp_double[j][j], _state);
9500  if( ae_fp_greater(xj,tjj) )
9501  {
9502  xbnd = xbnd*(tjj/xj);
9503  }
9504  if( j==jlast )
9505  {
9506  grow = ae_minreal(grow, xbnd, _state);
9507  }
9508  j = j+jinc;
9509  }
9510  }
9511  else
9512  {
9513 
9514  /*
9515  * A is unit triangular.
9516  *
9517  * Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
9518  */
9519  grow = ae_minreal(1, 1/ae_maxreal(xbnd, smlnum, _state), _state);
9520  j = jfirst;
9521  while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9522  {
9523 
9524  /*
9525  * Exit the loop if the growth factor is too small.
9526  */
9527  if( ae_fp_less_eq(grow,smlnum) )
9528  {
9529  break;
9530  }
9531 
9532  /*
9533  * G(j) = ( 1 + CNORM(j) )*G(j-1)
9534  */
9535  xj = 1+cnorm->ptr.p_double[j];
9536  grow = grow/xj;
9537  j = j+jinc;
9538  }
9539  }
9540  }
9541  }
9542  if( ae_fp_greater(grow*tscal,smlnum) )
9543  {
9544 
9545  /*
9546  * Use the Level 2 BLAS solve if the reciprocal of the bound on
9547  * elements of X is not too small.
9548  */
9549  if( (upper&&notran)||(!upper&&!notran) )
9550  {
9551  if( nounit )
9552  {
9553  vd = a->ptr.pp_double[n][n];
9554  }
9555  else
9556  {
9557  vd = 1;
9558  }
9559  x->ptr.p_double[n] = x->ptr.p_double[n]/vd;
9560  for(i=n-1; i>=1; i--)
9561  {
9562  ip1 = i+1;
9563  if( upper )
9564  {
9565  v = ae_v_dotproduct(&a->ptr.pp_double[i][ip1], 1, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n));
9566  }
9567  else
9568  {
9569  v = ae_v_dotproduct(&a->ptr.pp_double[ip1][i], a->stride, &x->ptr.p_double[ip1], 1, ae_v_len(ip1,n));
9570  }
9571  if( nounit )
9572  {
9573  vd = a->ptr.pp_double[i][i];
9574  }
9575  else
9576  {
9577  vd = 1;
9578  }
9579  x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd;
9580  }
9581  }
9582  else
9583  {
9584  if( nounit )
9585  {
9586  vd = a->ptr.pp_double[1][1];
9587  }
9588  else
9589  {
9590  vd = 1;
9591  }
9592  x->ptr.p_double[1] = x->ptr.p_double[1]/vd;
9593  for(i=2; i<=n; i++)
9594  {
9595  im1 = i-1;
9596  if( upper )
9597  {
9598  v = ae_v_dotproduct(&a->ptr.pp_double[1][i], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,im1));
9599  }
9600  else
9601  {
9602  v = ae_v_dotproduct(&a->ptr.pp_double[i][1], 1, &x->ptr.p_double[1], 1, ae_v_len(1,im1));
9603  }
9604  if( nounit )
9605  {
9606  vd = a->ptr.pp_double[i][i];
9607  }
9608  else
9609  {
9610  vd = 1;
9611  }
9612  x->ptr.p_double[i] = (x->ptr.p_double[i]-v)/vd;
9613  }
9614  }
9615  }
9616  else
9617  {
9618 
9619  /*
9620  * Use a Level 1 BLAS solve, scaling intermediate results.
9621  */
9622  if( ae_fp_greater(xmax,bignum) )
9623  {
9624 
9625  /*
9626  * Scale X so that its components are less than or equal to
9627  * BIGNUM in absolute value.
9628  */
9629  *s = bignum/xmax;
9630  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), *s);
9631  xmax = bignum;
9632  }
9633  if( notran )
9634  {
9635 
9636  /*
9637  * Solve A * x = b
9638  */
9639  j = jfirst;
9640  while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9641  {
9642 
9643  /*
9644  * Compute x(j) = b(j) / A(j,j), scaling x if necessary.
9645  */
9646  xj = ae_fabs(x->ptr.p_double[j], _state);
9647  flg = 0;
9648  if( nounit )
9649  {
9650  tjjs = a->ptr.pp_double[j][j]*tscal;
9651  }
9652  else
9653  {
9654  tjjs = tscal;
9655  if( ae_fp_eq(tscal,1) )
9656  {
9657  flg = 100;
9658  }
9659  }
9660  if( flg!=100 )
9661  {
9662  tjj = ae_fabs(tjjs, _state);
9663  if( ae_fp_greater(tjj,smlnum) )
9664  {
9665 
9666  /*
9667  * abs(A(j,j)) > SMLNUM:
9668  */
9669  if( ae_fp_less(tjj,1) )
9670  {
9671  if( ae_fp_greater(xj,tjj*bignum) )
9672  {
9673 
9674  /*
9675  * Scale x by 1/b(j).
9676  */
9677  rec = 1/xj;
9678  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
9679  *s = *s*rec;
9680  xmax = xmax*rec;
9681  }
9682  }
9683  x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
9684  xj = ae_fabs(x->ptr.p_double[j], _state);
9685  }
9686  else
9687  {
9688  if( ae_fp_greater(tjj,0) )
9689  {
9690 
9691  /*
9692  * 0 < abs(A(j,j)) <= SMLNUM:
9693  */
9694  if( ae_fp_greater(xj,tjj*bignum) )
9695  {
9696 
9697  /*
9698  * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
9699  * to avoid overflow when dividing by A(j,j).
9700  */
9701  rec = tjj*bignum/xj;
9702  if( ae_fp_greater(cnorm->ptr.p_double[j],1) )
9703  {
9704 
9705  /*
9706  * Scale by 1/CNORM(j) to avoid overflow when
9707  * multiplying x(j) times column j.
9708  */
9709  rec = rec/cnorm->ptr.p_double[j];
9710  }
9711  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
9712  *s = *s*rec;
9713  xmax = xmax*rec;
9714  }
9715  x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
9716  xj = ae_fabs(x->ptr.p_double[j], _state);
9717  }
9718  else
9719  {
9720 
9721  /*
9722  * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and
9723  * scale = 0, and compute a solution to A*x = 0.
9724  */
9725  for(i=1; i<=n; i++)
9726  {
9727  x->ptr.p_double[i] = 0;
9728  }
9729  x->ptr.p_double[j] = 1;
9730  xj = 1;
9731  *s = 0;
9732  xmax = 0;
9733  }
9734  }
9735  }
9736 
9737  /*
9738  * Scale x if necessary to avoid overflow when adding a
9739  * multiple of column j of A.
9740  */
9741  if( ae_fp_greater(xj,1) )
9742  {
9743  rec = 1/xj;
9744  if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xmax)*rec) )
9745  {
9746 
9747  /*
9748  * Scale x by 1/(2*abs(x(j))).
9749  */
9750  rec = rec*0.5;
9751  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
9752  *s = *s*rec;
9753  }
9754  }
9755  else
9756  {
9757  if( ae_fp_greater(xj*cnorm->ptr.p_double[j],bignum-xmax) )
9758  {
9759 
9760  /*
9761  * Scale x by 1/2.
9762  */
9763  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), 0.5);
9764  *s = *s*0.5;
9765  }
9766  }
9767  if( upper )
9768  {
9769  if( j>1 )
9770  {
9771 
9772  /*
9773  * Compute the update
9774  * x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
9775  */
9776  v = x->ptr.p_double[j]*tscal;
9777  jm1 = j-1;
9778  ae_v_subd(&x->ptr.p_double[1], 1, &a->ptr.pp_double[1][j], a->stride, ae_v_len(1,jm1), v);
9779  i = 1;
9780  for(k=2; k<=j-1; k++)
9781  {
9782  if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) )
9783  {
9784  i = k;
9785  }
9786  }
9787  xmax = ae_fabs(x->ptr.p_double[i], _state);
9788  }
9789  }
9790  else
9791  {
9792  if( j<n )
9793  {
9794 
9795  /*
9796  * Compute the update
9797  * x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
9798  */
9799  jp1 = j+1;
9800  v = x->ptr.p_double[j]*tscal;
9801  ae_v_subd(&x->ptr.p_double[jp1], 1, &a->ptr.pp_double[jp1][j], a->stride, ae_v_len(jp1,n), v);
9802  i = j+1;
9803  for(k=j+2; k<=n; k++)
9804  {
9805  if( ae_fp_greater(ae_fabs(x->ptr.p_double[k], _state),ae_fabs(x->ptr.p_double[i], _state)) )
9806  {
9807  i = k;
9808  }
9809  }
9810  xmax = ae_fabs(x->ptr.p_double[i], _state);
9811  }
9812  }
9813  j = j+jinc;
9814  }
9815  }
9816  else
9817  {
9818 
9819  /*
9820  * Solve A' * x = b
9821  */
9822  j = jfirst;
9823  while((jinc>0&&j<=jlast)||(jinc<0&&j>=jlast))
9824  {
9825 
9826  /*
9827  * Compute x(j) = b(j) - sum A(k,j)*x(k).
9828  * k<>j
9829  */
9830  xj = ae_fabs(x->ptr.p_double[j], _state);
9831  uscal = tscal;
9832  rec = 1/ae_maxreal(xmax, 1, _state);
9833  if( ae_fp_greater(cnorm->ptr.p_double[j],(bignum-xj)*rec) )
9834  {
9835 
9836  /*
9837  * If x(j) could overflow, scale x by 1/(2*XMAX).
9838  */
9839  rec = rec*0.5;
9840  if( nounit )
9841  {
9842  tjjs = a->ptr.pp_double[j][j]*tscal;
9843  }
9844  else
9845  {
9846  tjjs = tscal;
9847  }
9848  tjj = ae_fabs(tjjs, _state);
9849  if( ae_fp_greater(tjj,1) )
9850  {
9851 
9852  /*
9853  * Divide by A(j,j) when scaling x if A(j,j) > 1.
9854  */
9855  rec = ae_minreal(1, rec*tjj, _state);
9856  uscal = uscal/tjjs;
9857  }
9858  if( ae_fp_less(rec,1) )
9859  {
9860  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
9861  *s = *s*rec;
9862  xmax = xmax*rec;
9863  }
9864  }
9865  sumj = 0;
9866  if( ae_fp_eq(uscal,1) )
9867  {
9868 
9869  /*
9870  * If the scaling needed for A in the dot product is 1,
9871  * call DDOT to perform the dot product.
9872  */
9873  if( upper )
9874  {
9875  if( j>1 )
9876  {
9877  jm1 = j-1;
9878  sumj = ae_v_dotproduct(&a->ptr.pp_double[1][j], a->stride, &x->ptr.p_double[1], 1, ae_v_len(1,jm1));
9879  }
9880  else
9881  {
9882  sumj = 0;
9883  }
9884  }
9885  else
9886  {
9887  if( j<n )
9888  {
9889  jp1 = j+1;
9890  sumj = ae_v_dotproduct(&a->ptr.pp_double[jp1][j], a->stride, &x->ptr.p_double[jp1], 1, ae_v_len(jp1,n));
9891  }
9892  }
9893  }
9894  else
9895  {
9896 
9897  /*
9898  * Otherwise, use in-line code for the dot product.
9899  */
9900  if( upper )
9901  {
9902  for(i=1; i<=j-1; i++)
9903  {
9904  v = a->ptr.pp_double[i][j]*uscal;
9905  sumj = sumj+v*x->ptr.p_double[i];
9906  }
9907  }
9908  else
9909  {
9910  if( j<n )
9911  {
9912  for(i=j+1; i<=n; i++)
9913  {
9914  v = a->ptr.pp_double[i][j]*uscal;
9915  sumj = sumj+v*x->ptr.p_double[i];
9916  }
9917  }
9918  }
9919  }
9920  if( ae_fp_eq(uscal,tscal) )
9921  {
9922 
9923  /*
9924  * Compute x(j) := ( x(j) - sumj ) / A(j,j) if 1/A(j,j)
9925  * was not used to scale the dotproduct.
9926  */
9927  x->ptr.p_double[j] = x->ptr.p_double[j]-sumj;
9928  xj = ae_fabs(x->ptr.p_double[j], _state);
9929  flg = 0;
9930  if( nounit )
9931  {
9932  tjjs = a->ptr.pp_double[j][j]*tscal;
9933  }
9934  else
9935  {
9936  tjjs = tscal;
9937  if( ae_fp_eq(tscal,1) )
9938  {
9939  flg = 150;
9940  }
9941  }
9942 
9943  /*
9944  * Compute x(j) = x(j) / A(j,j), scaling if necessary.
9945  */
9946  if( flg!=150 )
9947  {
9948  tjj = ae_fabs(tjjs, _state);
9949  if( ae_fp_greater(tjj,smlnum) )
9950  {
9951 
9952  /*
9953  * abs(A(j,j)) > SMLNUM:
9954  */
9955  if( ae_fp_less(tjj,1) )
9956  {
9957  if( ae_fp_greater(xj,tjj*bignum) )
9958  {
9959 
9960  /*
9961  * Scale X by 1/abs(x(j)).
9962  */
9963  rec = 1/xj;
9964  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
9965  *s = *s*rec;
9966  xmax = xmax*rec;
9967  }
9968  }
9969  x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
9970  }
9971  else
9972  {
9973  if( ae_fp_greater(tjj,0) )
9974  {
9975 
9976  /*
9977  * 0 < abs(A(j,j)) <= SMLNUM:
9978  */
9979  if( ae_fp_greater(xj,tjj*bignum) )
9980  {
9981 
9982  /*
9983  * Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM.
9984  */
9985  rec = tjj*bignum/xj;
9986  ae_v_muld(&x->ptr.p_double[1], 1, ae_v_len(1,n), rec);
9987  *s = *s*rec;
9988  xmax = xmax*rec;
9989  }
9990  x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs;
9991  }
9992  else
9993  {
9994 
9995  /*
9996  * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and
9997  * scale = 0, and compute a solution to A'*x = 0.
9998  */
9999  for(i=1; i<=n; i++)
10000  {
10001  x->ptr.p_double[i] = 0;
10002  }
10003  x->ptr.p_double[j] = 1;
10004  *s = 0;
10005  xmax = 0;
10006  }
10007  }
10008  }
10009  }
10010  else
10011  {
10012 
10013  /*
10014  * Compute x(j) := x(j) / A(j,j) - sumj if the dot
10015  * product has already been divided by 1/A(j,j).
10016  */
10017  x->ptr.p_double[j] = x->ptr.p_double[j]/tjjs-sumj;
10018  }
10019  xmax = ae_maxreal(xmax, ae_fabs(x->ptr.p_double[j], _state), _state);
10020  j = j+jinc;
10021  }
10022  }
10023  *s = *s/tscal;
10024  }
10025 
10026  /*
10027  * Scale the column norms by 1/TSCAL for return.
10028  */
10029  if( ae_fp_neq(tscal,1) )
10030  {
10031  v = 1/tscal;
10032  ae_v_muld(&cnorm->ptr.p_double[1], 1, ae_v_len(1,n), v);
10033  }
10034 }
10035 
10036 
10037 
10038 
10039 /*************************************************************************
10040 Real implementation of CMatrixScaledTRSafeSolve
10041 
10042  -- ALGLIB routine --
10043  21.01.2010
10044  Bochkanov Sergey
10045 *************************************************************************/
10047  double sa,
10048  ae_int_t n,
10049  /* Real */ ae_vector* x,
10050  ae_bool isupper,
10051  ae_int_t trans,
10052  ae_bool isunit,
10053  double maxgrowth,
10054  ae_state *_state)
10055 {
10056  ae_frame _frame_block;
10057  double lnmax;
10058  double nrmb;
10059  double nrmx;
10060  ae_int_t i;
10061  ae_complex alpha;
10062  ae_complex beta;
10063  double vr;
10064  ae_complex cx;
10065  ae_vector tmp;
10066  ae_bool result;
10067 
10068  ae_frame_make(_state, &_frame_block);
10069  ae_vector_init(&tmp, 0, DT_REAL, _state, ae_true);
10070 
10071  ae_assert(n>0, "RMatrixTRSafeSolve: incorrect N!", _state);
10072  ae_assert(trans==0||trans==1, "RMatrixTRSafeSolve: incorrect Trans!", _state);
10073  result = ae_true;
10074  lnmax = ae_log(ae_maxrealnumber, _state);
10075 
10076  /*
10077  * Quick return if possible
10078  */
10079  if( n<=0 )
10080  {
10081  ae_frame_leave(_state);
10082  return result;
10083  }
10084 
10085  /*
10086  * Load norms: right part and X
10087  */
10088  nrmb = 0;
10089  for(i=0; i<=n-1; i++)
10090  {
10091  nrmb = ae_maxreal(nrmb, ae_fabs(x->ptr.p_double[i], _state), _state);
10092  }
10093  nrmx = 0;
10094 
10095  /*
10096  * Solve
10097  */
10098  ae_vector_set_length(&tmp, n, _state);
10099  result = ae_true;
10100  if( isupper&&trans==0 )
10101  {
10102 
10103  /*
10104  * U*x = b
10105  */
10106  for(i=n-1; i>=0; i--)
10107  {
10108 
10109  /*
10110  * Task is reduced to alpha*x[i] = beta
10111  */
10112  if( isunit )
10113  {
10114  alpha = ae_complex_from_d(sa);
10115  }
10116  else
10117  {
10118  alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
10119  }
10120  if( i<n-1 )
10121  {
10122  ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa);
10123  vr = ae_v_dotproduct(&tmp.ptr.p_double[i+1], 1, &x->ptr.p_double[i+1], 1, ae_v_len(i+1,n-1));
10124  beta = ae_complex_from_d(x->ptr.p_double[i]-vr);
10125  }
10126  else
10127  {
10128  beta = ae_complex_from_d(x->ptr.p_double[i]);
10129  }
10130 
10131  /*
10132  * solve alpha*x[i] = beta
10133  */
10134  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
10135  if( !result )
10136  {
10137  ae_frame_leave(_state);
10138  return result;
10139  }
10140  x->ptr.p_double[i] = cx.x;
10141  }
10142  ae_frame_leave(_state);
10143  return result;
10144  }
10145  if( !isupper&&trans==0 )
10146  {
10147 
10148  /*
10149  * L*x = b
10150  */
10151  for(i=0; i<=n-1; i++)
10152  {
10153 
10154  /*
10155  * Task is reduced to alpha*x[i] = beta
10156  */
10157  if( isunit )
10158  {
10159  alpha = ae_complex_from_d(sa);
10160  }
10161  else
10162  {
10163  alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
10164  }
10165  if( i>0 )
10166  {
10167  ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa);
10168  vr = ae_v_dotproduct(&tmp.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,i-1));
10169  beta = ae_complex_from_d(x->ptr.p_double[i]-vr);
10170  }
10171  else
10172  {
10173  beta = ae_complex_from_d(x->ptr.p_double[i]);
10174  }
10175 
10176  /*
10177  * solve alpha*x[i] = beta
10178  */
10179  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
10180  if( !result )
10181  {
10182  ae_frame_leave(_state);
10183  return result;
10184  }
10185  x->ptr.p_double[i] = cx.x;
10186  }
10187  ae_frame_leave(_state);
10188  return result;
10189  }
10190  if( isupper&&trans==1 )
10191  {
10192 
10193  /*
10194  * U^T*x = b
10195  */
10196  for(i=0; i<=n-1; i++)
10197  {
10198 
10199  /*
10200  * Task is reduced to alpha*x[i] = beta
10201  */
10202  if( isunit )
10203  {
10204  alpha = ae_complex_from_d(sa);
10205  }
10206  else
10207  {
10208  alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
10209  }
10210  beta = ae_complex_from_d(x->ptr.p_double[i]);
10211 
10212  /*
10213  * solve alpha*x[i] = beta
10214  */
10215  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
10216  if( !result )
10217  {
10218  ae_frame_leave(_state);
10219  return result;
10220  }
10221  x->ptr.p_double[i] = cx.x;
10222 
10223  /*
10224  * update the rest of right part
10225  */
10226  if( i<n-1 )
10227  {
10228  vr = cx.x;
10229  ae_v_moved(&tmp.ptr.p_double[i+1], 1, &a->ptr.pp_double[i][i+1], 1, ae_v_len(i+1,n-1), sa);
10230  ae_v_subd(&x->ptr.p_double[i+1], 1, &tmp.ptr.p_double[i+1], 1, ae_v_len(i+1,n-1), vr);
10231  }
10232  }
10233  ae_frame_leave(_state);
10234  return result;
10235  }
10236  if( !isupper&&trans==1 )
10237  {
10238 
10239  /*
10240  * L^T*x = b
10241  */
10242  for(i=n-1; i>=0; i--)
10243  {
10244 
10245  /*
10246  * Task is reduced to alpha*x[i] = beta
10247  */
10248  if( isunit )
10249  {
10250  alpha = ae_complex_from_d(sa);
10251  }
10252  else
10253  {
10254  alpha = ae_complex_from_d(a->ptr.pp_double[i][i]*sa);
10255  }
10256  beta = ae_complex_from_d(x->ptr.p_double[i]);
10257 
10258  /*
10259  * solve alpha*x[i] = beta
10260  */
10261  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &cx, _state);
10262  if( !result )
10263  {
10264  ae_frame_leave(_state);
10265  return result;
10266  }
10267  x->ptr.p_double[i] = cx.x;
10268 
10269  /*
10270  * update the rest of right part
10271  */
10272  if( i>0 )
10273  {
10274  vr = cx.x;
10275  ae_v_moved(&tmp.ptr.p_double[0], 1, &a->ptr.pp_double[i][0], 1, ae_v_len(0,i-1), sa);
10276  ae_v_subd(&x->ptr.p_double[0], 1, &tmp.ptr.p_double[0], 1, ae_v_len(0,i-1), vr);
10277  }
10278  }
10279  ae_frame_leave(_state);
10280  return result;
10281  }
10282  result = ae_false;
10283  ae_frame_leave(_state);
10284  return result;
10285 }
10286 
10287 
10288 /*************************************************************************
10289 Internal subroutine for safe solution of
10290 
10291  SA*op(A)=b
10292 
10293 where A is NxN upper/lower triangular/unitriangular matrix, op(A) is
10294 either identity transform, transposition or Hermitian transposition, SA is
10295 a scaling factor such that max(|SA*A[i,j]|) is close to 1.0 in magnutude.
10296 
10297 This subroutine limits relative growth of solution (in inf-norm) by
10298 MaxGrowth, returning False if growth exceeds MaxGrowth. Degenerate or
10299 near-degenerate matrices are handled correctly (False is returned) as long
10300 as MaxGrowth is significantly less than MaxRealNumber/norm(b).
10301 
10302  -- ALGLIB routine --
10303  21.01.2010
10304  Bochkanov Sergey
10305 *************************************************************************/
10307  double sa,
10308  ae_int_t n,
10309  /* Complex */ ae_vector* x,
10310  ae_bool isupper,
10311  ae_int_t trans,
10312  ae_bool isunit,
10313  double maxgrowth,
10314  ae_state *_state)
10315 {
10316  ae_frame _frame_block;
10317  double lnmax;
10318  double nrmb;
10319  double nrmx;
10320  ae_int_t i;
10321  ae_complex alpha;
10322  ae_complex beta;
10323  ae_complex vc;
10324  ae_vector tmp;
10325  ae_bool result;
10326 
10327  ae_frame_make(_state, &_frame_block);
10328  ae_vector_init(&tmp, 0, DT_COMPLEX, _state, ae_true);
10329 
10330  ae_assert(n>0, "CMatrixTRSafeSolve: incorrect N!", _state);
10331  ae_assert((trans==0||trans==1)||trans==2, "CMatrixTRSafeSolve: incorrect Trans!", _state);
10332  result = ae_true;
10333  lnmax = ae_log(ae_maxrealnumber, _state);
10334 
10335  /*
10336  * Quick return if possible
10337  */
10338  if( n<=0 )
10339  {
10340  ae_frame_leave(_state);
10341  return result;
10342  }
10343 
10344  /*
10345  * Load norms: right part and X
10346  */
10347  nrmb = 0;
10348  for(i=0; i<=n-1; i++)
10349  {
10350  nrmb = ae_maxreal(nrmb, ae_c_abs(x->ptr.p_complex[i], _state), _state);
10351  }
10352  nrmx = 0;
10353 
10354  /*
10355  * Solve
10356  */
10357  ae_vector_set_length(&tmp, n, _state);
10358  result = ae_true;
10359  if( isupper&&trans==0 )
10360  {
10361 
10362  /*
10363  * U*x = b
10364  */
10365  for(i=n-1; i>=0; i--)
10366  {
10367 
10368  /*
10369  * Task is reduced to alpha*x[i] = beta
10370  */
10371  if( isunit )
10372  {
10373  alpha = ae_complex_from_d(sa);
10374  }
10375  else
10376  {
10377  alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
10378  }
10379  if( i<n-1 )
10380  {
10381  ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa);
10382  vc = ae_v_cdotproduct(&tmp.ptr.p_complex[i+1], 1, "N", &x->ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1));
10383  beta = ae_c_sub(x->ptr.p_complex[i],vc);
10384  }
10385  else
10386  {
10387  beta = x->ptr.p_complex[i];
10388  }
10389 
10390  /*
10391  * solve alpha*x[i] = beta
10392  */
10393  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10394  if( !result )
10395  {
10396  ae_frame_leave(_state);
10397  return result;
10398  }
10399  x->ptr.p_complex[i] = vc;
10400  }
10401  ae_frame_leave(_state);
10402  return result;
10403  }
10404  if( !isupper&&trans==0 )
10405  {
10406 
10407  /*
10408  * L*x = b
10409  */
10410  for(i=0; i<=n-1; i++)
10411  {
10412 
10413  /*
10414  * Task is reduced to alpha*x[i] = beta
10415  */
10416  if( isunit )
10417  {
10418  alpha = ae_complex_from_d(sa);
10419  }
10420  else
10421  {
10422  alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
10423  }
10424  if( i>0 )
10425  {
10426  ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa);
10427  vc = ae_v_cdotproduct(&tmp.ptr.p_complex[0], 1, "N", &x->ptr.p_complex[0], 1, "N", ae_v_len(0,i-1));
10428  beta = ae_c_sub(x->ptr.p_complex[i],vc);
10429  }
10430  else
10431  {
10432  beta = x->ptr.p_complex[i];
10433  }
10434 
10435  /*
10436  * solve alpha*x[i] = beta
10437  */
10438  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10439  if( !result )
10440  {
10441  ae_frame_leave(_state);
10442  return result;
10443  }
10444  x->ptr.p_complex[i] = vc;
10445  }
10446  ae_frame_leave(_state);
10447  return result;
10448  }
10449  if( isupper&&trans==1 )
10450  {
10451 
10452  /*
10453  * U^T*x = b
10454  */
10455  for(i=0; i<=n-1; i++)
10456  {
10457 
10458  /*
10459  * Task is reduced to alpha*x[i] = beta
10460  */
10461  if( isunit )
10462  {
10463  alpha = ae_complex_from_d(sa);
10464  }
10465  else
10466  {
10467  alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
10468  }
10469  beta = x->ptr.p_complex[i];
10470 
10471  /*
10472  * solve alpha*x[i] = beta
10473  */
10474  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10475  if( !result )
10476  {
10477  ae_frame_leave(_state);
10478  return result;
10479  }
10480  x->ptr.p_complex[i] = vc;
10481 
10482  /*
10483  * update the rest of right part
10484  */
10485  if( i<n-1 )
10486  {
10487  ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "N", ae_v_len(i+1,n-1), sa);
10488  ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc);
10489  }
10490  }
10491  ae_frame_leave(_state);
10492  return result;
10493  }
10494  if( !isupper&&trans==1 )
10495  {
10496 
10497  /*
10498  * L^T*x = b
10499  */
10500  for(i=n-1; i>=0; i--)
10501  {
10502 
10503  /*
10504  * Task is reduced to alpha*x[i] = beta
10505  */
10506  if( isunit )
10507  {
10508  alpha = ae_complex_from_d(sa);
10509  }
10510  else
10511  {
10512  alpha = ae_c_mul_d(a->ptr.pp_complex[i][i],sa);
10513  }
10514  beta = x->ptr.p_complex[i];
10515 
10516  /*
10517  * solve alpha*x[i] = beta
10518  */
10519  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10520  if( !result )
10521  {
10522  ae_frame_leave(_state);
10523  return result;
10524  }
10525  x->ptr.p_complex[i] = vc;
10526 
10527  /*
10528  * update the rest of right part
10529  */
10530  if( i>0 )
10531  {
10532  ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "N", ae_v_len(0,i-1), sa);
10533  ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc);
10534  }
10535  }
10536  ae_frame_leave(_state);
10537  return result;
10538  }
10539  if( isupper&&trans==2 )
10540  {
10541 
10542  /*
10543  * U^H*x = b
10544  */
10545  for(i=0; i<=n-1; i++)
10546  {
10547 
10548  /*
10549  * Task is reduced to alpha*x[i] = beta
10550  */
10551  if( isunit )
10552  {
10553  alpha = ae_complex_from_d(sa);
10554  }
10555  else
10556  {
10557  alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa);
10558  }
10559  beta = x->ptr.p_complex[i];
10560 
10561  /*
10562  * solve alpha*x[i] = beta
10563  */
10564  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10565  if( !result )
10566  {
10567  ae_frame_leave(_state);
10568  return result;
10569  }
10570  x->ptr.p_complex[i] = vc;
10571 
10572  /*
10573  * update the rest of right part
10574  */
10575  if( i<n-1 )
10576  {
10577  ae_v_cmoved(&tmp.ptr.p_complex[i+1], 1, &a->ptr.pp_complex[i][i+1], 1, "Conj", ae_v_len(i+1,n-1), sa);
10578  ae_v_csubc(&x->ptr.p_complex[i+1], 1, &tmp.ptr.p_complex[i+1], 1, "N", ae_v_len(i+1,n-1), vc);
10579  }
10580  }
10581  ae_frame_leave(_state);
10582  return result;
10583  }
10584  if( !isupper&&trans==2 )
10585  {
10586 
10587  /*
10588  * L^T*x = b
10589  */
10590  for(i=n-1; i>=0; i--)
10591  {
10592 
10593  /*
10594  * Task is reduced to alpha*x[i] = beta
10595  */
10596  if( isunit )
10597  {
10598  alpha = ae_complex_from_d(sa);
10599  }
10600  else
10601  {
10602  alpha = ae_c_mul_d(ae_c_conj(a->ptr.pp_complex[i][i], _state),sa);
10603  }
10604  beta = x->ptr.p_complex[i];
10605 
10606  /*
10607  * solve alpha*x[i] = beta
10608  */
10609  result = safesolve_cbasicsolveandupdate(alpha, beta, lnmax, nrmb, maxgrowth, &nrmx, &vc, _state);
10610  if( !result )
10611  {
10612  ae_frame_leave(_state);
10613  return result;
10614  }
10615  x->ptr.p_complex[i] = vc;
10616 
10617  /*
10618  * update the rest of right part
10619  */
10620  if( i>0 )
10621  {
10622  ae_v_cmoved(&tmp.ptr.p_complex[0], 1, &a->ptr.pp_complex[i][0], 1, "Conj", ae_v_len(0,i-1), sa);
10623  ae_v_csubc(&x->ptr.p_complex[0], 1, &tmp.ptr.p_complex[0], 1, "N", ae_v_len(0,i-1), vc);
10624  }
10625  }
10626  ae_frame_leave(_state);
10627  return result;
10628  }
10629  result = ae_false;
10630  ae_frame_leave(_state);
10631  return result;
10632 }
10633 
10634 
10635 /*************************************************************************
10636 complex basic solver-updater for reduced linear system
10637 
10638  alpha*x[i] = beta
10639 
10640 solves this equation and updates it in overlfow-safe manner (keeping track
10641 of relative growth of solution).
10642 
10643 Parameters:
10644  Alpha - alpha
10645  Beta - beta
10646  LnMax - precomputed Ln(MaxRealNumber)
10647  BNorm - inf-norm of b (right part of original system)
10648  MaxGrowth- maximum growth of norm(x) relative to norm(b)
10649  XNorm - inf-norm of other components of X (which are already processed)
10650  it is updated by CBasicSolveAndUpdate.
10651  X - solution
10652 
10653  -- ALGLIB routine --
10654  26.01.2009
10655  Bochkanov Sergey
10656 *************************************************************************/
10657 static ae_bool safesolve_cbasicsolveandupdate(ae_complex alpha,
10658  ae_complex beta,
10659  double lnmax,
10660  double bnorm,
10661  double maxgrowth,
10662  double* xnorm,
10663  ae_complex* x,
10664  ae_state *_state)
10665 {
10666  double v;
10667  ae_bool result;
10668 
10669  x->x = 0;
10670  x->y = 0;
10671 
10672  result = ae_false;
10673  if( ae_c_eq_d(alpha,0) )
10674  {
10675  return result;
10676  }
10677  if( ae_c_neq_d(beta,0) )
10678  {
10679 
10680  /*
10681  * alpha*x[i]=beta
10682  */
10683  v = ae_log(ae_c_abs(beta, _state), _state)-ae_log(ae_c_abs(alpha, _state), _state);
10684  if( ae_fp_greater(v,lnmax) )
10685  {
10686  return result;
10687  }
10688  *x = ae_c_div(beta,alpha);
10689  }
10690  else
10691  {
10692 
10693  /*
10694  * alpha*x[i]=0
10695  */
10696  *x = ae_complex_from_d(0);
10697  }
10698 
10699  /*
10700  * update NrmX, test growth limit
10701  */
10702  *xnorm = ae_maxreal(*xnorm, ae_c_abs(*x, _state), _state);
10703  if( ae_fp_greater(*xnorm,maxgrowth*bnorm) )
10704  {
10705  return result;
10706  }
10707  result = ae_true;
10708  return result;
10709 }
10710 
10711 
10712 
10713 
10714 /*************************************************************************
10715 Prepares HPC compuations of chunked gradient with HPCChunkedGradient().
10716 You have to call this function before calling HPCChunkedGradient() for
10717 a new set of weights. You have to call it only once, see example below:
10718 
10719 HOW TO PROCESS DATASET WITH THIS FUNCTION:
10720  Grad:=0
10721  HPCPrepareChunkedGradient(Weights, WCount, NTotal, NOut, Buf)
10722  foreach chunk-of-dataset do
10723  HPCChunkedGradient(...)
10724  HPCFinalizeChunkedGradient(Buf, Grad)
10725 
10726 *************************************************************************/
10727 void hpcpreparechunkedgradient(/* Real */ ae_vector* weights,
10728  ae_int_t wcount,
10729  ae_int_t ntotal,
10730  ae_int_t nin,
10731  ae_int_t nout,
10732  mlpbuffers* buf,
10733  ae_state *_state)
10734 {
10735  ae_int_t i;
10736  ae_int_t batch4size;
10737  ae_int_t chunksize;
10738 
10739 
10740  chunksize = 4;
10741  batch4size = 3*chunksize*ntotal+chunksize*(2*nout+1);
10742  if( buf->xy.rows<chunksize||buf->xy.cols<nin+nout )
10743  {
10744  ae_matrix_set_length(&buf->xy, chunksize, nin+nout, _state);
10745  }
10746  if( buf->xy2.rows<chunksize||buf->xy2.cols<nin+nout )
10747  {
10748  ae_matrix_set_length(&buf->xy2, chunksize, nin+nout, _state);
10749  }
10750  if( buf->xyrow.cnt<nin+nout )
10751  {
10752  ae_vector_set_length(&buf->xyrow, nin+nout, _state);
10753  }
10754  if( buf->x.cnt<nin )
10755  {
10756  ae_vector_set_length(&buf->x, nin, _state);
10757  }
10758  if( buf->y.cnt<nout )
10759  {
10760  ae_vector_set_length(&buf->y, nout, _state);
10761  }
10762  if( buf->desiredy.cnt<nout )
10763  {
10764  ae_vector_set_length(&buf->desiredy, nout, _state);
10765  }
10766  if( buf->batch4buf.cnt<batch4size )
10767  {
10768  ae_vector_set_length(&buf->batch4buf, batch4size, _state);
10769  }
10770  if( buf->hpcbuf.cnt<wcount )
10771  {
10772  ae_vector_set_length(&buf->hpcbuf, wcount, _state);
10773  }
10774  if( buf->g.cnt<wcount )
10775  {
10776  ae_vector_set_length(&buf->g, wcount, _state);
10777  }
10778  if( !hpccores_hpcpreparechunkedgradientx(weights, wcount, &buf->hpcbuf, _state) )
10779  {
10780  for(i=0; i<=wcount-1; i++)
10781  {
10782  buf->hpcbuf.ptr.p_double[i] = 0.0;
10783  }
10784  }
10785  buf->wcount = wcount;
10786  buf->ntotal = ntotal;
10787  buf->nin = nin;
10788  buf->nout = nout;
10789  buf->chunksize = chunksize;
10790 }
10791 
10792 
10793 /*************************************************************************
10794 Finalizes HPC compuations of chunked gradient with HPCChunkedGradient().
10795 You have to call this function after calling HPCChunkedGradient() for
10796 a new set of weights. You have to call it only once, see example below:
10797 
10798 HOW TO PROCESS DATASET WITH THIS FUNCTION:
10799  Grad:=0
10800  HPCPrepareChunkedGradient(Weights, WCount, NTotal, NOut, Buf)
10801  foreach chunk-of-dataset do
10802  HPCChunkedGradient(...)
10803  HPCFinalizeChunkedGradient(Buf, Grad)
10804 
10805 *************************************************************************/
10807  /* Real */ ae_vector* grad,
10808  ae_state *_state)
10809 {
10810  ae_int_t i;
10811 
10812 
10813  if( !hpccores_hpcfinalizechunkedgradientx(&buf->hpcbuf, buf->wcount, grad, _state) )
10814  {
10815  for(i=0; i<=buf->wcount-1; i++)
10816  {
10817  grad->ptr.p_double[i] = grad->ptr.p_double[i]+buf->hpcbuf.ptr.p_double[i];
10818  }
10819  }
10820 }
10821 
10822 
10823 /*************************************************************************
10824 Fast kernel for chunked gradient.
10825 
10826 *************************************************************************/
10828  /* Integer */ ae_vector* structinfo,
10829  /* Real */ ae_vector* columnmeans,
10830  /* Real */ ae_vector* columnsigmas,
10831  /* Real */ ae_matrix* xy,
10832  ae_int_t cstart,
10833  ae_int_t csize,
10834  /* Real */ ae_vector* batch4buf,
10835  /* Real */ ae_vector* hpcbuf,
10836  double* e,
10837  ae_bool naturalerrorfunc,
10838  ae_state *_state)
10839 {
10840 #ifndef ALGLIB_INTERCEPTS_SSE2
10841  ae_bool result;
10842 
10843 
10844  result = ae_false;
10845  return result;
10846 #else
10847  return _ialglib_i_hpcchunkedgradient(weights, structinfo, columnmeans, columnsigmas, xy, cstart, csize, batch4buf, hpcbuf, e, naturalerrorfunc);
10848 #endif
10849 }
10850 
10851 
10852 /*************************************************************************
10853 Fast kernel for chunked processing.
10854 
10855 *************************************************************************/
10857  /* Integer */ ae_vector* structinfo,
10858  /* Real */ ae_vector* columnmeans,
10859  /* Real */ ae_vector* columnsigmas,
10860  /* Real */ ae_matrix* xy,
10861  ae_int_t cstart,
10862  ae_int_t csize,
10863  /* Real */ ae_vector* batch4buf,
10864  /* Real */ ae_vector* hpcbuf,
10865  ae_state *_state)
10866 {
10867 #ifndef ALGLIB_INTERCEPTS_SSE2
10868  ae_bool result;
10869 
10870 
10871  result = ae_false;
10872  return result;
10873 #else
10874  return _ialglib_i_hpcchunkedprocess(weights, structinfo, columnmeans, columnsigmas, xy, cstart, csize, batch4buf, hpcbuf);
10875 #endif
10876 }
10877 
10878 
10879 /*************************************************************************
10880 Stub function.
10881 
10882  -- ALGLIB routine --
10883  14.06.2013
10884  Bochkanov Sergey
10885 *************************************************************************/
10886 static ae_bool hpccores_hpcpreparechunkedgradientx(/* Real */ ae_vector* weights,
10887  ae_int_t wcount,
10888  /* Real */ ae_vector* hpcbuf,
10889  ae_state *_state)
10890 {
10891 #ifndef ALGLIB_INTERCEPTS_SSE2
10892  ae_bool result;
10893 
10894 
10895  result = ae_false;
10896  return result;
10897 #else
10898  return _ialglib_i_hpcpreparechunkedgradientx(weights, wcount, hpcbuf);
10899 #endif
10900 }
10901 
10902 
10903 /*************************************************************************
10904 Stub function.
10905 
10906  -- ALGLIB routine --
10907  14.06.2013
10908  Bochkanov Sergey
10909 *************************************************************************/
10910 static ae_bool hpccores_hpcfinalizechunkedgradientx(/* Real */ ae_vector* buf,
10911  ae_int_t wcount,
10912  /* Real */ ae_vector* grad,
10913  ae_state *_state)
10914 {
10915 #ifndef ALGLIB_INTERCEPTS_SSE2
10916  ae_bool result;
10917 
10918 
10919  result = ae_false;
10920  return result;
10921 #else
10922  return _ialglib_i_hpcfinalizechunkedgradientx(buf, wcount, grad);
10923 #endif
10924 }
10925 
10926 
10927 ae_bool _mlpbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic)
10928 {
10929  mlpbuffers *p = (mlpbuffers*)_p;
10930  ae_touch_ptr((void*)p);
10931  if( !ae_vector_init(&p->batch4buf, 0, DT_REAL, _state, make_automatic) )
10932  return ae_false;
10933  if( !ae_vector_init(&p->hpcbuf, 0, DT_REAL, _state, make_automatic) )
10934  return ae_false;
10935  if( !ae_matrix_init(&p->xy, 0, 0, DT_REAL, _state, make_automatic) )
10936  return ae_false;
10937  if( !ae_matrix_init(&p->xy2, 0, 0, DT_REAL, _state, make_automatic) )
10938  return ae_false;
10939  if( !ae_vector_init(&p->xyrow, 0, DT_REAL, _state, make_automatic) )
10940  return ae_false;
10941  if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
10942  return ae_false;
10943  if( !ae_vector_init(&p->y, 0, DT_REAL, _state, make_automatic) )
10944  return ae_false;
10945  if( !ae_vector_init(&p->desiredy, 0, DT_REAL, _state, make_automatic) )
10946  return ae_false;
10947  if( !ae_vector_init(&p->g, 0, DT_REAL, _state, make_automatic) )
10948  return ae_false;
10949  if( !ae_vector_init(&p->tmp0, 0, DT_REAL, _state, make_automatic) )
10950  return ae_false;
10951  return ae_true;
10952 }
10953 
10954 
10955 ae_bool _mlpbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
10956 {
10957  mlpbuffers *dst = (mlpbuffers*)_dst;
10958  mlpbuffers *src = (mlpbuffers*)_src;
10959  dst->chunksize = src->chunksize;
10960  dst->ntotal = src->ntotal;
10961  dst->nin = src->nin;
10962  dst->nout = src->nout;
10963  dst->wcount = src->wcount;
10964  if( !ae_vector_init_copy(&dst->batch4buf, &src->batch4buf, _state, make_automatic) )
10965  return ae_false;
10966  if( !ae_vector_init_copy(&dst->hpcbuf, &src->hpcbuf, _state, make_automatic) )
10967  return ae_false;
10968  if( !ae_matrix_init_copy(&dst->xy, &src->xy, _state, make_automatic) )
10969  return ae_false;
10970  if( !ae_matrix_init_copy(&dst->xy2, &src->xy2, _state, make_automatic) )
10971  return ae_false;
10972  if( !ae_vector_init_copy(&dst->xyrow, &src->xyrow, _state, make_automatic) )
10973  return ae_false;
10974  if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
10975  return ae_false;
10976  if( !ae_vector_init_copy(&dst->y, &src->y, _state, make_automatic) )
10977  return ae_false;
10978  if( !ae_vector_init_copy(&dst->desiredy, &src->desiredy, _state, make_automatic) )
10979  return ae_false;
10980  dst->e = src->e;
10981  if( !ae_vector_init_copy(&dst->g, &src->g, _state, make_automatic) )
10982  return ae_false;
10983  if( !ae_vector_init_copy(&dst->tmp0, &src->tmp0, _state, make_automatic) )
10984  return ae_false;
10985  return ae_true;
10986 }
10987 
10988 
10989 void _mlpbuffers_clear(void* _p)
10990 {
10991  mlpbuffers *p = (mlpbuffers*)_p;
10992  ae_touch_ptr((void*)p);
10993  ae_vector_clear(&p->batch4buf);
10994  ae_vector_clear(&p->hpcbuf);
10995  ae_matrix_clear(&p->xy);
10996  ae_matrix_clear(&p->xy2);
10997  ae_vector_clear(&p->xyrow);
10998  ae_vector_clear(&p->x);
10999  ae_vector_clear(&p->y);
11000  ae_vector_clear(&p->desiredy);
11001  ae_vector_clear(&p->g);
11002  ae_vector_clear(&p->tmp0);
11003 }
11004 
11005 
11006 void _mlpbuffers_destroy(void* _p)
11007 {
11008  mlpbuffers *p = (mlpbuffers*)_p;
11009  ae_touch_ptr((void*)p);
11010  ae_vector_destroy(&p->batch4buf);
11011  ae_vector_destroy(&p->hpcbuf);
11012  ae_matrix_destroy(&p->xy);
11013  ae_matrix_destroy(&p->xy2);
11014  ae_vector_destroy(&p->xyrow);
11015  ae_vector_destroy(&p->x);
11016  ae_vector_destroy(&p->y);
11017  ae_vector_destroy(&p->desiredy);
11018  ae_vector_destroy(&p->g);
11019  ae_vector_destroy(&p->tmp0);
11020 }
11021 
11022 
11023 
11024 
11025 /*************************************************************************
11026 More precise dot-product. Absolute error of subroutine result is about
11027 1 ulp of max(MX,V), where:
11028  MX = max( |a[i]*b[i]| )
11029  V = |(a,b)|
11030 
11031 INPUT PARAMETERS
11032  A - array[0..N-1], vector 1
11033  B - array[0..N-1], vector 2
11034  N - vectors length, N<2^29.
11035  Temp - array[0..N-1], pre-allocated temporary storage
11036 
11037 OUTPUT PARAMETERS
11038  R - (A,B)
11039  RErr - estimate of error. This estimate accounts for both errors
11040  during calculation of (A,B) and errors introduced by
11041  rounding of A and B to fit in double (about 1 ulp).
11042 
11043  -- ALGLIB --
11044  Copyright 24.08.2009 by Bochkanov Sergey
11045 *************************************************************************/
11046 void xdot(/* Real */ ae_vector* a,
11047  /* Real */ ae_vector* b,
11048  ae_int_t n,
11049  /* Real */ ae_vector* temp,
11050  double* r,
11051  double* rerr,
11052  ae_state *_state)
11053 {
11054  ae_int_t i;
11055  double mx;
11056  double v;
11057 
11058  *r = 0;
11059  *rerr = 0;
11060 
11061 
11062  /*
11063  * special cases:
11064  * * N=0
11065  */
11066  if( n==0 )
11067  {
11068  *r = 0;
11069  *rerr = 0;
11070  return;
11071  }
11072  mx = 0;
11073  for(i=0; i<=n-1; i++)
11074  {
11075  v = a->ptr.p_double[i]*b->ptr.p_double[i];
11076  temp->ptr.p_double[i] = v;
11077  mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
11078  }
11079  if( ae_fp_eq(mx,0) )
11080  {
11081  *r = 0;
11082  *rerr = 0;
11083  return;
11084  }
11085  xblas_xsum(temp, mx, n, r, rerr, _state);
11086 }
11087 
11088 
11089 /*************************************************************************
11090 More precise complex dot-product. Absolute error of subroutine result is
11091 about 1 ulp of max(MX,V), where:
11092  MX = max( |a[i]*b[i]| )
11093  V = |(a,b)|
11094 
11095 INPUT PARAMETERS
11096  A - array[0..N-1], vector 1
11097  B - array[0..N-1], vector 2
11098  N - vectors length, N<2^29.
11099  Temp - array[0..2*N-1], pre-allocated temporary storage
11100 
11101 OUTPUT PARAMETERS
11102  R - (A,B)
11103  RErr - estimate of error. This estimate accounts for both errors
11104  during calculation of (A,B) and errors introduced by
11105  rounding of A and B to fit in double (about 1 ulp).
11106 
11107  -- ALGLIB --
11108  Copyright 27.01.2010 by Bochkanov Sergey
11109 *************************************************************************/
11110 void xcdot(/* Complex */ ae_vector* a,
11111  /* Complex */ ae_vector* b,
11112  ae_int_t n,
11113  /* Real */ ae_vector* temp,
11114  ae_complex* r,
11115  double* rerr,
11116  ae_state *_state)
11117 {
11118  ae_int_t i;
11119  double mx;
11120  double v;
11121  double rerrx;
11122  double rerry;
11123 
11124  r->x = 0;
11125  r->y = 0;
11126  *rerr = 0;
11127 
11128 
11129  /*
11130  * special cases:
11131  * * N=0
11132  */
11133  if( n==0 )
11134  {
11135  *r = ae_complex_from_d(0);
11136  *rerr = 0;
11137  return;
11138  }
11139 
11140  /*
11141  * calculate real part
11142  */
11143  mx = 0;
11144  for(i=0; i<=n-1; i++)
11145  {
11146  v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].x;
11147  temp->ptr.p_double[2*i+0] = v;
11148  mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
11149  v = -a->ptr.p_complex[i].y*b->ptr.p_complex[i].y;
11150  temp->ptr.p_double[2*i+1] = v;
11151  mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
11152  }
11153  if( ae_fp_eq(mx,0) )
11154  {
11155  r->x = 0;
11156  rerrx = 0;
11157  }
11158  else
11159  {
11160  xblas_xsum(temp, mx, 2*n, &r->x, &rerrx, _state);
11161  }
11162 
11163  /*
11164  * calculate imaginary part
11165  */
11166  mx = 0;
11167  for(i=0; i<=n-1; i++)
11168  {
11169  v = a->ptr.p_complex[i].x*b->ptr.p_complex[i].y;
11170  temp->ptr.p_double[2*i+0] = v;
11171  mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
11172  v = a->ptr.p_complex[i].y*b->ptr.p_complex[i].x;
11173  temp->ptr.p_double[2*i+1] = v;
11174  mx = ae_maxreal(mx, ae_fabs(v, _state), _state);
11175  }
11176  if( ae_fp_eq(mx,0) )
11177  {
11178  r->y = 0;
11179  rerry = 0;
11180  }
11181  else
11182  {
11183  xblas_xsum(temp, mx, 2*n, &r->y, &rerry, _state);
11184  }
11185 
11186  /*
11187  * total error
11188  */
11189  if( ae_fp_eq(rerrx,0)&&ae_fp_eq(rerry,0) )
11190  {
11191  *rerr = 0;
11192  }
11193  else
11194  {
11195  *rerr = ae_maxreal(rerrx, rerry, _state)*ae_sqrt(1+ae_sqr(ae_minreal(rerrx, rerry, _state)/ae_maxreal(rerrx, rerry, _state), _state), _state);
11196  }
11197 }
11198 
11199 
11200 /*************************************************************************
11201 Internal subroutine for extra-precise calculation of SUM(w[i]).
11202 
11203 INPUT PARAMETERS:
11204  W - array[0..N-1], values to be added
11205  W is modified during calculations.
11206  MX - max(W[i])
11207  N - array size
11208 
11209 OUTPUT PARAMETERS:
11210  R - SUM(w[i])
11211  RErr- error estimate for R
11212 
11213  -- ALGLIB --
11214  Copyright 24.08.2009 by Bochkanov Sergey
11215 *************************************************************************/
11216 static void xblas_xsum(/* Real */ ae_vector* w,
11217  double mx,
11218  ae_int_t n,
11219  double* r,
11220  double* rerr,
11221  ae_state *_state)
11222 {
11223  ae_int_t i;
11224  ae_int_t k;
11225  ae_int_t ks;
11226  double v;
11227  double s;
11228  double ln2;
11229  double chunk;
11230  double invchunk;
11231  ae_bool allzeros;
11232 
11233  *r = 0;
11234  *rerr = 0;
11235 
11236 
11237  /*
11238  * special cases:
11239  * * N=0
11240  * * N is too large to use integer arithmetics
11241  */
11242  if( n==0 )
11243  {
11244  *r = 0;
11245  *rerr = 0;
11246  return;
11247  }
11248  if( ae_fp_eq(mx,0) )
11249  {
11250  *r = 0;
11251  *rerr = 0;
11252  return;
11253  }
11254  ae_assert(n<536870912, "XDot: N is too large!", _state);
11255 
11256  /*
11257  * Prepare
11258  */
11259  ln2 = ae_log(2, _state);
11260  *rerr = mx*ae_machineepsilon;
11261 
11262  /*
11263  * 1. find S such that 0.5<=S*MX<1
11264  * 2. multiply W by S, so task is normalized in some sense
11265  * 3. S:=1/S so we can obtain original vector multiplying by S
11266  */
11267  k = ae_round(ae_log(mx, _state)/ln2, _state);
11268  s = xblas_xfastpow(2, -k, _state);
11269  while(ae_fp_greater_eq(s*mx,1))
11270  {
11271  s = 0.5*s;
11272  }
11273  while(ae_fp_less(s*mx,0.5))
11274  {
11275  s = 2*s;
11276  }
11277  ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
11278  s = 1/s;
11279 
11280  /*
11281  * find Chunk=2^M such that N*Chunk<2^29
11282  *
11283  * we have chosen upper limit (2^29) with enough space left
11284  * to tolerate possible problems with rounding and N's close
11285  * to the limit, so we don't want to be very strict here.
11286  */
11287  k = ae_trunc(ae_log((double)536870912/(double)n, _state)/ln2, _state);
11288  chunk = xblas_xfastpow(2, k, _state);
11289  if( ae_fp_less(chunk,2) )
11290  {
11291  chunk = 2;
11292  }
11293  invchunk = 1/chunk;
11294 
11295  /*
11296  * calculate result
11297  */
11298  *r = 0;
11299  ae_v_muld(&w->ptr.p_double[0], 1, ae_v_len(0,n-1), chunk);
11300  for(;;)
11301  {
11302  s = s*invchunk;
11303  allzeros = ae_true;
11304  ks = 0;
11305  for(i=0; i<=n-1; i++)
11306  {
11307  v = w->ptr.p_double[i];
11308  k = ae_trunc(v, _state);
11309  if( ae_fp_neq(v,k) )
11310  {
11311  allzeros = ae_false;
11312  }
11313  w->ptr.p_double[i] = chunk*(v-k);
11314  ks = ks+k;
11315  }
11316  *r = *r+s*ks;
11317  v = ae_fabs(*r, _state);
11318  if( allzeros||ae_fp_eq(s*n+mx,mx) )
11319  {
11320  break;
11321  }
11322  }
11323 
11324  /*
11325  * correct error
11326  */
11327  *rerr = ae_maxreal(*rerr, ae_fabs(*r, _state)*ae_machineepsilon, _state);
11328 }
11329 
11330 
11331 /*************************************************************************
11332 Fast Pow
11333 
11334  -- ALGLIB --
11335  Copyright 24.08.2009 by Bochkanov Sergey
11336 *************************************************************************/
11337 static double xblas_xfastpow(double r, ae_int_t n, ae_state *_state)
11338 {
11339  double result;
11340 
11341 
11342  result = 0;
11343  if( n>0 )
11344  {
11345  if( n%2==0 )
11346  {
11347  result = ae_sqr(xblas_xfastpow(r, n/2, _state), _state);
11348  }
11349  else
11350  {
11351  result = r*xblas_xfastpow(r, n-1, _state);
11352  }
11353  return result;
11354  }
11355  if( n==0 )
11356  {
11357  result = 1;
11358  }
11359  if( n<0 )
11360  {
11361  result = xblas_xfastpow(1/r, -n, _state);
11362  }
11363  return result;
11364 }
11365 
11366 
11367 
11368 
11369 /*************************************************************************
11370 Normalizes direction/step pair: makes |D|=1, scales Stp.
11371 If |D|=0, it returns, leavind D/Stp unchanged.
11372 
11373  -- ALGLIB --
11374  Copyright 01.04.2010 by Bochkanov Sergey
11375 *************************************************************************/
11376 void linminnormalized(/* Real */ ae_vector* d,
11377  double* stp,
11378  ae_int_t n,
11379  ae_state *_state)
11380 {
11381  double mx;
11382  double s;
11383  ae_int_t i;
11384 
11385 
11386 
11387  /*
11388  * first, scale D to avoid underflow/overflow durng squaring
11389  */
11390  mx = 0;
11391  for(i=0; i<=n-1; i++)
11392  {
11393  mx = ae_maxreal(mx, ae_fabs(d->ptr.p_double[i], _state), _state);
11394  }
11395  if( ae_fp_eq(mx,0) )
11396  {
11397  return;
11398  }
11399  s = 1/mx;
11400  ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
11401  *stp = *stp/s;
11402 
11403  /*
11404  * normalize D
11405  */
11406  s = ae_v_dotproduct(&d->ptr.p_double[0], 1, &d->ptr.p_double[0], 1, ae_v_len(0,n-1));
11407  s = 1/ae_sqrt(s, _state);
11408  ae_v_muld(&d->ptr.p_double[0], 1, ae_v_len(0,n-1), s);
11409  *stp = *stp/s;
11410 }
11411 
11412 
11413 /*************************************************************************
11414 THE PURPOSE OF MCSRCH IS TO FIND A STEP WHICH SATISFIES A SUFFICIENT
11415 DECREASE CONDITION AND A CURVATURE CONDITION.
11416 
11417 AT EACH STAGE THE SUBROUTINE UPDATES AN INTERVAL OF UNCERTAINTY WITH
11418 ENDPOINTS STX AND STY. THE INTERVAL OF UNCERTAINTY IS INITIALLY CHOSEN
11419 SO THAT IT CONTAINS A MINIMIZER OF THE MODIFIED FUNCTION
11420 
11421  F(X+STP*S) - F(X) - FTOL*STP*(GRADF(X)'S).
11422 
11423 IF A STEP IS OBTAINED FOR WHICH THE MODIFIED FUNCTION HAS A NONPOSITIVE
11424 FUNCTION VALUE AND NONNEGATIVE DERIVATIVE, THEN THE INTERVAL OF
11425 UNCERTAINTY IS CHOSEN SO THAT IT CONTAINS A MINIMIZER OF F(X+STP*S).
11426 
11427 THE ALGORITHM IS DESIGNED TO FIND A STEP WHICH SATISFIES THE SUFFICIENT
11428 DECREASE CONDITION
11429 
11430  F(X+STP*S) .LE. F(X) + FTOL*STP*(GRADF(X)'S),
11431 
11432 AND THE CURVATURE CONDITION
11433 
11434  ABS(GRADF(X+STP*S)'S)) .LE. GTOL*ABS(GRADF(X)'S).
11435 
11436 IF FTOL IS LESS THAN GTOL AND IF, FOR EXAMPLE, THE FUNCTION IS BOUNDED
11437 BELOW, THEN THERE IS ALWAYS A STEP WHICH SATISFIES BOTH CONDITIONS.
11438 IF NO STEP CAN BE FOUND WHICH SATISFIES BOTH CONDITIONS, THEN THE
11439 ALGORITHM USUALLY STOPS WHEN ROUNDING ERRORS PREVENT FURTHER PROGRESS.
11440 IN THIS CASE STP ONLY SATISFIES THE SUFFICIENT DECREASE CONDITION.
11441 
11442 
11443 :::::::::::::IMPORTANT NOTES:::::::::::::
11444 
11445 NOTE 1:
11446 
11447 This routine guarantees that it will stop at the last point where function
11448 value was calculated. It won't make several additional function evaluations
11449 after finding good point. So if you store function evaluations requested by
11450 this routine, you can be sure that last one is the point where we've stopped.
11451 
11452 NOTE 2:
11453 
11454 when 0<StpMax<StpMin, algorithm will terminate with INFO=5 and Stp=0.0
11455 :::::::::::::::::::::::::::::::::::::::::
11456 
11457 
11458 PARAMETERS DESCRIPRION
11459 
11460 STAGE IS ZERO ON FIRST CALL, ZERO ON FINAL EXIT
11461 
11462 N IS A POSITIVE INTEGER INPUT VARIABLE SET TO THE NUMBER OF VARIABLES.
11463 
11464 X IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE BASE POINT FOR
11465 THE LINE SEARCH. ON OUTPUT IT CONTAINS X+STP*S.
11466 
11467 F IS A VARIABLE. ON INPUT IT MUST CONTAIN THE VALUE OF F AT X. ON OUTPUT
11468 IT CONTAINS THE VALUE OF F AT X + STP*S.
11469 
11470 G IS AN ARRAY OF LENGTH N. ON INPUT IT MUST CONTAIN THE GRADIENT OF F AT X.
11471 ON OUTPUT IT CONTAINS THE GRADIENT OF F AT X + STP*S.
11472 
11473 S IS AN INPUT ARRAY OF LENGTH N WHICH SPECIFIES THE SEARCH DIRECTION.
11474 
11475 STP IS A NONNEGATIVE VARIABLE. ON INPUT STP CONTAINS AN INITIAL ESTIMATE
11476 OF A SATISFACTORY STEP. ON OUTPUT STP CONTAINS THE FINAL ESTIMATE.
11477 
11478 FTOL AND GTOL ARE NONNEGATIVE INPUT VARIABLES. TERMINATION OCCURS WHEN THE
11479 SUFFICIENT DECREASE CONDITION AND THE DIRECTIONAL DERIVATIVE CONDITION ARE
11480 SATISFIED.
11481 
11482 XTOL IS A NONNEGATIVE INPUT VARIABLE. TERMINATION OCCURS WHEN THE RELATIVE
11483 WIDTH OF THE INTERVAL OF UNCERTAINTY IS AT MOST XTOL.
11484 
11485 STPMIN AND STPMAX ARE NONNEGATIVE INPUT VARIABLES WHICH SPECIFY LOWER AND
11486 UPPER BOUNDS FOR THE STEP.
11487 
11488 MAXFEV IS A POSITIVE INTEGER INPUT VARIABLE. TERMINATION OCCURS WHEN THE
11489 NUMBER OF CALLS TO FCN IS AT LEAST MAXFEV BY THE END OF AN ITERATION.
11490 
11491 INFO IS AN INTEGER OUTPUT VARIABLE SET AS FOLLOWS:
11492  INFO = 0 IMPROPER INPUT PARAMETERS.
11493 
11494  INFO = 1 THE SUFFICIENT DECREASE CONDITION AND THE
11495  DIRECTIONAL DERIVATIVE CONDITION HOLD.
11496 
11497  INFO = 2 RELATIVE WIDTH OF THE INTERVAL OF UNCERTAINTY
11498  IS AT MOST XTOL.
11499 
11500  INFO = 3 NUMBER OF CALLS TO FCN HAS REACHED MAXFEV.
11501 
11502  INFO = 4 THE STEP IS AT THE LOWER BOUND STPMIN.
11503 
11504  INFO = 5 THE STEP IS AT THE UPPER BOUND STPMAX.
11505 
11506  INFO = 6 ROUNDING ERRORS PREVENT FURTHER PROGRESS.
11507  THERE MAY NOT BE A STEP WHICH SATISFIES THE
11508  SUFFICIENT DECREASE AND CURVATURE CONDITIONS.
11509  TOLERANCES MAY BE TOO SMALL.
11510 
11511 NFEV IS AN INTEGER OUTPUT VARIABLE SET TO THE NUMBER OF CALLS TO FCN.
11512 
11513 WA IS A WORK ARRAY OF LENGTH N.
11514 
11515 ARGONNE NATIONAL LABORATORY. MINPACK PROJECT. JUNE 1983
11516 JORGE J. MORE', DAVID J. THUENTE
11517 *************************************************************************/
11519  /* Real */ ae_vector* x,
11520  double* f,
11521  /* Real */ ae_vector* g,
11522  /* Real */ ae_vector* s,
11523  double* stp,
11524  double stpmax,
11525  double gtol,
11526  ae_int_t* info,
11527  ae_int_t* nfev,
11528  /* Real */ ae_vector* wa,
11529  linminstate* state,
11530  ae_int_t* stage,
11531  ae_state *_state)
11532 {
11533  double v;
11534  double p5;
11535  double p66;
11536  double zero;
11537 
11538 
11539 
11540  /*
11541  * init
11542  */
11543  p5 = 0.5;
11544  p66 = 0.66;
11545  state->xtrapf = 4.0;
11546  zero = 0;
11547  if( ae_fp_eq(stpmax,0) )
11548  {
11549  stpmax = linmin_defstpmax;
11550  }
11551  if( ae_fp_less(*stp,linmin_stpmin) )
11552  {
11553  *stp = linmin_stpmin;
11554  }
11555  if( ae_fp_greater(*stp,stpmax) )
11556  {
11557  *stp = stpmax;
11558  }
11559 
11560  /*
11561  * Main cycle
11562  */
11563  for(;;)
11564  {
11565  if( *stage==0 )
11566  {
11567 
11568  /*
11569  * NEXT
11570  */
11571  *stage = 2;
11572  continue;
11573  }
11574  if( *stage==2 )
11575  {
11576  state->infoc = 1;
11577  *info = 0;
11578 
11579  /*
11580  * CHECK THE INPUT PARAMETERS FOR ERRORS.
11581  */
11582  if( ae_fp_less(stpmax,linmin_stpmin)&&ae_fp_greater(stpmax,0) )
11583  {
11584  *info = 5;
11585  *stp = 0.0;
11586  return;
11587  }
11588  if( ((((((n<=0||ae_fp_less_eq(*stp,0))||ae_fp_less(linmin_ftol,0))||ae_fp_less(gtol,zero))||ae_fp_less(linmin_xtol,zero))||ae_fp_less(linmin_stpmin,zero))||ae_fp_less(stpmax,linmin_stpmin))||linmin_maxfev<=0 )
11589  {
11590  *stage = 0;
11591  return;
11592  }
11593 
11594  /*
11595  * COMPUTE THE INITIAL GRADIENT IN THE SEARCH DIRECTION
11596  * AND CHECK THAT S IS A DESCENT DIRECTION.
11597  */
11598  v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
11599  state->dginit = v;
11600  if( ae_fp_greater_eq(state->dginit,0) )
11601  {
11602  *stage = 0;
11603  return;
11604  }
11605 
11606  /*
11607  * INITIALIZE LOCAL VARIABLES.
11608  */
11609  state->brackt = ae_false;
11610  state->stage1 = ae_true;
11611  *nfev = 0;
11612  state->finit = *f;
11613  state->dgtest = linmin_ftol*state->dginit;
11614  state->width = stpmax-linmin_stpmin;
11615  state->width1 = state->width/p5;
11616  ae_v_move(&wa->ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
11617 
11618  /*
11619  * THE VARIABLES STX, FX, DGX CONTAIN THE VALUES OF THE STEP,
11620  * FUNCTION, AND DIRECTIONAL DERIVATIVE AT THE BEST STEP.
11621  * THE VARIABLES STY, FY, DGY CONTAIN THE VALUE OF THE STEP,
11622  * FUNCTION, AND DERIVATIVE AT THE OTHER ENDPOINT OF
11623  * THE INTERVAL OF UNCERTAINTY.
11624  * THE VARIABLES STP, F, DG CONTAIN THE VALUES OF THE STEP,
11625  * FUNCTION, AND DERIVATIVE AT THE CURRENT STEP.
11626  */
11627  state->stx = 0;
11628  state->fx = state->finit;
11629  state->dgx = state->dginit;
11630  state->sty = 0;
11631  state->fy = state->finit;
11632  state->dgy = state->dginit;
11633 
11634  /*
11635  * NEXT
11636  */
11637  *stage = 3;
11638  continue;
11639  }
11640  if( *stage==3 )
11641  {
11642 
11643  /*
11644  * START OF ITERATION.
11645  *
11646  * SET THE MINIMUM AND MAXIMUM STEPS TO CORRESPOND
11647  * TO THE PRESENT INTERVAL OF UNCERTAINTY.
11648  */
11649  if( state->brackt )
11650  {
11651  if( ae_fp_less(state->stx,state->sty) )
11652  {
11653  state->stmin = state->stx;
11654  state->stmax = state->sty;
11655  }
11656  else
11657  {
11658  state->stmin = state->sty;
11659  state->stmax = state->stx;
11660  }
11661  }
11662  else
11663  {
11664  state->stmin = state->stx;
11665  state->stmax = *stp+state->xtrapf*(*stp-state->stx);
11666  }
11667 
11668  /*
11669  * FORCE THE STEP TO BE WITHIN THE BOUNDS STPMAX AND STPMIN.
11670  */
11671  if( ae_fp_greater(*stp,stpmax) )
11672  {
11673  *stp = stpmax;
11674  }
11675  if( ae_fp_less(*stp,linmin_stpmin) )
11676  {
11677  *stp = linmin_stpmin;
11678  }
11679 
11680  /*
11681  * IF AN UNUSUAL TERMINATION IS TO OCCUR THEN LET
11682  * STP BE THE LOWEST POINT OBTAINED SO FAR.
11683  */
11684  if( (((state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||*nfev>=linmin_maxfev-1)||state->infoc==0)||(state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax)) )
11685  {
11686  *stp = state->stx;
11687  }
11688 
11689  /*
11690  * EVALUATE THE FUNCTION AND GRADIENT AT STP
11691  * AND COMPUTE THE DIRECTIONAL DERIVATIVE.
11692  */
11693  ae_v_move(&x->ptr.p_double[0], 1, &wa->ptr.p_double[0], 1, ae_v_len(0,n-1));
11694  ae_v_addd(&x->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1), *stp);
11695 
11696  /*
11697  * NEXT
11698  */
11699  *stage = 4;
11700  return;
11701  }
11702  if( *stage==4 )
11703  {
11704  *info = 0;
11705  *nfev = *nfev+1;
11706  v = ae_v_dotproduct(&g->ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
11707  state->dg = v;
11708  state->ftest1 = state->finit+*stp*state->dgtest;
11709 
11710  /*
11711  * TEST FOR CONVERGENCE.
11712  */
11713  if( (state->brackt&&(ae_fp_less_eq(*stp,state->stmin)||ae_fp_greater_eq(*stp,state->stmax)))||state->infoc==0 )
11714  {
11715  *info = 6;
11716  }
11717  if( (ae_fp_eq(*stp,stpmax)&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_less_eq(state->dg,state->dgtest) )
11718  {
11719  *info = 5;
11720  }
11721  if( ae_fp_eq(*stp,linmin_stpmin)&&(ae_fp_greater(*f,state->ftest1)||ae_fp_greater_eq(state->dg,state->dgtest)) )
11722  {
11723  *info = 4;
11724  }
11725  if( *nfev>=linmin_maxfev )
11726  {
11727  *info = 3;
11728  }
11729  if( state->brackt&&ae_fp_less_eq(state->stmax-state->stmin,linmin_xtol*state->stmax) )
11730  {
11731  *info = 2;
11732  }
11733  if( ae_fp_less_eq(*f,state->ftest1)&&ae_fp_less_eq(ae_fabs(state->dg, _state),-gtol*state->dginit) )
11734  {
11735  *info = 1;
11736  }
11737 
11738  /*
11739  * CHECK FOR TERMINATION.
11740  */
11741  if( *info!=0 )
11742  {
11743  *stage = 0;
11744  return;
11745  }
11746 
11747  /*
11748  * IN THE FIRST STAGE WE SEEK A STEP FOR WHICH THE MODIFIED
11749  * FUNCTION HAS A NONPOSITIVE VALUE AND NONNEGATIVE DERIVATIVE.
11750  */
11751  if( (state->stage1&&ae_fp_less_eq(*f,state->ftest1))&&ae_fp_greater_eq(state->dg,ae_minreal(linmin_ftol, gtol, _state)*state->dginit) )
11752  {
11753  state->stage1 = ae_false;
11754  }
11755 
11756  /*
11757  * A MODIFIED FUNCTION IS USED TO PREDICT THE STEP ONLY IF
11758  * WE HAVE NOT OBTAINED A STEP FOR WHICH THE MODIFIED
11759  * FUNCTION HAS A NONPOSITIVE FUNCTION VALUE AND NONNEGATIVE
11760  * DERIVATIVE, AND IF A LOWER FUNCTION VALUE HAS BEEN
11761  * OBTAINED BUT THE DECREASE IS NOT SUFFICIENT.
11762  */
11763  if( (state->stage1&&ae_fp_less_eq(*f,state->fx))&&ae_fp_greater(*f,state->ftest1) )
11764  {
11765 
11766  /*
11767  * DEFINE THE MODIFIED FUNCTION AND DERIVATIVE VALUES.
11768  */
11769  state->fm = *f-*stp*state->dgtest;
11770  state->fxm = state->fx-state->stx*state->dgtest;
11771  state->fym = state->fy-state->sty*state->dgtest;
11772  state->dgm = state->dg-state->dgtest;
11773  state->dgxm = state->dgx-state->dgtest;
11774  state->dgym = state->dgy-state->dgtest;
11775 
11776  /*
11777  * CALL CSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
11778  * AND TO COMPUTE THE NEW STEP.
11779  */
11780  linmin_mcstep(&state->stx, &state->fxm, &state->dgxm, &state->sty, &state->fym, &state->dgym, stp, state->fm, state->dgm, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
11781 
11782  /*
11783  * RESET THE FUNCTION AND GRADIENT VALUES FOR F.
11784  */
11785  state->fx = state->fxm+state->stx*state->dgtest;
11786  state->fy = state->fym+state->sty*state->dgtest;
11787  state->dgx = state->dgxm+state->dgtest;
11788  state->dgy = state->dgym+state->dgtest;
11789  }
11790  else
11791  {
11792 
11793  /*
11794  * CALL MCSTEP TO UPDATE THE INTERVAL OF UNCERTAINTY
11795  * AND TO COMPUTE THE NEW STEP.
11796  */
11797  linmin_mcstep(&state->stx, &state->fx, &state->dgx, &state->sty, &state->fy, &state->dgy, stp, *f, state->dg, &state->brackt, state->stmin, state->stmax, &state->infoc, _state);
11798  }
11799 
11800  /*
11801  * FORCE A SUFFICIENT DECREASE IN THE SIZE OF THE
11802  * INTERVAL OF UNCERTAINTY.
11803  */
11804  if( state->brackt )
11805  {
11806  if( ae_fp_greater_eq(ae_fabs(state->sty-state->stx, _state),p66*state->width1) )
11807  {
11808  *stp = state->stx+p5*(state->sty-state->stx);
11809  }
11810  state->width1 = state->width;
11811  state->width = ae_fabs(state->sty-state->stx, _state);
11812  }
11813 
11814  /*
11815  * NEXT.
11816  */
11817  *stage = 3;
11818  continue;
11819  }
11820  }
11821 }
11822 
11823 
11824 /*************************************************************************
11825 These functions perform Armijo line search using at most FMAX function
11826 evaluations. It doesn't enforce some kind of " sufficient decrease"
11827 criterion - it just tries different Armijo steps and returns optimum found
11828 so far.
11829 
11830 Optimization is done using F-rcomm interface:
11831 * ArmijoCreate initializes State structure
11832  (reusing previously allocated buffers)
11833 * ArmijoIteration is subsequently called
11834 * ArmijoResults returns results
11835 
11836 INPUT PARAMETERS:
11837  N - problem size
11838  X - array[N], starting point
11839  F - F(X+S*STP)
11840  S - step direction, S>0
11841  STP - step length
11842  STPMAX - maximum value for STP or zero (if no limit is imposed)
11843  FMAX - maximum number of function evaluations
11844  State - optimization state
11845 
11846  -- ALGLIB --
11847  Copyright 05.10.2010 by Bochkanov Sergey
11848 *************************************************************************/
11850  /* Real */ ae_vector* x,
11851  double f,
11852  /* Real */ ae_vector* s,
11853  double stp,
11854  double stpmax,
11855  ae_int_t fmax,
11856  armijostate* state,
11857  ae_state *_state)
11858 {
11859 
11860 
11861  if( state->x.cnt<n )
11862  {
11863  ae_vector_set_length(&state->x, n, _state);
11864  }
11865  if( state->xbase.cnt<n )
11866  {
11867  ae_vector_set_length(&state->xbase, n, _state);
11868  }
11869  if( state->s.cnt<n )
11870  {
11871  ae_vector_set_length(&state->s, n, _state);
11872  }
11873  state->stpmax = stpmax;
11874  state->fmax = fmax;
11875  state->stplen = stp;
11876  state->fcur = f;
11877  state->n = n;
11878  ae_v_move(&state->xbase.ptr.p_double[0], 1, &x->ptr.p_double[0], 1, ae_v_len(0,n-1));
11879  ae_v_move(&state->s.ptr.p_double[0], 1, &s->ptr.p_double[0], 1, ae_v_len(0,n-1));
11880  ae_vector_set_length(&state->rstate.ia, 0+1, _state);
11881  ae_vector_set_length(&state->rstate.ra, 0+1, _state);
11882  state->rstate.stage = -1;
11883 }
11884 
11885 
11886 /*************************************************************************
11887 This is rcomm-based search function
11888 
11889  -- ALGLIB --
11890  Copyright 05.10.2010 by Bochkanov Sergey
11891 *************************************************************************/
11893 {
11894  double v;
11895  ae_int_t n;
11896  ae_bool result;
11897 
11898 
11899 
11900  /*
11901  * Reverse communication preparations
11902  * I know it looks ugly, but it works the same way
11903  * anywhere from C++ to Python.
11904  *
11905  * This code initializes locals by:
11906  * * random values determined during code
11907  * generation - on first subroutine call
11908  * * values from previous call - on subsequent calls
11909  */
11910  if( state->rstate.stage>=0 )
11911  {
11912  n = state->rstate.ia.ptr.p_int[0];
11913  v = state->rstate.ra.ptr.p_double[0];
11914  }
11915  else
11916  {
11917  n = -983;
11918  v = -989;
11919  }
11920  if( state->rstate.stage==0 )
11921  {
11922  goto lbl_0;
11923  }
11924  if( state->rstate.stage==1 )
11925  {
11926  goto lbl_1;
11927  }
11928  if( state->rstate.stage==2 )
11929  {
11930  goto lbl_2;
11931  }
11932  if( state->rstate.stage==3 )
11933  {
11934  goto lbl_3;
11935  }
11936 
11937  /*
11938  * Routine body
11939  */
11940  if( (ae_fp_less_eq(state->stplen,0)||ae_fp_less(state->stpmax,0))||state->fmax<2 )
11941  {
11942  state->info = 0;
11943  result = ae_false;
11944  return result;
11945  }
11946  if( ae_fp_less_eq(state->stplen,linmin_stpmin) )
11947  {
11948  state->info = 4;
11949  result = ae_false;
11950  return result;
11951  }
11952  n = state->n;
11953  state->nfev = 0;
11954 
11955  /*
11956  * We always need F
11957  */
11958  state->needf = ae_true;
11959 
11960  /*
11961  * Bound StpLen
11962  */
11963  if( ae_fp_greater(state->stplen,state->stpmax)&&ae_fp_neq(state->stpmax,0) )
11964  {
11965  state->stplen = state->stpmax;
11966  }
11967 
11968  /*
11969  * Increase length
11970  */
11971  v = state->stplen*linmin_armijofactor;
11972  if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,0) )
11973  {
11974  v = state->stpmax;
11975  }
11976  ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
11977  ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
11978  state->rstate.stage = 0;
11979  goto lbl_rcomm;
11980 lbl_0:
11981  state->nfev = state->nfev+1;
11982  if( ae_fp_greater_eq(state->f,state->fcur) )
11983  {
11984  goto lbl_4;
11985  }
11986  state->stplen = v;
11987  state->fcur = state->f;
11988 lbl_6:
11989  if( ae_false )
11990  {
11991  goto lbl_7;
11992  }
11993 
11994  /*
11995  * test stopping conditions
11996  */
11997  if( state->nfev>=state->fmax )
11998  {
11999  state->info = 3;
12000  result = ae_false;
12001  return result;
12002  }
12003  if( ae_fp_greater_eq(state->stplen,state->stpmax) )
12004  {
12005  state->info = 5;
12006  result = ae_false;
12007  return result;
12008  }
12009 
12010  /*
12011  * evaluate F
12012  */
12013  v = state->stplen*linmin_armijofactor;
12014  if( ae_fp_greater(v,state->stpmax)&&ae_fp_neq(state->stpmax,0) )
12015  {
12016  v = state->stpmax;
12017  }
12018  ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
12019  ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
12020  state->rstate.stage = 1;
12021  goto lbl_rcomm;
12022 lbl_1:
12023  state->nfev = state->nfev+1;
12024 
12025  /*
12026  * make decision
12027  */
12028  if( ae_fp_less(state->f,state->fcur) )
12029  {
12030  state->stplen = v;
12031  state->fcur = state->f;
12032  }
12033  else
12034  {
12035  state->info = 1;
12036  result = ae_false;
12037  return result;
12038  }
12039  goto lbl_6;
12040 lbl_7:
12041 lbl_4:
12042 
12043  /*
12044  * Decrease length
12045  */
12046  v = state->stplen/linmin_armijofactor;
12047  ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
12048  ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
12049  state->rstate.stage = 2;
12050  goto lbl_rcomm;
12051 lbl_2:
12052  state->nfev = state->nfev+1;
12053  if( ae_fp_greater_eq(state->f,state->fcur) )
12054  {
12055  goto lbl_8;
12056  }
12057  state->stplen = state->stplen/linmin_armijofactor;
12058  state->fcur = state->f;
12059 lbl_10:
12060  if( ae_false )
12061  {
12062  goto lbl_11;
12063  }
12064 
12065  /*
12066  * test stopping conditions
12067  */
12068  if( state->nfev>=state->fmax )
12069  {
12070  state->info = 3;
12071  result = ae_false;
12072  return result;
12073  }
12074  if( ae_fp_less_eq(state->stplen,linmin_stpmin) )
12075  {
12076  state->info = 4;
12077  result = ae_false;
12078  return result;
12079  }
12080 
12081  /*
12082  * evaluate F
12083  */
12084  v = state->stplen/linmin_armijofactor;
12085  ae_v_move(&state->x.ptr.p_double[0], 1, &state->xbase.ptr.p_double[0], 1, ae_v_len(0,n-1));
12086  ae_v_addd(&state->x.ptr.p_double[0], 1, &state->s.ptr.p_double[0], 1, ae_v_len(0,n-1), v);
12087  state->rstate.stage = 3;
12088  goto lbl_rcomm;
12089 lbl_3:
12090  state->nfev = state->nfev+1;
12091 
12092  /*
12093  * make decision
12094  */
12095  if( ae_fp_less(state->f,state->fcur) )
12096  {
12097  state->stplen = state->stplen/linmin_armijofactor;
12098  state->fcur = state->f;
12099  }
12100  else
12101  {
12102  state->info = 1;
12103  result = ae_false;
12104  return result;
12105  }
12106  goto lbl_10;
12107 lbl_11:
12108 lbl_8:
12109 
12110  /*
12111  * Nothing to be done
12112  */
12113  state->info = 1;
12114  result = ae_false;
12115  return result;
12116 
12117  /*
12118  * Saving state
12119  */
12120 lbl_rcomm:
12121  result = ae_true;
12122  state->rstate.ia.ptr.p_int[0] = n;
12123  state->rstate.ra.ptr.p_double[0] = v;
12124  return result;
12125 }
12126 
12127 
12128 /*************************************************************************
12129 Results of Armijo search
12130 
12131 OUTPUT PARAMETERS:
12132  INFO - on output it is set to one of the return codes:
12133  * 0 improper input params
12134  * 1 optimum step is found with at most FMAX evaluations
12135  * 3 FMAX evaluations were used,
12136  X contains optimum found so far
12137  * 4 step is at lower bound STPMIN
12138  * 5 step is at upper bound
12139  STP - step length (in case of failure it is still returned)
12140  F - function value (in case of failure it is still returned)
12141 
12142  -- ALGLIB --
12143  Copyright 05.10.2010 by Bochkanov Sergey
12144 *************************************************************************/
12146  ae_int_t* info,
12147  double* stp,
12148  double* f,
12149  ae_state *_state)
12150 {
12151 
12152 
12153  *info = state->info;
12154  *stp = state->stplen;
12155  *f = state->fcur;
12156 }
12157 
12158 
12159 static void linmin_mcstep(double* stx,
12160  double* fx,
12161  double* dx,
12162  double* sty,
12163  double* fy,
12164  double* dy,
12165  double* stp,
12166  double fp,
12167  double dp,
12168  ae_bool* brackt,
12169  double stmin,
12170  double stmax,
12171  ae_int_t* info,
12172  ae_state *_state)
12173 {
12174  ae_bool bound;
12175  double gamma;
12176  double p;
12177  double q;
12178  double r;
12179  double s;
12180  double sgnd;
12181  double stpc;
12182  double stpf;
12183  double stpq;
12184  double theta;
12185 
12186 
12187  *info = 0;
12188 
12189  /*
12190  * CHECK THE INPUT PARAMETERS FOR ERRORS.
12191  */
12192  if( ((*brackt&&(ae_fp_less_eq(*stp,ae_minreal(*stx, *sty, _state))||ae_fp_greater_eq(*stp,ae_maxreal(*stx, *sty, _state))))||ae_fp_greater_eq(*dx*(*stp-(*stx)),0))||ae_fp_less(stmax,stmin) )
12193  {
12194  return;
12195  }
12196 
12197  /*
12198  * DETERMINE IF THE DERIVATIVES HAVE OPPOSITE SIGN.
12199  */
12200  sgnd = dp*(*dx/ae_fabs(*dx, _state));
12201 
12202  /*
12203  * FIRST CASE. A HIGHER FUNCTION VALUE.
12204  * THE MINIMUM IS BRACKETED. IF THE CUBIC STEP IS CLOSER
12205  * TO STX THAN THE QUADRATIC STEP, THE CUBIC STEP IS TAKEN,
12206  * ELSE THE AVERAGE OF THE CUBIC AND QUADRATIC STEPS IS TAKEN.
12207  */
12208  if( ae_fp_greater(fp,*fx) )
12209  {
12210  *info = 1;
12211  bound = ae_true;
12212  theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
12213  s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
12214  gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
12215  if( ae_fp_less(*stp,*stx) )
12216  {
12217  gamma = -gamma;
12218  }
12219  p = gamma-(*dx)+theta;
12220  q = gamma-(*dx)+gamma+dp;
12221  r = p/q;
12222  stpc = *stx+r*(*stp-(*stx));
12223  stpq = *stx+*dx/((*fx-fp)/(*stp-(*stx))+(*dx))/2*(*stp-(*stx));
12224  if( ae_fp_less(ae_fabs(stpc-(*stx), _state),ae_fabs(stpq-(*stx), _state)) )
12225  {
12226  stpf = stpc;
12227  }
12228  else
12229  {
12230  stpf = stpc+(stpq-stpc)/2;
12231  }
12232  *brackt = ae_true;
12233  }
12234  else
12235  {
12236  if( ae_fp_less(sgnd,0) )
12237  {
12238 
12239  /*
12240  * SECOND CASE. A LOWER FUNCTION VALUE AND DERIVATIVES OF
12241  * OPPOSITE SIGN. THE MINIMUM IS BRACKETED. IF THE CUBIC
12242  * STEP IS CLOSER TO STX THAN THE QUADRATIC (SECANT) STEP,
12243  * THE CUBIC STEP IS TAKEN, ELSE THE QUADRATIC STEP IS TAKEN.
12244  */
12245  *info = 2;
12246  bound = ae_false;
12247  theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
12248  s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
12249  gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state);
12250  if( ae_fp_greater(*stp,*stx) )
12251  {
12252  gamma = -gamma;
12253  }
12254  p = gamma-dp+theta;
12255  q = gamma-dp+gamma+(*dx);
12256  r = p/q;
12257  stpc = *stp+r*(*stx-(*stp));
12258  stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
12259  if( ae_fp_greater(ae_fabs(stpc-(*stp), _state),ae_fabs(stpq-(*stp), _state)) )
12260  {
12261  stpf = stpc;
12262  }
12263  else
12264  {
12265  stpf = stpq;
12266  }
12267  *brackt = ae_true;
12268  }
12269  else
12270  {
12271  if( ae_fp_less(ae_fabs(dp, _state),ae_fabs(*dx, _state)) )
12272  {
12273 
12274  /*
12275  * THIRD CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
12276  * SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DECREASES.
12277  * THE CUBIC STEP IS ONLY USED IF THE CUBIC TENDS TO INFINITY
12278  * IN THE DIRECTION OF THE STEP OR IF THE MINIMUM OF THE CUBIC
12279  * IS BEYOND STP. OTHERWISE THE CUBIC STEP IS DEFINED TO BE
12280  * EITHER STPMIN OR STPMAX. THE QUADRATIC (SECANT) STEP IS ALSO
12281  * COMPUTED AND IF THE MINIMUM IS BRACKETED THEN THE THE STEP
12282  * CLOSEST TO STX IS TAKEN, ELSE THE STEP FARTHEST AWAY IS TAKEN.
12283  */
12284  *info = 3;
12285  bound = ae_true;
12286  theta = 3*(*fx-fp)/(*stp-(*stx))+(*dx)+dp;
12287  s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dx, _state), ae_fabs(dp, _state), _state), _state);
12288 
12289  /*
12290  * THE CASE GAMMA = 0 ONLY ARISES IF THE CUBIC DOES NOT TEND
12291  * TO INFINITY IN THE DIRECTION OF THE STEP.
12292  */
12293  gamma = s*ae_sqrt(ae_maxreal(0, ae_sqr(theta/s, _state)-*dx/s*(dp/s), _state), _state);
12294  if( ae_fp_greater(*stp,*stx) )
12295  {
12296  gamma = -gamma;
12297  }
12298  p = gamma-dp+theta;
12299  q = gamma+(*dx-dp)+gamma;
12300  r = p/q;
12301  if( ae_fp_less(r,0)&&ae_fp_neq(gamma,0) )
12302  {
12303  stpc = *stp+r*(*stx-(*stp));
12304  }
12305  else
12306  {
12307  if( ae_fp_greater(*stp,*stx) )
12308  {
12309  stpc = stmax;
12310  }
12311  else
12312  {
12313  stpc = stmin;
12314  }
12315  }
12316  stpq = *stp+dp/(dp-(*dx))*(*stx-(*stp));
12317  if( *brackt )
12318  {
12319  if( ae_fp_less(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
12320  {
12321  stpf = stpc;
12322  }
12323  else
12324  {
12325  stpf = stpq;
12326  }
12327  }
12328  else
12329  {
12330  if( ae_fp_greater(ae_fabs(*stp-stpc, _state),ae_fabs(*stp-stpq, _state)) )
12331  {
12332  stpf = stpc;
12333  }
12334  else
12335  {
12336  stpf = stpq;
12337  }
12338  }
12339  }
12340  else
12341  {
12342 
12343  /*
12344  * FOURTH CASE. A LOWER FUNCTION VALUE, DERIVATIVES OF THE
12345  * SAME SIGN, AND THE MAGNITUDE OF THE DERIVATIVE DOES
12346  * NOT DECREASE. IF THE MINIMUM IS NOT BRACKETED, THE STEP
12347  * IS EITHER STPMIN OR STPMAX, ELSE THE CUBIC STEP IS TAKEN.
12348  */
12349  *info = 4;
12350  bound = ae_false;
12351  if( *brackt )
12352  {
12353  theta = 3*(fp-(*fy))/(*sty-(*stp))+(*dy)+dp;
12354  s = ae_maxreal(ae_fabs(theta, _state), ae_maxreal(ae_fabs(*dy, _state), ae_fabs(dp, _state), _state), _state);
12355  gamma = s*ae_sqrt(ae_sqr(theta/s, _state)-*dy/s*(dp/s), _state);
12356  if( ae_fp_greater(*stp,*sty) )
12357  {
12358  gamma = -gamma;
12359  }
12360  p = gamma-dp+theta;
12361  q = gamma-dp+gamma+(*dy);
12362  r = p/q;
12363  stpc = *stp+r*(*sty-(*stp));
12364  stpf = stpc;
12365  }
12366  else
12367  {
12368  if( ae_fp_greater(*stp,*stx) )
12369  {
12370  stpf = stmax;
12371  }
12372  else
12373  {
12374  stpf = stmin;
12375  }
12376  }
12377  }
12378  }
12379  }
12380 
12381  /*
12382  * UPDATE THE INTERVAL OF UNCERTAINTY. THIS UPDATE DOES NOT
12383  * DEPEND ON THE NEW STEP OR THE CASE ANALYSIS ABOVE.
12384  */
12385  if( ae_fp_greater(fp,*fx) )
12386  {
12387  *sty = *stp;
12388  *fy = fp;
12389  *dy = dp;
12390  }
12391  else
12392  {
12393  if( ae_fp_less(sgnd,0.0) )
12394  {
12395  *sty = *stx;
12396  *fy = *fx;
12397  *dy = *dx;
12398  }
12399  *stx = *stp;
12400  *fx = fp;
12401  *dx = dp;
12402  }
12403 
12404  /*
12405  * COMPUTE THE NEW STEP AND SAFEGUARD IT.
12406  */
12407  stpf = ae_minreal(stmax, stpf, _state);
12408  stpf = ae_maxreal(stmin, stpf, _state);
12409  *stp = stpf;
12410  if( *brackt&&bound )
12411  {
12412  if( ae_fp_greater(*sty,*stx) )
12413  {
12414  *stp = ae_minreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
12415  }
12416  else
12417  {
12418  *stp = ae_maxreal(*stx+0.66*(*sty-(*stx)), *stp, _state);
12419  }
12420  }
12421 }
12422 
12423 
12424 ae_bool _linminstate_init(void* _p, ae_state *_state, ae_bool make_automatic)
12425 {
12426  linminstate *p = (linminstate*)_p;
12427  ae_touch_ptr((void*)p);
12428  return ae_true;
12429 }
12430 
12431 
12432 ae_bool _linminstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
12433 {
12434  linminstate *dst = (linminstate*)_dst;
12435  linminstate *src = (linminstate*)_src;
12436  dst->brackt = src->brackt;
12437  dst->stage1 = src->stage1;
12438  dst->infoc = src->infoc;
12439  dst->dg = src->dg;
12440  dst->dgm = src->dgm;
12441  dst->dginit = src->dginit;
12442  dst->dgtest = src->dgtest;
12443  dst->dgx = src->dgx;
12444  dst->dgxm = src->dgxm;
12445  dst->dgy = src->dgy;
12446  dst->dgym = src->dgym;
12447  dst->finit = src->finit;
12448  dst->ftest1 = src->ftest1;
12449  dst->fm = src->fm;
12450  dst->fx = src->fx;
12451  dst->fxm = src->fxm;
12452  dst->fy = src->fy;
12453  dst->fym = src->fym;
12454  dst->stx = src->stx;
12455  dst->sty = src->sty;
12456  dst->stmin = src->stmin;
12457  dst->stmax = src->stmax;
12458  dst->width = src->width;
12459  dst->width1 = src->width1;
12460  dst->xtrapf = src->xtrapf;
12461  return ae_true;
12462 }
12463 
12464 
12465 void _linminstate_clear(void* _p)
12466 {
12467  linminstate *p = (linminstate*)_p;
12468  ae_touch_ptr((void*)p);
12469 }
12470 
12471 
12472 void _linminstate_destroy(void* _p)
12473 {
12474  linminstate *p = (linminstate*)_p;
12475  ae_touch_ptr((void*)p);
12476 }
12477 
12478 
12479 ae_bool _armijostate_init(void* _p, ae_state *_state, ae_bool make_automatic)
12480 {
12481  armijostate *p = (armijostate*)_p;
12482  ae_touch_ptr((void*)p);
12483  if( !ae_vector_init(&p->x, 0, DT_REAL, _state, make_automatic) )
12484  return ae_false;
12485  if( !ae_vector_init(&p->xbase, 0, DT_REAL, _state, make_automatic) )
12486  return ae_false;
12487  if( !ae_vector_init(&p->s, 0, DT_REAL, _state, make_automatic) )
12488  return ae_false;
12489  if( !_rcommstate_init(&p->rstate, _state, make_automatic) )
12490  return ae_false;
12491  return ae_true;
12492 }
12493 
12494 
12495 ae_bool _armijostate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
12496 {
12497  armijostate *dst = (armijostate*)_dst;
12498  armijostate *src = (armijostate*)_src;
12499  dst->needf = src->needf;
12500  if( !ae_vector_init_copy(&dst->x, &src->x, _state, make_automatic) )
12501  return ae_false;
12502  dst->f = src->f;
12503  dst->n = src->n;
12504  if( !ae_vector_init_copy(&dst->xbase, &src->xbase, _state, make_automatic) )
12505  return ae_false;
12506  if( !ae_vector_init_copy(&dst->s, &src->s, _state, make_automatic) )
12507  return ae_false;
12508  dst->stplen = src->stplen;
12509  dst->fcur = src->fcur;
12510  dst->stpmax = src->stpmax;
12511  dst->fmax = src->fmax;
12512  dst->nfev = src->nfev;
12513  dst->info = src->info;
12514  if( !_rcommstate_init_copy(&dst->rstate, &src->rstate, _state, make_automatic) )
12515  return ae_false;
12516  return ae_true;
12517 }
12518 
12519 
12520 void _armijostate_clear(void* _p)
12521 {
12522  armijostate *p = (armijostate*)_p;
12523  ae_touch_ptr((void*)p);
12524  ae_vector_clear(&p->x);
12525  ae_vector_clear(&p->xbase);
12526  ae_vector_clear(&p->s);
12527  _rcommstate_clear(&p->rstate);
12528 }
12529 
12530 
12531 void _armijostate_destroy(void* _p)
12532 {
12533  armijostate *p = (armijostate*)_p;
12534  ae_touch_ptr((void*)p);
12535  ae_vector_destroy(&p->x);
12536  ae_vector_destroy(&p->xbase);
12537  ae_vector_destroy(&p->s);
12538  _rcommstate_destroy(&p->rstate);
12539 }
12540 
12541 
12542 
12543 
12545  ae_int_t* proot,
12546  ae_int_t* invproot,
12547  ae_state *_state)
12548 {
12549  ae_int_t candroot;
12550  ae_int_t phin;
12551  ae_int_t q;
12552  ae_int_t f;
12553  ae_bool allnonone;
12554  ae_int_t x;
12555  ae_int_t lastx;
12556  ae_int_t y;
12557  ae_int_t lasty;
12558  ae_int_t a;
12559  ae_int_t b;
12560  ae_int_t t;
12561  ae_int_t n2;
12562 
12563  *proot = 0;
12564  *invproot = 0;
12565 
12566  ae_assert(n>=3, "FindPrimitiveRootAndInverse: N<3", _state);
12567  *proot = 0;
12568  *invproot = 0;
12569 
12570  /*
12571  * check that N is prime
12572  */
12573  ae_assert(ntheory_isprime(n, _state), "FindPrimitiveRoot: N is not prime", _state);
12574 
12575  /*
12576  * Because N is prime, Euler totient function is equal to N-1
12577  */
12578  phin = n-1;
12579 
12580  /*
12581  * Test different values of PRoot - from 2 to N-1.
12582  * One of these values MUST be primitive root.
12583  *
12584  * For testing we use algorithm from Wiki (Primitive root modulo n):
12585  * * compute phi(N)
12586  * * determine the different prime factors of phi(N), say p1, ..., pk
12587  * * for every element m of Zn*, compute m^(phi(N)/pi) mod N for i=1..k
12588  * using a fast algorithm for modular exponentiation.
12589  * * a number m for which these k results are all different from 1 is a
12590  * primitive root.
12591  */
12592  for(candroot=2; candroot<=n-1; candroot++)
12593  {
12594 
12595  /*
12596  * We have current candidate root in CandRoot.
12597  *
12598  * Scan different prime factors of PhiN. Here:
12599  * * F is a current candidate factor
12600  * * Q is a current quotient - amount which was left after dividing PhiN
12601  * by all previous factors
12602  *
12603  * For each factor, perform test mentioned above.
12604  */
12605  q = phin;
12606  f = 2;
12607  allnonone = ae_true;
12608  while(q>1)
12609  {
12610  if( q%f==0 )
12611  {
12612  t = ntheory_modexp(candroot, phin/f, n, _state);
12613  if( t==1 )
12614  {
12615  allnonone = ae_false;
12616  break;
12617  }
12618  while(q%f==0)
12619  {
12620  q = q/f;
12621  }
12622  }
12623  f = f+1;
12624  }
12625  if( allnonone )
12626  {
12627  *proot = candroot;
12628  break;
12629  }
12630  }
12631  ae_assert(*proot>=2, "FindPrimitiveRoot: internal error (root not found)", _state);
12632 
12633  /*
12634  * Use extended Euclidean algorithm to find multiplicative inverse of primitive root
12635  */
12636  x = 0;
12637  lastx = 1;
12638  y = 1;
12639  lasty = 0;
12640  a = *proot;
12641  b = n;
12642  while(b!=0)
12643  {
12644  q = a/b;
12645  t = a%b;
12646  a = b;
12647  b = t;
12648  t = lastx-q*x;
12649  lastx = x;
12650  x = t;
12651  t = lasty-q*y;
12652  lasty = y;
12653  y = t;
12654  }
12655  while(lastx<0)
12656  {
12657  lastx = lastx+n;
12658  }
12659  *invproot = lastx;
12660 
12661  /*
12662  * Check that it is safe to perform multiplication modulo N.
12663  * Check results for consistency.
12664  */
12665  n2 = (n-1)*(n-1);
12666  ae_assert(n2/(n-1)==n-1, "FindPrimitiveRoot: internal error", _state);
12667  ae_assert(*proot*(*invproot)/(*proot)==(*invproot), "FindPrimitiveRoot: internal error", _state);
12668  ae_assert(*proot*(*invproot)/(*invproot)==(*proot), "FindPrimitiveRoot: internal error", _state);
12669  ae_assert(*proot*(*invproot)%n==1, "FindPrimitiveRoot: internal error", _state);
12670 }
12671 
12672 
12673 static ae_bool ntheory_isprime(ae_int_t n, ae_state *_state)
12674 {
12675  ae_int_t p;
12676  ae_bool result;
12677 
12678 
12679  result = ae_false;
12680  p = 2;
12681  while(p*p<=n)
12682  {
12683  if( n%p==0 )
12684  {
12685  return result;
12686  }
12687  p = p+1;
12688  }
12689  result = ae_true;
12690  return result;
12691 }
12692 
12693 
12694 static ae_int_t ntheory_modmul(ae_int_t a,
12695  ae_int_t b,
12696  ae_int_t n,
12697  ae_state *_state)
12698 {
12699  ae_int_t t;
12700  double ra;
12701  double rb;
12702  ae_int_t result;
12703 
12704 
12705  ae_assert(a>=0&&a<n, "ModMul: A<0 or A>=N", _state);
12706  ae_assert(b>=0&&b<n, "ModMul: B<0 or B>=N", _state);
12707 
12708  /*
12709  * Base cases
12710  */
12711  ra = a;
12712  rb = b;
12713  if( b==0||a==0 )
12714  {
12715  result = 0;
12716  return result;
12717  }
12718  if( b==1||a==1 )
12719  {
12720  result = a*b;
12721  return result;
12722  }
12723  if( ae_fp_eq(ra*rb,a*b) )
12724  {
12725  result = a*b%n;
12726  return result;
12727  }
12728 
12729  /*
12730  * Non-base cases
12731  */
12732  if( b%2==0 )
12733  {
12734 
12735  /*
12736  * A*B = (A*(B/2)) * 2
12737  *
12738  * Product T=A*(B/2) is calculated recursively, product T*2 is
12739  * calculated as follows:
12740  * * result:=T-N
12741  * * result:=result+T
12742  * * if result<0 then result:=result+N
12743  *
12744  * In case integer result overflows, we generate exception
12745  */
12746  t = ntheory_modmul(a, b/2, n, _state);
12747  result = t-n;
12748  result = result+t;
12749  if( result<0 )
12750  {
12751  result = result+n;
12752  }
12753  }
12754  else
12755  {
12756 
12757  /*
12758  * A*B = (A*(B div 2)) * 2 + A
12759  *
12760  * Product T=A*(B/2) is calculated recursively, product T*2 is
12761  * calculated as follows:
12762  * * result:=T-N
12763  * * result:=result+T
12764  * * if result<0 then result:=result+N
12765  *
12766  * In case integer result overflows, we generate exception
12767  */
12768  t = ntheory_modmul(a, b/2, n, _state);
12769  result = t-n;
12770  result = result+t;
12771  if( result<0 )
12772  {
12773  result = result+n;
12774  }
12775  result = result-n;
12776  result = result+a;
12777  if( result<0 )
12778  {
12779  result = result+n;
12780  }
12781  }
12782  return result;
12783 }
12784 
12785 
12786 static ae_int_t ntheory_modexp(ae_int_t a,
12787  ae_int_t b,
12788  ae_int_t n,
12789  ae_state *_state)
12790 {
12791  ae_int_t t;
12792  ae_int_t result;
12793 
12794 
12795  ae_assert(a>=0&&a<n, "ModExp: A<0 or A>=N", _state);
12796  ae_assert(b>=0, "ModExp: B<0", _state);
12797 
12798  /*
12799  * Base cases
12800  */
12801  if( b==0 )
12802  {
12803  result = 1;
12804  return result;
12805  }
12806  if( b==1 )
12807  {
12808  result = a;
12809  return result;
12810  }
12811 
12812  /*
12813  * Non-base cases
12814  */
12815  if( b%2==0 )
12816  {
12817  t = ntheory_modmul(a, a, n, _state);
12818  result = ntheory_modexp(t, b/2, n, _state);
12819  }
12820  else
12821  {
12822  t = ntheory_modmul(a, a, n, _state);
12823  result = ntheory_modexp(t, b/2, n, _state);
12824  result = ntheory_modmul(result, a, n, _state);
12825  }
12826  return result;
12827 }
12828 
12829 
12830 
12831 
12832 /*************************************************************************
12833 This subroutine generates FFT plan for K complex FFT's with length N each.
12834 
12835 INPUT PARAMETERS:
12836  N - FFT length (in complex numbers), N>=1
12837  K - number of repetitions, K>=1
12838 
12839 OUTPUT PARAMETERS:
12840  Plan - plan
12841 
12842  -- ALGLIB --
12843  Copyright 05.04.2013 by Bochkanov Sergey
12844 *************************************************************************/
12846  ae_int_t k,
12847  fasttransformplan* plan,
12848  ae_state *_state)
12849 {
12850  ae_frame _frame_block;
12851  srealarray bluesteinbuf;
12852  ae_int_t rowptr;
12853  ae_int_t bluesteinsize;
12854  ae_int_t precrptr;
12855  ae_int_t preciptr;
12856  ae_int_t precrsize;
12857  ae_int_t precisize;
12858 
12859  ae_frame_make(_state, &_frame_block);
12861  _srealarray_init(&bluesteinbuf, _state, ae_true);
12862 
12863 
12864  /*
12865  * Initial check for parameters
12866  */
12867  ae_assert(n>0, "FTComplexFFTPlan: N<=0", _state);
12868  ae_assert(k>0, "FTComplexFFTPlan: K<=0", _state);
12869 
12870  /*
12871  * Determine required sizes of precomputed real and integer
12872  * buffers. This stage of code is highly dependent on internals
12873  * of FTComplexFFTPlanRec() and must be kept synchronized with
12874  * possible changes in internals of plan generation function.
12875  *
12876  * Buffer size is determined as follows:
12877  * * N is factorized
12878  * * we factor out anything which is less or equal to MaxRadix
12879  * * prime factor F>RaderThreshold requires 4*FTBaseFindSmooth(2*F-1)
12880  * real entries to store precomputed Quantities for Bluestein's
12881  * transformation
12882  * * prime factor F<=RaderThreshold does NOT require
12883  * precomputed storage
12884  */
12885  precrsize = 0;
12886  precisize = 0;
12887  ftbase_ftdeterminespacerequirements(n, &precrsize, &precisize, _state);
12888  if( precrsize>0 )
12889  {
12890  ae_vector_set_length(&plan->precr, precrsize, _state);
12891  }
12892  if( precisize>0 )
12893  {
12894  ae_vector_set_length(&plan->preci, precisize, _state);
12895  }
12896 
12897  /*
12898  * Generate plan
12899  */
12900  rowptr = 0;
12901  precrptr = 0;
12902  preciptr = 0;
12903  bluesteinsize = 1;
12904  ae_vector_set_length(&plan->buffer, 2*n*k, _state);
12905  ftbase_ftcomplexfftplanrec(n, k, ae_true, ae_true, &rowptr, &bluesteinsize, &precrptr, &preciptr, plan, _state);
12906  ae_vector_set_length(&bluesteinbuf.val, bluesteinsize, _state);
12907  ae_shared_pool_set_seed(&plan->bluesteinpool, &bluesteinbuf, sizeof(bluesteinbuf), _srealarray_init, _srealarray_init_copy, _srealarray_destroy, _state);
12908 
12909  /*
12910  * Check that actual amount of precomputed space used by transformation
12911  * plan is EXACTLY equal to amount of space allocated by us.
12912  */
12913  ae_assert(precrptr==precrsize, "FTComplexFFTPlan: internal error (PrecRPtr<>PrecRSize)", _state);
12914  ae_assert(preciptr==precisize, "FTComplexFFTPlan: internal error (PrecRPtr<>PrecRSize)", _state);
12915  ae_frame_leave(_state);
12916 }
12917 
12918 
12919 /*************************************************************************
12920 This subroutine applies transformation plan to input/output array A.
12921 
12922 INPUT PARAMETERS:
12923  Plan - transformation plan
12924  A - array, must be large enough for plan to work
12925  OffsA - offset of the subarray to process
12926  RepCnt - repetition count (transformation is repeatedly applied
12927  to subsequent subarrays)
12928 
12929 OUTPUT PARAMETERS:
12930  Plan - plan (temporary buffers can be modified, plan itself
12931  is unchanged and can be reused)
12932  A - transformed array
12933 
12934  -- ALGLIB --
12935  Copyright 05.04.2013 by Bochkanov Sergey
12936 *************************************************************************/
12938  /* Real */ ae_vector* a,
12939  ae_int_t offsa,
12940  ae_int_t repcnt,
12941  ae_state *_state)
12942 {
12943  ae_int_t plansize;
12944  ae_int_t i;
12945 
12946 
12947  plansize = plan->entries.ptr.pp_int[0][ftbase_coloperandscnt]*plan->entries.ptr.pp_int[0][ftbase_coloperandsize]*plan->entries.ptr.pp_int[0][ftbase_colmicrovectorsize];
12948  for(i=0; i<=repcnt-1; i++)
12949  {
12950  ftbase_ftapplysubplan(plan, 0, a, offsa+plansize*i, 0, &plan->buffer, 1, _state);
12951  }
12952 }
12953 
12954 
12955 /*************************************************************************
12956 Returns good factorization N=N1*N2.
12957 
12958 Usually N1<=N2 (but not always - small N's may be exception).
12959 if N1<>1 then N2<>1.
12960 
12961 Factorization is chosen depending on task type and codelets we have.
12962 
12963  -- ALGLIB --
12964  Copyright 01.05.2009 by Bochkanov Sergey
12965 *************************************************************************/
12967  ae_int_t tasktype,
12968  ae_int_t* n1,
12969  ae_int_t* n2,
12970  ae_state *_state)
12971 {
12972  ae_int_t j;
12973 
12974  *n1 = 0;
12975  *n2 = 0;
12976 
12977  *n1 = 0;
12978  *n2 = 0;
12979 
12980  /*
12981  * try to find good codelet
12982  */
12983  if( *n1*(*n2)!=n )
12984  {
12985  for(j=ftbase_ftbasecodeletrecommended; j>=2; j--)
12986  {
12987  if( n%j==0 )
12988  {
12989  *n1 = j;
12990  *n2 = n/j;
12991  break;
12992  }
12993  }
12994  }
12995 
12996  /*
12997  * try to factorize N
12998  */
12999  if( *n1*(*n2)!=n )
13000  {
13001  for(j=ftbase_ftbasecodeletrecommended+1; j<=n-1; j++)
13002  {
13003  if( n%j==0 )
13004  {
13005  *n1 = j;
13006  *n2 = n/j;
13007  break;
13008  }
13009  }
13010  }
13011 
13012  /*
13013  * looks like N is prime :(
13014  */
13015  if( *n1*(*n2)!=n )
13016  {
13017  *n1 = 1;
13018  *n2 = n;
13019  }
13020 
13021  /*
13022  * normalize
13023  */
13024  if( *n2==1&&*n1!=1 )
13025  {
13026  *n2 = *n1;
13027  *n1 = 1;
13028  }
13029 }
13030 
13031 
13032 /*************************************************************************
13033 Is number smooth?
13034 
13035  -- ALGLIB --
13036  Copyright 01.05.2009 by Bochkanov Sergey
13037 *************************************************************************/
13039 {
13040  ae_int_t i;
13041  ae_bool result;
13042 
13043 
13044  for(i=2; i<=ftbase_ftbasemaxsmoothfactor; i++)
13045  {
13046  while(n%i==0)
13047  {
13048  n = n/i;
13049  }
13050  }
13051  result = n==1;
13052  return result;
13053 }
13054 
13055 
13056 /*************************************************************************
13057 Returns smallest smooth (divisible only by 2, 3, 5) number that is greater
13058 than or equal to max(N,2)
13059 
13060  -- ALGLIB --
13061  Copyright 01.05.2009 by Bochkanov Sergey
13062 *************************************************************************/
13064 {
13065  ae_int_t best;
13066  ae_int_t result;
13067 
13068 
13069  best = 2;
13070  while(best<n)
13071  {
13072  best = 2*best;
13073  }
13074  ftbase_ftbasefindsmoothrec(n, 1, 2, &best, _state);
13075  result = best;
13076  return result;
13077 }
13078 
13079 
13080 /*************************************************************************
13081 Returns smallest smooth (divisible only by 2, 3, 5) even number that is
13082 greater than or equal to max(N,2)
13083 
13084  -- ALGLIB --
13085  Copyright 01.05.2009 by Bochkanov Sergey
13086 *************************************************************************/
13088 {
13089  ae_int_t best;
13090  ae_int_t result;
13091 
13092 
13093  best = 2;
13094  while(best<n)
13095  {
13096  best = 2*best;
13097  }
13098  ftbase_ftbasefindsmoothrec(n, 2, 2, &best, _state);
13099  result = best;
13100  return result;
13101 }
13102 
13103 
13104 /*************************************************************************
13105 Returns estimate of FLOP count for the FFT.
13106 
13107 It is only an estimate based on operations count for the PERFECT FFT
13108 and relative inefficiency of the algorithm actually used.
13109 
13110 N should be power of 2, estimates are badly wrong for non-power-of-2 N's.
13111 
13112  -- ALGLIB --
13113  Copyright 01.05.2009 by Bochkanov Sergey
13114 *************************************************************************/
13116 {
13117  double result;
13118 
13119 
13120  result = ftbase_ftbaseinefficiencyfactor*(4*n*ae_log(n, _state)/ae_log(2, _state)-6*n+8);
13121  return result;
13122 }
13123 
13124 
13125 /*************************************************************************
13126 This function returns EXACT estimate of the space requirements for N-point
13127 FFT. Internals of this function are highly dependent on details of different
13128 FFTs employed by this unit, so every time algorithm is changed this function
13129 has to be rewritten.
13130 
13131 INPUT PARAMETERS:
13132  N - transform length
13133  PrecRSize - must be set to zero
13134  PrecISize - must be set to zero
13135 
13136 OUTPUT PARAMETERS:
13137  PrecRSize - number of real temporaries required for transformation
13138  PrecISize - number of integer temporaries required for transformation
13139 
13140 
13141  -- ALGLIB --
13142  Copyright 05.04.2013 by Bochkanov Sergey
13143 *************************************************************************/
13144 static void ftbase_ftdeterminespacerequirements(ae_int_t n,
13145  ae_int_t* precrsize,
13146  ae_int_t* precisize,
13147  ae_state *_state)
13148 {
13149  ae_int_t ncur;
13150  ae_int_t f;
13151  ae_int_t i;
13152 
13153 
13154 
13155  /*
13156  * Determine required sizes of precomputed real and integer
13157  * buffers. This stage of code is highly dependent on internals
13158  * of FTComplexFFTPlanRec() and must be kept synchronized with
13159  * possible changes in internals of plan generation function.
13160  *
13161  * Buffer size is determined as follows:
13162  * * N is factorized
13163  * * we factor out anything which is less or equal to MaxRadix
13164  * * prime factor F>RaderThreshold requires 4*FTBaseFindSmooth(2*F-1)
13165  * real entries to store precomputed Quantities for Bluestein's
13166  * transformation
13167  * * prime factor F<=RaderThreshold requires 2*(F-1)+ESTIMATE(F-1)
13168  * precomputed storage
13169  */
13170  ncur = n;
13171  for(i=2; i<=ftbase_maxradix; i++)
13172  {
13173  while(ncur%i==0)
13174  {
13175  ncur = ncur/i;
13176  }
13177  }
13178  f = 2;
13179  while(f<=ncur)
13180  {
13181  while(ncur%f==0)
13182  {
13183  if( f>ftbase_raderthreshold )
13184  {
13185  *precrsize = *precrsize+4*ftbasefindsmooth(2*f-1, _state);
13186  }
13187  else
13188  {
13189  *precrsize = *precrsize+2*(f-1);
13190  ftbase_ftdeterminespacerequirements(f-1, precrsize, precisize, _state);
13191  }
13192  ncur = ncur/f;
13193  }
13194  f = f+1;
13195  }
13196 }
13197 
13198 
13199 /*************************************************************************
13200 Recurrent function called by FTComplexFFTPlan() and other functions. It
13201 recursively builds transformation plan
13202 
13203 INPUT PARAMETERS:
13204  N - FFT length (in complex numbers), N>=1
13205  K - number of repetitions, K>=1
13206  ChildPlan - if True, plan generator inserts OpStart/opEnd in the
13207  plan header/footer.
13208  TopmostPlan - if True, plan generator assumes that it is topmost plan:
13209  * it may use global buffer for transpositions
13210  and there is no other plan which executes in parallel
13211  RowPtr - index which points to past-the-last entry generated so far
13212  BluesteinSize- amount of storage (in real numbers) required for Bluestein buffer
13213  PrecRPtr - pointer to unused part of precomputed real buffer (Plan.PrecR):
13214  * when this function stores some data to precomputed buffer,
13215  it advances pointer.
13216  * it is responsibility of the function to assert that
13217  Plan.PrecR has enough space to store data before actually
13218  writing to buffer.
13219  * it is responsibility of the caller to allocate enough
13220  space before calling this function
13221  PrecIPtr - pointer to unused part of precomputed integer buffer (Plan.PrecI):
13222  * when this function stores some data to precomputed buffer,
13223  it advances pointer.
13224  * it is responsibility of the function to assert that
13225  Plan.PrecR has enough space to store data before actually
13226  writing to buffer.
13227  * it is responsibility of the caller to allocate enough
13228  space before calling this function
13229  Plan - plan (generated so far)
13230 
13231 OUTPUT PARAMETERS:
13232  RowPtr - updated pointer (advanced by number of entries generated
13233  by function)
13234  BluesteinSize- updated amount
13235  (may be increased, but may never be decreased)
13236 
13237 NOTE: in case TopmostPlan is True, ChildPlan is also must be True.
13238 
13239  -- ALGLIB --
13240  Copyright 05.04.2013 by Bochkanov Sergey
13241 *************************************************************************/
13242 static void ftbase_ftcomplexfftplanrec(ae_int_t n,
13243  ae_int_t k,
13244  ae_bool childplan,
13245  ae_bool topmostplan,
13246  ae_int_t* rowptr,
13247  ae_int_t* bluesteinsize,
13248  ae_int_t* precrptr,
13249  ae_int_t* preciptr,
13250  fasttransformplan* plan,
13251  ae_state *_state)
13252 {
13253  ae_frame _frame_block;
13254  srealarray localbuf;
13255  ae_int_t m;
13256  ae_int_t n1;
13257  ae_int_t n2;
13258  ae_int_t gq;
13259  ae_int_t giq;
13260  ae_int_t row0;
13261  ae_int_t row1;
13262  ae_int_t row2;
13263  ae_int_t row3;
13264 
13265  ae_frame_make(_state, &_frame_block);
13266  _srealarray_init(&localbuf, _state, ae_true);
13267 
13268  ae_assert(n>0, "FTComplexFFTPlan: N<=0", _state);
13269  ae_assert(k>0, "FTComplexFFTPlan: K<=0", _state);
13270  ae_assert(!topmostplan||childplan, "FTComplexFFTPlan: ChildPlan is inconsistent with TopmostPlan", _state);
13271 
13272  /*
13273  * Try to generate "topmost" plan
13274  */
13275  if( topmostplan&&n>ftbase_recursivethreshold )
13276  {
13277  ftbase_ftfactorize(n, ae_false, &n1, &n2, _state);
13278  if( n1*n2==0 )
13279  {
13280 
13281  /*
13282  * Handle prime-factor FFT with Bluestein's FFT.
13283  * Determine size of Bluestein's buffer.
13284  */
13285  m = ftbasefindsmooth(2*n-1, _state);
13286  *bluesteinsize = ae_maxint(2*m, *bluesteinsize, _state);
13287 
13288  /*
13289  * Generate plan
13290  */
13291  ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13292  ftbase_ftpushentry4(plan, rowptr, ftbase_opbluesteinsfft, k, n, 2, m, 2, *precrptr, 0, _state);
13293  row0 = *rowptr;
13294  ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
13295  ftbase_ftcomplexfftplanrec(m, 1, ae_true, ae_true, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13296  row1 = *rowptr;
13297  plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
13298  ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13299 
13300  /*
13301  * Fill precomputed buffer
13302  */
13303  ftbase_ftprecomputebluesteinsfft(n, m, &plan->precr, *precrptr, _state);
13304 
13305  /*
13306  * Update pointer to the precomputed area
13307  */
13308  *precrptr = *precrptr+4*m;
13309  }
13310  else
13311  {
13312 
13313  /*
13314  * Handle composite FFT with recursive Cooley-Tukey which
13315  * uses global buffer instead of local one.
13316  */
13317  ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13318  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13319  row0 = *rowptr;
13320  ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n2, n1, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
13321  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
13322  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
13323  row2 = *rowptr;
13324  ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n1, n2, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
13325  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13326  ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13327  row1 = *rowptr;
13328  ftbase_ftcomplexfftplanrec(n1, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13329  plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
13330  row3 = *rowptr;
13331  ftbase_ftcomplexfftplanrec(n2, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13332  plan->entries.ptr.pp_int[row2][ftbase_colparam0] = row3-row2;
13333  }
13334  ae_frame_leave(_state);
13335  return;
13336  }
13337 
13338  /*
13339  * Prepare "non-topmost" plan:
13340  * * calculate factorization
13341  * * use local (shared) buffer
13342  * * update buffer size - ANY plan will need at least
13343  * 2*N temporaries, additional requirements can be
13344  * applied later
13345  */
13346  ftbase_ftfactorize(n, ae_false, &n1, &n2, _state);
13347 
13348  /*
13349  * Handle FFT's with N1*N2=0: either small-N or prime-factor
13350  */
13351  if( n1*n2==0 )
13352  {
13353  if( n<=ftbase_maxradix )
13354  {
13355 
13356  /*
13357  * Small-N FFT
13358  */
13359  if( childplan )
13360  {
13361  ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13362  }
13363  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexcodeletfft, k, n, 2, 0, _state);
13364  if( childplan )
13365  {
13366  ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13367  }
13368  ae_frame_leave(_state);
13369  return;
13370  }
13371  if( n<=ftbase_raderthreshold )
13372  {
13373 
13374  /*
13375  * Handle prime-factor FFT's with Rader's FFT
13376  */
13377  m = n-1;
13378  if( childplan )
13379  {
13380  ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13381  }
13382  findprimitiverootandinverse(n, &gq, &giq, _state);
13383  ftbase_ftpushentry4(plan, rowptr, ftbase_opradersfft, k, n, 2, 2, gq, giq, *precrptr, _state);
13384  ftbase_ftprecomputeradersfft(n, gq, giq, &plan->precr, *precrptr, _state);
13385  *precrptr = *precrptr+2*(n-1);
13386  row0 = *rowptr;
13387  ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
13388  ftbase_ftcomplexfftplanrec(m, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13389  row1 = *rowptr;
13390  plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
13391  if( childplan )
13392  {
13393  ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13394  }
13395  }
13396  else
13397  {
13398 
13399  /*
13400  * Handle prime-factor FFT's with Bluestein's FFT
13401  */
13402  m = ftbasefindsmooth(2*n-1, _state);
13403  *bluesteinsize = ae_maxint(2*m, *bluesteinsize, _state);
13404  if( childplan )
13405  {
13406  ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13407  }
13408  ftbase_ftpushentry4(plan, rowptr, ftbase_opbluesteinsfft, k, n, 2, m, 2, *precrptr, 0, _state);
13409  ftbase_ftprecomputebluesteinsfft(n, m, &plan->precr, *precrptr, _state);
13410  *precrptr = *precrptr+4*m;
13411  row0 = *rowptr;
13412  ftbase_ftpushentry(plan, rowptr, ftbase_opjmp, 0, 0, 0, 0, _state);
13413  ftbase_ftcomplexfftplanrec(m, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13414  row1 = *rowptr;
13415  plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
13416  if( childplan )
13417  {
13418  ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13419  }
13420  }
13421  ae_frame_leave(_state);
13422  return;
13423  }
13424 
13425  /*
13426  * Handle Cooley-Tukey FFT with small N1
13427  */
13428  if( n1<=ftbase_maxradix )
13429  {
13430 
13431  /*
13432  * Specialized transformation for small N1:
13433  * * N2 short inplace FFT's, each N1-point, with integrated twiddle factors
13434  * * N1 long FFT's
13435  * * final transposition
13436  */
13437  if( childplan )
13438  {
13439  ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13440  }
13441  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexcodelettwfft, k, n1, 2*n2, 0, _state);
13442  ftbase_ftcomplexfftplanrec(n2, k*n1, ae_false, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13443  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13444  if( childplan )
13445  {
13446  ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13447  }
13448  ae_frame_leave(_state);
13449  return;
13450  }
13451 
13452  /*
13453  * Handle general Cooley-Tukey FFT, either "flat" or "recursive"
13454  */
13455  if( n<=ftbase_recursivethreshold )
13456  {
13457 
13458  /*
13459  * General code for large N1/N2, "flat" version without explicit recurrence
13460  * (nested subplans are inserted directly into the body of the plan)
13461  */
13462  if( childplan )
13463  {
13464  ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13465  }
13466  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13467  ftbase_ftcomplexfftplanrec(n1, k*n2, ae_false, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13468  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
13469  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
13470  ftbase_ftcomplexfftplanrec(n2, k*n1, ae_false, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13471  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13472  if( childplan )
13473  {
13474  ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13475  }
13476  }
13477  else
13478  {
13479 
13480  /*
13481  * General code for large N1/N2, "recursive" version - nested subplans
13482  * are separated from the plan body.
13483  *
13484  * Generate parent plan.
13485  */
13486  if( childplan )
13487  {
13488  ftbase_ftpushentry2(plan, rowptr, ftbase_opstart, k, n, 2, -1, ftbase_ftoptimisticestimate(n, _state), _state);
13489  }
13490  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13491  row0 = *rowptr;
13492  ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n2, n1, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
13493  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplexfftfactors, k, n, 2, n1, _state);
13494  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n2, _state);
13495  row2 = *rowptr;
13496  ftbase_ftpushentry2(plan, rowptr, ftbase_opparallelcall, k*n1, n2, 2, 0, ftbase_ftoptimisticestimate(n, _state), _state);
13497  ftbase_ftpushentry(plan, rowptr, ftbase_opcomplextranspose, k, n, 2, n1, _state);
13498  if( childplan )
13499  {
13500  ftbase_ftpushentry(plan, rowptr, ftbase_opend, k, n, 2, 0, _state);
13501  }
13502 
13503  /*
13504  * Generate child subplans, insert refence to parent plans
13505  */
13506  row1 = *rowptr;
13507  ftbase_ftcomplexfftplanrec(n1, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13508  plan->entries.ptr.pp_int[row0][ftbase_colparam0] = row1-row0;
13509  row3 = *rowptr;
13510  ftbase_ftcomplexfftplanrec(n2, 1, ae_true, ae_false, rowptr, bluesteinsize, precrptr, preciptr, plan, _state);
13511  plan->entries.ptr.pp_int[row2][ftbase_colparam0] = row3-row2;
13512  }
13513  ae_frame_leave(_state);
13514 }
13515 
13516 
13517 /*************************************************************************
13518 This function pushes one more entry to the plan. It resizes Entries matrix
13519 if needed.
13520 
13521 INPUT PARAMETERS:
13522  Plan - plan (generated so far)
13523  RowPtr - index which points to past-the-last entry generated so far
13524  EType - entry type
13525  EOpCnt - operands count
13526  EOpSize - operand size
13527  EMcvSize - microvector size
13528  EParam0 - parameter 0
13529 
13530 OUTPUT PARAMETERS:
13531  Plan - updated plan
13532  RowPtr - updated pointer
13533 
13534 NOTE: Param1 is set to -1.
13535 
13536  -- ALGLIB --
13537  Copyright 05.04.2013 by Bochkanov Sergey
13538 *************************************************************************/
13539 static void ftbase_ftpushentry(fasttransformplan* plan,
13540  ae_int_t* rowptr,
13541  ae_int_t etype,
13542  ae_int_t eopcnt,
13543  ae_int_t eopsize,
13544  ae_int_t emcvsize,
13545  ae_int_t eparam0,
13546  ae_state *_state)
13547 {
13548 
13549 
13550  ftbase_ftpushentry2(plan, rowptr, etype, eopcnt, eopsize, emcvsize, eparam0, -1, _state);
13551 }
13552 
13553 
13554 /*************************************************************************
13555 Same as FTPushEntry(), but sets Param0 AND Param1.
13556 This function pushes one more entry to the plan. It resized Entries matrix
13557 if needed.
13558 
13559 INPUT PARAMETERS:
13560  Plan - plan (generated so far)
13561  RowPtr - index which points to past-the-last entry generated so far
13562  EType - entry type
13563  EOpCnt - operands count
13564  EOpSize - operand size
13565  EMcvSize - microvector size
13566  EParam0 - parameter 0
13567  EParam1 - parameter 1
13568 
13569 OUTPUT PARAMETERS:
13570  Plan - updated plan
13571  RowPtr - updated pointer
13572 
13573  -- ALGLIB --
13574  Copyright 05.04.2013 by Bochkanov Sergey
13575 *************************************************************************/
13576 static void ftbase_ftpushentry2(fasttransformplan* plan,
13577  ae_int_t* rowptr,
13578  ae_int_t etype,
13579  ae_int_t eopcnt,
13580  ae_int_t eopsize,
13581  ae_int_t emcvsize,
13582  ae_int_t eparam0,
13583  ae_int_t eparam1,
13584  ae_state *_state)
13585 {
13586 
13587 
13588  if( *rowptr>=plan->entries.rows )
13589  {
13590  imatrixresize(&plan->entries, ae_maxint(2*plan->entries.rows, 1, _state), ftbase_colscnt, _state);
13591  }
13592  plan->entries.ptr.pp_int[*rowptr][ftbase_coltype] = etype;
13593  plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandscnt] = eopcnt;
13594  plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandsize] = eopsize;
13595  plan->entries.ptr.pp_int[*rowptr][ftbase_colmicrovectorsize] = emcvsize;
13596  plan->entries.ptr.pp_int[*rowptr][ftbase_colparam0] = eparam0;
13597  plan->entries.ptr.pp_int[*rowptr][ftbase_colparam1] = eparam1;
13598  plan->entries.ptr.pp_int[*rowptr][ftbase_colparam2] = 0;
13599  plan->entries.ptr.pp_int[*rowptr][ftbase_colparam3] = 0;
13600  *rowptr = *rowptr+1;
13601 }
13602 
13603 
13604 /*************************************************************************
13605 Same as FTPushEntry(), but sets Param0, Param1, Param2 and Param3.
13606 This function pushes one more entry to the plan. It resized Entries matrix
13607 if needed.
13608 
13609 INPUT PARAMETERS:
13610  Plan - plan (generated so far)
13611  RowPtr - index which points to past-the-last entry generated so far
13612  EType - entry type
13613  EOpCnt - operands count
13614  EOpSize - operand size
13615  EMcvSize - microvector size
13616  EParam0 - parameter 0
13617  EParam1 - parameter 1
13618  EParam2 - parameter 2
13619  EParam3 - parameter 3
13620 
13621 OUTPUT PARAMETERS:
13622  Plan - updated plan
13623  RowPtr - updated pointer
13624 
13625  -- ALGLIB --
13626  Copyright 05.04.2013 by Bochkanov Sergey
13627 *************************************************************************/
13628 static void ftbase_ftpushentry4(fasttransformplan* plan,
13629  ae_int_t* rowptr,
13630  ae_int_t etype,
13631  ae_int_t eopcnt,
13632  ae_int_t eopsize,
13633  ae_int_t emcvsize,
13634  ae_int_t eparam0,
13635  ae_int_t eparam1,
13636  ae_int_t eparam2,
13637  ae_int_t eparam3,
13638  ae_state *_state)
13639 {
13640 
13641 
13642  if( *rowptr>=plan->entries.rows )
13643  {
13644  imatrixresize(&plan->entries, ae_maxint(2*plan->entries.rows, 1, _state), ftbase_colscnt, _state);
13645  }
13646  plan->entries.ptr.pp_int[*rowptr][ftbase_coltype] = etype;
13647  plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandscnt] = eopcnt;
13648  plan->entries.ptr.pp_int[*rowptr][ftbase_coloperandsize] = eopsize;
13649  plan->entries.ptr.pp_int[*rowptr][ftbase_colmicrovectorsize] = emcvsize;
13650  plan->entries.ptr.pp_int[*rowptr][ftbase_colparam0] = eparam0;
13651  plan->entries.ptr.pp_int[*rowptr][ftbase_colparam1] = eparam1;
13652  plan->entries.ptr.pp_int[*rowptr][ftbase_colparam2] = eparam2;
13653  plan->entries.ptr.pp_int[*rowptr][ftbase_colparam3] = eparam3;
13654  *rowptr = *rowptr+1;
13655 }
13656 
13657 
13658 /*************************************************************************
13659 This subroutine applies subplan to input/output array A.
13660 
13661 INPUT PARAMETERS:
13662  Plan - transformation plan
13663  SubPlan - subplan index
13664  A - array, must be large enough for plan to work
13665  ABase - base offset in array A, this value points to start of
13666  subarray whose length is equal to length of the plan
13667  AOffset - offset with respect to ABase, 0<=AOffset<PlanLength.
13668  This is an offset within large PlanLength-subarray of
13669  the chunk to process.
13670  Buf - temporary buffer whose length is equal to plan length
13671  (without taking into account RepCnt) or larger.
13672  OffsBuf - offset in the buffer array
13673  RepCnt - repetition count (transformation is repeatedly applied
13674  to subsequent subarrays)
13675 
13676 OUTPUT PARAMETERS:
13677  Plan - plan (temporary buffers can be modified, plan itself
13678  is unchanged and can be reused)
13679  A - transformed array
13680 
13681  -- ALGLIB --
13682  Copyright 05.04.2013 by Bochkanov Sergey
13683 *************************************************************************/
13684 static void ftbase_ftapplysubplan(fasttransformplan* plan,
13685  ae_int_t subplan,
13686  /* Real */ ae_vector* a,
13687  ae_int_t abase,
13688  ae_int_t aoffset,
13689  /* Real */ ae_vector* buf,
13690  ae_int_t repcnt,
13691  ae_state *_state)
13692 {
13693  ae_frame _frame_block;
13694  ae_int_t rowidx;
13695  ae_int_t i;
13696  ae_int_t n1;
13697  ae_int_t n2;
13698  ae_int_t operation;
13699  ae_int_t operandscnt;
13700  ae_int_t operandsize;
13701  ae_int_t microvectorsize;
13702  ae_int_t param0;
13703  ae_int_t param1;
13704  ae_int_t parentsize;
13705  ae_int_t childsize;
13706  ae_int_t chunksize;
13707  ae_int_t lastchunksize;
13708  srealarray *bufa;
13709  ae_smart_ptr _bufa;
13710  srealarray *bufb;
13711  ae_smart_ptr _bufb;
13712  srealarray *bufc;
13713  ae_smart_ptr _bufc;
13714  srealarray *bufd;
13715  ae_smart_ptr _bufd;
13716 
13717  ae_frame_make(_state, &_frame_block);
13718  ae_smart_ptr_init(&_bufa, (void**)&bufa, _state, ae_true);
13719  ae_smart_ptr_init(&_bufb, (void**)&bufb, _state, ae_true);
13720  ae_smart_ptr_init(&_bufc, (void**)&bufc, _state, ae_true);
13721  ae_smart_ptr_init(&_bufd, (void**)&bufd, _state, ae_true);
13722 
13723  ae_assert(plan->entries.ptr.pp_int[subplan][ftbase_coltype]==ftbase_opstart, "FTApplySubPlan: incorrect subplan header", _state);
13724  rowidx = subplan+1;
13725  while(plan->entries.ptr.pp_int[rowidx][ftbase_coltype]!=ftbase_opend)
13726  {
13727  operation = plan->entries.ptr.pp_int[rowidx][ftbase_coltype];
13728  operandscnt = repcnt*plan->entries.ptr.pp_int[rowidx][ftbase_coloperandscnt];
13729  operandsize = plan->entries.ptr.pp_int[rowidx][ftbase_coloperandsize];
13730  microvectorsize = plan->entries.ptr.pp_int[rowidx][ftbase_colmicrovectorsize];
13731  param0 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
13732  param1 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam1];
13733  touchint(&param1, _state);
13734 
13735  /*
13736  * Process "jump" operation
13737  */
13738  if( operation==ftbase_opjmp )
13739  {
13740  rowidx = rowidx+plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
13741  continue;
13742  }
13743 
13744  /*
13745  * Process "parallel call" operation:
13746  * * we perform initial check for consistency between parent and child plans
13747  * * we call FTSplitAndApplyParallelPlan(), which splits parallel plan into
13748  * several parallel tasks
13749  */
13750  if( operation==ftbase_opparallelcall )
13751  {
13752  parentsize = operandsize*microvectorsize;
13753  childsize = plan->entries.ptr.pp_int[rowidx+param0][ftbase_coloperandscnt]*plan->entries.ptr.pp_int[rowidx+param0][ftbase_coloperandsize]*plan->entries.ptr.pp_int[rowidx+param0][ftbase_colmicrovectorsize];
13754  ae_assert(plan->entries.ptr.pp_int[rowidx+param0][ftbase_coltype]==ftbase_opstart, "FTApplySubPlan: incorrect child subplan header", _state);
13755  ae_assert(parentsize==childsize, "FTApplySubPlan: incorrect child subplan header", _state);
13756  chunksize = ae_maxint(ftbase_recursivethreshold/childsize, 1, _state);
13757  lastchunksize = operandscnt%chunksize;
13758  if( lastchunksize==0 )
13759  {
13760  lastchunksize = chunksize;
13761  }
13762  i = 0;
13763  while(i<operandscnt)
13764  {
13765  chunksize = ae_minint(chunksize, operandscnt-i, _state);
13766  ftbase_ftapplysubplan(plan, rowidx+param0, a, abase, aoffset+i*childsize, buf, chunksize, _state);
13767  i = i+chunksize;
13768  }
13769  rowidx = rowidx+1;
13770  continue;
13771  }
13772 
13773  /*
13774  * Process "reference complex FFT" operation
13775  */
13776  if( operation==ftbase_opcomplexreffft )
13777  {
13778  ftbase_ftapplycomplexreffft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, buf, _state);
13779  rowidx = rowidx+1;
13780  continue;
13781  }
13782 
13783  /*
13784  * Process "codelet FFT" operation
13785  */
13786  if( operation==ftbase_opcomplexcodeletfft )
13787  {
13788  ftbase_ftapplycomplexcodeletfft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, _state);
13789  rowidx = rowidx+1;
13790  continue;
13791  }
13792 
13793  /*
13794  * Process "integrated codelet FFT" operation
13795  */
13796  if( operation==ftbase_opcomplexcodelettwfft )
13797  {
13798  ftbase_ftapplycomplexcodelettwfft(a, abase+aoffset, operandscnt, operandsize, microvectorsize, _state);
13799  rowidx = rowidx+1;
13800  continue;
13801  }
13802 
13803  /*
13804  * Process Bluestein's FFT operation
13805  */
13806  if( operation==ftbase_opbluesteinsfft )
13807  {
13808  ae_assert(microvectorsize==2, "FTApplySubPlan: microvectorsize!=2 for Bluesteins FFT", _state);
13809  ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufa, _state);
13810  ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufb, _state);
13811  ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufc, _state);
13812  ae_shared_pool_retrieve(&plan->bluesteinpool, &_bufd, _state);
13813  ftbase_ftbluesteinsfft(plan, a, abase, aoffset, operandscnt, operandsize, plan->entries.ptr.pp_int[rowidx][ftbase_colparam0], plan->entries.ptr.pp_int[rowidx][ftbase_colparam2], rowidx+plan->entries.ptr.pp_int[rowidx][ftbase_colparam1], &bufa->val, &bufb->val, &bufc->val, &bufd->val, _state);
13814  ae_shared_pool_recycle(&plan->bluesteinpool, &_bufa, _state);
13815  ae_shared_pool_recycle(&plan->bluesteinpool, &_bufb, _state);
13816  ae_shared_pool_recycle(&plan->bluesteinpool, &_bufc, _state);
13817  ae_shared_pool_recycle(&plan->bluesteinpool, &_bufd, _state);
13818  rowidx = rowidx+1;
13819  continue;
13820  }
13821 
13822  /*
13823  * Process Rader's FFT
13824  */
13825  if( operation==ftbase_opradersfft )
13826  {
13827  ftbase_ftradersfft(plan, a, abase, aoffset, operandscnt, operandsize, rowidx+plan->entries.ptr.pp_int[rowidx][ftbase_colparam0], plan->entries.ptr.pp_int[rowidx][ftbase_colparam1], plan->entries.ptr.pp_int[rowidx][ftbase_colparam2], plan->entries.ptr.pp_int[rowidx][ftbase_colparam3], buf, _state);
13828  rowidx = rowidx+1;
13829  continue;
13830  }
13831 
13832  /*
13833  * Process "complex twiddle factors" operation
13834  */
13835  if( operation==ftbase_opcomplexfftfactors )
13836  {
13837  ae_assert(microvectorsize==2, "FTApplySubPlan: MicrovectorSize<>1", _state);
13838  n1 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
13839  n2 = operandsize/n1;
13840  for(i=0; i<=operandscnt-1; i++)
13841  {
13842  ftbase_ffttwcalc(a, abase+aoffset+i*operandsize*2, n1, n2, _state);
13843  }
13844  rowidx = rowidx+1;
13845  continue;
13846  }
13847 
13848  /*
13849  * Process "complex transposition" operation
13850  */
13851  if( operation==ftbase_opcomplextranspose )
13852  {
13853  ae_assert(microvectorsize==2, "FTApplySubPlan: MicrovectorSize<>1", _state);
13854  n1 = plan->entries.ptr.pp_int[rowidx][ftbase_colparam0];
13855  n2 = operandsize/n1;
13856  for(i=0; i<=operandscnt-1; i++)
13857  {
13858  ftbase_internalcomplexlintranspose(a, n1, n2, abase+aoffset+i*operandsize*2, buf, _state);
13859  }
13860  rowidx = rowidx+1;
13861  continue;
13862  }
13863 
13864  /*
13865  * Error
13866  */
13867  ae_assert(ae_false, "FTApplySubPlan: unexpected plan type", _state);
13868  }
13869  ae_frame_leave(_state);
13870 }
13871 
13872 
13873 /*************************************************************************
13874 This subroutine applies complex reference FFT to input/output array A.
13875 
13876 VERY SLOW OPERATION, do not use it in real life plans :)
13877 
13878 INPUT PARAMETERS:
13879  A - array, must be large enough for plan to work
13880  Offs - offset of the subarray to process
13881  OperandsCnt - operands count (see description of FastTransformPlan)
13882  OperandSize - operand size (see description of FastTransformPlan)
13883  MicrovectorSize-microvector size (see description of FastTransformPlan)
13884  Buf - temporary array, must be at least OperandsCnt*OperandSize*MicrovectorSize
13885 
13886 OUTPUT PARAMETERS:
13887  A - transformed array
13888 
13889  -- ALGLIB --
13890  Copyright 05.04.2013 by Bochkanov Sergey
13891 *************************************************************************/
13892 static void ftbase_ftapplycomplexreffft(/* Real */ ae_vector* a,
13893  ae_int_t offs,
13894  ae_int_t operandscnt,
13895  ae_int_t operandsize,
13896  ae_int_t microvectorsize,
13897  /* Real */ ae_vector* buf,
13898  ae_state *_state)
13899 {
13900  ae_int_t opidx;
13901  ae_int_t i;
13902  ae_int_t k;
13903  double hre;
13904  double him;
13905  double c;
13906  double s;
13907  double re;
13908  double im;
13909  ae_int_t n;
13910 
13911 
13912  ae_assert(operandscnt>=1, "FTApplyComplexRefFFT: OperandsCnt<1", _state);
13913  ae_assert(operandsize>=1, "FTApplyComplexRefFFT: OperandSize<1", _state);
13914  ae_assert(microvectorsize==2, "FTApplyComplexRefFFT: MicrovectorSize<>2", _state);
13915  n = operandsize;
13916  for(opidx=0; opidx<=operandscnt-1; opidx++)
13917  {
13918  for(i=0; i<=n-1; i++)
13919  {
13920  hre = 0;
13921  him = 0;
13922  for(k=0; k<=n-1; k++)
13923  {
13924  re = a->ptr.p_double[offs+opidx*operandsize*2+2*k+0];
13925  im = a->ptr.p_double[offs+opidx*operandsize*2+2*k+1];
13926  c = ae_cos(-2*ae_pi*k*i/n, _state);
13927  s = ae_sin(-2*ae_pi*k*i/n, _state);
13928  hre = hre+c*re-s*im;
13929  him = him+c*im+s*re;
13930  }
13931  buf->ptr.p_double[2*i+0] = hre;
13932  buf->ptr.p_double[2*i+1] = him;
13933  }
13934  for(i=0; i<=operandsize*2-1; i++)
13935  {
13936  a->ptr.p_double[offs+opidx*operandsize*2+i] = buf->ptr.p_double[i];
13937  }
13938  }
13939 }
13940 
13941 
13942 /*************************************************************************
13943 This subroutine applies complex codelet FFT to input/output array A.
13944 
13945 INPUT PARAMETERS:
13946  A - array, must be large enough for plan to work
13947  Offs - offset of the subarray to process
13948  OperandsCnt - operands count (see description of FastTransformPlan)
13949  OperandSize - operand size (see description of FastTransformPlan)
13950  MicrovectorSize-microvector size, must be 2
13951 
13952 OUTPUT PARAMETERS:
13953  A - transformed array
13954 
13955  -- ALGLIB --
13956  Copyright 05.04.2013 by Bochkanov Sergey
13957 *************************************************************************/
13958 static void ftbase_ftapplycomplexcodeletfft(/* Real */ ae_vector* a,
13959  ae_int_t offs,
13960  ae_int_t operandscnt,
13961  ae_int_t operandsize,
13962  ae_int_t microvectorsize,
13963  ae_state *_state)
13964 {
13965  ae_int_t opidx;
13966  ae_int_t n;
13967  ae_int_t aoffset;
13968  double a0x;
13969  double a0y;
13970  double a1x;
13971  double a1y;
13972  double a2x;
13973  double a2y;
13974  double a3x;
13975  double a3y;
13976  double a4x;
13977  double a4y;
13978  double a5x;
13979  double a5y;
13980  double v0;
13981  double v1;
13982  double v2;
13983  double v3;
13984  double t1x;
13985  double t1y;
13986  double t2x;
13987  double t2y;
13988  double t3x;
13989  double t3y;
13990  double t4x;
13991  double t4y;
13992  double t5x;
13993  double t5y;
13994  double m1x;
13995  double m1y;
13996  double m2x;
13997  double m2y;
13998  double m3x;
13999  double m3y;
14000  double m4x;
14001  double m4y;
14002  double m5x;
14003  double m5y;
14004  double s1x;
14005  double s1y;
14006  double s2x;
14007  double s2y;
14008  double s3x;
14009  double s3y;
14010  double s4x;
14011  double s4y;
14012  double s5x;
14013  double s5y;
14014  double c1;
14015  double c2;
14016  double c3;
14017  double c4;
14018  double c5;
14019  double v;
14020 
14021 
14022  ae_assert(operandscnt>=1, "FTApplyComplexCodeletFFT: OperandsCnt<1", _state);
14023  ae_assert(operandsize>=1, "FTApplyComplexCodeletFFT: OperandSize<1", _state);
14024  ae_assert(microvectorsize==2, "FTApplyComplexCodeletFFT: MicrovectorSize<>2", _state);
14025  n = operandsize;
14026 
14027  /*
14028  * Hard-coded transforms for different N's
14029  */
14030  ae_assert(n<=ftbase_maxradix, "FTApplyComplexCodeletFFT: N>MaxRadix", _state);
14031  if( n==2 )
14032  {
14033  for(opidx=0; opidx<=operandscnt-1; opidx++)
14034  {
14035  aoffset = offs+opidx*operandsize*2;
14036  a0x = a->ptr.p_double[aoffset+0];
14037  a0y = a->ptr.p_double[aoffset+1];
14038  a1x = a->ptr.p_double[aoffset+2];
14039  a1y = a->ptr.p_double[aoffset+3];
14040  v0 = a0x+a1x;
14041  v1 = a0y+a1y;
14042  v2 = a0x-a1x;
14043  v3 = a0y-a1y;
14044  a->ptr.p_double[aoffset+0] = v0;
14045  a->ptr.p_double[aoffset+1] = v1;
14046  a->ptr.p_double[aoffset+2] = v2;
14047  a->ptr.p_double[aoffset+3] = v3;
14048  }
14049  return;
14050  }
14051  if( n==3 )
14052  {
14053  c1 = ae_cos(2*ae_pi/3, _state)-1;
14054  c2 = ae_sin(2*ae_pi/3, _state);
14055  for(opidx=0; opidx<=operandscnt-1; opidx++)
14056  {
14057  aoffset = offs+opidx*operandsize*2;
14058  a0x = a->ptr.p_double[aoffset+0];
14059  a0y = a->ptr.p_double[aoffset+1];
14060  a1x = a->ptr.p_double[aoffset+2];
14061  a1y = a->ptr.p_double[aoffset+3];
14062  a2x = a->ptr.p_double[aoffset+4];
14063  a2y = a->ptr.p_double[aoffset+5];
14064  t1x = a1x+a2x;
14065  t1y = a1y+a2y;
14066  a0x = a0x+t1x;
14067  a0y = a0y+t1y;
14068  m1x = c1*t1x;
14069  m1y = c1*t1y;
14070  m2x = c2*(a1y-a2y);
14071  m2y = c2*(a2x-a1x);
14072  s1x = a0x+m1x;
14073  s1y = a0y+m1y;
14074  a1x = s1x+m2x;
14075  a1y = s1y+m2y;
14076  a2x = s1x-m2x;
14077  a2y = s1y-m2y;
14078  a->ptr.p_double[aoffset+0] = a0x;
14079  a->ptr.p_double[aoffset+1] = a0y;
14080  a->ptr.p_double[aoffset+2] = a1x;
14081  a->ptr.p_double[aoffset+3] = a1y;
14082  a->ptr.p_double[aoffset+4] = a2x;
14083  a->ptr.p_double[aoffset+5] = a2y;
14084  }
14085  return;
14086  }
14087  if( n==4 )
14088  {
14089  for(opidx=0; opidx<=operandscnt-1; opidx++)
14090  {
14091  aoffset = offs+opidx*operandsize*2;
14092  a0x = a->ptr.p_double[aoffset+0];
14093  a0y = a->ptr.p_double[aoffset+1];
14094  a1x = a->ptr.p_double[aoffset+2];
14095  a1y = a->ptr.p_double[aoffset+3];
14096  a2x = a->ptr.p_double[aoffset+4];
14097  a2y = a->ptr.p_double[aoffset+5];
14098  a3x = a->ptr.p_double[aoffset+6];
14099  a3y = a->ptr.p_double[aoffset+7];
14100  t1x = a0x+a2x;
14101  t1y = a0y+a2y;
14102  t2x = a1x+a3x;
14103  t2y = a1y+a3y;
14104  m2x = a0x-a2x;
14105  m2y = a0y-a2y;
14106  m3x = a1y-a3y;
14107  m3y = a3x-a1x;
14108  a->ptr.p_double[aoffset+0] = t1x+t2x;
14109  a->ptr.p_double[aoffset+1] = t1y+t2y;
14110  a->ptr.p_double[aoffset+4] = t1x-t2x;
14111  a->ptr.p_double[aoffset+5] = t1y-t2y;
14112  a->ptr.p_double[aoffset+2] = m2x+m3x;
14113  a->ptr.p_double[aoffset+3] = m2y+m3y;
14114  a->ptr.p_double[aoffset+6] = m2x-m3x;
14115  a->ptr.p_double[aoffset+7] = m2y-m3y;
14116  }
14117  return;
14118  }
14119  if( n==5 )
14120  {
14121  v = 2*ae_pi/5;
14122  c1 = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1;
14123  c2 = (ae_cos(v, _state)-ae_cos(2*v, _state))/2;
14124  c3 = -ae_sin(v, _state);
14125  c4 = -(ae_sin(v, _state)+ae_sin(2*v, _state));
14126  c5 = ae_sin(v, _state)-ae_sin(2*v, _state);
14127  for(opidx=0; opidx<=operandscnt-1; opidx++)
14128  {
14129  aoffset = offs+opidx*operandsize*2;
14130  t1x = a->ptr.p_double[aoffset+2]+a->ptr.p_double[aoffset+8];
14131  t1y = a->ptr.p_double[aoffset+3]+a->ptr.p_double[aoffset+9];
14132  t2x = a->ptr.p_double[aoffset+4]+a->ptr.p_double[aoffset+6];
14133  t2y = a->ptr.p_double[aoffset+5]+a->ptr.p_double[aoffset+7];
14134  t3x = a->ptr.p_double[aoffset+2]-a->ptr.p_double[aoffset+8];
14135  t3y = a->ptr.p_double[aoffset+3]-a->ptr.p_double[aoffset+9];
14136  t4x = a->ptr.p_double[aoffset+6]-a->ptr.p_double[aoffset+4];
14137  t4y = a->ptr.p_double[aoffset+7]-a->ptr.p_double[aoffset+5];
14138  t5x = t1x+t2x;
14139  t5y = t1y+t2y;
14140  a->ptr.p_double[aoffset+0] = a->ptr.p_double[aoffset+0]+t5x;
14141  a->ptr.p_double[aoffset+1] = a->ptr.p_double[aoffset+1]+t5y;
14142  m1x = c1*t5x;
14143  m1y = c1*t5y;
14144  m2x = c2*(t1x-t2x);
14145  m2y = c2*(t1y-t2y);
14146  m3x = -c3*(t3y+t4y);
14147  m3y = c3*(t3x+t4x);
14148  m4x = -c4*t4y;
14149  m4y = c4*t4x;
14150  m5x = -c5*t3y;
14151  m5y = c5*t3x;
14152  s3x = m3x-m4x;
14153  s3y = m3y-m4y;
14154  s5x = m3x+m5x;
14155  s5y = m3y+m5y;
14156  s1x = a->ptr.p_double[aoffset+0]+m1x;
14157  s1y = a->ptr.p_double[aoffset+1]+m1y;
14158  s2x = s1x+m2x;
14159  s2y = s1y+m2y;
14160  s4x = s1x-m2x;
14161  s4y = s1y-m2y;
14162  a->ptr.p_double[aoffset+2] = s2x+s3x;
14163  a->ptr.p_double[aoffset+3] = s2y+s3y;
14164  a->ptr.p_double[aoffset+4] = s4x+s5x;
14165  a->ptr.p_double[aoffset+5] = s4y+s5y;
14166  a->ptr.p_double[aoffset+6] = s4x-s5x;
14167  a->ptr.p_double[aoffset+7] = s4y-s5y;
14168  a->ptr.p_double[aoffset+8] = s2x-s3x;
14169  a->ptr.p_double[aoffset+9] = s2y-s3y;
14170  }
14171  return;
14172  }
14173  if( n==6 )
14174  {
14175  c1 = ae_cos(2*ae_pi/3, _state)-1;
14176  c2 = ae_sin(2*ae_pi/3, _state);
14177  c3 = ae_cos(-ae_pi/3, _state);
14178  c4 = ae_sin(-ae_pi/3, _state);
14179  for(opidx=0; opidx<=operandscnt-1; opidx++)
14180  {
14181  aoffset = offs+opidx*operandsize*2;
14182  a0x = a->ptr.p_double[aoffset+0];
14183  a0y = a->ptr.p_double[aoffset+1];
14184  a1x = a->ptr.p_double[aoffset+2];
14185  a1y = a->ptr.p_double[aoffset+3];
14186  a2x = a->ptr.p_double[aoffset+4];
14187  a2y = a->ptr.p_double[aoffset+5];
14188  a3x = a->ptr.p_double[aoffset+6];
14189  a3y = a->ptr.p_double[aoffset+7];
14190  a4x = a->ptr.p_double[aoffset+8];
14191  a4y = a->ptr.p_double[aoffset+9];
14192  a5x = a->ptr.p_double[aoffset+10];
14193  a5y = a->ptr.p_double[aoffset+11];
14194  v0 = a0x;
14195  v1 = a0y;
14196  a0x = a0x+a3x;
14197  a0y = a0y+a3y;
14198  a3x = v0-a3x;
14199  a3y = v1-a3y;
14200  v0 = a1x;
14201  v1 = a1y;
14202  a1x = a1x+a4x;
14203  a1y = a1y+a4y;
14204  a4x = v0-a4x;
14205  a4y = v1-a4y;
14206  v0 = a2x;
14207  v1 = a2y;
14208  a2x = a2x+a5x;
14209  a2y = a2y+a5y;
14210  a5x = v0-a5x;
14211  a5y = v1-a5y;
14212  t4x = a4x*c3-a4y*c4;
14213  t4y = a4x*c4+a4y*c3;
14214  a4x = t4x;
14215  a4y = t4y;
14216  t5x = -a5x*c3-a5y*c4;
14217  t5y = a5x*c4-a5y*c3;
14218  a5x = t5x;
14219  a5y = t5y;
14220  t1x = a1x+a2x;
14221  t1y = a1y+a2y;
14222  a0x = a0x+t1x;
14223  a0y = a0y+t1y;
14224  m1x = c1*t1x;
14225  m1y = c1*t1y;
14226  m2x = c2*(a1y-a2y);
14227  m2y = c2*(a2x-a1x);
14228  s1x = a0x+m1x;
14229  s1y = a0y+m1y;
14230  a1x = s1x+m2x;
14231  a1y = s1y+m2y;
14232  a2x = s1x-m2x;
14233  a2y = s1y-m2y;
14234  t1x = a4x+a5x;
14235  t1y = a4y+a5y;
14236  a3x = a3x+t1x;
14237  a3y = a3y+t1y;
14238  m1x = c1*t1x;
14239  m1y = c1*t1y;
14240  m2x = c2*(a4y-a5y);
14241  m2y = c2*(a5x-a4x);
14242  s1x = a3x+m1x;
14243  s1y = a3y+m1y;
14244  a4x = s1x+m2x;
14245  a4y = s1y+m2y;
14246  a5x = s1x-m2x;
14247  a5y = s1y-m2y;
14248  a->ptr.p_double[aoffset+0] = a0x;
14249  a->ptr.p_double[aoffset+1] = a0y;
14250  a->ptr.p_double[aoffset+2] = a3x;
14251  a->ptr.p_double[aoffset+3] = a3y;
14252  a->ptr.p_double[aoffset+4] = a1x;
14253  a->ptr.p_double[aoffset+5] = a1y;
14254  a->ptr.p_double[aoffset+6] = a4x;
14255  a->ptr.p_double[aoffset+7] = a4y;
14256  a->ptr.p_double[aoffset+8] = a2x;
14257  a->ptr.p_double[aoffset+9] = a2y;
14258  a->ptr.p_double[aoffset+10] = a5x;
14259  a->ptr.p_double[aoffset+11] = a5y;
14260  }
14261  return;
14262  }
14263 }
14264 
14265 
14266 /*************************************************************************
14267 This subroutine applies complex "integrated" codelet FFT to input/output
14268 array A. "Integrated" codelet differs from "normal" one in following ways:
14269 * it can work with MicrovectorSize>1
14270 * hence, it can be used in Cooley-Tukey FFT without transpositions
14271 * it performs inlined multiplication by twiddle factors of Cooley-Tukey
14272  FFT with N2=MicrovectorSize/2.
14273 
14274 INPUT PARAMETERS:
14275  A - array, must be large enough for plan to work
14276  Offs - offset of the subarray to process
14277  OperandsCnt - operands count (see description of FastTransformPlan)
14278  OperandSize - operand size (see description of FastTransformPlan)
14279  MicrovectorSize-microvector size, must be 1
14280 
14281 OUTPUT PARAMETERS:
14282  A - transformed array
14283 
14284  -- ALGLIB --
14285  Copyright 05.04.2013 by Bochkanov Sergey
14286 *************************************************************************/
14287 static void ftbase_ftapplycomplexcodelettwfft(/* Real */ ae_vector* a,
14288  ae_int_t offs,
14289  ae_int_t operandscnt,
14290  ae_int_t operandsize,
14291  ae_int_t microvectorsize,
14292  ae_state *_state)
14293 {
14294  ae_int_t opidx;
14295  ae_int_t mvidx;
14296  ae_int_t n;
14297  ae_int_t m;
14298  ae_int_t aoffset0;
14299  ae_int_t aoffset2;
14300  ae_int_t aoffset4;
14301  ae_int_t aoffset6;
14302  ae_int_t aoffset8;
14303  ae_int_t aoffset10;
14304  double a0x;
14305  double a0y;
14306  double a1x;
14307  double a1y;
14308  double a2x;
14309  double a2y;
14310  double a3x;
14311  double a3y;
14312  double a4x;
14313  double a4y;
14314  double a5x;
14315  double a5y;
14316  double v0;
14317  double v1;
14318  double v2;
14319  double v3;
14320  double q0x;
14321  double q0y;
14322  double t1x;
14323  double t1y;
14324  double t2x;
14325  double t2y;
14326  double t3x;
14327  double t3y;
14328  double t4x;
14329  double t4y;
14330  double t5x;
14331  double t5y;
14332  double m1x;
14333  double m1y;
14334  double m2x;
14335  double m2y;
14336  double m3x;
14337  double m3y;
14338  double m4x;
14339  double m4y;
14340  double m5x;
14341  double m5y;
14342  double s1x;
14343  double s1y;
14344  double s2x;
14345  double s2y;
14346  double s3x;
14347  double s3y;
14348  double s4x;
14349  double s4y;
14350  double s5x;
14351  double s5y;
14352  double c1;
14353  double c2;
14354  double c3;
14355  double c4;
14356  double c5;
14357  double v;
14358  double tw0;
14359  double tw1;
14360  double twx;
14361  double twxm1;
14362  double twy;
14363  double tw2x;
14364  double tw2y;
14365  double tw3x;
14366  double tw3y;
14367  double tw4x;
14368  double tw4y;
14369  double tw5x;
14370  double tw5y;
14371 
14372 
14373  ae_assert(operandscnt>=1, "FTApplyComplexCodeletFFT: OperandsCnt<1", _state);
14374  ae_assert(operandsize>=1, "FTApplyComplexCodeletFFT: OperandSize<1", _state);
14375  ae_assert(microvectorsize>=1, "FTApplyComplexCodeletFFT: MicrovectorSize<>1", _state);
14376  ae_assert(microvectorsize%2==0, "FTApplyComplexCodeletFFT: MicrovectorSize is not even", _state);
14377  n = operandsize;
14378  m = microvectorsize/2;
14379 
14380  /*
14381  * Hard-coded transforms for different N's
14382  */
14383  ae_assert(n<=ftbase_maxradix, "FTApplyComplexCodeletTwFFT: N>MaxRadix", _state);
14384  if( n==2 )
14385  {
14386  v = -2*ae_pi/(n*m);
14387  tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
14388  tw1 = ae_sin(v, _state);
14389  for(opidx=0; opidx<=operandscnt-1; opidx++)
14390  {
14391  aoffset0 = offs+opidx*operandsize*microvectorsize;
14392  aoffset2 = aoffset0+microvectorsize;
14393  twxm1 = 0.0;
14394  twy = 0.0;
14395  for(mvidx=0; mvidx<=m-1; mvidx++)
14396  {
14397  a0x = a->ptr.p_double[aoffset0];
14398  a0y = a->ptr.p_double[aoffset0+1];
14399  a1x = a->ptr.p_double[aoffset2];
14400  a1y = a->ptr.p_double[aoffset2+1];
14401  v0 = a0x+a1x;
14402  v1 = a0y+a1y;
14403  v2 = a0x-a1x;
14404  v3 = a0y-a1y;
14405  a->ptr.p_double[aoffset0] = v0;
14406  a->ptr.p_double[aoffset0+1] = v1;
14407  a->ptr.p_double[aoffset2] = v2*(1+twxm1)-v3*twy;
14408  a->ptr.p_double[aoffset2+1] = v3*(1+twxm1)+v2*twy;
14409  aoffset0 = aoffset0+2;
14410  aoffset2 = aoffset2+2;
14411  if( (mvidx+1)%ftbase_updatetw==0 )
14412  {
14413  v = -2*ae_pi*(mvidx+1)/(n*m);
14414  twxm1 = ae_sin(0.5*v, _state);
14415  twxm1 = -2*twxm1*twxm1;
14416  twy = ae_sin(v, _state);
14417  }
14418  else
14419  {
14420  v = twxm1+tw0+twxm1*tw0-twy*tw1;
14421  twy = twy+tw1+twxm1*tw1+twy*tw0;
14422  twxm1 = v;
14423  }
14424  }
14425  }
14426  return;
14427  }
14428  if( n==3 )
14429  {
14430  v = -2*ae_pi/(n*m);
14431  tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
14432  tw1 = ae_sin(v, _state);
14433  c1 = ae_cos(2*ae_pi/3, _state)-1;
14434  c2 = ae_sin(2*ae_pi/3, _state);
14435  for(opidx=0; opidx<=operandscnt-1; opidx++)
14436  {
14437  aoffset0 = offs+opidx*operandsize*microvectorsize;
14438  aoffset2 = aoffset0+microvectorsize;
14439  aoffset4 = aoffset2+microvectorsize;
14440  twx = 1.0;
14441  twxm1 = 0.0;
14442  twy = 0.0;
14443  for(mvidx=0; mvidx<=m-1; mvidx++)
14444  {
14445  a0x = a->ptr.p_double[aoffset0];
14446  a0y = a->ptr.p_double[aoffset0+1];
14447  a1x = a->ptr.p_double[aoffset2];
14448  a1y = a->ptr.p_double[aoffset2+1];
14449  a2x = a->ptr.p_double[aoffset4];
14450  a2y = a->ptr.p_double[aoffset4+1];
14451  t1x = a1x+a2x;
14452  t1y = a1y+a2y;
14453  a0x = a0x+t1x;
14454  a0y = a0y+t1y;
14455  m1x = c1*t1x;
14456  m1y = c1*t1y;
14457  m2x = c2*(a1y-a2y);
14458  m2y = c2*(a2x-a1x);
14459  s1x = a0x+m1x;
14460  s1y = a0y+m1y;
14461  a1x = s1x+m2x;
14462  a1y = s1y+m2y;
14463  a2x = s1x-m2x;
14464  a2y = s1y-m2y;
14465  tw2x = twx*twx-twy*twy;
14466  tw2y = 2*twx*twy;
14467  a->ptr.p_double[aoffset0] = a0x;
14468  a->ptr.p_double[aoffset0+1] = a0y;
14469  a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy;
14470  a->ptr.p_double[aoffset2+1] = a1y*twx+a1x*twy;
14471  a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y;
14472  a->ptr.p_double[aoffset4+1] = a2y*tw2x+a2x*tw2y;
14473  aoffset0 = aoffset0+2;
14474  aoffset2 = aoffset2+2;
14475  aoffset4 = aoffset4+2;
14476  if( (mvidx+1)%ftbase_updatetw==0 )
14477  {
14478  v = -2*ae_pi*(mvidx+1)/(n*m);
14479  twxm1 = ae_sin(0.5*v, _state);
14480  twxm1 = -2*twxm1*twxm1;
14481  twy = ae_sin(v, _state);
14482  twx = twxm1+1;
14483  }
14484  else
14485  {
14486  v = twxm1+tw0+twxm1*tw0-twy*tw1;
14487  twy = twy+tw1+twxm1*tw1+twy*tw0;
14488  twxm1 = v;
14489  twx = v+1;
14490  }
14491  }
14492  }
14493  return;
14494  }
14495  if( n==4 )
14496  {
14497  v = -2*ae_pi/(n*m);
14498  tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
14499  tw1 = ae_sin(v, _state);
14500  for(opidx=0; opidx<=operandscnt-1; opidx++)
14501  {
14502  aoffset0 = offs+opidx*operandsize*microvectorsize;
14503  aoffset2 = aoffset0+microvectorsize;
14504  aoffset4 = aoffset2+microvectorsize;
14505  aoffset6 = aoffset4+microvectorsize;
14506  twx = 1.0;
14507  twxm1 = 0.0;
14508  twy = 0.0;
14509  for(mvidx=0; mvidx<=m-1; mvidx++)
14510  {
14511  a0x = a->ptr.p_double[aoffset0];
14512  a0y = a->ptr.p_double[aoffset0+1];
14513  a1x = a->ptr.p_double[aoffset2];
14514  a1y = a->ptr.p_double[aoffset2+1];
14515  a2x = a->ptr.p_double[aoffset4];
14516  a2y = a->ptr.p_double[aoffset4+1];
14517  a3x = a->ptr.p_double[aoffset6];
14518  a3y = a->ptr.p_double[aoffset6+1];
14519  t1x = a0x+a2x;
14520  t1y = a0y+a2y;
14521  t2x = a1x+a3x;
14522  t2y = a1y+a3y;
14523  m2x = a0x-a2x;
14524  m2y = a0y-a2y;
14525  m3x = a1y-a3y;
14526  m3y = a3x-a1x;
14527  tw2x = twx*twx-twy*twy;
14528  tw2y = 2*twx*twy;
14529  tw3x = twx*tw2x-twy*tw2y;
14530  tw3y = twx*tw2y+twy*tw2x;
14531  a1x = m2x+m3x;
14532  a1y = m2y+m3y;
14533  a2x = t1x-t2x;
14534  a2y = t1y-t2y;
14535  a3x = m2x-m3x;
14536  a3y = m2y-m3y;
14537  a->ptr.p_double[aoffset0] = t1x+t2x;
14538  a->ptr.p_double[aoffset0+1] = t1y+t2y;
14539  a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy;
14540  a->ptr.p_double[aoffset2+1] = a1y*twx+a1x*twy;
14541  a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y;
14542  a->ptr.p_double[aoffset4+1] = a2y*tw2x+a2x*tw2y;
14543  a->ptr.p_double[aoffset6] = a3x*tw3x-a3y*tw3y;
14544  a->ptr.p_double[aoffset6+1] = a3y*tw3x+a3x*tw3y;
14545  aoffset0 = aoffset0+2;
14546  aoffset2 = aoffset2+2;
14547  aoffset4 = aoffset4+2;
14548  aoffset6 = aoffset6+2;
14549  if( (mvidx+1)%ftbase_updatetw==0 )
14550  {
14551  v = -2*ae_pi*(mvidx+1)/(n*m);
14552  twxm1 = ae_sin(0.5*v, _state);
14553  twxm1 = -2*twxm1*twxm1;
14554  twy = ae_sin(v, _state);
14555  twx = twxm1+1;
14556  }
14557  else
14558  {
14559  v = twxm1+tw0+twxm1*tw0-twy*tw1;
14560  twy = twy+tw1+twxm1*tw1+twy*tw0;
14561  twxm1 = v;
14562  twx = v+1;
14563  }
14564  }
14565  }
14566  return;
14567  }
14568  if( n==5 )
14569  {
14570  v = -2*ae_pi/(n*m);
14571  tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
14572  tw1 = ae_sin(v, _state);
14573  v = 2*ae_pi/5;
14574  c1 = (ae_cos(v, _state)+ae_cos(2*v, _state))/2-1;
14575  c2 = (ae_cos(v, _state)-ae_cos(2*v, _state))/2;
14576  c3 = -ae_sin(v, _state);
14577  c4 = -(ae_sin(v, _state)+ae_sin(2*v, _state));
14578  c5 = ae_sin(v, _state)-ae_sin(2*v, _state);
14579  for(opidx=0; opidx<=operandscnt-1; opidx++)
14580  {
14581  aoffset0 = offs+opidx*operandsize*microvectorsize;
14582  aoffset2 = aoffset0+microvectorsize;
14583  aoffset4 = aoffset2+microvectorsize;
14584  aoffset6 = aoffset4+microvectorsize;
14585  aoffset8 = aoffset6+microvectorsize;
14586  twx = 1.0;
14587  twxm1 = 0.0;
14588  twy = 0.0;
14589  for(mvidx=0; mvidx<=m-1; mvidx++)
14590  {
14591  a0x = a->ptr.p_double[aoffset0];
14592  a0y = a->ptr.p_double[aoffset0+1];
14593  a1x = a->ptr.p_double[aoffset2];
14594  a1y = a->ptr.p_double[aoffset2+1];
14595  a2x = a->ptr.p_double[aoffset4];
14596  a2y = a->ptr.p_double[aoffset4+1];
14597  a3x = a->ptr.p_double[aoffset6];
14598  a3y = a->ptr.p_double[aoffset6+1];
14599  a4x = a->ptr.p_double[aoffset8];
14600  a4y = a->ptr.p_double[aoffset8+1];
14601  t1x = a1x+a4x;
14602  t1y = a1y+a4y;
14603  t2x = a2x+a3x;
14604  t2y = a2y+a3y;
14605  t3x = a1x-a4x;
14606  t3y = a1y-a4y;
14607  t4x = a3x-a2x;
14608  t4y = a3y-a2y;
14609  t5x = t1x+t2x;
14610  t5y = t1y+t2y;
14611  q0x = a0x+t5x;
14612  q0y = a0y+t5y;
14613  m1x = c1*t5x;
14614  m1y = c1*t5y;
14615  m2x = c2*(t1x-t2x);
14616  m2y = c2*(t1y-t2y);
14617  m3x = -c3*(t3y+t4y);
14618  m3y = c3*(t3x+t4x);
14619  m4x = -c4*t4y;
14620  m4y = c4*t4x;
14621  m5x = -c5*t3y;
14622  m5y = c5*t3x;
14623  s3x = m3x-m4x;
14624  s3y = m3y-m4y;
14625  s5x = m3x+m5x;
14626  s5y = m3y+m5y;
14627  s1x = q0x+m1x;
14628  s1y = q0y+m1y;
14629  s2x = s1x+m2x;
14630  s2y = s1y+m2y;
14631  s4x = s1x-m2x;
14632  s4y = s1y-m2y;
14633  tw2x = twx*twx-twy*twy;
14634  tw2y = 2*twx*twy;
14635  tw3x = twx*tw2x-twy*tw2y;
14636  tw3y = twx*tw2y+twy*tw2x;
14637  tw4x = tw2x*tw2x-tw2y*tw2y;
14638  tw4y = tw2x*tw2y+tw2y*tw2x;
14639  a1x = s2x+s3x;
14640  a1y = s2y+s3y;
14641  a2x = s4x+s5x;
14642  a2y = s4y+s5y;
14643  a3x = s4x-s5x;
14644  a3y = s4y-s5y;
14645  a4x = s2x-s3x;
14646  a4y = s2y-s3y;
14647  a->ptr.p_double[aoffset0] = q0x;
14648  a->ptr.p_double[aoffset0+1] = q0y;
14649  a->ptr.p_double[aoffset2] = a1x*twx-a1y*twy;
14650  a->ptr.p_double[aoffset2+1] = a1x*twy+a1y*twx;
14651  a->ptr.p_double[aoffset4] = a2x*tw2x-a2y*tw2y;
14652  a->ptr.p_double[aoffset4+1] = a2x*tw2y+a2y*tw2x;
14653  a->ptr.p_double[aoffset6] = a3x*tw3x-a3y*tw3y;
14654  a->ptr.p_double[aoffset6+1] = a3x*tw3y+a3y*tw3x;
14655  a->ptr.p_double[aoffset8] = a4x*tw4x-a4y*tw4y;
14656  a->ptr.p_double[aoffset8+1] = a4x*tw4y+a4y*tw4x;
14657  aoffset0 = aoffset0+2;
14658  aoffset2 = aoffset2+2;
14659  aoffset4 = aoffset4+2;
14660  aoffset6 = aoffset6+2;
14661  aoffset8 = aoffset8+2;
14662  if( (mvidx+1)%ftbase_updatetw==0 )
14663  {
14664  v = -2*ae_pi*(mvidx+1)/(n*m);
14665  twxm1 = ae_sin(0.5*v, _state);
14666  twxm1 = -2*twxm1*twxm1;
14667  twy = ae_sin(v, _state);
14668  twx = twxm1+1;
14669  }
14670  else
14671  {
14672  v = twxm1+tw0+twxm1*tw0-twy*tw1;
14673  twy = twy+tw1+twxm1*tw1+twy*tw0;
14674  twxm1 = v;
14675  twx = v+1;
14676  }
14677  }
14678  }
14679  return;
14680  }
14681  if( n==6 )
14682  {
14683  c1 = ae_cos(2*ae_pi/3, _state)-1;
14684  c2 = ae_sin(2*ae_pi/3, _state);
14685  c3 = ae_cos(-ae_pi/3, _state);
14686  c4 = ae_sin(-ae_pi/3, _state);
14687  v = -2*ae_pi/(n*m);
14688  tw0 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
14689  tw1 = ae_sin(v, _state);
14690  for(opidx=0; opidx<=operandscnt-1; opidx++)
14691  {
14692  aoffset0 = offs+opidx*operandsize*microvectorsize;
14693  aoffset2 = aoffset0+microvectorsize;
14694  aoffset4 = aoffset2+microvectorsize;
14695  aoffset6 = aoffset4+microvectorsize;
14696  aoffset8 = aoffset6+microvectorsize;
14697  aoffset10 = aoffset8+microvectorsize;
14698  twx = 1.0;
14699  twxm1 = 0.0;
14700  twy = 0.0;
14701  for(mvidx=0; mvidx<=m-1; mvidx++)
14702  {
14703  a0x = a->ptr.p_double[aoffset0+0];
14704  a0y = a->ptr.p_double[aoffset0+1];
14705  a1x = a->ptr.p_double[aoffset2+0];
14706  a1y = a->ptr.p_double[aoffset2+1];
14707  a2x = a->ptr.p_double[aoffset4+0];
14708  a2y = a->ptr.p_double[aoffset4+1];
14709  a3x = a->ptr.p_double[aoffset6+0];
14710  a3y = a->ptr.p_double[aoffset6+1];
14711  a4x = a->ptr.p_double[aoffset8+0];
14712  a4y = a->ptr.p_double[aoffset8+1];
14713  a5x = a->ptr.p_double[aoffset10+0];
14714  a5y = a->ptr.p_double[aoffset10+1];
14715  v0 = a0x;
14716  v1 = a0y;
14717  a0x = a0x+a3x;
14718  a0y = a0y+a3y;
14719  a3x = v0-a3x;
14720  a3y = v1-a3y;
14721  v0 = a1x;
14722  v1 = a1y;
14723  a1x = a1x+a4x;
14724  a1y = a1y+a4y;
14725  a4x = v0-a4x;
14726  a4y = v1-a4y;
14727  v0 = a2x;
14728  v1 = a2y;
14729  a2x = a2x+a5x;
14730  a2y = a2y+a5y;
14731  a5x = v0-a5x;
14732  a5y = v1-a5y;
14733  t4x = a4x*c3-a4y*c4;
14734  t4y = a4x*c4+a4y*c3;
14735  a4x = t4x;
14736  a4y = t4y;
14737  t5x = -a5x*c3-a5y*c4;
14738  t5y = a5x*c4-a5y*c3;
14739  a5x = t5x;
14740  a5y = t5y;
14741  t1x = a1x+a2x;
14742  t1y = a1y+a2y;
14743  a0x = a0x+t1x;
14744  a0y = a0y+t1y;
14745  m1x = c1*t1x;
14746  m1y = c1*t1y;
14747  m2x = c2*(a1y-a2y);
14748  m2y = c2*(a2x-a1x);
14749  s1x = a0x+m1x;
14750  s1y = a0y+m1y;
14751  a1x = s1x+m2x;
14752  a1y = s1y+m2y;
14753  a2x = s1x-m2x;
14754  a2y = s1y-m2y;
14755  t1x = a4x+a5x;
14756  t1y = a4y+a5y;
14757  a3x = a3x+t1x;
14758  a3y = a3y+t1y;
14759  m1x = c1*t1x;
14760  m1y = c1*t1y;
14761  m2x = c2*(a4y-a5y);
14762  m2y = c2*(a5x-a4x);
14763  s1x = a3x+m1x;
14764  s1y = a3y+m1y;
14765  a4x = s1x+m2x;
14766  a4y = s1y+m2y;
14767  a5x = s1x-m2x;
14768  a5y = s1y-m2y;
14769  tw2x = twx*twx-twy*twy;
14770  tw2y = 2*twx*twy;
14771  tw3x = twx*tw2x-twy*tw2y;
14772  tw3y = twx*tw2y+twy*tw2x;
14773  tw4x = tw2x*tw2x-tw2y*tw2y;
14774  tw4y = 2*tw2x*tw2y;
14775  tw5x = tw3x*tw2x-tw3y*tw2y;
14776  tw5y = tw3x*tw2y+tw3y*tw2x;
14777  a->ptr.p_double[aoffset0+0] = a0x;
14778  a->ptr.p_double[aoffset0+1] = a0y;
14779  a->ptr.p_double[aoffset2+0] = a3x*twx-a3y*twy;
14780  a->ptr.p_double[aoffset2+1] = a3y*twx+a3x*twy;
14781  a->ptr.p_double[aoffset4+0] = a1x*tw2x-a1y*tw2y;
14782  a->ptr.p_double[aoffset4+1] = a1y*tw2x+a1x*tw2y;
14783  a->ptr.p_double[aoffset6+0] = a4x*tw3x-a4y*tw3y;
14784  a->ptr.p_double[aoffset6+1] = a4y*tw3x+a4x*tw3y;
14785  a->ptr.p_double[aoffset8+0] = a2x*tw4x-a2y*tw4y;
14786  a->ptr.p_double[aoffset8+1] = a2y*tw4x+a2x*tw4y;
14787  a->ptr.p_double[aoffset10+0] = a5x*tw5x-a5y*tw5y;
14788  a->ptr.p_double[aoffset10+1] = a5y*tw5x+a5x*tw5y;
14789  aoffset0 = aoffset0+2;
14790  aoffset2 = aoffset2+2;
14791  aoffset4 = aoffset4+2;
14792  aoffset6 = aoffset6+2;
14793  aoffset8 = aoffset8+2;
14794  aoffset10 = aoffset10+2;
14795  if( (mvidx+1)%ftbase_updatetw==0 )
14796  {
14797  v = -2*ae_pi*(mvidx+1)/(n*m);
14798  twxm1 = ae_sin(0.5*v, _state);
14799  twxm1 = -2*twxm1*twxm1;
14800  twy = ae_sin(v, _state);
14801  twx = twxm1+1;
14802  }
14803  else
14804  {
14805  v = twxm1+tw0+twxm1*tw0-twy*tw1;
14806  twy = twy+tw1+twxm1*tw1+twy*tw0;
14807  twxm1 = v;
14808  twx = v+1;
14809  }
14810  }
14811  }
14812  return;
14813  }
14814 }
14815 
14816 
14817 /*************************************************************************
14818 This subroutine precomputes data for complex Bluestein's FFT and writes
14819 them to array PrecR[] at specified offset. It is responsibility of the
14820 caller to make sure that PrecR[] is large enough.
14821 
14822 INPUT PARAMETERS:
14823  N - original size of the transform
14824  M - size of the "padded" Bluestein's transform
14825  PrecR - preallocated array
14826  Offs - offset
14827 
14828 OUTPUT PARAMETERS:
14829  PrecR - data at Offs:Offs+4*M-1 are modified:
14830  * PrecR[Offs:Offs+2*M-1] stores Z[k]=exp(i*pi*k^2/N)
14831  * PrecR[Offs+2*M:Offs+4*M-1] stores FFT of the Z
14832  Other parts of PrecR are unchanged.
14833 
14834 NOTE: this function performs internal M-point FFT. It allocates temporary
14835  plan which is destroyed after leaving this function.
14836 
14837  -- ALGLIB --
14838  Copyright 08.05.2013 by Bochkanov Sergey
14839 *************************************************************************/
14840 static void ftbase_ftprecomputebluesteinsfft(ae_int_t n,
14841  ae_int_t m,
14842  /* Real */ ae_vector* precr,
14843  ae_int_t offs,
14844  ae_state *_state)
14845 {
14846  ae_frame _frame_block;
14847  ae_int_t i;
14848  double bx;
14849  double by;
14850  fasttransformplan plan;
14851 
14852  ae_frame_make(_state, &_frame_block);
14853  _fasttransformplan_init(&plan, _state, ae_true);
14854 
14855 
14856  /*
14857  * Fill first half of PrecR with b[k] = exp(i*pi*k^2/N)
14858  */
14859  for(i=0; i<=2*m-1; i++)
14860  {
14861  precr->ptr.p_double[offs+i] = 0;
14862  }
14863  for(i=0; i<=n-1; i++)
14864  {
14865  bx = ae_cos(ae_pi/n*i*i, _state);
14866  by = ae_sin(ae_pi/n*i*i, _state);
14867  precr->ptr.p_double[offs+2*i+0] = bx;
14868  precr->ptr.p_double[offs+2*i+1] = by;
14869  precr->ptr.p_double[offs+2*((m-i)%m)+0] = bx;
14870  precr->ptr.p_double[offs+2*((m-i)%m)+1] = by;
14871  }
14872 
14873  /*
14874  * Precomputed FFT
14875  */
14876  ftcomplexfftplan(m, 1, &plan, _state);
14877  for(i=0; i<=2*m-1; i++)
14878  {
14879  precr->ptr.p_double[offs+2*m+i] = precr->ptr.p_double[offs+i];
14880  }
14881  ftbase_ftapplysubplan(&plan, 0, precr, offs+2*m, 0, &plan.buffer, 1, _state);
14882  ae_frame_leave(_state);
14883 }
14884 
14885 
14886 /*************************************************************************
14887 This subroutine applies complex Bluestein's FFT to input/output array A.
14888 
14889 INPUT PARAMETERS:
14890  Plan - transformation plan
14891  A - array, must be large enough for plan to work
14892  ABase - base offset in array A, this value points to start of
14893  subarray whose length is equal to length of the plan
14894  AOffset - offset with respect to ABase, 0<=AOffset<PlanLength.
14895  This is an offset within large PlanLength-subarray of
14896  the chunk to process.
14897  OperandsCnt - number of repeated operands (length N each)
14898  N - original data length (measured in complex numbers)
14899  M - padded data length (measured in complex numbers)
14900  PrecOffs - offset of the precomputed data for the plan
14901  SubPlan - position of the length-M FFT subplan which is used by
14902  transformation
14903  BufA - temporary buffer, at least 2*M elements
14904  BufB - temporary buffer, at least 2*M elements
14905  BufC - temporary buffer, at least 2*M elements
14906  BufD - temporary buffer, at least 2*M elements
14907 
14908 OUTPUT PARAMETERS:
14909  A - transformed array
14910 
14911  -- ALGLIB --
14912  Copyright 05.04.2013 by Bochkanov Sergey
14913 *************************************************************************/
14914 static void ftbase_ftbluesteinsfft(fasttransformplan* plan,
14915  /* Real */ ae_vector* a,
14916  ae_int_t abase,
14917  ae_int_t aoffset,
14918  ae_int_t operandscnt,
14919  ae_int_t n,
14920  ae_int_t m,
14921  ae_int_t precoffs,
14922  ae_int_t subplan,
14923  /* Real */ ae_vector* bufa,
14924  /* Real */ ae_vector* bufb,
14925  /* Real */ ae_vector* bufc,
14926  /* Real */ ae_vector* bufd,
14927  ae_state *_state)
14928 {
14929  ae_int_t op;
14930  ae_int_t i;
14931  double x;
14932  double y;
14933  double bx;
14934  double by;
14935  double ax;
14936  double ay;
14937  double rx;
14938  double ry;
14939  ae_int_t p0;
14940  ae_int_t p1;
14941  ae_int_t p2;
14942 
14943 
14944  for(op=0; op<=operandscnt-1; op++)
14945  {
14946 
14947  /*
14948  * Multiply A by conj(Z), store to buffer.
14949  * Pad A by zeros.
14950  *
14951  * NOTE: Z[k]=exp(i*pi*k^2/N)
14952  */
14953  p0 = abase+aoffset+op*2*n;
14954  p1 = precoffs;
14955  for(i=0; i<=n-1; i++)
14956  {
14957  x = a->ptr.p_double[p0+0];
14958  y = a->ptr.p_double[p0+1];
14959  bx = plan->precr.ptr.p_double[p1+0];
14960  by = -plan->precr.ptr.p_double[p1+1];
14961  bufa->ptr.p_double[2*i+0] = x*bx-y*by;
14962  bufa->ptr.p_double[2*i+1] = x*by+y*bx;
14963  p0 = p0+2;
14964  p1 = p1+2;
14965  }
14966  for(i=2*n; i<=2*m-1; i++)
14967  {
14968  bufa->ptr.p_double[i] = 0;
14969  }
14970 
14971  /*
14972  * Perform convolution of A and Z (using precomputed
14973  * FFT of Z stored in Plan structure).
14974  */
14975  ftbase_ftapplysubplan(plan, subplan, bufa, 0, 0, bufc, 1, _state);
14976  p0 = 0;
14977  p1 = precoffs+2*m;
14978  for(i=0; i<=m-1; i++)
14979  {
14980  ax = bufa->ptr.p_double[p0+0];
14981  ay = bufa->ptr.p_double[p0+1];
14982  bx = plan->precr.ptr.p_double[p1+0];
14983  by = plan->precr.ptr.p_double[p1+1];
14984  bufa->ptr.p_double[p0+0] = ax*bx-ay*by;
14985  bufa->ptr.p_double[p0+1] = -(ax*by+ay*bx);
14986  p0 = p0+2;
14987  p1 = p1+2;
14988  }
14989  ftbase_ftapplysubplan(plan, subplan, bufa, 0, 0, bufc, 1, _state);
14990 
14991  /*
14992  * Post processing:
14993  * A:=conj(Z)*conj(A)/M
14994  * Here conj(A)/M corresponds to last stage of inverse DFT,
14995  * and conj(Z) comes from Bluestein's FFT algorithm.
14996  */
14997  p0 = precoffs;
14998  p1 = 0;
14999  p2 = abase+aoffset+op*2*n;
15000  for(i=0; i<=n-1; i++)
15001  {
15002  bx = plan->precr.ptr.p_double[p0+0];
15003  by = plan->precr.ptr.p_double[p0+1];
15004  rx = bufa->ptr.p_double[p1+0]/m;
15005  ry = -bufa->ptr.p_double[p1+1]/m;
15006  a->ptr.p_double[p2+0] = rx*bx-ry*(-by);
15007  a->ptr.p_double[p2+1] = rx*(-by)+ry*bx;
15008  p0 = p0+2;
15009  p1 = p1+2;
15010  p2 = p2+2;
15011  }
15012  }
15013 }
15014 
15015 
15016 /*************************************************************************
15017 This subroutine precomputes data for complex Rader's FFT and writes them
15018 to array PrecR[] at specified offset. It is responsibility of the caller
15019 to make sure that PrecR[] is large enough.
15020 
15021 INPUT PARAMETERS:
15022  N - original size of the transform (before reduction to N-1)
15023  RQ - primitive root modulo N
15024  RIQ - inverse of primitive root modulo N
15025  PrecR - preallocated array
15026  Offs - offset
15027 
15028 OUTPUT PARAMETERS:
15029  PrecR - data at Offs:Offs+2*(N-1)-1 store FFT of Rader's factors,
15030  other parts of PrecR are unchanged.
15031 
15032 NOTE: this function performs internal (N-1)-point FFT. It allocates temporary
15033  plan which is destroyed after leaving this function.
15034 
15035  -- ALGLIB --
15036  Copyright 08.05.2013 by Bochkanov Sergey
15037 *************************************************************************/
15038 static void ftbase_ftprecomputeradersfft(ae_int_t n,
15039  ae_int_t rq,
15040  ae_int_t riq,
15041  /* Real */ ae_vector* precr,
15042  ae_int_t offs,
15043  ae_state *_state)
15044 {
15045  ae_frame _frame_block;
15046  ae_int_t q;
15047  fasttransformplan plan;
15048  ae_int_t kiq;
15049  double v;
15050 
15051  ae_frame_make(_state, &_frame_block);
15052  _fasttransformplan_init(&plan, _state, ae_true);
15053 
15054 
15055  /*
15056  * Fill PrecR with Rader factors, perform FFT
15057  */
15058  kiq = 1;
15059  for(q=0; q<=n-2; q++)
15060  {
15061  v = -2*ae_pi*kiq/n;
15062  precr->ptr.p_double[offs+2*q+0] = ae_cos(v, _state);
15063  precr->ptr.p_double[offs+2*q+1] = ae_sin(v, _state);
15064  kiq = kiq*riq%n;
15065  }
15066  ftcomplexfftplan(n-1, 1, &plan, _state);
15067  ftbase_ftapplysubplan(&plan, 0, precr, offs, 0, &plan.buffer, 1, _state);
15068  ae_frame_leave(_state);
15069 }
15070 
15071 
15072 /*************************************************************************
15073 This subroutine applies complex Rader's FFT to input/output array A.
15074 
15075 INPUT PARAMETERS:
15076  A - array, must be large enough for plan to work
15077  ABase - base offset in array A, this value points to start of
15078  subarray whose length is equal to length of the plan
15079  AOffset - offset with respect to ABase, 0<=AOffset<PlanLength.
15080  This is an offset within large PlanLength-subarray of
15081  the chunk to process.
15082  OperandsCnt - number of repeated operands (length N each)
15083  N - original data length (measured in complex numbers)
15084  SubPlan - position of the (N-1)-point FFT subplan which is used
15085  by transformation
15086  RQ - primitive root modulo N
15087  RIQ - inverse of primitive root modulo N
15088  PrecOffs - offset of the precomputed data for the plan
15089  Buf - temporary array
15090 
15091 OUTPUT PARAMETERS:
15092  A - transformed array
15093 
15094  -- ALGLIB --
15095  Copyright 05.04.2013 by Bochkanov Sergey
15096 *************************************************************************/
15097 static void ftbase_ftradersfft(fasttransformplan* plan,
15098  /* Real */ ae_vector* a,
15099  ae_int_t abase,
15100  ae_int_t aoffset,
15101  ae_int_t operandscnt,
15102  ae_int_t n,
15103  ae_int_t subplan,
15104  ae_int_t rq,
15105  ae_int_t riq,
15106  ae_int_t precoffs,
15107  /* Real */ ae_vector* buf,
15108  ae_state *_state)
15109 {
15110  ae_int_t opidx;
15111  ae_int_t i;
15112  ae_int_t q;
15113  ae_int_t kq;
15114  ae_int_t kiq;
15115  double x0;
15116  double y0;
15117  ae_int_t p0;
15118  ae_int_t p1;
15119  double ax;
15120  double ay;
15121  double bx;
15122  double by;
15123  double rx;
15124  double ry;
15125 
15126 
15127  ae_assert(operandscnt>=1, "FTApplyComplexRefFFT: OperandsCnt<1", _state);
15128 
15129  /*
15130  * Process operands
15131  */
15132  for(opidx=0; opidx<=operandscnt-1; opidx++)
15133  {
15134 
15135  /*
15136  * fill QA
15137  */
15138  kq = 1;
15139  p0 = abase+aoffset+opidx*n*2;
15140  p1 = aoffset+opidx*n*2;
15141  rx = a->ptr.p_double[p0+0];
15142  ry = a->ptr.p_double[p0+1];
15143  x0 = rx;
15144  y0 = ry;
15145  for(q=0; q<=n-2; q++)
15146  {
15147  ax = a->ptr.p_double[p0+2*kq+0];
15148  ay = a->ptr.p_double[p0+2*kq+1];
15149  buf->ptr.p_double[p1+0] = ax;
15150  buf->ptr.p_double[p1+1] = ay;
15151  rx = rx+ax;
15152  ry = ry+ay;
15153  kq = kq*rq%n;
15154  p1 = p1+2;
15155  }
15156  p0 = abase+aoffset+opidx*n*2;
15157  p1 = aoffset+opidx*n*2;
15158  for(q=0; q<=n-2; q++)
15159  {
15160  a->ptr.p_double[p0] = buf->ptr.p_double[p1];
15161  a->ptr.p_double[p0+1] = buf->ptr.p_double[p1+1];
15162  p0 = p0+2;
15163  p1 = p1+2;
15164  }
15165 
15166  /*
15167  * Convolution
15168  */
15169  ftbase_ftapplysubplan(plan, subplan, a, abase, aoffset+opidx*n*2, buf, 1, _state);
15170  p0 = abase+aoffset+opidx*n*2;
15171  p1 = precoffs;
15172  for(i=0; i<=n-2; i++)
15173  {
15174  ax = a->ptr.p_double[p0+0];
15175  ay = a->ptr.p_double[p0+1];
15176  bx = plan->precr.ptr.p_double[p1+0];
15177  by = plan->precr.ptr.p_double[p1+1];
15178  a->ptr.p_double[p0+0] = ax*bx-ay*by;
15179  a->ptr.p_double[p0+1] = -(ax*by+ay*bx);
15180  p0 = p0+2;
15181  p1 = p1+2;
15182  }
15183  ftbase_ftapplysubplan(plan, subplan, a, abase, aoffset+opidx*n*2, buf, 1, _state);
15184  p0 = abase+aoffset+opidx*n*2;
15185  for(i=0; i<=n-2; i++)
15186  {
15187  a->ptr.p_double[p0+0] = a->ptr.p_double[p0+0]/(n-1);
15188  a->ptr.p_double[p0+1] = -a->ptr.p_double[p0+1]/(n-1);
15189  p0 = p0+2;
15190  }
15191 
15192  /*
15193  * Result
15194  */
15195  buf->ptr.p_double[aoffset+opidx*n*2+0] = rx;
15196  buf->ptr.p_double[aoffset+opidx*n*2+1] = ry;
15197  kiq = 1;
15198  p0 = aoffset+opidx*n*2;
15199  p1 = abase+aoffset+opidx*n*2;
15200  for(q=0; q<=n-2; q++)
15201  {
15202  buf->ptr.p_double[p0+2*kiq+0] = x0+a->ptr.p_double[p1+0];
15203  buf->ptr.p_double[p0+2*kiq+1] = y0+a->ptr.p_double[p1+1];
15204  kiq = kiq*riq%n;
15205  p1 = p1+2;
15206  }
15207  p0 = abase+aoffset+opidx*n*2;
15208  p1 = aoffset+opidx*n*2;
15209  for(q=0; q<=n-1; q++)
15210  {
15211  a->ptr.p_double[p0] = buf->ptr.p_double[p1];
15212  a->ptr.p_double[p0+1] = buf->ptr.p_double[p1+1];
15213  p0 = p0+2;
15214  p1 = p1+2;
15215  }
15216  }
15217 }
15218 
15219 
15220 /*************************************************************************
15221 Factorizes task size N into product of two smaller sizes N1 and N2
15222 
15223 INPUT PARAMETERS:
15224  N - task size, N>0
15225  IsRoot - whether taks is root task (first one in a sequence)
15226 
15227 OUTPUT PARAMETERS:
15228  N1, N2 - such numbers that:
15229  * for prime N: N1=N2=0
15230  * for composite N<=MaxRadix: N1=N2=0
15231  * for composite N>MaxRadix: 1<=N1<=N2, N1*N2=N
15232 
15233  -- ALGLIB --
15234  Copyright 08.04.2013 by Bochkanov Sergey
15235 *************************************************************************/
15236 static void ftbase_ftfactorize(ae_int_t n,
15237  ae_bool isroot,
15238  ae_int_t* n1,
15239  ae_int_t* n2,
15240  ae_state *_state)
15241 {
15242  ae_int_t j;
15243  ae_int_t k;
15244 
15245  *n1 = 0;
15246  *n2 = 0;
15247 
15248  ae_assert(n>0, "FTFactorize: N<=0", _state);
15249  *n1 = 0;
15250  *n2 = 0;
15251 
15252  /*
15253  * Small N
15254  */
15255  if( n<=ftbase_maxradix )
15256  {
15257  return;
15258  }
15259 
15260  /*
15261  * Large N, recursive split
15262  */
15263  if( n>ftbase_recursivethreshold )
15264  {
15265  k = ae_iceil(ae_sqrt(n, _state), _state)+1;
15266  ae_assert(k*k>=n, "FTFactorize: internal error during recursive factorization", _state);
15267  for(j=k; j>=2; j--)
15268  {
15269  if( n%j==0 )
15270  {
15271  *n1 = ae_minint(n/j, j, _state);
15272  *n2 = ae_maxint(n/j, j, _state);
15273  return;
15274  }
15275  }
15276  }
15277 
15278  /*
15279  * N>MaxRadix, try to find good codelet
15280  */
15281  for(j=ftbase_maxradix; j>=2; j--)
15282  {
15283  if( n%j==0 )
15284  {
15285  *n1 = j;
15286  *n2 = n/j;
15287  break;
15288  }
15289  }
15290 
15291  /*
15292  * In case no good codelet was found,
15293  * try to factorize N into product of ANY primes.
15294  */
15295  if( *n1*(*n2)!=n )
15296  {
15297  for(j=2; j<=n-1; j++)
15298  {
15299  if( n%j==0 )
15300  {
15301  *n1 = j;
15302  *n2 = n/j;
15303  break;
15304  }
15305  if( j*j>n )
15306  {
15307  break;
15308  }
15309  }
15310  }
15311 
15312  /*
15313  * normalize
15314  */
15315  if( *n1>(*n2) )
15316  {
15317  j = *n1;
15318  *n1 = *n2;
15319  *n2 = j;
15320  }
15321 }
15322 
15323 
15324 /*************************************************************************
15325 Returns optimistic estimate of the FFT cost, in UNITs (1 UNIT = 100 KFLOPs)
15326 
15327 INPUT PARAMETERS:
15328  N - task size, N>0
15329 
15330 RESULU:
15331  cost in UNITs, rounded down to nearest integer
15332 
15333 NOTE: If FFT cost is less than 1 UNIT, it will return 0 as result.
15334 
15335  -- ALGLIB --
15336  Copyright 08.04.2013 by Bochkanov Sergey
15337 *************************************************************************/
15338 static ae_int_t ftbase_ftoptimisticestimate(ae_int_t n, ae_state *_state)
15339 {
15340  ae_int_t result;
15341 
15342 
15343  ae_assert(n>0, "FTOptimisticEstimate: N<=0", _state);
15344  result = ae_ifloor(1.0E-5*5*n*ae_log(n, _state)/ae_log(2, _state), _state);
15345  return result;
15346 }
15347 
15348 
15349 /*************************************************************************
15350 Twiddle factors calculation
15351 
15352  -- ALGLIB --
15353  Copyright 01.05.2009 by Bochkanov Sergey
15354 *************************************************************************/
15355 static void ftbase_ffttwcalc(/* Real */ ae_vector* a,
15356  ae_int_t aoffset,
15357  ae_int_t n1,
15358  ae_int_t n2,
15359  ae_state *_state)
15360 {
15361  ae_int_t i;
15362  ae_int_t j2;
15363  ae_int_t n;
15364  ae_int_t halfn1;
15365  ae_int_t offs;
15366  double x;
15367  double y;
15368  double twxm1;
15369  double twy;
15370  double twbasexm1;
15371  double twbasey;
15372  double twrowxm1;
15373  double twrowy;
15374  double tmpx;
15375  double tmpy;
15376  double v;
15377  ae_int_t updatetw2;
15378 
15379 
15380 
15381  /*
15382  * Multiplication by twiddle factors for complex Cooley-Tukey FFT
15383  * with N factorized as N1*N2.
15384  *
15385  * Naive solution to this problem is given below:
15386  *
15387  * > for K:=1 to N2-1 do
15388  * > for J:=1 to N1-1 do
15389  * > begin
15390  * > Idx:=K*N1+J;
15391  * > X:=A[AOffset+2*Idx+0];
15392  * > Y:=A[AOffset+2*Idx+1];
15393  * > TwX:=Cos(-2*Pi()*K*J/(N1*N2));
15394  * > TwY:=Sin(-2*Pi()*K*J/(N1*N2));
15395  * > A[AOffset+2*Idx+0]:=X*TwX-Y*TwY;
15396  * > A[AOffset+2*Idx+1]:=X*TwY+Y*TwX;
15397  * > end;
15398  *
15399  * However, there are exist more efficient solutions.
15400  *
15401  * Each pass of the inner cycle corresponds to multiplication of one
15402  * entry of A by W[k,j]=exp(-I*2*pi*k*j/N). This factor can be rewritten
15403  * as exp(-I*2*pi*k/N)^j. So we can replace costly exponentiation by
15404  * repeated multiplication: W[k,j+1]=W[k,j]*exp(-I*2*pi*k/N), with
15405  * second factor being computed once in the beginning of the iteration.
15406  *
15407  * Also, exp(-I*2*pi*k/N) can be represented as exp(-I*2*pi/N)^k, i.e.
15408  * we have W[K+1,1]=W[K,1]*W[1,1].
15409  *
15410  * In our loop we use following variables:
15411  * * [TwBaseXM1,TwBaseY] = [cos(2*pi/N)-1, sin(2*pi/N)]
15412  * * [TwRowXM1, TwRowY] = [cos(2*pi*I/N)-1, sin(2*pi*I/N)]
15413  * * [TwXM1, TwY] = [cos(2*pi*I*J/N)-1, sin(2*pi*I*J/N)]
15414  *
15415  * Meaning of the variables:
15416  * * [TwXM1,TwY] is current twiddle factor W[I,J]
15417  * * [TwRowXM1, TwRowY] is W[I,1]
15418  * * [TwBaseXM1,TwBaseY] is W[1,1]
15419  *
15420  * During inner loop we multiply current twiddle factor by W[I,1],
15421  * during outer loop we update W[I,1].
15422  *
15423  */
15424  ae_assert(ftbase_updatetw>=2, "FFTTwCalc: internal error - UpdateTw<2", _state);
15425  updatetw2 = ftbase_updatetw/2;
15426  halfn1 = n1/2;
15427  n = n1*n2;
15428  v = -2*ae_pi/n;
15429  twbasexm1 = -2*ae_sqr(ae_sin(0.5*v, _state), _state);
15430  twbasey = ae_sin(v, _state);
15431  twrowxm1 = 0;
15432  twrowy = 0;
15433  offs = aoffset;
15434  for(i=0; i<=n2-1; i++)
15435  {
15436 
15437  /*
15438  * Initialize twiddle factor for current row
15439  */
15440  twxm1 = 0;
15441  twy = 0;
15442 
15443  /*
15444  * N1-point block is separated into 2-point chunks and residual 1-point chunk
15445  * (in case N1 is odd). Unrolled loop is several times faster.
15446  */
15447  for(j2=0; j2<=halfn1-1; j2++)
15448  {
15449 
15450  /*
15451  * Processing:
15452  * * process first element in a chunk.
15453  * * update twiddle factor (unconditional update)
15454  * * process second element
15455  * * conditional update of the twiddle factor
15456  */
15457  x = a->ptr.p_double[offs+0];
15458  y = a->ptr.p_double[offs+1];
15459  tmpx = x*(1+twxm1)-y*twy;
15460  tmpy = x*twy+y*(1+twxm1);
15461  a->ptr.p_double[offs+0] = tmpx;
15462  a->ptr.p_double[offs+1] = tmpy;
15463  tmpx = (1+twxm1)*twrowxm1-twy*twrowy;
15464  twy = twy+(1+twxm1)*twrowy+twy*twrowxm1;
15465  twxm1 = twxm1+tmpx;
15466  x = a->ptr.p_double[offs+2];
15467  y = a->ptr.p_double[offs+3];
15468  tmpx = x*(1+twxm1)-y*twy;
15469  tmpy = x*twy+y*(1+twxm1);
15470  a->ptr.p_double[offs+2] = tmpx;
15471  a->ptr.p_double[offs+3] = tmpy;
15472  offs = offs+4;
15473  if( (j2+1)%updatetw2==0&&j2<halfn1-1 )
15474  {
15475 
15476  /*
15477  * Recalculate twiddle factor
15478  */
15479  v = -2*ae_pi*i*2*(j2+1)/n;
15480  twxm1 = ae_sin(0.5*v, _state);
15481  twxm1 = -2*twxm1*twxm1;
15482  twy = ae_sin(v, _state);
15483  }
15484  else
15485  {
15486 
15487  /*
15488  * Update twiddle factor
15489  */
15490  tmpx = (1+twxm1)*twrowxm1-twy*twrowy;
15491  twy = twy+(1+twxm1)*twrowy+twy*twrowxm1;
15492  twxm1 = twxm1+tmpx;
15493  }
15494  }
15495  if( n1%2==1 )
15496  {
15497 
15498  /*
15499  * Handle residual chunk
15500  */
15501  x = a->ptr.p_double[offs+0];
15502  y = a->ptr.p_double[offs+1];
15503  tmpx = x*(1+twxm1)-y*twy;
15504  tmpy = x*twy+y*(1+twxm1);
15505  a->ptr.p_double[offs+0] = tmpx;
15506  a->ptr.p_double[offs+1] = tmpy;
15507  offs = offs+2;
15508  }
15509 
15510  /*
15511  * update TwRow: TwRow(new) = TwRow(old)*TwBase
15512  */
15513  if( i<n2-1 )
15514  {
15515  if( (i+1)%ftbase_updatetw==0 )
15516  {
15517  v = -2*ae_pi*(i+1)/n;
15518  twrowxm1 = ae_sin(0.5*v, _state);
15519  twrowxm1 = -2*twrowxm1*twrowxm1;
15520  twrowy = ae_sin(v, _state);
15521  }
15522  else
15523  {
15524  tmpx = twbasexm1+twrowxm1*twbasexm1-twrowy*twbasey;
15525  tmpy = twbasey+twrowxm1*twbasey+twrowy*twbasexm1;
15526  twrowxm1 = twrowxm1+tmpx;
15527  twrowy = twrowy+tmpy;
15528  }
15529  }
15530  }
15531 }
15532 
15533 
15534 /*************************************************************************
15535 Linear transpose: transpose complex matrix stored in 1-dimensional array
15536 
15537  -- ALGLIB --
15538  Copyright 01.05.2009 by Bochkanov Sergey
15539 *************************************************************************/
15540 static void ftbase_internalcomplexlintranspose(/* Real */ ae_vector* a,
15541  ae_int_t m,
15542  ae_int_t n,
15543  ae_int_t astart,
15544  /* Real */ ae_vector* buf,
15545  ae_state *_state)
15546 {
15547 
15548 
15549  ftbase_ffticltrec(a, astart, n, buf, 0, m, m, n, _state);
15550  ae_v_move(&a->ptr.p_double[astart], 1, &buf->ptr.p_double[0], 1, ae_v_len(astart,astart+2*m*n-1));
15551 }
15552 
15553 
15554 /*************************************************************************
15555 Recurrent subroutine for a InternalComplexLinTranspose
15556 
15557 Write A^T to B, where:
15558 * A is m*n complex matrix stored in array A as pairs of real/image values,
15559  beginning from AStart position, with AStride stride
15560 * B is n*m complex matrix stored in array B as pairs of real/image values,
15561  beginning from BStart position, with BStride stride
15562 stride is measured in complex numbers, i.e. in real/image pairs.
15563 
15564  -- ALGLIB --
15565  Copyright 01.05.2009 by Bochkanov Sergey
15566 *************************************************************************/
15567 static void ftbase_ffticltrec(/* Real */ ae_vector* a,
15568  ae_int_t astart,
15569  ae_int_t astride,
15570  /* Real */ ae_vector* b,
15571  ae_int_t bstart,
15572  ae_int_t bstride,
15573  ae_int_t m,
15574  ae_int_t n,
15575  ae_state *_state)
15576 {
15577  ae_int_t i;
15578  ae_int_t j;
15579  ae_int_t idx1;
15580  ae_int_t idx2;
15581  ae_int_t m2;
15582  ae_int_t m1;
15583  ae_int_t n1;
15584 
15585 
15586  if( m==0||n==0 )
15587  {
15588  return;
15589  }
15590  if( ae_maxint(m, n, _state)<=8 )
15591  {
15592  m2 = 2*bstride;
15593  for(i=0; i<=m-1; i++)
15594  {
15595  idx1 = bstart+2*i;
15596  idx2 = astart+2*i*astride;
15597  for(j=0; j<=n-1; j++)
15598  {
15599  b->ptr.p_double[idx1+0] = a->ptr.p_double[idx2+0];
15600  b->ptr.p_double[idx1+1] = a->ptr.p_double[idx2+1];
15601  idx1 = idx1+m2;
15602  idx2 = idx2+2;
15603  }
15604  }
15605  return;
15606  }
15607  if( n>m )
15608  {
15609 
15610  /*
15611  * New partition:
15612  *
15613  * "A^T -> B" becomes "(A1 A2)^T -> ( B1 )
15614  * ( B2 )
15615  */
15616  n1 = n/2;
15617  if( n-n1>=8&&n1%8!=0 )
15618  {
15619  n1 = n1+(8-n1%8);
15620  }
15621  ae_assert(n-n1>0, "Assertion failed", _state);
15622  ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m, n1, _state);
15623  ftbase_ffticltrec(a, astart+2*n1, astride, b, bstart+2*n1*bstride, bstride, m, n-n1, _state);
15624  }
15625  else
15626  {
15627 
15628  /*
15629  * New partition:
15630  *
15631  * "A^T -> B" becomes "( A1 )^T -> ( B1 B2 )
15632  * ( A2 )
15633  */
15634  m1 = m/2;
15635  if( m-m1>=8&&m1%8!=0 )
15636  {
15637  m1 = m1+(8-m1%8);
15638  }
15639  ae_assert(m-m1>0, "Assertion failed", _state);
15640  ftbase_ffticltrec(a, astart, astride, b, bstart, bstride, m1, n, _state);
15641  ftbase_ffticltrec(a, astart+2*m1*astride, astride, b, bstart+2*m1, bstride, m-m1, n, _state);
15642  }
15643 }
15644 
15645 
15646 /*************************************************************************
15647 Recurrent subroutine for a InternalRealLinTranspose
15648 
15649 
15650  -- ALGLIB --
15651  Copyright 01.05.2009 by Bochkanov Sergey
15652 *************************************************************************/
15653 static void ftbase_fftirltrec(/* Real */ ae_vector* a,
15654  ae_int_t astart,
15655  ae_int_t astride,
15656  /* Real */ ae_vector* b,
15657  ae_int_t bstart,
15658  ae_int_t bstride,
15659  ae_int_t m,
15660  ae_int_t n,
15661  ae_state *_state)
15662 {
15663  ae_int_t i;
15664  ae_int_t j;
15665  ae_int_t idx1;
15666  ae_int_t idx2;
15667  ae_int_t m1;
15668  ae_int_t n1;
15669 
15670 
15671  if( m==0||n==0 )
15672  {
15673  return;
15674  }
15675  if( ae_maxint(m, n, _state)<=8 )
15676  {
15677  for(i=0; i<=m-1; i++)
15678  {
15679  idx1 = bstart+i;
15680  idx2 = astart+i*astride;
15681  for(j=0; j<=n-1; j++)
15682  {
15683  b->ptr.p_double[idx1] = a->ptr.p_double[idx2];
15684  idx1 = idx1+bstride;
15685  idx2 = idx2+1;
15686  }
15687  }
15688  return;
15689  }
15690  if( n>m )
15691  {
15692 
15693  /*
15694  * New partition:
15695  *
15696  * "A^T -> B" becomes "(A1 A2)^T -> ( B1 )
15697  * ( B2 )
15698  */
15699  n1 = n/2;
15700  if( n-n1>=8&&n1%8!=0 )
15701  {
15702  n1 = n1+(8-n1%8);
15703  }
15704  ae_assert(n-n1>0, "Assertion failed", _state);
15705  ftbase_fftirltrec(a, astart, astride, b, bstart, bstride, m, n1, _state);
15706  ftbase_fftirltrec(a, astart+n1, astride, b, bstart+n1*bstride, bstride, m, n-n1, _state);
15707  }
15708  else
15709  {
15710 
15711  /*
15712  * New partition:
15713  *
15714  * "A^T -> B" becomes "( A1 )^T -> ( B1 B2 )
15715  * ( A2 )
15716  */
15717  m1 = m/2;
15718  if( m-m1>=8&&m1%8!=0 )
15719  {
15720  m1 = m1+(8-m1%8);
15721  }
15722  ae_assert(m-m1>0, "Assertion failed", _state);
15723  ftbase_fftirltrec(a, astart, astride, b, bstart, bstride, m1, n, _state);
15724  ftbase_fftirltrec(a, astart+m1*astride, astride, b, bstart+m1, bstride, m-m1, n, _state);
15725  }
15726 }
15727 
15728 
15729 /*************************************************************************
15730 recurrent subroutine for FFTFindSmoothRec
15731 
15732  -- ALGLIB --
15733  Copyright 01.05.2009 by Bochkanov Sergey
15734 *************************************************************************/
15735 static void ftbase_ftbasefindsmoothrec(ae_int_t n,
15736  ae_int_t seed,
15737  ae_int_t leastfactor,
15738  ae_int_t* best,
15739  ae_state *_state)
15740 {
15741 
15742 
15743  ae_assert(ftbase_ftbasemaxsmoothfactor<=5, "FTBaseFindSmoothRec: internal error!", _state);
15744  if( seed>=n )
15745  {
15746  *best = ae_minint(*best, seed, _state);
15747  return;
15748  }
15749  if( leastfactor<=2 )
15750  {
15751  ftbase_ftbasefindsmoothrec(n, seed*2, 2, best, _state);
15752  }
15753  if( leastfactor<=3 )
15754  {
15755  ftbase_ftbasefindsmoothrec(n, seed*3, 3, best, _state);
15756  }
15757  if( leastfactor<=5 )
15758  {
15759  ftbase_ftbasefindsmoothrec(n, seed*5, 5, best, _state);
15760  }
15761 }
15762 
15763 
15764 ae_bool _fasttransformplan_init(void* _p, ae_state *_state, ae_bool make_automatic)
15765 {
15767  ae_touch_ptr((void*)p);
15768  if( !ae_matrix_init(&p->entries, 0, 0, DT_INT, _state, make_automatic) )
15769  return ae_false;
15770  if( !ae_vector_init(&p->buffer, 0, DT_REAL, _state, make_automatic) )
15771  return ae_false;
15772  if( !ae_vector_init(&p->precr, 0, DT_REAL, _state, make_automatic) )
15773  return ae_false;
15774  if( !ae_vector_init(&p->preci, 0, DT_REAL, _state, make_automatic) )
15775  return ae_false;
15776  if( !ae_shared_pool_init(&p->bluesteinpool, _state, make_automatic) )
15777  return ae_false;
15778  return ae_true;
15779 }
15780 
15781 
15782 ae_bool _fasttransformplan_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic)
15783 {
15784  fasttransformplan *dst = (fasttransformplan*)_dst;
15785  fasttransformplan *src = (fasttransformplan*)_src;
15786  if( !ae_matrix_init_copy(&dst->entries, &src->entries, _state, make_automatic) )
15787  return ae_false;
15788  if( !ae_vector_init_copy(&dst->buffer, &src->buffer, _state, make_automatic) )
15789  return ae_false;
15790  if( !ae_vector_init_copy(&dst->precr, &src->precr, _state, make_automatic) )
15791  return ae_false;
15792  if( !ae_vector_init_copy(&dst->preci, &src->preci, _state, make_automatic) )
15793  return ae_false;
15794  if( !ae_shared_pool_init_copy(&dst->bluesteinpool, &src->bluesteinpool, _state, make_automatic) )
15795  return ae_false;
15796  return ae_true;
15797 }
15798 
15799 
15801 {
15803  ae_touch_ptr((void*)p);
15804  ae_matrix_clear(&p->entries);
15805  ae_vector_clear(&p->buffer);
15806  ae_vector_clear(&p->precr);
15807  ae_vector_clear(&p->preci);
15808  ae_shared_pool_clear(&p->bluesteinpool);
15809 }
15810 
15811 
15813 {
15815  ae_touch_ptr((void*)p);
15816  ae_matrix_destroy(&p->entries);
15817  ae_vector_destroy(&p->buffer);
15818  ae_vector_destroy(&p->precr);
15819  ae_vector_destroy(&p->preci);
15820  ae_shared_pool_destroy(&p->bluesteinpool);
15821 }
15822 
15823 
15824 
15825 
15826 double nulog1p(double x, ae_state *_state)
15827 {
15828  double z;
15829  double lp;
15830  double lq;
15831  double result;
15832 
15833 
15834  z = 1.0+x;
15835  if( ae_fp_less(z,0.70710678118654752440)||ae_fp_greater(z,1.41421356237309504880) )
15836  {
15837  result = ae_log(z, _state);
15838  return result;
15839  }
15840  z = x*x;
15841  lp = 4.5270000862445199635215E-5;
15842  lp = lp*x+4.9854102823193375972212E-1;
15843  lp = lp*x+6.5787325942061044846969E0;
15844  lp = lp*x+2.9911919328553073277375E1;
15845  lp = lp*x+6.0949667980987787057556E1;
15846  lp = lp*x+5.7112963590585538103336E1;
15847  lp = lp*x+2.0039553499201281259648E1;
15848  lq = 1.0000000000000000000000E0;
15849  lq = lq*x+1.5062909083469192043167E1;
15850  lq = lq*x+8.3047565967967209469434E1;
15851  lq = lq*x+2.2176239823732856465394E2;
15852  lq = lq*x+3.0909872225312059774938E2;
15853  lq = lq*x+2.1642788614495947685003E2;
15854  lq = lq*x+6.0118660497603843919306E1;
15855  z = -0.5*z+x*(z*lp/lq);
15856  result = x+z;
15857  return result;
15858 }
15859 
15860 
15861 double nuexpm1(double x, ae_state *_state)
15862 {
15863  double r;
15864  double xx;
15865  double ep;
15866  double eq;
15867  double result;
15868 
15869 
15870  if( ae_fp_less(x,-0.5)||ae_fp_greater(x,0.5) )
15871  {
15872  result = ae_exp(x, _state)-1.0;
15873  return result;
15874  }
15875  xx = x*x;
15876  ep = 1.2617719307481059087798E-4;
15877  ep = ep*xx+3.0299440770744196129956E-2;
15878  ep = ep*xx+9.9999999999999999991025E-1;
15879  eq = 3.0019850513866445504159E-6;
15880  eq = eq*xx+2.5244834034968410419224E-3;
15881  eq = eq*xx+2.2726554820815502876593E-1;
15882  eq = eq*xx+2.0000000000000000000897E0;
15883  r = x*ep;
15884  r = r/(eq-r);
15885  result = r+r;
15886  return result;
15887 }
15888 
15889 
15890 double nucosm1(double x, ae_state *_state)
15891 {
15892  double xx;
15893  double c;
15894  double result;
15895 
15896 
15897  if( ae_fp_less(x,-0.25*ae_pi)||ae_fp_greater(x,0.25*ae_pi) )
15898  {
15899  result = ae_cos(x, _state)-1;
15900  return result;
15901  }
15902  xx = x*x;
15903  c = 4.7377507964246204691685E-14;
15904  c = c*xx-1.1470284843425359765671E-11;
15905  c = c*xx+2.0876754287081521758361E-9;
15906  c = c*xx-2.7557319214999787979814E-7;
15907  c = c*xx+2.4801587301570552304991E-5;
15908  c = c*xx-1.3888888888888872993737E-3;
15909  c = c*xx+4.1666666666666666609054E-2;
15910  result = -0.5*xx+xx*xx*c;
15911  return result;
15912 }
15913 
15914 
15915 
15916 
15917 
15918 }
15919 
ae_bool _armijostate_init(void *_p, ae_state *_state, ae_bool make_automatic)
ae_bool _ialglib_i_cmatrixrighttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2)
Definition: ap.cpp:9963
ae_bool apservisfinitectrmatrix(ae_matrix *x, ae_int_t n, ae_bool isupper, ae_state *_state)
void ae_v_cmove(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
Definition: ap.cpp:3871
double log2(double x, ae_state *_state)
void applyrotationsfromtheleft(ae_bool isforward, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *c, ae_vector *s, ae_matrix *a, ae_vector *work, ae_state *_state)
ae_bool ae_fp_greater_eq(double v1, double v2)
Definition: ap.cpp:1351
void serializecomplex(ae_serializer *s, ae_complex v, ae_state *_state)
double v_posinf
Definition: ap.h:360
void ftapplyplan(fasttransformplan *plan, ae_vector *a, ae_int_t offsa, ae_int_t repcnt, ae_state *_state)
void _scomplex_clear(void *_p)
void serializerealarray(ae_serializer *s, ae_vector *v, ae_int_t n, ae_state *_state)
ae_bool _sboolean_init(void *_p, ae_state *_state, ae_bool make_automatic)
double ae_c_abs(ae_complex z, ae_state *state)
Definition: ap.cpp:3639
void ae_v_moved(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
Definition: ap.cpp:4425
ae_int_t getmlpeserializationcode(ae_state *_state)
ae_int_t vectoridxabsmax(ae_vector *x, ae_int_t i1, ae_int_t i2, ae_state *_state)
ae_int_t cols
Definition: ap.h:445
ae_bool _sinteger_init(void *_p, ae_state *_state, ae_bool make_automatic)
double ae_sin(double x, ae_state *state)
Definition: ap.cpp:1630
ae_bool ae_shared_pool_init(void *_dst, ae_state *state, ae_bool make_automatic)
Definition: ap.cpp:2864
void hermitianrank2update(ae_matrix *a, ae_bool isupper, ae_int_t i1, ae_int_t i2, ae_vector *x, ae_vector *y, ae_vector *t, ae_complex alpha, ae_state *_state)
void apperiodicmap(double *x, double a, double b, double *k, ae_state *_state)
ae_bool _sreal_init(void *_p, ae_state *_state, ae_bool make_automatic)
ae_bool rmatrixsyrkf(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper, ae_state *_state)
void _mlpbuffers_destroy(void *_p)
void dec(ae_int_t *v, ae_state *_state)
ae_bool cmatrixgemmf(ae_int_t m, ae_int_t n, ae_int_t k, ae_complex alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, ae_complex beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
void _armijostate_destroy(void *_p)
ae_bool isfinitertrmatrix(ae_matrix *x, ae_int_t n, ae_bool isupper, ae_state *_state)
void ae_v_muld(double *vdst, ae_int_t stride_dst, ae_int_t n, double alpha)
Definition: ap.cpp:4538
void copyandtranspose(ae_matrix *a, ae_int_t is1, ae_int_t is2, ae_int_t js1, ae_int_t js2, ae_matrix *b, ae_int_t id1, ae_int_t id2, ae_int_t jd1, ae_int_t jd2, ae_state *_state)
ae_bool _sboolean_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ae_shared_pool_retrieve(ae_shared_pool *pool, ae_smart_ptr *pptr, ae_state *state)
Definition: ap.cpp:3122
double ae_fabs(double x, ae_state *state)
Definition: ap.cpp:1520
void ae_shared_pool_clear(void *_dst)
Definition: ap.cpp:3014
ae_bool rmatrixsyrkmkl(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper, ae_state *_state)
doublereal * c
ae_bool cmatrixrank1f(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_vector *u, ae_int_t iu, ae_vector *v, ae_int_t iv, ae_state *_state)
doublereal * g
ae_bool cmatrixscaledtrsafesolve(ae_matrix *a, double sa, ae_int_t n, ae_vector *x, ae_bool isupper, ae_int_t trans, ae_bool isunit, double maxgrowth, ae_state *_state)
void ftbasefactorize(ae_int_t n, ae_int_t tasktype, ae_int_t *n1, ae_int_t *n2, ae_state *_state)
#define ae_false
Definition: ap.h:196
ae_int_t stride
Definition: ap.h:446
void symmetricrank2update(ae_matrix *a, ae_bool isupper, ae_int_t i1, ae_int_t i2, ae_vector *x, ae_vector *y, ae_vector *t, double alpha, ae_state *_state)
double beta(const double a, const double b)
doublereal * grad
void xdot(ae_vector *a, ae_vector *b, ae_int_t n, ae_vector *temp, double *r, double *rerr, ae_state *_state)
union alglib_impl::ae_matrix::@12 ptr
ae_int_t columnidxabsmax(ae_matrix *x, ae_int_t i1, ae_int_t i2, ae_int_t j, ae_state *_state)
ae_bool _sintegerarray_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
double inttoreal(ae_int_t a, ae_state *_state)
double vectornorm2(ae_vector *x, ae_int_t i1, ae_int_t i2, ae_state *_state)
void ae_frame_make(ae_state *state, ae_frame *tmp)
Definition: ap.cpp:402
static double * y
ae_bool aredistinct(ae_vector *x, ae_int_t n, ae_state *_state)
ae_bool _sintegerarray_init(void *_p, ae_state *_state, ae_bool make_automatic)
void cmatrixgemmk(ae_int_t m, ae_int_t n, ae_int_t k, ae_complex alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, ae_complex beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
void hpcpreparechunkedgradient(ae_vector *weights, ae_int_t wcount, ae_int_t ntotal, ae_int_t nin, ae_int_t nout, mlpbuffers *buf, ae_state *_state)
void taskgenint1dequidist(double a, double b, ae_int_t n, ae_vector *x, ae_vector *y, ae_state *_state)
ae_bool ae_c_eq_d(ae_complex lhs, double rhs)
Definition: ap.cpp:3723
ae_complex ae_c_conj(ae_complex lhs, ae_state *state)
Definition: ap.cpp:3623
ae_vector ia
Definition: ap.h:837
void tagsortfast(ae_vector *a, ae_vector *bufa, ae_int_t n, ae_state *_state)
void tagsortfastr(ae_vector *a, ae_vector *b, ae_vector *bufa, ae_vector *bufb, ae_int_t n, ae_state *_state)
ae_complex ae_complex_from_d(double v)
Definition: ap.cpp:3607
ae_bool apservisfinitematrix(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
void bvectorsetlengthatleast(ae_vector *x, ae_int_t n, ae_state *_state)
double v_neginf
Definition: ap.h:365
void _sintegerarray_clear(void *_p)
ae_int_t ftbasefindsmootheven(ae_int_t n, ae_state *_state)
doublereal * w
void _sbooleanarray_clear(void *_p)
double * p_double
Definition: ap.h:437
ae_bool _ialglib_i_rmatrixgemmf(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *_a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *_b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, double beta, ae_matrix *_c, ae_int_t ic, ae_int_t jc)
Definition: ap.cpp:9923
void ae_shared_pool_recycle(ae_shared_pool *pool, ae_smart_ptr *pptr, ae_state *state)
Definition: ap.cpp:3192
ae_complex ae_c_div_d(ae_complex lhs, double rhs)
Definition: ap.cpp:3773
void serializeintegerarray(ae_serializer *s, ae_vector *v, ae_int_t n, ae_state *_state)
void _sreal_clear(void *_p)
ae_bool ae_c_neq_d(ae_complex lhs, double rhs)
Definition: ap.cpp:3732
void allocrealarray(ae_serializer *s, ae_vector *v, ae_int_t n, ae_state *_state)
ae_bool _fasttransformplan_init(void *_p, ae_state *_state, ae_bool make_automatic)
void rmatrixtrsafesolve(ae_matrix *a, ae_int_t n, ae_vector *x, double *s, ae_bool isupper, ae_bool istrans, ae_bool isunit, ae_state *_state)
void rmatrixgemmk(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
ae_bool seterrorflag(ae_bool *flag, ae_bool cond, ae_state *_state)
ae_bool ae_fp_eq(double v1, double v2)
Definition: ap.cpp:1313
ae_bool _scomplexarray_init(void *_p, ae_state *_state, ae_bool make_automatic)
double nucosm1(double x, ae_state *_state)
ae_bool _sinteger_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ae_v_cmulc(ae_complex *vdst, ae_int_t stride_dst, ae_int_t n, ae_complex alpha)
Definition: ap.cpp:4310
ae_bool _linminstate_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
cmache_1 eps
double * gamma
ae_int_t ftbasefindsmooth(ae_int_t n, ae_state *_state)
ae_int_t getkdtreeserializationcode(ae_state *_state)
ae_int_t getrbfserializationcode(ae_state *_state)
void unserializerealmatrix(ae_serializer *s, ae_matrix *v, ae_state *_state)
void _linminstate_clear(void *_p)
ae_bool rmatrixlefttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2, ae_state *_state)
ae_bool apservisfiniteornanmatrix(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
void unserializerealarray(ae_serializer *s, ae_vector *v, ae_state *_state)
double ae_cos(double x, ae_state *state)
Definition: ap.cpp:1635
void allocintegerarray(ae_serializer *s, ae_vector *v, ae_int_t n, ae_state *_state)
ae_bool ae_matrix_init_copy(ae_matrix *dst, ae_matrix *src, ae_state *state, ae_bool make_automatic)
Definition: ap.cpp:801
ae_bool ae_matrix_init(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
Definition: ap.cpp:756
ae_bool hpcchunkedprocess(ae_vector *weights, ae_vector *structinfo, ae_vector *columnmeans, ae_vector *columnsigmas, ae_matrix *xy, ae_int_t cstart, ae_int_t csize, ae_vector *batch4buf, ae_vector *hpcbuf, ae_state *_state)
doublereal * x
void ae_matrix_destroy(ae_matrix *dst)
Definition: ap.cpp:909
double v_nan
Definition: ap.h:355
#define i
#define ae_pi
Definition: ap.h:828
ae_int_t recsearch(ae_vector *a, ae_int_t nrec, ae_int_t nheader, ae_int_t i0, ae_int_t i1, ae_vector *b, ae_state *_state)
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
void _apbuffers_destroy(void *_p)
void ae_v_add(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
Definition: ap.cpp:4452
ae_bool rmatrixscaledtrsafesolve(ae_matrix *a, double sa, ae_int_t n, ae_vector *x, ae_bool isupper, ae_int_t trans, ae_bool isunit, double maxgrowth, ae_state *_state)
ae_int_t getmlpserializationcode(ae_state *_state)
doublereal * d
ae_complex ae_c_sub_d(ae_complex lhs, double rhs)
Definition: ap.cpp:3757
double theta
ae_int_t getrdfserializationcode(ae_state *_state)
double safepythag2(double x, double y, ae_state *_state)
double vv
double upperhessenberg1norm(ae_matrix *a, ae_int_t i1, ae_int_t i2, ae_int_t j1, ae_int_t j2, ae_vector *work, ae_state *_state)
void ae_serializer_serialize_int(ae_serializer *serializer, ae_int_t v, ae_state *state)
Definition: ap.cpp:3514
void _srealarray_destroy(void *_p)
glob_log first
void linminnormalized(ae_vector *d, double *stp, ae_int_t n, ae_state *_state)
void _rcommstate_destroy(rcommstate *p)
Definition: ap.cpp:4605
ae_int_t ae_v_len(ae_int_t a, ae_int_t b)
Definition: ap.cpp:4562
void ae_vector_destroy(ae_vector *dst)
Definition: ap.cpp:707
ae_bool _rcommstate_init(rcommstate *p, ae_state *_state, ae_bool make_automatic)
Definition: ap.cpp:4570
void tagsortmiddleir(ae_vector *a, ae_vector *b, ae_int_t offset, ae_int_t n, ae_state *_state)
ae_bool cmatrixsyrkf(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper, ae_state *_state)
doublereal * b
ae_bool armijoiteration(armijostate *state, ae_state *_state)
void countdown(ae_int_t *v, ae_state *_state)
ae_int_t ** pp_int
Definition: ap.h:454
long flag
void ae_shared_pool_set_seed(ae_shared_pool *dst, void *seed_object, ae_int_t size_of_object, ae_bool(*init)(void *dst, ae_state *state, ae_bool make_automatic), ae_bool(*init_copy)(void *dst, void *src, ae_state *state, ae_bool make_automatic), void(*destroy)(void *ptr), ae_state *state)
Definition: ap.cpp:3079
double v1
void tagsort(ae_vector *a, ae_int_t n, ae_vector *p1, ae_vector *p2, ae_state *_state)
struct _constraint * cs
ae_bool _sbooleanarray_init(void *_p, ae_state *_state, ae_bool make_automatic)
ae_bool _rcommstate_init_copy(rcommstate *dst, rcommstate *src, ae_state *_state, ae_bool make_automatic)
Definition: ap.cpp:4583
void ae_v_move(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n)
Definition: ap.cpp:4371
void _srealarray_clear(void *_p)
void ae_swap_matrices(ae_matrix *mat1, ae_matrix *mat2)
Definition: ap.cpp:919
ae_int_t rows
Definition: ap.h:444
ae_bool seterrorflagdiff(ae_bool *flag, double val, double refval, double tol, double s, ae_state *_state)
void _linminstate_destroy(void *_p)
ae_bool _apbuffers_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
ae_complex ae_c_div(ae_complex lhs, ae_complex rhs)
Definition: ap.cpp:3701
ae_bool _ialglib_i_cmatrixgemmf(ae_int_t m, ae_int_t n, ae_int_t k, ae_complex alpha, ae_matrix *_a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *_b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, ae_complex beta, ae_matrix *_c, ae_int_t ic, ae_int_t jc)
Definition: ap.cpp:9943
ae_int_t rowidxabsmax(ae_matrix *x, ae_int_t j1, ae_int_t j2, ae_int_t i, ae_state *_state)
double * f
void matrixvectormultiply(ae_matrix *a, ae_int_t i1, ae_int_t i2, ae_int_t j1, ae_int_t j2, ae_bool trans, ae_vector *x, ae_int_t ix1, ae_int_t ix2, double alpha, ae_vector *y, ae_int_t iy1, ae_int_t iy2, double beta, ae_state *_state)
ae_complex ae_c_add(ae_complex lhs, ae_complex rhs)
Definition: ap.cpp:3677
void ae_vector_clear(ae_vector *dst)
Definition: ap.cpp:692
void rmatrixgemmk44v01(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_matrix *b, ae_int_t ib, ae_int_t jb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
void rmatrixgemmk44v00(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_matrix *b, ae_int_t ib, ae_int_t jb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
ae_bool _armijostate_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ae_v_csubc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
Definition: ap.cpp:4276
void copyintegerarray(ae_vector *src, ae_vector *dst, ae_state *_state)
void inc(ae_int_t *v, ae_state *_state)
ae_complex unserializecomplex(ae_serializer *s, ae_state *_state)
void taskgenint1dcheb1(double a, double b, ae_int_t n, ae_vector *x, ae_vector *y, ae_state *_state)
ae_complex ** pp_complex
Definition: ap.h:456
ae_bool ae_fp_less(double v1, double v2)
Definition: ap.cpp:1327
void rankx(ae_vector *x, ae_int_t n, ae_bool iscentered, apbuffers *buf, ae_state *_state)
void symmetricmatrixvectormultiply(ae_matrix *a, ae_bool isupper, ae_int_t i1, ae_int_t i2, ae_vector *x, double alpha, ae_vector *y, ae_state *_state)
ae_bool cmatrixmvf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t opa, ae_vector *x, ae_int_t ix, ae_vector *y, ae_int_t iy, ae_state *_state)
double safeminposrv(double x, double y, double v, ae_state *_state)
void ae_serializer_unserialize_int(ae_serializer *serializer, ae_int_t *v, ae_state *state)
Definition: ap.cpp:3589
ae_vector ra
Definition: ap.h:839
double ae_randomreal(ae_state *state)
Definition: ap.cpp:1607
void rmatrixgemmk44v11(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_matrix *b, ae_int_t ib, ae_int_t jb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
double * ctemp
ae_bool ae_smart_ptr_init(ae_smart_ptr *dst, void **subscriber, ae_state *state, ae_bool make_automatic)
Definition: ap.cpp:967
void matrixmatrixmultiply(ae_matrix *a, ae_int_t ai1, ae_int_t ai2, ae_int_t aj1, ae_int_t aj2, ae_bool transa, ae_matrix *b, ae_int_t bi1, ae_int_t bi2, ae_int_t bj1, ae_int_t bj2, ae_bool transb, double alpha, ae_matrix *c, ae_int_t ci1, ae_int_t ci2, ae_int_t cj1, ae_int_t cj2, double beta, ae_vector *work, ae_state *_state)
void armijocreate(ae_int_t n, ae_vector *x, double f, ae_vector *s, double stp, double stpmax, ae_int_t fmax, armijostate *state, ae_state *_state)
double pythag2(double x, double y, ae_state *_state)
void eq(Image< double > &op1, const Image< double > &op2)
Be careful with integer images for relational operations...due to double comparisons.
void complexgeneratereflection(ae_vector *x, ae_int_t n, ae_complex *tau, ae_state *_state)
ae_bool ftbaseissmooth(ae_int_t n, ae_state *_state)
double dx
void ae_v_caddc(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
Definition: ap.cpp:4169
void generaterotation(double f, double g, double *cs, double *sn, double *r, ae_state *_state)
ae_bool ae_shared_pool_init_copy(void *_dst, void *_src, ae_state *state, ae_bool make_automatic)
Definition: ap.cpp:2946
#define ae_bool
Definition: ap.h:194
void tagheappushi(ae_vector *a, ae_vector *b, ae_int_t *n, double va, ae_int_t vb, ae_state *_state)
ae_complex ae_c_sub(ae_complex lhs, ae_complex rhs)
Definition: ap.cpp:3693
ae_bool ae_fp_neq(double v1, double v2)
Definition: ap.cpp:1321
ae_bool ae_isnan(double x, ae_state *state)
Definition: ap.cpp:1500
ae_bool rmatrixmvf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t opa, ae_vector *x, ae_int_t ix, ae_vector *y, ae_int_t iy, ae_state *_state)
ae_bool isfinitevector(ae_vector *x, ae_int_t n, ae_state *_state)
void ae_v_cmoved(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, double alpha)
Definition: ap.cpp:3965
void rvectorsetlengthatleast(ae_vector *x, ae_int_t n, ae_state *_state)
void _fasttransformplan_destroy(void *_p)
double z
double randomnormal(ae_state *_state)
double boundval(double x, double b1, double b2, ae_state *_state)
void allocrealmatrix(ae_serializer *s, ae_matrix *v, ae_int_t n0, ae_int_t n1, ae_state *_state)
void ae_v_cadd(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n)
Definition: ap.cpp:4069
void _sboolean_destroy(void *_p)
void findprimitiverootandinverse(ae_int_t n, ae_int_t *proot, ae_int_t *invproot, ae_state *_state)
void ae_touch_ptr(void *p)
Definition: ap.cpp:294
void taskgenint1dcheb2(double a, double b, ae_int_t n, ae_vector *x, ae_vector *y, ae_state *_state)
void _mlpbuffers_clear(void *_p)
double ae_maxreal(double m1, double m2, ae_state *state)
Definition: ap.cpp:1577
ae_complex ae_v_cdotproduct(const ae_complex *v0, ae_int_t stride0, const char *conj0, const ae_complex *v1, ae_int_t stride1, const char *conj1, ae_int_t n)
Definition: ap.cpp:3807
ae_bool apservisfinitecmatrix(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
void splitlength(ae_int_t tasksize, ae_int_t chunksize, ae_int_t *task0, ae_int_t *task1, ae_state *_state)
void _sinteger_destroy(void *_p)
void mcsrch(ae_int_t n, ae_vector *x, double *f, ae_vector *g, ae_vector *s, double *stp, double stpmax, double gtol, ae_int_t *info, ae_int_t *nfev, ae_vector *wa, linminstate *state, ae_int_t *stage, ae_state *_state)
ae_bool ae_vector_set_length(ae_vector *dst, ae_int_t newsize, ae_state *state)
Definition: ap.cpp:658
void unserializeintegerarray(ae_serializer *s, ae_vector *v, ae_state *_state)
ae_bool _srealarray_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void applyreflectionfromtheleft(ae_matrix *c, double tau, ae_vector *v, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *work, ae_state *_state)
void applyreflectionfromtheright(ae_matrix *c, double tau, ae_vector *v, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *work, ae_state *_state)
ae_bool _fasttransformplan_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
ae_int_t lowerbound(ae_vector *a, ae_int_t n, double t, ae_state *_state)
ae_bool _ialglib_i_cmatrixlefttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2)
Definition: ap.cpp:9993
void _sreal_destroy(void *_p)
void _scomplexarray_destroy(void *_p)
double ae_log(double x, ae_state *state)
Definition: ap.cpp:1679
void _fasttransformplan_clear(void *_p)
void applyrotationsfromtheright(ae_bool isforward, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *c, ae_vector *s, ae_matrix *a, ae_vector *work, ae_state *_state)
void ae_serializer_serialize_double(ae_serializer *serializer, double v, ae_state *state)
Definition: ap.cpp:3549
double nuexpm1(double x, ae_state *_state)
void _apbuffers_clear(void *_p)
ae_bool _apbuffers_init(void *_p, ae_state *_state, ae_bool make_automatic)
#define j
double ae_minreal(double m1, double m2, ae_state *state)
Definition: ap.cpp:1582
ae_bool isfinitecvector(ae_vector *z, ae_int_t n, ae_state *_state)
ae_bool _ialglib_i_rmatrixrank1f(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_vector *u, ae_int_t uoffs, ae_vector *v, ae_int_t voffs)
Definition: ap.cpp:10068
ae_bool _scomplex_init(void *_p, ae_state *_state, ae_bool make_automatic)
void taskgenint1d(double a, double b, ae_int_t n, ae_vector *x, ae_vector *y, ae_state *_state)
ae_complex ae_c_d_div(double lhs, ae_complex rhs)
Definition: ap.cpp:3781
ae_bool rmatrixrighttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2, ae_state *_state)
ae_bool _sreal_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
int m
void rmatrixgemmk44v10(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_matrix *b, ae_int_t ib, ae_int_t jb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
void alloccomplex(ae_serializer *s, ae_complex v, ae_state *_state)
void generatereflection(ae_vector *x, ae_int_t n, double *tau, ae_state *_state)
ae_bool _ialglib_i_rmatrixlefttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2)
Definition: ap.cpp:10008
double safepythag3(double x, double y, double z, ae_state *_state)
void hpcfinalizechunkedgradient(mlpbuffers *buf, ae_vector *grad, ae_state *_state)
ae_bool _ialglib_i_rmatrixsyrkf(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper)
Definition: ap.cpp:10039
ae_int_t ae_ifloor(double x, ae_state *state)
Definition: ap.cpp:1557
ae_bool cmatrixlefttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2, ae_state *_state)
ae_complex * p_complex
Definition: ap.h:438
double ** pp_double
Definition: ap.h:455
void ae_shared_pool_destroy(void *_dst)
Definition: ap.cpp:3040
void touchint(ae_int_t *a, ae_state *_state)
ae_bool approxequalrel(double a, double b, double tol, ae_state *_state)
void rmatrixsetlengthatleast(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
double ae_sqrt(double x, ae_state *state)
Definition: ap.cpp:1535
void ae_assert(ae_bool cond, const char *msg, ae_state *state)
Definition: ap.cpp:1227
union alglib_impl::ae_vector::@11 ptr
void _sboolean_clear(void *_p)
ae_bool rmatrixgemmf(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
ae_bool _ialglib_i_cmatrixrank1f(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_vector *u, ae_int_t uoffs, ae_vector *v, ae_int_t voffs)
Definition: ap.cpp:10055
ae_bool _scomplexarray_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
ae_bool _srealarray_init(void *_p, ae_state *_state, ae_bool make_automatic)
void _armijostate_clear(void *_p)
void xcdot(ae_vector *a, ae_vector *b, ae_int_t n, ae_vector *temp, ae_complex *r, double *rerr, ae_state *_state)
ae_bool _scomplex_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ivectorsetlengthatleast(ae_vector *x, ae_int_t n, ae_state *_state)
void copymatrix(ae_matrix *a, ae_int_t is1, ae_int_t is2, ae_int_t js1, ae_int_t js2, ae_matrix *b, ae_int_t id1, ae_int_t id2, ae_int_t jd1, ae_int_t jd2, ae_state *_state)
double ftbasegetflopestimate(ae_int_t n, ae_state *_state)
void splitlengtheven(ae_int_t tasksize, ae_int_t *task0, ae_int_t *task1, ae_state *_state)
double nulog1p(double x, ae_state *_state)
#define ae_machineepsilon
Definition: ap.h:825
void ftcomplexfftplan(ae_int_t n, ae_int_t k, fasttransformplan *plan, ae_state *_state)
void serializerealmatrix(ae_serializer *s, ae_matrix *v, ae_int_t n0, ae_int_t n1, ae_state *_state)
ae_bool _ialglib_i_cmatrixsyrkf(ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_bool isupper)
Definition: ap.cpp:10023
double ae_exp(double x, ae_state *state)
Definition: ap.cpp:1689
ae_complex ae_c_mul(ae_complex lhs, ae_complex rhs)
Definition: ap.cpp:3685
void _sinteger_clear(void *_p)
void tagsortbuf(ae_vector *a, ae_int_t n, ae_vector *p1, ae_vector *p2, apbuffers *buf, ae_state *_state)
void _sintegerarray_destroy(void *_p)
void _rcommstate_clear(rcommstate *p)
Definition: ap.cpp:4597
ptrdiff_t ae_int_t
Definition: ap.h:186
void complexapplyreflectionfromtheright(ae_matrix *c, ae_complex tau, ae_vector *v, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *work, ae_state *_state)
void rmatrixresize(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
doublereal * u
void ae_v_subd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
Definition: ap.cpp:4533
ae_bool ae_vector_init(ae_vector *dst, ae_int_t size, ae_datatype datatype, ae_state *state, ae_bool make_automatic)
Definition: ap.cpp:580
ae_int_t ae_maxint(ae_int_t m1, ae_int_t m2, ae_state *state)
Definition: ap.cpp:1567
ae_complex ae_c_mul_d(ae_complex lhs, double rhs)
Definition: ap.cpp:3749
ae_bool upperhessenbergschurdecomposition(ae_matrix *h, ae_int_t n, ae_matrix *s, ae_state *_state)
ae_bool ae_isfinite(double x, ae_state *state)
Definition: ap.cpp:1495
double ae_sqr(double x, ae_state *state)
Definition: ap.cpp:1530
void inplacetranspose(ae_matrix *a, ae_int_t i1, ae_int_t i2, ae_int_t j1, ae_int_t j2, ae_vector *work, ae_state *_state)
void randomunit(ae_int_t n, ae_vector *x, ae_state *_state)
void ae_serializer_alloc_entry(ae_serializer *serializer)
Definition: ap.cpp:3411
void ae_serializer_unserialize_double(ae_serializer *serializer, double *v, ae_state *state)
Definition: ap.cpp:3594
void complexapplyreflectionfromtheleft(ae_matrix *c, ae_complex tau, ae_vector *v, ae_int_t m1, ae_int_t m2, ae_int_t n1, ae_int_t n2, ae_vector *work, ae_state *_state)
void _sbooleanarray_destroy(void *_p)
void copyrealarray(ae_vector *src, ae_vector *dst, ae_state *_state)
ae_int_t * p_int
Definition: ap.h:436
ae_int_t upperbound(ae_vector *a, ae_int_t n, double t, ae_state *_state)
void tagsortfasti(ae_vector *a, ae_vector *b, ae_vector *bufa, ae_vector *bufb, ae_int_t n, ae_state *_state)
void ae_v_addd(double *vdst, ae_int_t stride_dst, const double *vsrc, ae_int_t stride_src, ae_int_t n, double alpha)
Definition: ap.cpp:4479
ae_bool ae_vector_init_copy(ae_vector *dst, ae_vector *src, ae_state *state, ae_bool make_automatic)
Definition: ap.cpp:614
void armijoresults(armijostate *state, ae_int_t *info, double *stp, double *f, ae_state *_state)
ae_bool _mlpbuffers_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
ae_bool rmatrixgemmmkl(ae_int_t m, ae_int_t n, ae_int_t k, double alpha, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_int_t optypea, ae_matrix *b, ae_int_t ib, ae_int_t jb, ae_int_t optypeb, double beta, ae_matrix *c, ae_int_t ic, ae_int_t jc, ae_state *_state)
ae_bool ae_fp_less_eq(double v1, double v2)
Definition: ap.cpp:1335
ae_int_t ae_round(double x, ae_state *state)
Definition: ap.cpp:1547
double v0
void internalschurdecomposition(ae_matrix *h, ae_int_t n, ae_int_t tneeded, ae_int_t zneeded, ae_vector *wr, ae_vector *wi, ae_matrix *z, ae_int_t *info, ae_state *_state)
void safesolvetriangular(ae_matrix *a, ae_int_t n, ae_vector *x, double *s, ae_bool isupper, ae_bool istrans, ae_bool isunit, ae_bool normin, ae_vector *cnorm, ae_state *_state)
void _scomplexarray_clear(void *_p)
void ae_frame_leave(ae_state *state)
Definition: ap.cpp:415
ae_bool cmatrixrighttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2, ae_state *_state)
ae_bool _sbooleanarray_init_copy(void *_dst, void *_src, ae_state *_state, ae_bool make_automatic)
void ae_matrix_clear(ae_matrix *dst)
Definition: ap.cpp:891
double fmax
#define ae_true
Definition: ap.h:195
ae_bool _linminstate_init(void *_p, ae_state *_state, ae_bool make_automatic)
int * n
ae_bool ae_fp_greater(double v1, double v2)
Definition: ap.cpp:1343
void touchreal(double *a, ae_state *_state)
ae_int_t cnt
Definition: ap.h:429
ae_bool ae_matrix_set_length(ae_matrix *dst, ae_int_t rows, ae_int_t cols, ae_state *state)
Definition: ap.cpp:854
doublereal * a
void tagheapreplacetopi(ae_vector *a, ae_vector *b, ae_int_t n, double va, ae_int_t vb, ae_state *_state)
ae_int_t saferdiv(double x, double y, double *r, ae_state *_state)
ae_bool hpcchunkedgradient(ae_vector *weights, ae_vector *structinfo, ae_vector *columnmeans, ae_vector *columnsigmas, ae_matrix *xy, ae_int_t cstart, ae_int_t csize, ae_vector *batch4buf, ae_vector *hpcbuf, double *e, ae_bool naturalerrorfunc, ae_state *_state)
#define ae_minrealnumber
Definition: ap.h:827
ql0001_ & zero(ctemp+1),(cvec+1),(a+1),(b+1),(bl+1),(bu+1),(x+1),(w+1), &iout, ifail, &zero,(w+3), &lwar2,(iw+1), &leniw, &glob_grd.epsmac
void imatrixresize(ae_matrix *x, ae_int_t m, ae_int_t n, ae_state *_state)
void tagheappopi(ae_vector *a, ae_vector *b, ae_int_t *n, ae_state *_state)
ae_bool rmatrixrank1f(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t ia, ae_int_t ja, ae_vector *u, ae_int_t iu, ae_vector *v, ae_int_t iv, ae_state *_state)
void hermitianmatrixvectormultiply(ae_matrix *a, ae_bool isupper, ae_int_t i1, ae_int_t i2, ae_vector *x, ae_complex alpha, ae_vector *y, ae_state *_state)
ae_bool _ialglib_i_rmatrixrighttrsmf(ae_int_t m, ae_int_t n, ae_matrix *a, ae_int_t i1, ae_int_t j1, ae_bool isupper, ae_bool isunit, ae_int_t optype, ae_matrix *x, ae_int_t i2, ae_int_t j2)
Definition: ap.cpp:9978
ae_int_t ae_minint(ae_int_t m1, ae_int_t m2, ae_state *state)
Definition: ap.cpp:1572
ae_bool _mlpbuffers_init(void *_p, ae_state *_state, ae_bool make_automatic)
ae_bool aresameboolean(ae_bool v1, ae_bool v2, ae_state *_state)
ae_int_t ae_trunc(double x, ae_state *state)
Definition: ap.cpp:1552
double ae_v_dotproduct(const double *v0, ae_int_t stride0, const double *v1, ae_int_t stride1, ae_int_t n)
Definition: ap.cpp:4344
void copyrealmatrix(ae_matrix *src, ae_matrix *dst, ae_state *_state)
void _scomplex_destroy(void *_p)
void ae_v_cmovec(ae_complex *vdst, ae_int_t stride_dst, const ae_complex *vsrc, ae_int_t stride_src, const char *conj_src, ae_int_t n, ae_complex alpha)
Definition: ap.cpp:4015
#define ae_maxrealnumber
Definition: ap.h:826