Xmipp  v3.23.11-Nereus
svm.cpp
Go to the documentation of this file.
1 /* This file has been taken from LibSVM 3.25 */
2 /* Authors: Chih-Chung Chang and Chih-Jen Lin, LIBSVM :*/
3 /* Software available at http://www.csie.ntu.edu.tw/~cjlin/libsvm */
4 
5 #include <math.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <ctype.h>
9 #include <float.h>
10 #include <string.h>
11 #include <stdarg.h>
12 #include <limits.h>
13 #include <locale.h>
14 #include "svm.h"
15 using namespace libsvm;
16 
18 typedef float Qfloat;
19 typedef signed char schar;
20 #ifndef min
21 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
22 #endif
23 #ifndef max
24 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
25 #endif
26 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
27 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
28 {
29  dst = new T[n];
30  memcpy((void *)dst,(void *)src,sizeof(T)*n);
31 }
32 static inline double powi(double base, int times)
33 {
34  double tmp = base, ret = 1.0;
35 
36  for(int t=times; t>0; t/=2)
37  {
38  if(t%2==1) ret*=tmp;
39  tmp = tmp * tmp;
40  }
41  return ret;
42 }
43 #define INF HUGE_VAL
44 #define TAU 1e-12
45 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
46 
47 static void print_string_stdout(const char *s)
48 {
49  fputs(s,stdout);
50  fflush(stdout);
51 }
52 static void (*svm_print_string) (const char *) = &print_string_stdout;
53 #if 1
54 static void info(const char *fmt,...)
55 {
56  char buf[BUFSIZ];
57  va_list ap;
58  va_start(ap,fmt);
59  vsprintf(buf,fmt,ap);
60  va_end(ap);
61  (*svm_print_string)(buf);
62 }
63 #else
64 static void info(const char *fmt,...) {}
65 #endif
66 
67 //
68 // Kernel Cache
69 //
70 // l is the number of total data items
71 // size is the cache size limit in bytes
72 //
73 class Cache
74 {
75 public:
76  Cache(int l,long int size);
77  ~Cache();
78 
79  // request data [0,len)
80  // return some position p where [p,len) need to be filled
81  // (p >= len if nothing needs to be filled)
82  int get_data(const int index, Qfloat **data, int len);
83  void swap_index(int i, int j);
84 private:
85  int l;
86  long int size;
87  struct head_t
88  {
89  head_t *prev, *next; // a circular list
90  Qfloat *data;
91  int len; // data[0,len) is cached in this entry
92  };
93 
94  head_t *head;
95  head_t lru_head;
96  void lru_delete(head_t *h);
97  void lru_insert(head_t *h);
98 };
99 
100 Cache::Cache(int l_,long int size_):l(l_),size(size_)
101 {
102  head = (head_t *)calloc(l,sizeof(head_t)); // initialized to 0
103  size /= sizeof(Qfloat);
104  size -= l * sizeof(head_t) / sizeof(Qfloat);
105  size = max(size, 2 * (long int) l); // cache must be large enough for two columns
106  lru_head.next = lru_head.prev = &lru_head;
107 }
108 
110 {
111  for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
112  free(h->data);
113  free(head);
114 }
115 
116 void Cache::lru_delete(head_t *h)
117 {
118  // delete from current location
119  h->prev->next = h->next;
120  h->next->prev = h->prev;
121 }
122 
123 void Cache::lru_insert(head_t *h)
124 {
125  // insert to last position
126  h->next = &lru_head;
127  h->prev = lru_head.prev;
128  h->prev->next = h;
129  h->next->prev = h;
130 }
131 
132 int Cache::get_data(const int index, Qfloat **data, int len)
133 {
134  head_t *h = &head[index];
135  if(h->len) lru_delete(h);
136  int more = len - h->len;
137 
138  if(more > 0)
139  {
140  // free old space
141  while(size < more)
142  {
143  head_t *old = lru_head.next;
144  lru_delete(old);
145  free(old->data);
146  size += old->len;
147  old->data = 0;
148  old->len = 0;
149  }
150 
151  // allocate new space
152  h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
153  size -= more;
154  swap(h->len,len);
155  }
156 
157  lru_insert(h);
158  *data = h->data;
159  return len;
160 }
161 
162 void Cache::swap_index(int i, int j)
163 {
164  if(i==j) return;
165 
166  if(head[i].len) lru_delete(&head[i]);
167  if(head[j].len) lru_delete(&head[j]);
168  swap(head[i].data,head[j].data);
169  swap(head[i].len,head[j].len);
170  if(head[i].len) lru_insert(&head[i]);
171  if(head[j].len) lru_insert(&head[j]);
172 
173  if(i>j) swap(i,j);
174  for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
175  {
176  if(h->len > i)
177  {
178  if(h->len > j)
179  swap(h->data[i],h->data[j]);
180  else
181  {
182  // give up
183  lru_delete(h);
184  free(h->data);
185  size += h->len;
186  h->data = 0;
187  h->len = 0;
188  }
189  }
190  }
191 }
192 
193 //
194 // Kernel evaluation
195 //
196 // the static method k_function is for doing single kernel evaluation
197 // the constructor of Kernel prepares to calculate the l*l kernel matrix
198 // the member function get_Q is for getting one column from the Q Matrix
199 //
200 class QMatrix {
201 public:
202  virtual Qfloat *get_Q(int column, int len) const = 0;
203  virtual double *get_QD() const = 0;
204  virtual void swap_index(int i, int j) const = 0;
205  virtual ~QMatrix() {}
206 };
207 
208 class Kernel: public QMatrix {
209 public:
210  Kernel(int l, svm_node * const * x, const svm_parameter& param);
211  virtual ~Kernel();
212 
213  static double k_function(const svm_node *x, const svm_node *y,
214  const svm_parameter& param);
215  virtual Qfloat *get_Q(int column, int len) const = 0;
216  virtual double *get_QD() const = 0;
217  virtual void swap_index(int i, int j) const // no so const...
218  {
219  swap(x[i],x[j]);
220  if(x_square) swap(x_square[i],x_square[j]);
221  }
222 protected:
223 
224  double (Kernel::*kernel_function)(int i, int j) const;
225 
226 private:
227  const svm_node **x;
228  double *x_square;
229 
230  // svm_parameter
231  const int kernel_type;
232  const int degree;
233  const double gamma;
234  const double coef0;
235 
236  static double dot(const svm_node *px, const svm_node *py);
237  double kernel_linear(int i, int j) const
238  {
239  return dot(x[i],x[j]);
240  }
241  double kernel_poly(int i, int j) const
242  {
243  return powi(gamma*dot(x[i],x[j])+coef0,degree);
244  }
245  double kernel_rbf(int i, int j) const
246  {
247  return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
248  }
249  double kernel_sigmoid(int i, int j) const
250  {
251  return tanh(gamma*dot(x[i],x[j])+coef0);
252  }
253  double kernel_precomputed(int i, int j) const
254  {
255  return x[i][(int)(x[j][0].value)].value;
256  }
257 };
258 
259 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
260 :kernel_type(param.kernel_type), degree(param.degree),
261  gamma(param.gamma), coef0(param.coef0)
262 {
263  switch(kernel_type)
264  {
265  case LINEAR:
266  kernel_function = &Kernel::kernel_linear;
267  break;
268  case POLY:
269  kernel_function = &Kernel::kernel_poly;
270  break;
271  case RBF:
272  kernel_function = &Kernel::kernel_rbf;
273  break;
274  case SIGMOID:
275  kernel_function = &Kernel::kernel_sigmoid;
276  break;
277  case PRECOMPUTED:
278  kernel_function = &Kernel::kernel_precomputed;
279  break;
280  }
281 
282  clone(x,x_,l);
283 
284  if(kernel_type == RBF)
285  {
286  x_square = new double[l];
287  for(int i=0;i<l;i++)
288  x_square[i] = dot(x[i],x[i]);
289  }
290  else
291  x_square = 0;
292 }
293 
295 {
296  delete[] x;
297  delete[] x_square;
298 }
299 
300 double Kernel::dot(const svm_node *px, const svm_node *py)
301 {
302  double sum = 0;
303  while(px->index != -1 && py->index != -1)
304  {
305  if(px->index == py->index)
306  {
307  sum += px->value * py->value;
308  ++px;
309  ++py;
310  }
311  else
312  {
313  if(px->index > py->index)
314  ++py;
315  else
316  ++px;
317  }
318  }
319  return sum;
320 }
321 
322 double Kernel::k_function(const svm_node *x, const svm_node *y,
323  const svm_parameter& param)
324 {
325  switch(param.kernel_type)
326  {
327  case LINEAR:
328  return dot(x,y);
329  case POLY:
330  return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
331  case RBF:
332  {
333  double sum = 0;
334  while(x->index != -1 && y->index !=-1)
335  {
336  if(x->index == y->index)
337  {
338  double d = x->value - y->value;
339  sum += d*d;
340  ++x;
341  ++y;
342  }
343  else
344  {
345  if(x->index > y->index)
346  {
347  sum += y->value * y->value;
348  ++y;
349  }
350  else
351  {
352  sum += x->value * x->value;
353  ++x;
354  }
355  }
356  }
357 
358  while(x->index != -1)
359  {
360  sum += x->value * x->value;
361  ++x;
362  }
363 
364  while(y->index != -1)
365  {
366  sum += y->value * y->value;
367  ++y;
368  }
369 
370  return exp(-param.gamma*sum);
371  }
372  case SIGMOID:
373  return tanh(param.gamma*dot(x,y)+param.coef0);
374  case PRECOMPUTED: //x: test (validation), y: SV
375  return x[(int)(y->value)].value;
376  default:
377  return 0; // Unreachable
378  }
379 }
380 
381 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
382 // Solves:
383 //
384 // min 0.5(\alpha^T Q \alpha) + p^T \alpha
385 //
386 // y^T \alpha = \delta
387 // y_i = +1 or -1
388 // 0 <= alpha_i <= Cp for y_i = 1
389 // 0 <= alpha_i <= Cn for y_i = -1
390 //
391 // Given:
392 //
393 // Q, p, y, Cp, Cn, and an initial feasible point \alpha
394 // l is the size of vectors and matrices
395 // eps is the stopping tolerance
396 //
397 // solution will be put in \alpha, objective value will be put in obj
398 //
399 class Solver {
400 public:
401  Solver() {};
402  virtual ~Solver() {};
403 
404  struct SolutionInfo {
405  double obj;
406  double rho;
409  double r; // for Solver_NU
410  };
411 
412  void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
413  double *alpha_, double Cp, double Cn, double eps,
414  SolutionInfo* si, int shrinking);
415 protected:
418  double *G; // gradient of objective function
419  enum { LOWER_BOUND, UPPER_BOUND, FREE };
420  char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
421  double *alpha;
422  const QMatrix *Q;
423  const double *QD;
424  double eps;
425  double Cp,Cn;
426  double *p;
428  double *G_bar; // gradient, if we treat free variables as 0
429  int l;
430  bool unshrink; // XXX
431 
432  double get_C(int i)
433  {
434  return (y[i] > 0)? Cp : Cn;
435  }
437  {
438  if(alpha[i] >= get_C(i))
439  alpha_status[i] = UPPER_BOUND;
440  else if(alpha[i] <= 0)
441  alpha_status[i] = LOWER_BOUND;
442  else alpha_status[i] = FREE;
443  }
444  bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
445  bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
446  bool is_free(int i) { return alpha_status[i] == FREE; }
447  void swap_index(int i, int j);
448  void reconstruct_gradient();
449  virtual int select_working_set(int &i, int &j);
450  virtual double calculate_rho();
451  virtual void do_shrinking();
452 private:
453  bool be_shrunk(int i, double Gmax1, double Gmax2);
454 };
455 
456 void Solver::swap_index(int i, int j)
457 {
458  Q->swap_index(i,j);
459  swap(y[i],y[j]);
460  swap(G[i],G[j]);
461  swap(alpha_status[i],alpha_status[j]);
462  swap(alpha[i],alpha[j]);
463  swap(p[i],p[j]);
464  swap(active_set[i],active_set[j]);
465  swap(G_bar[i],G_bar[j]);
466 }
467 
469 {
470  // reconstruct inactive elements of G from G_bar and free variables
471 
472  if(active_size == l) return;
473 
474  int i,j;
475  int nr_free = 0;
476 
477  for(j=active_size;j<l;j++)
478  G[j] = G_bar[j] + p[j];
479 
480  for(j=0;j<active_size;j++)
481  if(is_free(j))
482  nr_free++;
483 
484  if(2*nr_free < active_size)
485  info("\nWARNING: using -h 0 may be faster\n");
486 
487  if (nr_free*l > 2*active_size*(l-active_size))
488  {
489  for(i=active_size;i<l;i++)
490  {
491  const Qfloat *Q_i = Q->get_Q(i,active_size);
492  for(j=0;j<active_size;j++)
493  if(is_free(j))
494  G[i] += alpha[j] * Q_i[j];
495  }
496  }
497  else
498  {
499  for(i=0;i<active_size;i++)
500  if(is_free(i))
501  {
502  const Qfloat *Q_i = Q->get_Q(i,l);
503  double alpha_i = alpha[i];
504  for(j=active_size;j<l;j++)
505  G[j] += alpha_i * Q_i[j];
506  }
507  }
508 }
509 
510 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
511  double *alpha_, double Cp, double Cn, double eps,
512  SolutionInfo* si, int shrinking)
513 {
514  this->l = l;
515  this->Q = &Q;
516  QD=Q.get_QD();
517  clone(p, p_,l);
518  clone(y, y_,l);
519  clone(alpha,alpha_,l);
520  this->Cp = Cp;
521  this->Cn = Cn;
522  this->eps = eps;
523  unshrink = false;
524 
525  // initialize alpha_status
526  {
527  alpha_status = new char[l];
528  for(int i=0;i<l;i++)
529  update_alpha_status(i);
530  }
531 
532  // initialize active set (for shrinking)
533  {
534  active_set = new int[l];
535  for(int i=0;i<l;i++)
536  active_set[i] = i;
537  active_size = l;
538  }
539 
540  // initialize gradient
541  {
542  G = new double[l];
543  G_bar = new double[l];
544  int i;
545  for(i=0;i<l;i++)
546  {
547  G[i] = p[i];
548  G_bar[i] = 0;
549  }
550  for(i=0;i<l;i++)
551  if(!is_lower_bound(i))
552  {
553  const Qfloat *Q_i = Q.get_Q(i,l);
554  double alpha_i = alpha[i];
555  int j;
556  for(j=0;j<l;j++)
557  G[j] += alpha_i*Q_i[j];
558  if(is_upper_bound(i))
559  for(j=0;j<l;j++)
560  G_bar[j] += get_C(i) * Q_i[j];
561  }
562  }
563 
564  // optimization step
565 
566  int iter = 0;
567  int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
568  int counter = min(l,1000)+1;
569 
570  while(iter < max_iter)
571  {
572  // show progress and do shrinking
573 
574  if(--counter == 0)
575  {
576  counter = min(l,1000);
577  if(shrinking) do_shrinking();
578  info(".");
579  }
580 
581  int i,j;
582  if(select_working_set(i,j)!=0)
583  {
584  // reconstruct the whole gradient
585  reconstruct_gradient();
586  // reset active set size and check
587  active_size = l;
588  info("*");
589  if(select_working_set(i,j)!=0)
590  break;
591  else
592  counter = 1; // do shrinking next iteration
593  }
594 
595  ++iter;
596 
597  // update alpha[i] and alpha[j], handle bounds carefully
598 
599  const Qfloat *Q_i = Q.get_Q(i,active_size);
600  const Qfloat *Q_j = Q.get_Q(j,active_size);
601 
602  double C_i = get_C(i);
603  double C_j = get_C(j);
604 
605  double old_alpha_i = alpha[i];
606  double old_alpha_j = alpha[j];
607 
608  if(y[i]!=y[j])
609  {
610  double quad_coef = QD[i]+QD[j]+2*Q_i[j];
611  if (quad_coef <= 0)
612  quad_coef = TAU;
613  double delta = (-G[i]-G[j])/quad_coef;
614  double diff = alpha[i] - alpha[j];
615  alpha[i] += delta;
616  alpha[j] += delta;
617 
618  if(diff > 0)
619  {
620  if(alpha[j] < 0)
621  {
622  alpha[j] = 0;
623  alpha[i] = diff;
624  }
625  }
626  else
627  {
628  if(alpha[i] < 0)
629  {
630  alpha[i] = 0;
631  alpha[j] = -diff;
632  }
633  }
634  if(diff > C_i - C_j)
635  {
636  if(alpha[i] > C_i)
637  {
638  alpha[i] = C_i;
639  alpha[j] = C_i - diff;
640  }
641  }
642  else
643  {
644  if(alpha[j] > C_j)
645  {
646  alpha[j] = C_j;
647  alpha[i] = C_j + diff;
648  }
649  }
650  }
651  else
652  {
653  double quad_coef = QD[i]+QD[j]-2*Q_i[j];
654  if (quad_coef <= 0)
655  quad_coef = TAU;
656  double delta = (G[i]-G[j])/quad_coef;
657  double sum = alpha[i] + alpha[j];
658  alpha[i] -= delta;
659  alpha[j] += delta;
660 
661  if(sum > C_i)
662  {
663  if(alpha[i] > C_i)
664  {
665  alpha[i] = C_i;
666  alpha[j] = sum - C_i;
667  }
668  }
669  else
670  {
671  if(alpha[j] < 0)
672  {
673  alpha[j] = 0;
674  alpha[i] = sum;
675  }
676  }
677  if(sum > C_j)
678  {
679  if(alpha[j] > C_j)
680  {
681  alpha[j] = C_j;
682  alpha[i] = sum - C_j;
683  }
684  }
685  else
686  {
687  if(alpha[i] < 0)
688  {
689  alpha[i] = 0;
690  alpha[j] = sum;
691  }
692  }
693  }
694 
695  // update G
696 
697  double delta_alpha_i = alpha[i] - old_alpha_i;
698  double delta_alpha_j = alpha[j] - old_alpha_j;
699 
700  for(int k=0;k<active_size;k++)
701  {
702  G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
703  }
704 
705  // update alpha_status and G_bar
706 
707  {
708  bool ui = is_upper_bound(i);
709  bool uj = is_upper_bound(j);
710  update_alpha_status(i);
711  update_alpha_status(j);
712  int k;
713  if(ui != is_upper_bound(i))
714  {
715  Q_i = Q.get_Q(i,l);
716  if(ui)
717  for(k=0;k<l;k++)
718  G_bar[k] -= C_i * Q_i[k];
719  else
720  for(k=0;k<l;k++)
721  G_bar[k] += C_i * Q_i[k];
722  }
723 
724  if(uj != is_upper_bound(j))
725  {
726  Q_j = Q.get_Q(j,l);
727  if(uj)
728  for(k=0;k<l;k++)
729  G_bar[k] -= C_j * Q_j[k];
730  else
731  for(k=0;k<l;k++)
732  G_bar[k] += C_j * Q_j[k];
733  }
734  }
735  }
736 
737  if(iter >= max_iter)
738  {
739  if(active_size < l)
740  {
741  // reconstruct the whole gradient to calculate objective value
742  reconstruct_gradient();
743  active_size = l;
744  info("*");
745  }
746  fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
747  }
748 
749  // calculate rho
750 
751  si->rho = calculate_rho();
752 
753  // calculate objective value
754  {
755  double v = 0;
756  int i;
757  for(i=0;i<l;i++)
758  v += alpha[i] * (G[i] + p[i]);
759 
760  si->obj = v/2;
761  }
762 
763  // put back the solution
764  {
765  for(int i=0;i<l;i++)
766  alpha_[active_set[i]] = alpha[i];
767  }
768 
769  // juggle everything back
770  /*{
771  for(int i=0;i<l;i++)
772  while(active_set[i] != i)
773  swap_index(i,active_set[i]);
774  // or Q.swap_index(i,active_set[i]);
775  }*/
776 
777  si->upper_bound_p = Cp;
778  si->upper_bound_n = Cn;
779 
780  info("\noptimization finished, #iter = %d\n",iter);
781 
782  delete[] p;
783  delete[] y;
784  delete[] alpha;
785  delete[] alpha_status;
786  delete[] active_set;
787  delete[] G;
788  delete[] G_bar;
789 }
790 
791 // return 1 if already optimal, return 0 otherwise
792 int Solver::select_working_set(int &out_i, int &out_j)
793 {
794  // return i,j such that
795  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
796  // j: minimizes the decrease of obj value
797  // (if quadratic coefficeint <= 0, replace it with tau)
798  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
799 
800  double Gmax = -INF;
801  double Gmax2 = -INF;
802  int Gmax_idx = -1;
803  int Gmin_idx = -1;
804  double obj_diff_min = INF;
805 
806  for(int t=0;t<active_size;t++)
807  if(y[t]==+1)
808  {
809  if(!is_upper_bound(t))
810  if(-G[t] >= Gmax)
811  {
812  Gmax = -G[t];
813  Gmax_idx = t;
814  }
815  }
816  else
817  {
818  if(!is_lower_bound(t))
819  if(G[t] >= Gmax)
820  {
821  Gmax = G[t];
822  Gmax_idx = t;
823  }
824  }
825 
826  int i = Gmax_idx;
827  const Qfloat *Q_i = NULL;
828  if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
829  Q_i = Q->get_Q(i,active_size);
830 
831  for(int j=0;j<active_size;j++)
832  {
833  if(y[j]==+1)
834  {
835  if (!is_lower_bound(j))
836  {
837  double grad_diff=Gmax+G[j];
838  if (G[j] >= Gmax2)
839  Gmax2 = G[j];
840  if (grad_diff > 0)
841  {
842  double obj_diff;
843  double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
844  if (quad_coef > 0)
845  obj_diff = -(grad_diff*grad_diff)/quad_coef;
846  else
847  obj_diff = -(grad_diff*grad_diff)/TAU;
848 
849  if (obj_diff <= obj_diff_min)
850  {
851  Gmin_idx=j;
852  obj_diff_min = obj_diff;
853  }
854  }
855  }
856  }
857  else
858  {
859  if (!is_upper_bound(j))
860  {
861  double grad_diff= Gmax-G[j];
862  if (-G[j] >= Gmax2)
863  Gmax2 = -G[j];
864  if (grad_diff > 0)
865  {
866  double obj_diff;
867  double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
868  if (quad_coef > 0)
869  obj_diff = -(grad_diff*grad_diff)/quad_coef;
870  else
871  obj_diff = -(grad_diff*grad_diff)/TAU;
872 
873  if (obj_diff <= obj_diff_min)
874  {
875  Gmin_idx=j;
876  obj_diff_min = obj_diff;
877  }
878  }
879  }
880  }
881  }
882 
883  if(Gmax+Gmax2 < eps || Gmin_idx == -1)
884  return 1;
885 
886  out_i = Gmax_idx;
887  out_j = Gmin_idx;
888  return 0;
889 }
890 
891 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
892 {
893  if(is_upper_bound(i))
894  {
895  if(y[i]==+1)
896  return(-G[i] > Gmax1);
897  else
898  return(-G[i] > Gmax2);
899  }
900  else if(is_lower_bound(i))
901  {
902  if(y[i]==+1)
903  return(G[i] > Gmax2);
904  else
905  return(G[i] > Gmax1);
906  }
907  else
908  return(false);
909 }
910 
912 {
913  int i;
914  double Gmax1 = -INF; // max { -y_i * grad(f)_i | i in I_up(\alpha) }
915  double Gmax2 = -INF; // max { y_i * grad(f)_i | i in I_low(\alpha) }
916 
917  // find maximal violating pair first
918  for(i=0;i<active_size;i++)
919  {
920  if(y[i]==+1)
921  {
922  if(!is_upper_bound(i))
923  {
924  if(-G[i] >= Gmax1)
925  Gmax1 = -G[i];
926  }
927  if(!is_lower_bound(i))
928  {
929  if(G[i] >= Gmax2)
930  Gmax2 = G[i];
931  }
932  }
933  else
934  {
935  if(!is_upper_bound(i))
936  {
937  if(-G[i] >= Gmax2)
938  Gmax2 = -G[i];
939  }
940  if(!is_lower_bound(i))
941  {
942  if(G[i] >= Gmax1)
943  Gmax1 = G[i];
944  }
945  }
946  }
947 
948  if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
949  {
950  unshrink = true;
951  reconstruct_gradient();
952  active_size = l;
953  info("*");
954  }
955 
956  for(i=0;i<active_size;i++)
957  if (be_shrunk(i, Gmax1, Gmax2))
958  {
959  active_size--;
960  while (active_size > i)
961  {
962  if (!be_shrunk(active_size, Gmax1, Gmax2))
963  {
964  swap_index(i,active_size);
965  break;
966  }
967  active_size--;
968  }
969  }
970 }
971 
973 {
974  double r;
975  int nr_free = 0;
976  double ub = INF, lb = -INF, sum_free = 0;
977  for(int i=0;i<active_size;i++)
978  {
979  double yG = y[i]*G[i];
980 
981  if(is_upper_bound(i))
982  {
983  if(y[i]==-1)
984  ub = min(ub,yG);
985  else
986  lb = max(lb,yG);
987  }
988  else if(is_lower_bound(i))
989  {
990  if(y[i]==+1)
991  ub = min(ub,yG);
992  else
993  lb = max(lb,yG);
994  }
995  else
996  {
997  ++nr_free;
998  sum_free += yG;
999  }
1000  }
1001 
1002  if(nr_free>0)
1003  r = sum_free/nr_free;
1004  else
1005  r = (ub+lb)/2;
1006 
1007  return r;
1008 }
1009 
1010 //
1011 // Solver for nu-svm classification and regression
1012 //
1013 // additional constraint: e^T \alpha = constant
1014 //
1015 class Solver_NU: public Solver
1016 {
1017 public:
1019  void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
1020  double *alpha, double Cp, double Cn, double eps,
1021  SolutionInfo* si, int shrinking)
1022  {
1023  this->si = si;
1024  Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
1025  }
1026 private:
1027  SolutionInfo *si;
1028  int select_working_set(int &i, int &j);
1029  double calculate_rho();
1030  bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
1031  void do_shrinking();
1032 };
1033 
1034 // return 1 if already optimal, return 0 otherwise
1035 int Solver_NU::select_working_set(int &out_i, int &out_j)
1036 {
1037  // return i,j such that y_i = y_j and
1038  // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
1039  // j: minimizes the decrease of obj value
1040  // (if quadratic coefficeint <= 0, replace it with tau)
1041  // -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
1042 
1043  double Gmaxp = -INF;
1044  double Gmaxp2 = -INF;
1045  int Gmaxp_idx = -1;
1046 
1047  double Gmaxn = -INF;
1048  double Gmaxn2 = -INF;
1049  int Gmaxn_idx = -1;
1050 
1051  int Gmin_idx = -1;
1052  double obj_diff_min = INF;
1053 
1054  for(int t=0;t<active_size;t++)
1055  if(y[t]==+1)
1056  {
1057  if(!is_upper_bound(t))
1058  if(-G[t] >= Gmaxp)
1059  {
1060  Gmaxp = -G[t];
1061  Gmaxp_idx = t;
1062  }
1063  }
1064  else
1065  {
1066  if(!is_lower_bound(t))
1067  if(G[t] >= Gmaxn)
1068  {
1069  Gmaxn = G[t];
1070  Gmaxn_idx = t;
1071  }
1072  }
1073 
1074  int ip = Gmaxp_idx;
1075  int in = Gmaxn_idx;
1076  const Qfloat *Q_ip = NULL;
1077  const Qfloat *Q_in = NULL;
1078  if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
1079  Q_ip = Q->get_Q(ip,active_size);
1080  if(in != -1)
1081  Q_in = Q->get_Q(in,active_size);
1082 
1083  for(int j=0;j<active_size;j++)
1084  {
1085  if(y[j]==+1)
1086  {
1087  if (!is_lower_bound(j))
1088  {
1089  double grad_diff=Gmaxp+G[j];
1090  if (G[j] >= Gmaxp2)
1091  Gmaxp2 = G[j];
1092  if (grad_diff > 0)
1093  {
1094  double obj_diff;
1095  double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
1096  if (quad_coef > 0)
1097  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1098  else
1099  obj_diff = -(grad_diff*grad_diff)/TAU;
1100 
1101  if (obj_diff <= obj_diff_min)
1102  {
1103  Gmin_idx=j;
1104  obj_diff_min = obj_diff;
1105  }
1106  }
1107  }
1108  }
1109  else
1110  {
1111  if (!is_upper_bound(j))
1112  {
1113  double grad_diff=Gmaxn-G[j];
1114  if (-G[j] >= Gmaxn2)
1115  Gmaxn2 = -G[j];
1116  if (grad_diff > 0)
1117  {
1118  double obj_diff;
1119  double quad_coef = QD[in]+QD[j]-2*Q_in[j];
1120  if (quad_coef > 0)
1121  obj_diff = -(grad_diff*grad_diff)/quad_coef;
1122  else
1123  obj_diff = -(grad_diff*grad_diff)/TAU;
1124 
1125  if (obj_diff <= obj_diff_min)
1126  {
1127  Gmin_idx=j;
1128  obj_diff_min = obj_diff;
1129  }
1130  }
1131  }
1132  }
1133  }
1134 
1135  if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps || Gmin_idx == -1)
1136  return 1;
1137 
1138  if (y[Gmin_idx] == +1)
1139  out_i = Gmaxp_idx;
1140  else
1141  out_i = Gmaxn_idx;
1142  out_j = Gmin_idx;
1143 
1144  return 0;
1145 }
1146 
1147 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
1148 {
1149  if(is_upper_bound(i))
1150  {
1151  if(y[i]==+1)
1152  return(-G[i] > Gmax1);
1153  else
1154  return(-G[i] > Gmax4);
1155  }
1156  else if(is_lower_bound(i))
1157  {
1158  if(y[i]==+1)
1159  return(G[i] > Gmax2);
1160  else
1161  return(G[i] > Gmax3);
1162  }
1163  else
1164  return(false);
1165 }
1166 
1167 void Solver_NU::do_shrinking()
1168 {
1169  double Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
1170  double Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
1171  double Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
1172  double Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
1173 
1174  // find maximal violating pair first
1175  int i;
1176  for(i=0;i<active_size;i++)
1177  {
1178  if(!is_upper_bound(i))
1179  {
1180  if(y[i]==+1)
1181  {
1182  if(-G[i] > Gmax1) Gmax1 = -G[i];
1183  }
1184  else if(-G[i] > Gmax4) Gmax4 = -G[i];
1185  }
1186  if(!is_lower_bound(i))
1187  {
1188  if(y[i]==+1)
1189  {
1190  if(G[i] > Gmax2) Gmax2 = G[i];
1191  }
1192  else if(G[i] > Gmax3) Gmax3 = G[i];
1193  }
1194  }
1195 
1196  if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
1197  {
1198  unshrink = true;
1199  reconstruct_gradient();
1200  active_size = l;
1201  }
1202 
1203  for(i=0;i<active_size;i++)
1204  if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1205  {
1206  active_size--;
1207  while (active_size > i)
1208  {
1209  if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1210  {
1211  swap_index(i,active_size);
1212  break;
1213  }
1214  active_size--;
1215  }
1216  }
1217 }
1218 
1219 double Solver_NU::calculate_rho()
1220 {
1221  int nr_free1 = 0,nr_free2 = 0;
1222  double ub1 = INF, ub2 = INF;
1223  double lb1 = -INF, lb2 = -INF;
1224  double sum_free1 = 0, sum_free2 = 0;
1225 
1226  for(int i=0;i<active_size;i++)
1227  {
1228  if(y[i]==+1)
1229  {
1230  if(is_upper_bound(i))
1231  lb1 = max(lb1,G[i]);
1232  else if(is_lower_bound(i))
1233  ub1 = min(ub1,G[i]);
1234  else
1235  {
1236  ++nr_free1;
1237  sum_free1 += G[i];
1238  }
1239  }
1240  else
1241  {
1242  if(is_upper_bound(i))
1243  lb2 = max(lb2,G[i]);
1244  else if(is_lower_bound(i))
1245  ub2 = min(ub2,G[i]);
1246  else
1247  {
1248  ++nr_free2;
1249  sum_free2 += G[i];
1250  }
1251  }
1252  }
1253 
1254  double r1,r2;
1255  if(nr_free1 > 0)
1256  r1 = sum_free1/nr_free1;
1257  else
1258  r1 = (ub1+lb1)/2;
1259 
1260  if(nr_free2 > 0)
1261  r2 = sum_free2/nr_free2;
1262  else
1263  r2 = (ub2+lb2)/2;
1264 
1265  si->r = (r1+r2)/2;
1266  return (r1-r2)/2;
1267 }
1268 
1269 //
1270 // Q matrices for various formulations
1271 //
1272 class SVC_Q: public Kernel
1273 {
1274 public:
1275  SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
1276  :Kernel(prob.l, prob.x, param)
1277  {
1278  clone(y,y_,prob.l);
1279  cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1280  QD = new double[prob.l];
1281  for(int i=0;i<prob.l;i++)
1282  QD[i] = (this->*kernel_function)(i,i);
1283  }
1284 
1285  Qfloat *get_Q(int i, int len) const
1286  {
1287  Qfloat *data;
1288  int start, j;
1289  if((start = cache->get_data(i,&data,len)) < len)
1290  {
1291  for(j=start;j<len;j++)
1292  data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
1293  }
1294  return data;
1295  }
1296 
1297  double *get_QD() const
1298  {
1299  return QD;
1300  }
1301 
1302  void swap_index(int i, int j) const
1303  {
1304  cache->swap_index(i,j);
1305  Kernel::swap_index(i,j);
1306  swap(y[i],y[j]);
1307  swap(QD[i],QD[j]);
1308  }
1309 
1311  {
1312  delete[] y;
1313  delete cache;
1314  delete[] QD;
1315  }
1316 private:
1317  schar *y;
1318  Cache *cache;
1319  double *QD;
1320 };
1321 
1322 class ONE_CLASS_Q: public Kernel
1323 {
1324 public:
1325  ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
1326  :Kernel(prob.l, prob.x, param)
1327  {
1328  cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
1329  QD = new double[prob.l];
1330  for(int i=0;i<prob.l;i++)
1331  QD[i] = (this->*kernel_function)(i,i);
1332  }
1333 
1334  Qfloat *get_Q(int i, int len) const
1335  {
1336  Qfloat *data;
1337  int start, j;
1338  if((start = cache->get_data(i,&data,len)) < len)
1339  {
1340  for(j=start;j<len;j++)
1341  data[j] = (Qfloat)(this->*kernel_function)(i,j);
1342  }
1343  return data;
1344  }
1345 
1346  double *get_QD() const
1347  {
1348  return QD;
1349  }
1350 
1351  void swap_index(int i, int j) const
1352  {
1353  cache->swap_index(i,j);
1354  Kernel::swap_index(i,j);
1355  swap(QD[i],QD[j]);
1356  }
1357 
1359  {
1360  delete cache;
1361  delete[] QD;
1362  }
1363 private:
1364  Cache *cache;
1365  double *QD;
1366 };
1367 
1368 class SVR_Q: public Kernel
1369 {
1370 public:
1371  SVR_Q(const svm_problem& prob, const svm_parameter& param)
1372  :Kernel(prob.l, prob.x, param)
1373  {
1374  l = prob.l;
1375  cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
1376  QD = new double[2*l];
1377  sign = new schar[2*l];
1378  index = new int[2*l];
1379  for(int k=0;k<l;k++)
1380  {
1381  sign[k] = 1;
1382  sign[k+l] = -1;
1383  index[k] = k;
1384  index[k+l] = k;
1385  QD[k] = (this->*kernel_function)(k,k);
1386  QD[k+l] = QD[k];
1387  }
1388  buffer[0] = new Qfloat[2*l];
1389  buffer[1] = new Qfloat[2*l];
1390  next_buffer = 0;
1391  }
1392 
1393  void swap_index(int i, int j) const
1394  {
1395  swap(sign[i],sign[j]);
1396  swap(index[i],index[j]);
1397  swap(QD[i],QD[j]);
1398  }
1399 
1400  Qfloat *get_Q(int i, int len) const
1401  {
1402  Qfloat *data;
1403  int j, real_i = index[i];
1404  if(cache->get_data(real_i,&data,l) < l)
1405  {
1406  for(j=0;j<l;j++)
1407  data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
1408  }
1409 
1410  // reorder and copy
1411  Qfloat *buf = buffer[next_buffer];
1412  next_buffer = 1 - next_buffer;
1413  schar si = sign[i];
1414  for(j=0;j<len;j++)
1415  buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
1416  return buf;
1417  }
1418 
1419  double *get_QD() const
1420  {
1421  return QD;
1422  }
1423 
1425  {
1426  delete cache;
1427  delete[] sign;
1428  delete[] index;
1429  delete[] buffer[0];
1430  delete[] buffer[1];
1431  delete[] QD;
1432  }
1433 private:
1434  int l;
1435  Cache *cache;
1436  schar *sign;
1437  int *index;
1438  mutable int next_buffer;
1439  Qfloat *buffer[2];
1440  double *QD;
1441 };
1442 
1443 //
1444 // construct and solve various formulations
1445 //
1446 static void solve_c_svc(
1447  const svm_problem *prob, const svm_parameter* param,
1448  double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
1449 {
1450  int l = prob->l;
1451  double *minus_ones = new double[l];
1452  schar *y = new schar[l];
1453 
1454  int i;
1455 
1456  for(i=0;i<l;i++)
1457  {
1458  alpha[i] = 0;
1459  minus_ones[i] = -1;
1460  if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
1461  }
1462 
1463  Solver s;
1464  s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
1465  alpha, Cp, Cn, param->eps, si, param->shrinking);
1466 
1467  double sum_alpha=0;
1468  for(i=0;i<l;i++)
1469  sum_alpha += alpha[i];
1470 
1471  if (Cp==Cn)
1472  info("nu = %f\n", sum_alpha/(Cp*prob->l));
1473 
1474  for(i=0;i<l;i++)
1475  alpha[i] *= y[i];
1476 
1477  delete[] minus_ones;
1478  delete[] y;
1479 }
1480 
1481 static void solve_nu_svc(
1482  const svm_problem *prob, const svm_parameter *param,
1483  double *alpha, Solver::SolutionInfo* si)
1484 {
1485  int i;
1486  int l = prob->l;
1487  double nu = param->nu;
1488 
1489  schar *y = new schar[l];
1490 
1491  for(i=0;i<l;i++)
1492  if(prob->y[i]>0)
1493  y[i] = +1;
1494  else
1495  y[i] = -1;
1496 
1497  double sum_pos = nu*l/2;
1498  double sum_neg = nu*l/2;
1499 
1500  for(i=0;i<l;i++)
1501  if(y[i] == +1)
1502  {
1503  alpha[i] = min(1.0,sum_pos);
1504  sum_pos -= alpha[i];
1505  }
1506  else
1507  {
1508  alpha[i] = min(1.0,sum_neg);
1509  sum_neg -= alpha[i];
1510  }
1511 
1512  double *zeros = new double[l];
1513 
1514  for(i=0;i<l;i++)
1515  zeros[i] = 0;
1516 
1517  Solver_NU s;
1518  s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
1519  alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1520  double r = si->r;
1521 
1522  info("C = %f\n",1/r);
1523 
1524  for(i=0;i<l;i++)
1525  alpha[i] *= y[i]/r;
1526 
1527  si->rho /= r;
1528  si->obj /= (r*r);
1529  si->upper_bound_p = 1/r;
1530  si->upper_bound_n = 1/r;
1531 
1532  delete[] y;
1533  delete[] zeros;
1534 }
1535 
1536 static void solve_one_class(
1537  const svm_problem *prob, const svm_parameter *param,
1538  double *alpha, Solver::SolutionInfo* si)
1539 {
1540  int l = prob->l;
1541  double *zeros = new double[l];
1542  schar *ones = new schar[l];
1543  int i;
1544 
1545  int n = (int)(param->nu*prob->l); // # of alpha's at upper bound
1546 
1547  for(i=0;i<n;i++)
1548  alpha[i] = 1;
1549  if(n<prob->l)
1550  alpha[n] = param->nu * prob->l - n;
1551  for(i=n+1;i<l;i++)
1552  alpha[i] = 0;
1553 
1554  for(i=0;i<l;i++)
1555  {
1556  zeros[i] = 0;
1557  ones[i] = 1;
1558  }
1559 
1560  Solver s;
1561  s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
1562  alpha, 1.0, 1.0, param->eps, si, param->shrinking);
1563 
1564  delete[] zeros;
1565  delete[] ones;
1566 }
1567 
1568 static void solve_epsilon_svr(
1569  const svm_problem *prob, const svm_parameter *param,
1570  double *alpha, Solver::SolutionInfo* si)
1571 {
1572  int l = prob->l;
1573  double *alpha2 = new double[2*l];
1574  double *linear_term = new double[2*l];
1575  schar *y = new schar[2*l];
1576  int i;
1577 
1578  for(i=0;i<l;i++)
1579  {
1580  alpha2[i] = 0;
1581  linear_term[i] = param->p - prob->y[i];
1582  y[i] = 1;
1583 
1584  alpha2[i+l] = 0;
1585  linear_term[i+l] = param->p + prob->y[i];
1586  y[i+l] = -1;
1587  }
1588 
1589  Solver s;
1590  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1591  alpha2, param->C, param->C, param->eps, si, param->shrinking);
1592 
1593  double sum_alpha = 0;
1594  for(i=0;i<l;i++)
1595  {
1596  alpha[i] = alpha2[i] - alpha2[i+l];
1597  sum_alpha += fabs(alpha[i]);
1598  }
1599  info("nu = %f\n",sum_alpha/(param->C*l));
1600 
1601  delete[] alpha2;
1602  delete[] linear_term;
1603  delete[] y;
1604 }
1605 
1606 static void solve_nu_svr(
1607  const svm_problem *prob, const svm_parameter *param,
1608  double *alpha, Solver::SolutionInfo* si)
1609 {
1610  int l = prob->l;
1611  double C = param->C;
1612  double *alpha2 = new double[2*l];
1613  double *linear_term = new double[2*l];
1614  schar *y = new schar[2*l];
1615  int i;
1616 
1617  double sum = C * param->nu * l / 2;
1618  for(i=0;i<l;i++)
1619  {
1620  alpha2[i] = alpha2[i+l] = min(sum,C);
1621  sum -= alpha2[i];
1622 
1623  linear_term[i] = - prob->y[i];
1624  y[i] = 1;
1625 
1626  linear_term[i+l] = prob->y[i];
1627  y[i+l] = -1;
1628  }
1629 
1630  Solver_NU s;
1631  s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
1632  alpha2, C, C, param->eps, si, param->shrinking);
1633 
1634  info("epsilon = %f\n",-si->r);
1635 
1636  for(i=0;i<l;i++)
1637  alpha[i] = alpha2[i] - alpha2[i+l];
1638 
1639  delete[] alpha2;
1640  delete[] linear_term;
1641  delete[] y;
1642 }
1643 
1644 //
1645 // decision_function
1646 //
1648 {
1649  double *alpha;
1650  double rho;
1651 };
1652 
1653 static decision_function svm_train_one(
1654  const svm_problem *prob, const svm_parameter *param,
1655  double Cp, double Cn)
1656 {
1657  double *alpha = Malloc(double,prob->l);
1659  switch(param->svm_type)
1660  {
1661  case C_SVC:
1662  solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1663  break;
1664  case NU_SVC:
1665  solve_nu_svc(prob,param,alpha,&si);
1666  break;
1667  case ONE_CLASS:
1668  solve_one_class(prob,param,alpha,&si);
1669  break;
1670  case EPSILON_SVR:
1671  solve_epsilon_svr(prob,param,alpha,&si);
1672  break;
1673  case NU_SVR:
1674  solve_nu_svr(prob,param,alpha,&si);
1675  break;
1676  }
1677 
1678  info("obj = %f, rho = %f\n",si.obj,si.rho);
1679 
1680  // output SVs
1681 
1682  int nSV = 0;
1683  int nBSV = 0;
1684  for(int i=0;i<prob->l;i++)
1685  {
1686  if(fabs(alpha[i]) > 0)
1687  {
1688  ++nSV;
1689  if(prob->y[i] > 0)
1690  {
1691  if(fabs(alpha[i]) >= si.upper_bound_p)
1692  ++nBSV;
1693  }
1694  else
1695  {
1696  if(fabs(alpha[i]) >= si.upper_bound_n)
1697  ++nBSV;
1698  }
1699  }
1700  }
1701 
1702  info("nSV = %d, nBSV = %d\n",nSV,nBSV);
1703 
1705  f.alpha = alpha;
1706  f.rho = si.rho;
1707  return f;
1708 }
1709 
1710 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
1711 static void sigmoid_train(
1712  int l, const double *dec_values, const double *labels,
1713  double& A, double& B)
1714 {
1715  double prior1=0, prior0 = 0;
1716  int i;
1717 
1718  for (i=0;i<l;i++)
1719  if (labels[i] > 0) prior1+=1;
1720  else prior0+=1;
1721 
1722  int max_iter=100; // Maximal number of iterations
1723  double min_step=1e-10; // Minimal step taken in line search
1724  double sigma=1e-12; // For numerically strict PD of Hessian
1725  double eps=1e-5;
1726  double hiTarget=(prior1+1.0)/(prior1+2.0);
1727  double loTarget=1/(prior0+2.0);
1728  double *t=Malloc(double,l);
1729  double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1730  double newA,newB,newf,d1,d2;
1731  int iter;
1732 
1733  // Initial Point and Initial Fun Value
1734  A=0.0; B=log((prior0+1.0)/(prior1+1.0));
1735  double fval = 0.0;
1736 
1737  for (i=0;i<l;i++)
1738  {
1739  if (labels[i]>0) t[i]=hiTarget;
1740  else t[i]=loTarget;
1741  fApB = dec_values[i]*A+B;
1742  if (fApB>=0)
1743  fval += t[i]*fApB + log(1+exp(-fApB));
1744  else
1745  fval += (t[i] - 1)*fApB +log(1+exp(fApB));
1746  }
1747  for (iter=0;iter<max_iter;iter++)
1748  {
1749  // Update Gradient and Hessian (use H' = H + sigma I)
1750  h11=sigma; // numerically ensures strict PD
1751  h22=sigma;
1752  h21=0.0;g1=0.0;g2=0.0;
1753  for (i=0;i<l;i++)
1754  {
1755  fApB = dec_values[i]*A+B;
1756  if (fApB >= 0)
1757  {
1758  p=exp(-fApB)/(1.0+exp(-fApB));
1759  q=1.0/(1.0+exp(-fApB));
1760  }
1761  else
1762  {
1763  p=1.0/(1.0+exp(fApB));
1764  q=exp(fApB)/(1.0+exp(fApB));
1765  }
1766  d2=p*q;
1767  h11+=dec_values[i]*dec_values[i]*d2;
1768  h22+=d2;
1769  h21+=dec_values[i]*d2;
1770  d1=t[i]-p;
1771  g1+=dec_values[i]*d1;
1772  g2+=d1;
1773  }
1774 
1775  // Stopping Criteria
1776  if (fabs(g1)<eps && fabs(g2)<eps)
1777  break;
1778 
1779  // Finding Newton direction: -inv(H') * g
1780  det=h11*h22-h21*h21;
1781  dA=-(h22*g1 - h21 * g2) / det;
1782  dB=-(-h21*g1+ h11 * g2) / det;
1783  gd=g1*dA+g2*dB;
1784 
1785 
1786  stepsize = 1; // Line Search
1787  while (stepsize >= min_step)
1788  {
1789  newA = A + stepsize * dA;
1790  newB = B + stepsize * dB;
1791 
1792  // New function value
1793  newf = 0.0;
1794  for (i=0;i<l;i++)
1795  {
1796  fApB = dec_values[i]*newA+newB;
1797  if (fApB >= 0)
1798  newf += t[i]*fApB + log(1+exp(-fApB));
1799  else
1800  newf += (t[i] - 1)*fApB +log(1+exp(fApB));
1801  }
1802  // Check sufficient decrease
1803  if (newf<fval+0.0001*stepsize*gd)
1804  {
1805  A=newA;B=newB;fval=newf;
1806  break;
1807  }
1808  else
1809  stepsize = stepsize / 2.0;
1810  }
1811 
1812  if (stepsize < min_step)
1813  {
1814  info("Line search fails in two-class probability estimates\n");
1815  break;
1816  }
1817  }
1818 
1819  if (iter>=max_iter)
1820  info("Reaching maximal iterations in two-class probability estimates\n");
1821  free(t);
1822 }
1823 
1824 static double sigmoid_predict(double decision_value, double A, double B)
1825 {
1826  double fApB = decision_value*A+B;
1827  // 1-p used later; avoid catastrophic cancellation
1828  if (fApB >= 0)
1829  return exp(-fApB)/(1.0+exp(-fApB));
1830  else
1831  return 1.0/(1+exp(fApB)) ;
1832 }
1833 
1834 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
1835 static void multiclass_probability(int k, double **r, double *p)
1836 {
1837  int t,j;
1838  int iter = 0, max_iter=max(100,k);
1839  double **Q=Malloc(double *,k);
1840  double *Qp=Malloc(double,k);
1841  double pQp, eps=0.005/k;
1842 
1843  for (t=0;t<k;t++)
1844  {
1845  p[t]=1.0/k; // Valid if k = 1
1846  Q[t]=Malloc(double,k);
1847  Q[t][t]=0;
1848  for (j=0;j<t;j++)
1849  {
1850  Q[t][t]+=r[j][t]*r[j][t];
1851  Q[t][j]=Q[j][t];
1852  }
1853  for (j=t+1;j<k;j++)
1854  {
1855  Q[t][t]+=r[j][t]*r[j][t];
1856  Q[t][j]=-r[j][t]*r[t][j];
1857  }
1858  }
1859  for (iter=0;iter<max_iter;iter++)
1860  {
1861  // stopping condition, recalculate QP,pQP for numerical accuracy
1862  pQp=0;
1863  for (t=0;t<k;t++)
1864  {
1865  Qp[t]=0;
1866  for (j=0;j<k;j++)
1867  Qp[t]+=Q[t][j]*p[j];
1868  pQp+=p[t]*Qp[t];
1869  }
1870  double max_error=0;
1871  for (t=0;t<k;t++)
1872  {
1873  double error=fabs(Qp[t]-pQp);
1874  if (error>max_error)
1875  max_error=error;
1876  }
1877  if (max_error<eps) break;
1878 
1879  for (t=0;t<k;t++)
1880  {
1881  double diff=(-Qp[t]+pQp)/Q[t][t];
1882  p[t]+=diff;
1883  pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1884  for (j=0;j<k;j++)
1885  {
1886  Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
1887  p[j]/=(1+diff);
1888  }
1889  }
1890  }
1891  if (iter>=max_iter)
1892  info("Exceeds max_iter in multiclass_prob\n");
1893  for(t=0;t<k;t++) free(Q[t]);
1894  free(Q);
1895  free(Qp);
1896 }
1897 
1898 // Cross-validation decision values for probability estimates
1899 static void svm_binary_svc_probability(
1900  const svm_problem *prob, const svm_parameter *param,
1901  double Cp, double Cn, double& probA, double& probB)
1902 {
1903  int i;
1904  int nr_fold = 5;
1905  int *perm = Malloc(int,prob->l);
1906  double *dec_values = Malloc(double,prob->l);
1907 
1908  // random shuffle
1909  for(i=0;i<prob->l;i++) perm[i]=i;
1910  for(i=0;i<prob->l;i++)
1911  {
1912  int j = i+rand()%(prob->l-i);
1913  swap(perm[i],perm[j]);
1914  }
1915  for(i=0;i<nr_fold;i++)
1916  {
1917  int begin = i*prob->l/nr_fold;
1918  int end = (i+1)*prob->l/nr_fold;
1919  int j,k;
1920  struct svm_problem subprob;
1921 
1922  subprob.l = prob->l-(end-begin);
1923  subprob.x = Malloc(struct svm_node*,subprob.l);
1924  subprob.y = Malloc(double,subprob.l);
1925 
1926  k=0;
1927  for(j=0;j<begin;j++)
1928  {
1929  subprob.x[k] = prob->x[perm[j]];
1930  subprob.y[k] = prob->y[perm[j]];
1931  ++k;
1932  }
1933  for(j=end;j<prob->l;j++)
1934  {
1935  subprob.x[k] = prob->x[perm[j]];
1936  subprob.y[k] = prob->y[perm[j]];
1937  ++k;
1938  }
1939  int p_count=0,n_count=0;
1940  for(j=0;j<k;j++)
1941  if(subprob.y[j]>0)
1942  p_count++;
1943  else
1944  n_count++;
1945 
1946  if(p_count==0 && n_count==0)
1947  for(j=begin;j<end;j++)
1948  dec_values[perm[j]] = 0;
1949  else if(p_count > 0 && n_count == 0)
1950  for(j=begin;j<end;j++)
1951  dec_values[perm[j]] = 1;
1952  else if(p_count == 0 && n_count > 0)
1953  for(j=begin;j<end;j++)
1954  dec_values[perm[j]] = -1;
1955  else
1956  {
1957  svm_parameter subparam = *param;
1958  subparam.probability=0;
1959  subparam.C=1.0;
1960  subparam.nr_weight=2;
1961  subparam.weight_label = Malloc(int,2);
1962  subparam.weight = Malloc(double,2);
1963  subparam.weight_label[0]=+1;
1964  subparam.weight_label[1]=-1;
1965  subparam.weight[0]=Cp;
1966  subparam.weight[1]=Cn;
1967  struct svm_model *submodel = svm_train(&subprob,&subparam);
1968  for(j=begin;j<end;j++)
1969  {
1970  svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
1971  // ensure +1 -1 order; reason not using CV subroutine
1972  dec_values[perm[j]] *= submodel->label[0];
1973  }
1974  svm_free_and_destroy_model(&submodel);
1975  svm_destroy_param(&subparam);
1976  }
1977  free(subprob.x);
1978  free(subprob.y);
1979  }
1980  sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
1981  free(dec_values);
1982  free(perm);
1983 }
1984 
1985 // Return parameter of a Laplace distribution
1986 static double svm_svr_probability(
1987  const svm_problem *prob, const svm_parameter *param)
1988 {
1989  int i;
1990  int nr_fold = 5;
1991  double *ymv = Malloc(double,prob->l);
1992  double mae = 0;
1993 
1994  svm_parameter newparam = *param;
1995  newparam.probability = 0;
1996  svm_cross_validation(prob,&newparam,nr_fold,ymv);
1997  for(i=0;i<prob->l;i++)
1998  {
1999  ymv[i]=prob->y[i]-ymv[i];
2000  mae += fabs(ymv[i]);
2001  }
2002  mae /= prob->l;
2003  double std=sqrt(2*mae*mae);
2004  int count=0;
2005  mae=0;
2006  for(i=0;i<prob->l;i++)
2007  if (fabs(ymv[i]) > 5*std)
2008  count=count+1;
2009  else
2010  mae+=fabs(ymv[i]);
2011  mae /= (prob->l-count);
2012  info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2013  free(ymv);
2014  return mae;
2015 }
2016 
2017 
2018 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
2019 // perm, length l, must be allocated before calling this subroutine
2020 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
2021 {
2022  int l = prob->l;
2023  int max_nr_class = 16;
2024  int nr_class = 0;
2025  int *label = Malloc(int,max_nr_class);
2026  int *count = Malloc(int,max_nr_class);
2027  int *data_label = Malloc(int,l);
2028  int i;
2029 
2030  for(i=0;i<l;i++)
2031  {
2032  int this_label = (int)prob->y[i];
2033  int j;
2034  for(j=0;j<nr_class;j++)
2035  {
2036  if(this_label == label[j])
2037  {
2038  ++count[j];
2039  break;
2040  }
2041  }
2042  data_label[i] = j;
2043  if(j == nr_class)
2044  {
2045  if(nr_class == max_nr_class)
2046  {
2047  max_nr_class *= 2;
2048  label = (int *)realloc(label,max_nr_class*sizeof(int));
2049  count = (int *)realloc(count,max_nr_class*sizeof(int));
2050  }
2051  label[nr_class] = this_label;
2052  count[nr_class] = 1;
2053  ++nr_class;
2054  }
2055  }
2056 
2057  //
2058  // Labels are ordered by their first occurrence in the training set.
2059  // However, for two-class sets with -1/+1 labels and -1 appears first,
2060  // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
2061  //
2062  if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2063  {
2064  swap(label[0],label[1]);
2065  swap(count[0],count[1]);
2066  for(i=0;i<l;i++)
2067  {
2068  if(data_label[i] == 0)
2069  data_label[i] = 1;
2070  else
2071  data_label[i] = 0;
2072  }
2073  }
2074 
2075  int *start = Malloc(int,nr_class);
2076  start[0] = 0;
2077  for(i=1;i<nr_class;i++)
2078  start[i] = start[i-1]+count[i-1];
2079  for(i=0;i<l;i++)
2080  {
2081  perm[start[data_label[i]]] = i;
2082  ++start[data_label[i]];
2083  }
2084  start[0] = 0;
2085  for(i=1;i<nr_class;i++)
2086  start[i] = start[i-1]+count[i-1];
2087 
2088  *nr_class_ret = nr_class;
2089  *label_ret = label;
2090  *start_ret = start;
2091  *count_ret = count;
2092  free(data_label);
2093 }
2094 
2095 //
2096 // Interface functions
2097 //
2098 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
2099 {
2100  svm_model *model = Malloc(svm_model,1);
2101  model->param = *param;
2102  model->free_sv = 0; // XXX
2103 
2104  if(param->svm_type == ONE_CLASS ||
2105  param->svm_type == EPSILON_SVR ||
2106  param->svm_type == NU_SVR)
2107  {
2108  // regression or one-class-svm
2109  model->nr_class = 2;
2110  model->label = NULL;
2111  model->nSV = NULL;
2112  model->probA = NULL; model->probB = NULL;
2113  model->sv_coef = Malloc(double *,1);
2114 
2115  if(param->probability &&
2116  (param->svm_type == EPSILON_SVR ||
2117  param->svm_type == NU_SVR))
2118  {
2119  model->probA = Malloc(double,1);
2120  model->probA[0] = svm_svr_probability(prob,param);
2121  }
2122 
2123  decision_function f = svm_train_one(prob,param,0,0);
2124  model->rho = Malloc(double,1);
2125  model->rho[0] = f.rho;
2126 
2127  int nSV = 0;
2128  int i;
2129  for(i=0;i<prob->l;i++)
2130  if(fabs(f.alpha[i]) > 0) ++nSV;
2131  model->l = nSV;
2132  model->SV = Malloc(svm_node *,nSV);
2133  model->sv_coef[0] = Malloc(double,nSV);
2134  model->sv_indices = Malloc(int,nSV);
2135  int j = 0;
2136  for(i=0;i<prob->l;i++)
2137  if(fabs(f.alpha[i]) > 0)
2138  {
2139  model->SV[j] = prob->x[i];
2140  model->sv_coef[0][j] = f.alpha[i];
2141  model->sv_indices[j] = i+1;
2142  ++j;
2143  }
2144 
2145  free(f.alpha);
2146  }
2147  else
2148  {
2149  // classification
2150  int l = prob->l;
2151  int nr_class;
2152  int *label = NULL;
2153  int *start = NULL;
2154  int *count = NULL;
2155  int *perm = Malloc(int,l);
2156 
2157  // group training data of the same class
2158  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2159  if(nr_class == 1)
2160  info("WARNING: training data in only one class. See README for details.\n");
2161 
2162  svm_node **x = Malloc(svm_node *,l);
2163  int i;
2164  for(i=0;i<l;i++)
2165  x[i] = prob->x[perm[i]];
2166 
2167  // calculate weighted C
2168 
2169  double *weighted_C = Malloc(double, nr_class);
2170  for(i=0;i<nr_class;i++)
2171  weighted_C[i] = param->C;
2172  for(i=0;i<param->nr_weight;i++)
2173  {
2174  int j;
2175  for(j=0;j<nr_class;j++)
2176  if(param->weight_label[i] == label[j])
2177  break;
2178  if(j == nr_class)
2179  fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
2180  else
2181  weighted_C[j] *= param->weight[i];
2182  }
2183 
2184  // train k*(k-1)/2 models
2185 
2186  bool *nonzero = Malloc(bool,l);
2187  for(i=0;i<l;i++)
2188  nonzero[i] = false;
2189  decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
2190 
2191  double *probA=NULL,*probB=NULL;
2192  if (param->probability)
2193  {
2194  probA=Malloc(double,nr_class*(nr_class-1)/2);
2195  probB=Malloc(double,nr_class*(nr_class-1)/2);
2196  }
2197 
2198  int p = 0;
2199  for(i=0;i<nr_class;i++)
2200  for(int j=i+1;j<nr_class;j++)
2201  {
2202  svm_problem sub_prob;
2203  int si = start[i], sj = start[j];
2204  int ci = count[i], cj = count[j];
2205  sub_prob.l = ci+cj;
2206  sub_prob.x = Malloc(svm_node *,sub_prob.l);
2207  sub_prob.y = Malloc(double,sub_prob.l);
2208  int k;
2209  for(k=0;k<ci;k++)
2210  {
2211  sub_prob.x[k] = x[si+k];
2212  sub_prob.y[k] = +1;
2213  }
2214  for(k=0;k<cj;k++)
2215  {
2216  sub_prob.x[ci+k] = x[sj+k];
2217  sub_prob.y[ci+k] = -1;
2218  }
2219 
2220  if(param->probability)
2221  svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2222 
2223  f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2224  for(k=0;k<ci;k++)
2225  if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2226  nonzero[si+k] = true;
2227  for(k=0;k<cj;k++)
2228  if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2229  nonzero[sj+k] = true;
2230  free(sub_prob.x);
2231  free(sub_prob.y);
2232  ++p;
2233  }
2234 
2235  // build output
2236 
2237  model->nr_class = nr_class;
2238 
2239  model->label = Malloc(int,nr_class);
2240  for(i=0;i<nr_class;i++)
2241  model->label[i] = label[i];
2242 
2243  model->rho = Malloc(double,nr_class*(nr_class-1)/2);
2244  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2245  model->rho[i] = f[i].rho;
2246 
2247  if(param->probability)
2248  {
2249  model->probA = Malloc(double,nr_class*(nr_class-1)/2);
2250  model->probB = Malloc(double,nr_class*(nr_class-1)/2);
2251  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2252  {
2253  model->probA[i] = probA[i];
2254  model->probB[i] = probB[i];
2255  }
2256  }
2257  else
2258  {
2259  model->probA=NULL;
2260  model->probB=NULL;
2261  }
2262 
2263  int total_sv = 0;
2264  int *nz_count = Malloc(int,nr_class);
2265  model->nSV = Malloc(int,nr_class);
2266  for(i=0;i<nr_class;i++)
2267  {
2268  int nSV = 0;
2269  for(int j=0;j<count[i];j++)
2270  if(nonzero[start[i]+j])
2271  {
2272  ++nSV;
2273  ++total_sv;
2274  }
2275  model->nSV[i] = nSV;
2276  nz_count[i] = nSV;
2277  }
2278 
2279  info("Total nSV = %d\n",total_sv);
2280 
2281  model->l = total_sv;
2282  model->SV = Malloc(svm_node *,total_sv);
2283  model->sv_indices = Malloc(int,total_sv);
2284  p = 0;
2285  for(i=0;i<l;i++)
2286  if(nonzero[i])
2287  {
2288  model->SV[p] = x[i];
2289  model->sv_indices[p++] = perm[i] + 1;
2290  }
2291 
2292  int *nz_start = Malloc(int,nr_class);
2293  nz_start[0] = 0;
2294  for(i=1;i<nr_class;i++)
2295  nz_start[i] = nz_start[i-1]+nz_count[i-1];
2296 
2297  model->sv_coef = Malloc(double *,nr_class-1);
2298  for(i=0;i<nr_class-1;i++)
2299  model->sv_coef[i] = Malloc(double,total_sv);
2300 
2301  p = 0;
2302  for(i=0;i<nr_class;i++)
2303  for(int j=i+1;j<nr_class;j++)
2304  {
2305  // classifier (i,j): coefficients with
2306  // i are in sv_coef[j-1][nz_start[i]...],
2307  // j are in sv_coef[i][nz_start[j]...]
2308 
2309  int si = start[i];
2310  int sj = start[j];
2311  int ci = count[i];
2312  int cj = count[j];
2313 
2314  int q = nz_start[i];
2315  int k;
2316  for(k=0;k<ci;k++)
2317  if(nonzero[si+k])
2318  model->sv_coef[j-1][q++] = f[p].alpha[k];
2319  q = nz_start[j];
2320  for(k=0;k<cj;k++)
2321  if(nonzero[sj+k])
2322  model->sv_coef[i][q++] = f[p].alpha[ci+k];
2323  ++p;
2324  }
2325 
2326  free(label);
2327  free(probA);
2328  free(probB);
2329  free(count);
2330  free(perm);
2331  free(start);
2332  free(x);
2333  free(weighted_C);
2334  free(nonzero);
2335  for(i=0;i<nr_class*(nr_class-1)/2;i++)
2336  free(f[i].alpha);
2337  free(f);
2338  free(nz_count);
2339  free(nz_start);
2340  }
2341  return model;
2342 }
2343 
2344 // Stratified cross validation
2345 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
2346 {
2347  int i;
2348  int *fold_start;
2349  int l = prob->l;
2350  int *perm = Malloc(int,l);
2351  int nr_class;
2352  if (nr_fold > l)
2353  {
2354  nr_fold = l;
2355  fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2356  }
2357  fold_start = Malloc(int,nr_fold+1);
2358  // stratified cv may not give leave-one-out rate
2359  // Each class to l folds -> some folds may have zero elements
2360  if((param->svm_type == C_SVC ||
2361  param->svm_type == NU_SVC) && nr_fold < l)
2362  {
2363  int *start = NULL;
2364  int *label = NULL;
2365  int *count = NULL;
2366  svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2367 
2368  // random shuffle and then data grouped by fold using the array perm
2369  int *fold_count = Malloc(int,nr_fold);
2370  int c;
2371  int *index = Malloc(int,l);
2372  for(i=0;i<l;i++)
2373  index[i]=perm[i];
2374  for (c=0; c<nr_class; c++)
2375  for(i=0;i<count[c];i++)
2376  {
2377  int j = i+rand()%(count[c]-i);
2378  swap(index[start[c]+j],index[start[c]+i]);
2379  }
2380  for(i=0;i<nr_fold;i++)
2381  {
2382  fold_count[i] = 0;
2383  for (c=0; c<nr_class;c++)
2384  fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
2385  }
2386  fold_start[0]=0;
2387  for (i=1;i<=nr_fold;i++)
2388  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2389  for (c=0; c<nr_class;c++)
2390  for(i=0;i<nr_fold;i++)
2391  {
2392  int begin = start[c]+i*count[c]/nr_fold;
2393  int end = start[c]+(i+1)*count[c]/nr_fold;
2394  for(int j=begin;j<end;j++)
2395  {
2396  perm[fold_start[i]] = index[j];
2397  fold_start[i]++;
2398  }
2399  }
2400  fold_start[0]=0;
2401  for (i=1;i<=nr_fold;i++)
2402  fold_start[i] = fold_start[i-1]+fold_count[i-1];
2403  free(start);
2404  free(label);
2405  free(count);
2406  free(index);
2407  free(fold_count);
2408  }
2409  else
2410  {
2411  for(i=0;i<l;i++) perm[i]=i;
2412  for(i=0;i<l;i++)
2413  {
2414  int j = i+rand()%(l-i);
2415  swap(perm[i],perm[j]);
2416  }
2417  for(i=0;i<=nr_fold;i++)
2418  fold_start[i]=i*l/nr_fold;
2419  }
2420 
2421  for(i=0;i<nr_fold;i++)
2422  {
2423  int begin = fold_start[i];
2424  int end = fold_start[i+1];
2425  int j,k;
2426  struct svm_problem subprob;
2427 
2428  subprob.l = l-(end-begin);
2429  subprob.x = Malloc(struct svm_node*,subprob.l);
2430  subprob.y = Malloc(double,subprob.l);
2431 
2432  k=0;
2433  for(j=0;j<begin;j++)
2434  {
2435  subprob.x[k] = prob->x[perm[j]];
2436  subprob.y[k] = prob->y[perm[j]];
2437  ++k;
2438  }
2439  for(j=end;j<l;j++)
2440  {
2441  subprob.x[k] = prob->x[perm[j]];
2442  subprob.y[k] = prob->y[perm[j]];
2443  ++k;
2444  }
2445  struct svm_model *submodel = svm_train(&subprob,param);
2446  if(param->probability &&
2447  (param->svm_type == C_SVC || param->svm_type == NU_SVC))
2448  {
2449  double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
2450  for(j=begin;j<end;j++)
2451  target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
2452  free(prob_estimates);
2453  }
2454  else
2455  for(j=begin;j<end;j++)
2456  target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
2457  svm_free_and_destroy_model(&submodel);
2458  free(subprob.x);
2459  free(subprob.y);
2460  }
2461  free(fold_start);
2462  free(perm);
2463 }
2464 
2465 
2466 int svm_get_svm_type(const svm_model *model)
2467 {
2468  return model->param.svm_type;
2469 }
2470 
2471 int svm_get_nr_class(const svm_model *model)
2472 {
2473  return model->nr_class;
2474 }
2475 
2476 void svm_get_labels(const svm_model *model, int* label)
2477 {
2478  if (model->label != NULL)
2479  for(int i=0;i<model->nr_class;i++)
2480  label[i] = model->label[i];
2481 }
2482 
2483 void svm_get_sv_indices(const svm_model *model, int* indices)
2484 {
2485  if (model->sv_indices != NULL)
2486  for(int i=0;i<model->l;i++)
2487  indices[i] = model->sv_indices[i];
2488 }
2489 
2490 int svm_get_nr_sv(const svm_model *model)
2491 {
2492  return model->l;
2493 }
2494 
2496 {
2497  if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
2498  model->probA!=NULL)
2499  return model->probA[0];
2500  else
2501  {
2502  fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
2503  return 0;
2504  }
2505 }
2506 
2507 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
2508 {
2509  int i;
2510  if(model->param.svm_type == ONE_CLASS ||
2511  model->param.svm_type == EPSILON_SVR ||
2512  model->param.svm_type == NU_SVR)
2513  {
2514  double *sv_coef = model->sv_coef[0];
2515  double sum = 0;
2516  for(i=0;i<model->l;i++)
2517  sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
2518  sum -= model->rho[0];
2519  *dec_values = sum;
2520 
2521  if(model->param.svm_type == ONE_CLASS)
2522  return (sum>0)?1:-1;
2523  else
2524  return sum;
2525  }
2526  else
2527  {
2528  int nr_class = model->nr_class;
2529  int l = model->l;
2530 
2531  double *kvalue = Malloc(double,l);
2532  for(i=0;i<l;i++)
2533  kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
2534 
2535  int *start = Malloc(int,nr_class);
2536  start[0] = 0;
2537  for(i=1;i<nr_class;i++)
2538  start[i] = start[i-1]+model->nSV[i-1];
2539 
2540  int *vote = Malloc(int,nr_class);
2541  for(i=0;i<nr_class;i++)
2542  vote[i] = 0;
2543 
2544  int p=0;
2545  for(i=0;i<nr_class;i++)
2546  for(int j=i+1;j<nr_class;j++)
2547  {
2548  double sum = 0;
2549  int si = start[i];
2550  int sj = start[j];
2551  int ci = model->nSV[i];
2552  int cj = model->nSV[j];
2553 
2554  int k;
2555  double *coef1 = model->sv_coef[j-1];
2556  double *coef2 = model->sv_coef[i];
2557  for(k=0;k<ci;k++)
2558  sum += coef1[si+k] * kvalue[si+k];
2559  for(k=0;k<cj;k++)
2560  sum += coef2[sj+k] * kvalue[sj+k];
2561  sum -= model->rho[p];
2562  dec_values[p] = sum;
2563 
2564  if(dec_values[p] > 0)
2565  ++vote[i];
2566  else
2567  ++vote[j];
2568  p++;
2569  }
2570 
2571  int vote_max_idx = 0;
2572  for(i=1;i<nr_class;i++)
2573  if(vote[i] > vote[vote_max_idx])
2574  vote_max_idx = i;
2575 
2576  free(kvalue);
2577  free(start);
2578  free(vote);
2579  return model->label[vote_max_idx];
2580  }
2581 }
2582 
2583 double svm_predict(const svm_model *model, const svm_node *x)
2584 {
2585  int nr_class = model->nr_class;
2586  double *dec_values;
2587  if(model->param.svm_type == ONE_CLASS ||
2588  model->param.svm_type == EPSILON_SVR ||
2589  model->param.svm_type == NU_SVR)
2590  dec_values = Malloc(double, 1);
2591  else
2592  dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2593  double pred_result = svm_predict_values(model, x, dec_values);
2594  free(dec_values);
2595  return pred_result;
2596 }
2597 
2599  const svm_model *model, const svm_node *x, double *prob_estimates)
2600 {
2601  if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
2602  model->probA!=NULL && model->probB!=NULL)
2603  {
2604  int i;
2605  int nr_class = model->nr_class;
2606  double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
2607  svm_predict_values(model, x, dec_values);
2608 
2609  double min_prob=1e-7;
2610  double **pairwise_prob=Malloc(double *,nr_class);
2611  for(i=0;i<nr_class;i++)
2612  pairwise_prob[i]=Malloc(double,nr_class);
2613  int k=0;
2614  for(i=0;i<nr_class;i++)
2615  for(int j=i+1;j<nr_class;j++)
2616  {
2617  pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
2618  pairwise_prob[j][i]=1-pairwise_prob[i][j];
2619  k++;
2620  }
2621  if (nr_class == 2)
2622  {
2623  prob_estimates[0] = pairwise_prob[0][1];
2624  prob_estimates[1] = pairwise_prob[1][0];
2625  }
2626  else
2627  multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2628 
2629  int prob_max_idx = 0;
2630  for(i=1;i<nr_class;i++)
2631  if(prob_estimates[i] > prob_estimates[prob_max_idx])
2632  prob_max_idx = i;
2633  for(i=0;i<nr_class;i++)
2634  free(pairwise_prob[i]);
2635  free(dec_values);
2636  free(pairwise_prob);
2637  return model->label[prob_max_idx];
2638  }
2639  else
2640  return svm_predict(model, x);
2641 }
2642 
2643 static const char *svm_type_table[] =
2644 {
2645  "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
2646 };
2647 
2648 static const char *kernel_type_table[]=
2649 {
2650  "linear","polynomial","rbf","sigmoid","precomputed",NULL
2651 };
2652 
2653 int svm_save_model(const char *model_file_name, const svm_model *model)
2654 {
2655  FILE *fp = fopen(model_file_name,"w");
2656  if(fp==NULL) return -1;
2657 
2658  char *old_locale = setlocale(LC_ALL, NULL);
2659  if (old_locale) {
2660  old_locale = strdup(old_locale);
2661  }
2662  setlocale(LC_ALL, "C");
2663 
2664  const svm_parameter& param = model->param;
2665 
2666  fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
2667  fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
2668 
2669  if(param.kernel_type == POLY)
2670  fprintf(fp,"degree %d\n", param.degree);
2671 
2672  if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
2673  fprintf(fp,"gamma %.17g\n", param.gamma);
2674 
2675  if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
2676  fprintf(fp,"coef0 %.17g\n", param.coef0);
2677 
2678  int nr_class = model->nr_class;
2679  int l = model->l;
2680  fprintf(fp, "nr_class %d\n", nr_class);
2681  fprintf(fp, "total_sv %d\n",l);
2682 
2683  {
2684  fprintf(fp, "rho");
2685  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2686  fprintf(fp," %.17g",model->rho[i]);
2687  fprintf(fp, "\n");
2688  }
2689 
2690  if(model->label)
2691  {
2692  fprintf(fp, "label");
2693  for(int i=0;i<nr_class;i++)
2694  fprintf(fp," %d",model->label[i]);
2695  fprintf(fp, "\n");
2696  }
2697 
2698  if(model->probA) // regression has probA only
2699  {
2700  fprintf(fp, "probA");
2701  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2702  fprintf(fp," %.17g",model->probA[i]);
2703  fprintf(fp, "\n");
2704  }
2705  if(model->probB)
2706  {
2707  fprintf(fp, "probB");
2708  for(int i=0;i<nr_class*(nr_class-1)/2;i++)
2709  fprintf(fp," %.17g",model->probB[i]);
2710  fprintf(fp, "\n");
2711  }
2712 
2713  if(model->nSV)
2714  {
2715  fprintf(fp, "nr_sv");
2716  for(int i=0;i<nr_class;i++)
2717  fprintf(fp," %d",model->nSV[i]);
2718  fprintf(fp, "\n");
2719  }
2720 
2721  fprintf(fp, "SV\n");
2722  const double * const *sv_coef = model->sv_coef;
2723  const svm_node * const *SV = model->SV;
2724 
2725  for(int i=0;i<l;i++)
2726  {
2727  for(int j=0;j<nr_class-1;j++)
2728  fprintf(fp, "%.17g ",sv_coef[j][i]);
2729 
2730  const svm_node *p = SV[i];
2731 
2732  if(param.kernel_type == PRECOMPUTED)
2733  fprintf(fp,"0:%d ",(int)(p->value));
2734  else
2735  while(p->index != -1)
2736  {
2737  fprintf(fp,"%d:%.8g ",p->index,p->value);
2738  p++;
2739  }
2740  fprintf(fp, "\n");
2741  }
2742 
2743  setlocale(LC_ALL, old_locale);
2744  free(old_locale);
2745 
2746  if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
2747  else return 0;
2748 }
2749 
2750 static char *line = NULL;
2751 static int max_line_len;
2752 
2753 static char* readline(FILE *input)
2754 {
2755  int len;
2756 
2757  if(fgets(line,max_line_len,input) == NULL)
2758  return NULL;
2759 
2760  while(strrchr(line,'\n') == NULL)
2761  {
2762  max_line_len *= 2;
2763  line = (char *) realloc(line,max_line_len);
2764  len = (int) strlen(line);
2765  if(fgets(line+len,max_line_len-len,input) == NULL)
2766  break;
2767  }
2768  return line;
2769 }
2770 
2771 //
2772 // FSCANF helps to handle fscanf failures.
2773 // Its do-while block avoids the ambiguity when
2774 // if (...)
2775 // FSCANF();
2776 // is used
2777 //
2778 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
2779 bool read_model_header(FILE *fp, svm_model* model)
2780 {
2781  svm_parameter& param = model->param;
2782  // parameters for training only won't be assigned, but arrays are assigned as NULL for safety
2783  param.nr_weight = 0;
2784  param.weight_label = NULL;
2785  param.weight = NULL;
2786 
2787  char cmd[81];
2788  while(1)
2789  {
2790  FSCANF(fp,"%80s",cmd);
2791 
2792  if(strcmp(cmd,"svm_type")==0)
2793  {
2794  FSCANF(fp,"%80s",cmd);
2795  int i;
2796  for(i=0;svm_type_table[i];i++)
2797  {
2798  if(strcmp(svm_type_table[i],cmd)==0)
2799  {
2800  param.svm_type=i;
2801  break;
2802  }
2803  }
2804  if(svm_type_table[i] == NULL)
2805  {
2806  fprintf(stderr,"unknown svm type.\n");
2807  return false;
2808  }
2809  }
2810  else if(strcmp(cmd,"kernel_type")==0)
2811  {
2812  FSCANF(fp,"%80s",cmd);
2813  int i;
2814  for(i=0;kernel_type_table[i];i++)
2815  {
2816  if(strcmp(kernel_type_table[i],cmd)==0)
2817  {
2818  param.kernel_type=i;
2819  break;
2820  }
2821  }
2822  if(kernel_type_table[i] == NULL)
2823  {
2824  fprintf(stderr,"unknown kernel function.\n");
2825  return false;
2826  }
2827  }
2828  else if(strcmp(cmd,"degree")==0)
2829  FSCANF(fp,"%d",&param.degree);
2830  else if(strcmp(cmd,"gamma")==0)
2831  FSCANF(fp,"%lf",&param.gamma);
2832  else if(strcmp(cmd,"coef0")==0)
2833  FSCANF(fp,"%lf",&param.coef0);
2834  else if(strcmp(cmd,"nr_class")==0)
2835  FSCANF(fp,"%d",&model->nr_class);
2836  else if(strcmp(cmd,"total_sv")==0)
2837  FSCANF(fp,"%d",&model->l);
2838  else if(strcmp(cmd,"rho")==0)
2839  {
2840  int n = model->nr_class * (model->nr_class-1)/2;
2841  model->rho = Malloc(double,n);
2842  for(int i=0;i<n;i++)
2843  FSCANF(fp,"%lf",&model->rho[i]);
2844  }
2845  else if(strcmp(cmd,"label")==0)
2846  {
2847  int n = model->nr_class;
2848  model->label = Malloc(int,n);
2849  for(int i=0;i<n;i++)
2850  FSCANF(fp,"%d",&model->label[i]);
2851  }
2852  else if(strcmp(cmd,"probA")==0)
2853  {
2854  int n = model->nr_class * (model->nr_class-1)/2;
2855  model->probA = Malloc(double,n);
2856  for(int i=0;i<n;i++)
2857  FSCANF(fp,"%lf",&model->probA[i]);
2858  }
2859  else if(strcmp(cmd,"probB")==0)
2860  {
2861  int n = model->nr_class * (model->nr_class-1)/2;
2862  model->probB = Malloc(double,n);
2863  for(int i=0;i<n;i++)
2864  FSCANF(fp,"%lf",&model->probB[i]);
2865  }
2866  else if(strcmp(cmd,"nr_sv")==0)
2867  {
2868  int n = model->nr_class;
2869  model->nSV = Malloc(int,n);
2870  for(int i=0;i<n;i++)
2871  FSCANF(fp,"%d",&model->nSV[i]);
2872  }
2873  else if(strcmp(cmd,"SV")==0)
2874  {
2875  while(1)
2876  {
2877  int c = getc(fp);
2878  if(c==EOF || c=='\n') break;
2879  }
2880  break;
2881  }
2882  else
2883  {
2884  fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
2885  return false;
2886  }
2887  }
2888 
2889  return true;
2890 
2891 }
2892 
2893 svm_model *svm_load_model(const char *model_file_name)
2894 {
2895  FILE *fp = fopen(model_file_name,"rb");
2896  if(fp==NULL) return NULL;
2897 
2898  char *old_locale = setlocale(LC_ALL, NULL);
2899  if (old_locale) {
2900  old_locale = strdup(old_locale);
2901  }
2902  setlocale(LC_ALL, "C");
2903 
2904  // read parameters
2905 
2906  svm_model *model = Malloc(svm_model,1);
2907  model->rho = NULL;
2908  model->probA = NULL;
2909  model->probB = NULL;
2910  model->sv_indices = NULL;
2911  model->label = NULL;
2912  model->nSV = NULL;
2913 
2914  // read header
2915  if (!read_model_header(fp, model))
2916  {
2917  fprintf(stderr, "ERROR: fscanf failed to read model\n");
2918  setlocale(LC_ALL, old_locale);
2919  free(old_locale);
2920  free(model->rho);
2921  free(model->label);
2922  free(model->nSV);
2923  free(model);
2924  return NULL;
2925  }
2926 
2927  // read sv_coef and SV
2928 
2929  int elements = 0;
2930  long pos = ftell(fp);
2931 
2932  max_line_len = 1024;
2933  line = Malloc(char,max_line_len);
2934  char *p,*endptr,*idx,*val;
2935 
2936  while(readline(fp)!=NULL)
2937  {
2938  p = strtok(line,":");
2939  while(1)
2940  {
2941  p = strtok(NULL,":");
2942  if(p == NULL)
2943  break;
2944  ++elements;
2945  }
2946  }
2947  elements += model->l;
2948 
2949  fseek(fp,pos,SEEK_SET);
2950 
2951  int m = model->nr_class - 1;
2952  int l = model->l;
2953  model->sv_coef = Malloc(double *,m);
2954  int i;
2955  for(i=0;i<m;i++)
2956  model->sv_coef[i] = Malloc(double,l);
2957  model->SV = Malloc(svm_node*,l);
2958  svm_node *x_space = NULL;
2959  if(l>0) x_space = Malloc(svm_node,elements);
2960 
2961  int j=0;
2962  for(i=0;i<l;i++)
2963  {
2964  readline(fp);
2965  model->SV[i] = &x_space[j];
2966 
2967  p = strtok(line, " \t");
2968  model->sv_coef[0][i] = strtod(p,&endptr);
2969  for(int k=1;k<m;k++)
2970  {
2971  p = strtok(NULL, " \t");
2972  model->sv_coef[k][i] = strtod(p,&endptr);
2973  }
2974 
2975  while(1)
2976  {
2977  idx = strtok(NULL, ":");
2978  val = strtok(NULL, " \t");
2979 
2980  if(val == NULL)
2981  break;
2982  x_space[j].index = (int) strtol(idx,&endptr,10);
2983  x_space[j].value = strtod(val,&endptr);
2984 
2985  ++j;
2986  }
2987  x_space[j++].index = -1;
2988  }
2989  free(line);
2990 
2991  setlocale(LC_ALL, old_locale);
2992  free(old_locale);
2993 
2994  if (ferror(fp) != 0 || fclose(fp) != 0)
2995  return NULL;
2996 
2997  model->free_sv = 1; // XXX
2998  return model;
2999 }
3000 
3002 {
3003  if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
3004  free((void *)(model_ptr->SV[0]));
3005  if(model_ptr->sv_coef)
3006  {
3007  for(int i=0;i<model_ptr->nr_class-1;i++)
3008  free(model_ptr->sv_coef[i]);
3009  }
3010 
3011  free(model_ptr->SV);
3012  model_ptr->SV = NULL;
3013 
3014  free(model_ptr->sv_coef);
3015  model_ptr->sv_coef = NULL;
3016 
3017  free(model_ptr->rho);
3018  model_ptr->rho = NULL;
3019 
3020  free(model_ptr->label);
3021  model_ptr->label= NULL;
3022 
3023  free(model_ptr->probA);
3024  model_ptr->probA = NULL;
3025 
3026  free(model_ptr->probB);
3027  model_ptr->probB= NULL;
3028 
3029  free(model_ptr->sv_indices);
3030  model_ptr->sv_indices = NULL;
3031 
3032  free(model_ptr->nSV);
3033  model_ptr->nSV = NULL;
3034 }
3035 
3037 {
3038  if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3039  {
3040  svm_free_model_content(*model_ptr_ptr);
3041  free(*model_ptr_ptr);
3042  *model_ptr_ptr = NULL;
3043  }
3044 }
3045 
3047 {
3048  free(param->weight_label);
3049  free(param->weight);
3050 }
3051 
3052 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
3053 {
3054  // svm_type
3055 
3056  int svm_type = param->svm_type;
3057  if(svm_type != C_SVC &&
3058  svm_type != NU_SVC &&
3059  svm_type != ONE_CLASS &&
3060  svm_type != EPSILON_SVR &&
3061  svm_type != NU_SVR)
3062  return "unknown svm type";
3063 
3064  // kernel_type, degree
3065 
3066  int kernel_type = param->kernel_type;
3067  if(kernel_type != LINEAR &&
3068  kernel_type != POLY &&
3069  kernel_type != RBF &&
3070  kernel_type != SIGMOID &&
3071  kernel_type != PRECOMPUTED)
3072  return "unknown kernel type";
3073 
3074  if((kernel_type == POLY || kernel_type == RBF || kernel_type == SIGMOID) &&
3075  param->gamma < 0)
3076  return "gamma < 0";
3077 
3078  if(kernel_type == POLY && param->degree < 0)
3079  return "degree of polynomial kernel < 0";
3080 
3081  // cache_size,eps,C,nu,p,shrinking
3082 
3083  if(param->cache_size <= 0)
3084  return "cache_size <= 0";
3085 
3086  if(param->eps <= 0)
3087  return "eps <= 0";
3088 
3089  if(svm_type == C_SVC ||
3090  svm_type == EPSILON_SVR ||
3091  svm_type == NU_SVR)
3092  if(param->C <= 0)
3093  return "C <= 0";
3094 
3095  if(svm_type == NU_SVC ||
3096  svm_type == ONE_CLASS ||
3097  svm_type == NU_SVR)
3098  if(param->nu <= 0 || param->nu > 1)
3099  return "nu <= 0 or nu > 1";
3100 
3101  if(svm_type == EPSILON_SVR)
3102  if(param->p < 0)
3103  return "p < 0";
3104 
3105  if(param->shrinking != 0 &&
3106  param->shrinking != 1)
3107  return "shrinking != 0 and shrinking != 1";
3108 
3109  if(param->probability != 0 &&
3110  param->probability != 1)
3111  return "probability != 0 and probability != 1";
3112 
3113  if(param->probability == 1 &&
3114  svm_type == ONE_CLASS)
3115  return "one-class SVM probability output not supported yet";
3116 
3117 
3118  // check whether nu-svc is feasible
3119 
3120  if(svm_type == NU_SVC)
3121  {
3122  int l = prob->l;
3123  int max_nr_class = 16;
3124  int nr_class = 0;
3125  int *label = Malloc(int,max_nr_class);
3126  int *count = Malloc(int,max_nr_class);
3127 
3128  int i;
3129  for(i=0;i<l;i++)
3130  {
3131  int this_label = (int)prob->y[i];
3132  int j;
3133  for(j=0;j<nr_class;j++)
3134  if(this_label == label[j])
3135  {
3136  ++count[j];
3137  break;
3138  }
3139  if(j == nr_class)
3140  {
3141  if(nr_class == max_nr_class)
3142  {
3143  max_nr_class *= 2;
3144  label = (int *)realloc(label,max_nr_class*sizeof(int));
3145  count = (int *)realloc(count,max_nr_class*sizeof(int));
3146  }
3147  label[nr_class] = this_label;
3148  count[nr_class] = 1;
3149  ++nr_class;
3150  }
3151  }
3152 
3153  for(i=0;i<nr_class;i++)
3154  {
3155  int n1 = count[i];
3156  for(int j=i+1;j<nr_class;j++)
3157  {
3158  int n2 = count[j];
3159  if(param->nu*(n1+n2)/2 > min(n1,n2))
3160  {
3161  free(label);
3162  free(count);
3163  return "specified nu is infeasible";
3164  }
3165  }
3166  }
3167  free(label);
3168  free(count);
3169  }
3170 
3171  return NULL;
3172 }
3173 
3175 {
3176  return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
3177  model->probA!=NULL && model->probB!=NULL) ||
3178  ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
3179  model->probA!=NULL);
3180 }
3181 
3182 void svm_set_print_string_function(void (*print_func)(const char *))
3183 {
3184  if(print_func == NULL)
3185  svm_print_string = &print_string_stdout;
3186  else
3187  svm_print_string = print_func;
3188 }
int active_size
Definition: svm.cpp:416
~SVR_Q()
Definition: svm.cpp:1424
int l
Definition: svm.cpp:429
double upper_bound_n
Definition: svm.cpp:408
void min(Image< double > &op1, const Image< double > &op2)
int get_data(const int index, Qfloat **data, int len)
Definition: svm.cpp:132
int * nSV
Definition: svm.h:73
int svm_get_nr_class(const svm_model *model)
Definition: svm.cpp:2471
#define FSCANF(_stream, _format, _var)
Definition: svm.cpp:2778
void Solve(int l, const QMatrix &Q, const double *p, const schar *y, double *alpha, double Cp, double Cn, double eps, SolutionInfo *si, int shrinking)
Definition: svm.cpp:1019
Definition: svm.cpp:200
double sign
doublereal * c
void swap_index(int i, int j)
Definition: svm.cpp:456
double svm_predict_probability(const svm_model *model, const svm_node *x, double *prob_estimates)
Definition: svm.cpp:2598
void sqrt(Image< double > &op)
svm_model * svm_train(const svm_problem *prob, const svm_parameter *param)
Definition: svm.cpp:2098
HBITMAP buffer
Definition: svm-toy.cpp:37
static double * y
void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
Definition: svm.cpp:2345
void svm_set_print_string_function(void(*print_func)(const char *))
Definition: svm.cpp:3182
ONE_CLASS_Q(const svm_problem &prob, const svm_parameter &param)
Definition: svm.cpp:1325
double * G
Definition: svm.cpp:418
virtual Qfloat * get_Q(int column, int len) const =0
int svm_get_svm_type(const svm_model *model)
Definition: svm.cpp:2466
double value
Definition: svm.h:19
double * G_bar
Definition: svm.cpp:428
int l
Definition: svm.h:62
int nr_weight
Definition: svm.h:46
double * alpha
Definition: svm.cpp:421
struct svm_node ** SV
Definition: svm.h:63
double * probB
Definition: svm.h:67
void swap_index(int i, int j) const
Definition: svm.cpp:1351
double(Kernel::* kernel_function)(int i, int j) const
Definition: svm.cpp:224
virtual ~Solver()
Definition: svm.cpp:402
glob_prnt iter
int * weight_label
Definition: svm.h:47
cmache_1 eps
Definition: svm.h:29
void svm_free_and_destroy_model(svm_model **model_ptr_ptr)
Definition: svm.cpp:3036
double * gamma
double upper_bound_p
Definition: svm.cpp:407
double Cp
Definition: svm.cpp:425
void swap_index(int i, int j) const
Definition: svm.cpp:1393
double * alpha
Definition: svm.cpp:1649
doublereal * x
int libsvm_version
Definition: svm.cpp:17
#define i
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
virtual int select_working_set(int &i, int &j)
Definition: svm.cpp:792
double * probA
Definition: svm.h:66
doublereal * d
int nr_class
Definition: svm.h:61
Definition: svm.h:58
#define LIBSVM_VERSION
Definition: svm.h:8
double p
Definition: svm.h:50
Definition: svm.cpp:73
double * get_QD() const
Definition: svm.cpp:1419
Solver()
Definition: svm.cpp:401
void svm_free_model_content(svm_model *model_ptr)
Definition: svm.cpp:3001
virtual void swap_index(int i, int j) const
Definition: svm.cpp:217
double cache_size
Definition: svm.h:43
void log(Image< double > &op)
Kernel(int l, svm_node *const *x, const svm_parameter &param)
Definition: svm.cpp:259
viol index
void svm_destroy_param(svm_parameter *param)
Definition: svm.cpp:3046
int in
double * f
double * p
Definition: svm.cpp:426
#define Malloc(type, n)
Definition: svm.cpp:45
double eps
Definition: svm.h:44
bool is_upper_bound(int i)
Definition: svm.cpp:444
const double * QD
Definition: svm.cpp:423
~ONE_CLASS_Q()
Definition: svm.cpp:1358
#define TAU
Definition: svm.cpp:44
void max(Image< double > &op1, const Image< double > &op2)
Definition: svm.cpp:1368
float Qfloat
Definition: svm.cpp:18
free((char *) ob)
int shrinking
Definition: svm.h:51
~Cache()
Definition: svm.cpp:109
double * get_QD() const
Definition: svm.cpp:1297
bool read_model_header(FILE *fp, svm_model *model)
Definition: svm.cpp:2779
const char * svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
Definition: svm.cpp:3052
double svm_get_svr_probability(const svm_model *model)
Definition: svm.cpp:2495
struct svm_node ** x
Definition: svm.h:26
int * label
Definition: svm.h:72
void update_alpha_status(int i)
Definition: svm.cpp:436
struct svm_parameter param
Definition: svm.h:60
int * active_set
Definition: svm.cpp:427
void swap_index(int i, int j) const
Definition: svm.cpp:1302
int * sv_indices
Definition: svm.h:68
#define j
SVR_Q(const svm_problem &prob, const svm_parameter &param)
Definition: svm.cpp:1371
int svm_check_probability_model(const svm_model *model)
Definition: svm.cpp:3174
int m
double * rho
Definition: svm.h:65
Definition: svm.cpp:1272
~SVC_Q()
Definition: svm.cpp:1310
Qfloat * get_Q(int i, int len) const
Definition: svm.cpp:1400
int index
Definition: svm.h:18
const QMatrix * Q
Definition: svm.cpp:422
struct _parameter * param
double svm_predict_values(const svm_model *model, const svm_node *x, double *dec_values)
Definition: svm.cpp:2507
Definition: svm.cpp:399
#define len
void error(char *s)
Definition: tools.cpp:107
schar * y
Definition: svm.cpp:417
signed char schar
Definition: svm.cpp:19
double ** sv_coef
Definition: svm.h:64
virtual double * get_QD() const =0
int svm_save_model(const char *model_file_name, const svm_model *model)
Definition: svm.cpp:2653
void swap_index(int i, int j)
Definition: svm.cpp:162
SVC_Q(const svm_problem &prob, const svm_parameter &param, const schar *y_)
Definition: svm.cpp:1275
double svm_predict(const svm_model *model, const svm_node *x)
Definition: svm.cpp:2583
void svm_get_labels(const svm_model *model, int *label)
Definition: svm.cpp:2476
virtual void do_shrinking()
Definition: svm.cpp:911
Definition: svm.cpp:208
int probability
Definition: svm.h:52
int degree
Definition: svm.h:38
int free_sv
Definition: svm.h:76
static double k_function(const svm_node *x, const svm_node *y, const svm_parameter &param)
Definition: svm.cpp:322
void svm_get_sv_indices(const svm_model *model, int *indices)
Definition: svm.cpp:2483
bool is_lower_bound(int i)
Definition: svm.cpp:445
Definition: svm.h:16
bool is_free(int i)
Definition: svm.cpp:446
double * y
Definition: svm.h:25
fprintf(glob_prnt.io, "\)
#define INF
Definition: svm.cpp:43
double gamma
Definition: svm.h:39
double eps
Definition: svm.cpp:424
float r2
void Solve(int l, const QMatrix &Q, const double *p_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo *si, int shrinking)
Definition: svm.cpp:510
int l
Definition: svm.h:24
double * weight
Definition: svm.h:48
double * get_QD() const
Definition: svm.cpp:1346
bool unshrink
Definition: svm.cpp:430
virtual ~QMatrix()
Definition: svm.cpp:205
double C
Definition: svm.h:45
int svm_type
Definition: svm.h:36
svm_model * svm_load_model(const char *model_file_name)
Definition: svm.cpp:2893
double nu
Definition: svm.h:49
virtual double calculate_rho()
Definition: svm.cpp:972
double coef0
Definition: svm.h:40
int * n
double get_C(int i)
Definition: svm.cpp:432
int kernel_type
Definition: svm.h:37
virtual ~Kernel()
Definition: svm.cpp:294
Qfloat * get_Q(int i, int len) const
Definition: svm.cpp:1334
void reconstruct_gradient()
Definition: svm.cpp:468
char * alpha_status
Definition: svm.cpp:420
Solver_NU()
Definition: svm.cpp:1018
Cache(int l, long int size)
Definition: svm.cpp:100
int svm_get_nr_sv(const svm_model *model)
Definition: svm.cpp:2490
Qfloat * get_Q(int i, int len) const
Definition: svm.cpp:1285
double * delta
float r1
__host__ __device__ float dot(float2 a, float2 b)