21 template <
class T>
static inline T
min(T
x,T
y) {
return (x<y)?
x:
y; }
24 template <
class T>
static inline T
max(T
x,T
y) {
return (
x>y)?
x:
y; }
26 template <
class T>
static inline void swap(T&
x, T&
y) { T t=
x;
x=
y; y=t; }
27 template <
class S,
class T>
static inline void clone(T*& dst, S* src,
int n)
30 memcpy((
void *)dst,(
void *)src,
sizeof(T)*n);
32 static inline double powi(
double base,
int times)
34 double tmp = base, ret = 1.0;
36 for(
int t=times; t>0; t/=2)
45 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) 47 static void print_string_stdout(
const char *s)
52 static void (*svm_print_string) (
const char *) = &print_string_stdout;
54 static void info(
const char *fmt,...)
61 (*svm_print_string)(buf);
64 static void info(
const char *fmt,...) {}
76 Cache(
int l,
long int size);
83 void swap_index(
int i,
int j);
96 void lru_delete(head_t *h);
97 void lru_insert(head_t *h);
102 head = (head_t *)calloc(l,
sizeof(head_t));
104 size -= l *
sizeof(head_t) /
sizeof(
Qfloat);
105 size =
max(size, 2 * (
long int) l);
106 lru_head.next = lru_head.prev = &lru_head;
111 for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
116 void Cache::lru_delete(head_t *h)
119 h->prev->next = h->next;
120 h->next->prev = h->prev;
123 void Cache::lru_insert(head_t *h)
127 h->prev = lru_head.prev;
134 head_t *h = &head[
index];
135 if(h->len) lru_delete(h);
136 int more = len - h->len;
143 head_t *old = lru_head.next;
166 if(head[i].
len) lru_delete(&head[i]);
167 if(head[j].len) lru_delete(&head[j]);
168 swap(head[i].data,head[j].data);
169 swap(head[i].len,head[j].len);
170 if(head[i].len) lru_insert(&head[i]);
171 if(head[j].len) lru_insert(&head[j]);
174 for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
179 swap(h->data[i],h->data[j]);
202 virtual Qfloat *get_Q(
int column,
int len)
const = 0;
203 virtual double *get_QD()
const = 0;
215 virtual Qfloat *get_Q(
int column,
int len)
const = 0;
216 virtual double *get_QD()
const = 0;
220 if(x_square) swap(x_square[i],x_square[j]);
224 double (
Kernel::*kernel_function)(
int i,
int j)
const;
231 const int kernel_type;
237 double kernel_linear(
int i,
int j)
const 239 return dot(x[i],x[j]);
241 double kernel_poly(
int i,
int j)
const 243 return powi(gamma*
dot(x[i],x[j])+coef0,degree);
245 double kernel_rbf(
int i,
int j)
const 247 return exp(-gamma*(x_square[i]+x_square[j]-2*
dot(x[i],x[j])));
249 double kernel_sigmoid(
int i,
int j)
const 251 return tanh(gamma*
dot(x[i],x[j])+coef0);
253 double kernel_precomputed(
int i,
int j)
const 255 return x[
i][(int)(x[j][0].value)].
value;
260 :kernel_type(param.kernel_type), degree(param.degree),
261 gamma(param.gamma), coef0(param.coef0)
284 if(kernel_type ==
RBF)
286 x_square =
new double[l];
288 x_square[i] = dot(x[i],x[i]);
358 while(x->
index != -1)
364 while(y->
index != -1)
370 return exp(-param.
gamma*sum);
373 return tanh(param.
gamma*dot(x,y)+param.
coef0);
412 void Solve(
int l,
const QMatrix& Q,
const double *p_,
const schar *y_,
413 double *alpha_,
double Cp,
double Cn,
double eps,
434 return (y[i] > 0)? Cp : Cn;
438 if(alpha[i] >= get_C(i))
439 alpha_status[
i] = UPPER_BOUND;
440 else if(alpha[i] <= 0)
441 alpha_status[
i] = LOWER_BOUND;
442 else alpha_status[
i] = FREE;
446 bool is_free(
int i) {
return alpha_status[
i] == FREE; }
448 void reconstruct_gradient();
449 virtual int select_working_set(
int &i,
int &j);
450 virtual double calculate_rho();
451 virtual void do_shrinking();
453 bool be_shrunk(
int i,
double Gmax1,
double Gmax2);
461 swap(alpha_status[i],alpha_status[j]);
462 swap(alpha[i],alpha[j]);
464 swap(active_set[i],active_set[j]);
465 swap(G_bar[i],G_bar[j]);
472 if(active_size == l)
return;
477 for(j=active_size;j<l;j++)
478 G[j] = G_bar[j] + p[j];
480 for(j=0;j<active_size;j++)
484 if(2*nr_free < active_size)
485 info(
"\nWARNING: using -h 0 may be faster\n");
487 if (nr_free*l > 2*active_size*(l-active_size))
489 for(i=active_size;i<l;i++)
491 const Qfloat *Q_i = Q->get_Q(i,active_size);
492 for(j=0;j<active_size;j++)
494 G[
i] += alpha[
j] * Q_i[
j];
499 for(i=0;i<active_size;i++)
502 const Qfloat *Q_i = Q->get_Q(i,l);
503 double alpha_i = alpha[
i];
504 for(j=active_size;j<l;j++)
505 G[j] += alpha_i * Q_i[j];
511 double *alpha_,
double Cp,
double Cn,
double eps,
519 clone(alpha,alpha_,l);
527 alpha_status =
new char[l];
529 update_alpha_status(i);
534 active_set =
new int[l];
543 G_bar =
new double[l];
551 if(!is_lower_bound(i))
554 double alpha_i = alpha[
i];
557 G[j] += alpha_i*Q_i[j];
558 if(is_upper_bound(i))
560 G_bar[j] += get_C(i) * Q_i[
j];
567 int max_iter =
max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
568 int counter =
min(l,1000)+1;
570 while(iter < max_iter)
576 counter =
min(l,1000);
577 if(shrinking) do_shrinking();
582 if(select_working_set(i,j)!=0)
585 reconstruct_gradient();
589 if(select_working_set(i,j)!=0)
602 double C_i = get_C(i);
603 double C_j = get_C(j);
605 double old_alpha_i = alpha[
i];
606 double old_alpha_j = alpha[
j];
610 double quad_coef = QD[
i]+QD[
j]+2*Q_i[
j];
613 double delta = (-G[
i]-G[
j])/quad_coef;
614 double diff = alpha[
i] - alpha[
j];
639 alpha[
j] = C_i - diff;
647 alpha[
i] = C_j + diff;
653 double quad_coef = QD[
i]+QD[
j]-2*Q_i[
j];
656 double delta = (G[
i]-G[
j])/quad_coef;
657 double sum = alpha[
i] + alpha[
j];
666 alpha[
j] = sum - C_i;
682 alpha[
i] = sum - C_j;
697 double delta_alpha_i = alpha[
i] - old_alpha_i;
698 double delta_alpha_j = alpha[
j] - old_alpha_j;
700 for(
int k=0;
k<active_size;
k++)
702 G[
k] += Q_i[
k]*delta_alpha_i + Q_j[
k]*delta_alpha_j;
708 bool ui = is_upper_bound(i);
709 bool uj = is_upper_bound(j);
710 update_alpha_status(i);
711 update_alpha_status(j);
713 if(ui != is_upper_bound(i))
718 G_bar[k] -= C_i * Q_i[k];
721 G_bar[k] += C_i * Q_i[k];
724 if(uj != is_upper_bound(j))
729 G_bar[k] -= C_j * Q_j[k];
732 G_bar[k] += C_j * Q_j[k];
742 reconstruct_gradient();
746 fprintf(stderr,
"\nWARNING: reaching max number of iterations\n");
751 si->
rho = calculate_rho();
758 v += alpha[i] * (G[i] + p[i]);
766 alpha_[active_set[i]] = alpha[i];
780 info(
"\noptimization finished, #iter = %d\n",iter);
785 delete[] alpha_status;
804 double obj_diff_min =
INF;
806 for(
int t=0;t<active_size;t++)
809 if(!is_upper_bound(t))
818 if(!is_lower_bound(t))
829 Q_i = Q->get_Q(i,active_size);
831 for(
int j=0;j<active_size;j++)
835 if (!is_lower_bound(j))
837 double grad_diff=Gmax+G[
j];
843 double quad_coef = QD[
i]+QD[
j]-2.0*y[
i]*Q_i[
j];
845 obj_diff = -(grad_diff*grad_diff)/quad_coef;
847 obj_diff = -(grad_diff*grad_diff)/
TAU;
849 if (obj_diff <= obj_diff_min)
852 obj_diff_min = obj_diff;
859 if (!is_upper_bound(j))
861 double grad_diff= Gmax-G[
j];
867 double quad_coef = QD[
i]+QD[
j]+2.0*y[
i]*Q_i[
j];
869 obj_diff = -(grad_diff*grad_diff)/quad_coef;
871 obj_diff = -(grad_diff*grad_diff)/
TAU;
873 if (obj_diff <= obj_diff_min)
876 obj_diff_min = obj_diff;
883 if(Gmax+Gmax2 <
eps || Gmin_idx == -1)
891 bool Solver::be_shrunk(
int i,
double Gmax1,
double Gmax2)
893 if(is_upper_bound(i))
896 return(-G[i] > Gmax1);
898 return(-G[i] > Gmax2);
900 else if(is_lower_bound(i))
903 return(G[i] > Gmax2);
905 return(G[i] > Gmax1);
918 for(i=0;i<active_size;i++)
922 if(!is_upper_bound(i))
927 if(!is_lower_bound(i))
935 if(!is_upper_bound(i))
940 if(!is_lower_bound(i))
948 if(unshrink ==
false && Gmax1 + Gmax2 <=
eps*10)
951 reconstruct_gradient();
956 for(i=0;i<active_size;i++)
957 if (be_shrunk(i, Gmax1, Gmax2))
960 while (active_size > i)
962 if (!be_shrunk(active_size, Gmax1, Gmax2))
976 double ub =
INF, lb = -
INF, sum_free = 0;
977 for(
int i=0;i<active_size;i++)
979 double yG = y[
i]*G[
i];
981 if(is_upper_bound(i))
988 else if(is_lower_bound(i))
1003 r = sum_free/nr_free;
1020 double *alpha,
double Cp,
double Cn,
double eps,
1021 SolutionInfo* si,
int shrinking)
1028 int select_working_set(
int &i,
int &j);
1029 double calculate_rho();
1030 bool be_shrunk(
int i,
double Gmax1,
double Gmax2,
double Gmax3,
double Gmax4);
1031 void do_shrinking();
1035 int Solver_NU::select_working_set(
int &out_i,
int &out_j)
1043 double Gmaxp = -
INF;
1044 double Gmaxp2 = -
INF;
1047 double Gmaxn = -
INF;
1048 double Gmaxn2 = -
INF;
1052 double obj_diff_min =
INF;
1054 for(
int t=0;t<active_size;t++)
1057 if(!is_upper_bound(t))
1066 if(!is_lower_bound(t))
1076 const Qfloat *Q_ip = NULL;
1077 const Qfloat *Q_in = NULL;
1079 Q_ip = Q->get_Q(ip,active_size);
1081 Q_in = Q->get_Q(in,active_size);
1083 for(
int j=0;j<active_size;j++)
1087 if (!is_lower_bound(j))
1089 double grad_diff=Gmaxp+G[
j];
1095 double quad_coef = QD[ip]+QD[
j]-2*Q_ip[
j];
1097 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1099 obj_diff = -(grad_diff*grad_diff)/
TAU;
1101 if (obj_diff <= obj_diff_min)
1104 obj_diff_min = obj_diff;
1111 if (!is_upper_bound(j))
1113 double grad_diff=Gmaxn-G[
j];
1114 if (-G[j] >= Gmaxn2)
1119 double quad_coef = QD[
in]+QD[
j]-2*Q_in[
j];
1121 obj_diff = -(grad_diff*grad_diff)/quad_coef;
1123 obj_diff = -(grad_diff*grad_diff)/
TAU;
1125 if (obj_diff <= obj_diff_min)
1128 obj_diff_min = obj_diff;
1135 if(
max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) <
eps || Gmin_idx == -1)
1138 if (y[Gmin_idx] == +1)
1147 bool Solver_NU::be_shrunk(
int i,
double Gmax1,
double Gmax2,
double Gmax3,
double Gmax4)
1149 if(is_upper_bound(i))
1152 return(-G[i] > Gmax1);
1154 return(-G[i] > Gmax4);
1156 else if(is_lower_bound(i))
1159 return(G[i] > Gmax2);
1161 return(G[i] > Gmax3);
1167 void Solver_NU::do_shrinking()
1169 double Gmax1 = -
INF;
1170 double Gmax2 = -
INF;
1171 double Gmax3 = -
INF;
1172 double Gmax4 = -
INF;
1176 for(i=0;i<active_size;i++)
1178 if(!is_upper_bound(i))
1182 if(-G[i] > Gmax1) Gmax1 = -G[
i];
1184 else if(-G[i] > Gmax4) Gmax4 = -G[
i];
1186 if(!is_lower_bound(i))
1190 if(G[i] > Gmax2) Gmax2 = G[
i];
1192 else if(G[i] > Gmax3) Gmax3 = G[
i];
1196 if(unshrink ==
false &&
max(Gmax1+Gmax2,Gmax3+Gmax4) <=
eps*10)
1199 reconstruct_gradient();
1203 for(i=0;i<active_size;i++)
1204 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
1207 while (active_size > i)
1209 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
1219 double Solver_NU::calculate_rho()
1221 int nr_free1 = 0,nr_free2 = 0;
1222 double ub1 =
INF, ub2 =
INF;
1223 double lb1 = -
INF, lb2 = -
INF;
1224 double sum_free1 = 0, sum_free2 = 0;
1226 for(
int i=0;i<active_size;i++)
1230 if(is_upper_bound(i))
1231 lb1 =
max(lb1,G[i]);
1232 else if(is_lower_bound(i))
1233 ub1 =
min(ub1,G[i]);
1242 if(is_upper_bound(i))
1243 lb2 =
max(lb2,G[i]);
1244 else if(is_lower_bound(i))
1245 ub2 =
min(ub2,G[i]);
1256 r1 = sum_free1/nr_free1;
1261 r2 = sum_free2/nr_free2;
1276 :
Kernel(prob.l, prob.x, param)
1280 QD =
new double[prob.
l];
1281 for(
int i=0;i<prob.
l;i++)
1289 if((start = cache->get_data(i,&data,len)) < len)
1291 for(j=start;j<
len;j++)
1304 cache->swap_index(i,j);
1326 :
Kernel(prob.l, prob.x, param)
1329 QD =
new double[prob.
l];
1330 for(
int i=0;i<prob.
l;i++)
1338 if((start = cache->get_data(i,&data,len)) < len)
1340 for(j=start;j<
len;j++)
1353 cache->swap_index(i,j);
1372 :
Kernel(prob.l, prob.x, param)
1376 QD =
new double[2*l];
1378 index =
new int[2*l];
1379 for(
int k=0;
k<l;
k++)
1404 if(cache->get_data(real_i,&data,l) < l)
1412 next_buffer = 1 - next_buffer;
1438 mutable int next_buffer;
1446 static void solve_c_svc(
1451 double *minus_ones =
new double[l];
1460 if(prob->
y[i] > 0) y[
i] = +1;
else y[
i] = -1;
1464 s.
Solve(l,
SVC_Q(*prob,*param,y), minus_ones, y,
1469 sum_alpha += alpha[i];
1472 info(
"nu = %f\n", sum_alpha/(Cp*prob->
l));
1477 delete[] minus_ones;
1481 static void solve_nu_svc(
1487 double nu = param->
nu;
1497 double sum_pos = nu*l/2;
1498 double sum_neg = nu*l/2;
1503 alpha[
i] =
min(1.0,sum_pos);
1504 sum_pos -= alpha[
i];
1508 alpha[
i] =
min(1.0,sum_neg);
1509 sum_neg -= alpha[
i];
1512 double *zeros =
new double[l];
1522 info(
"C = %f\n",1/r);
1536 static void solve_one_class(
1541 double *zeros =
new double[l];
1545 int n = (int)(param->
nu*prob->
l);
1550 alpha[
n] = param->
nu * prob->
l -
n;
1568 static void solve_epsilon_svr(
1573 double *alpha2 =
new double[2*l];
1574 double *linear_term =
new double[2*l];
1581 linear_term[
i] = param->
p - prob->
y[
i];
1585 linear_term[i+l] = param->
p + prob->
y[
i];
1590 s.
Solve(2*l,
SVR_Q(*prob,*param), linear_term, y,
1593 double sum_alpha = 0;
1596 alpha[
i] = alpha2[
i] - alpha2[i+l];
1597 sum_alpha += fabs(alpha[i]);
1599 info(
"nu = %f\n",sum_alpha/(param->
C*l));
1602 delete[] linear_term;
1606 static void solve_nu_svr(
1611 double C = param->
C;
1612 double *alpha2 =
new double[2*l];
1613 double *linear_term =
new double[2*l];
1617 double sum = C * param->
nu * l / 2;
1620 alpha2[
i] = alpha2[i+l] =
min(sum,C);
1623 linear_term[
i] = - prob->
y[
i];
1626 linear_term[i+l] = prob->
y[
i];
1631 s.
Solve(2*l,
SVR_Q(*prob,*param), linear_term, y,
1634 info(
"epsilon = %f\n",-si->
r);
1637 alpha[i] = alpha2[i] - alpha2[i+l];
1640 delete[] linear_term;
1655 double Cp,
double Cn)
1657 double *alpha =
Malloc(
double,prob->
l);
1662 solve_c_svc(prob,param,alpha,&si,Cp,Cn);
1665 solve_nu_svc(prob,param,alpha,&si);
1668 solve_one_class(prob,param,alpha,&si);
1671 solve_epsilon_svr(prob,param,alpha,&si);
1674 solve_nu_svr(prob,param,alpha,&si);
1678 info(
"obj = %f, rho = %f\n",si.
obj,si.
rho);
1684 for(
int i=0;i<prob->
l;i++)
1686 if(fabs(alpha[i]) > 0)
1702 info(
"nSV = %d, nBSV = %d\n",nSV,nBSV);
1711 static void sigmoid_train(
1712 int l,
const double *dec_values,
const double *labels,
1713 double& A,
double& B)
1715 double prior1=0, prior0 = 0;
1719 if (labels[i] > 0) prior1+=1;
1723 double min_step=1e-10;
1726 double hiTarget=(prior1+1.0)/(prior1+2.0);
1727 double loTarget=1/(prior0+2.0);
1728 double *t=
Malloc(
double,l);
1729 double fApB,p,q,h11,h22,h21,g1,g2,det,dA,dB,gd,stepsize;
1730 double newA,newB,newf,d1,d2;
1734 A=0.0; B=
log((prior0+1.0)/(prior1+1.0));
1739 if (labels[i]>0) t[
i]=hiTarget;
1741 fApB = dec_values[
i]*A+B;
1743 fval += t[
i]*fApB +
log(1+exp(-fApB));
1745 fval += (t[
i] - 1)*fApB +
log(1+exp(fApB));
1747 for (iter=0;iter<max_iter;iter++)
1752 h21=0.0;g1=0.0;g2=0.0;
1755 fApB = dec_values[
i]*A+B;
1758 p=exp(-fApB)/(1.0+exp(-fApB));
1759 q=1.0/(1.0+exp(-fApB));
1763 p=1.0/(1.0+exp(fApB));
1764 q=exp(fApB)/(1.0+exp(fApB));
1767 h11+=dec_values[
i]*dec_values[
i]*d2;
1769 h21+=dec_values[
i]*d2;
1771 g1+=dec_values[
i]*d1;
1776 if (fabs(g1)<eps && fabs(g2)<
eps)
1780 det=h11*h22-h21*h21;
1781 dA=-(h22*g1 - h21 * g2) / det;
1782 dB=-(-h21*g1+ h11 * g2) / det;
1787 while (stepsize >= min_step)
1789 newA = A + stepsize * dA;
1790 newB = B + stepsize * dB;
1796 fApB = dec_values[
i]*newA+newB;
1798 newf += t[
i]*fApB +
log(1+exp(-fApB));
1800 newf += (t[
i] - 1)*fApB +
log(1+exp(fApB));
1803 if (newf<fval+0.0001*stepsize*gd)
1805 A=newA;B=newB;fval=newf;
1809 stepsize = stepsize / 2.0;
1812 if (stepsize < min_step)
1814 info(
"Line search fails in two-class probability estimates\n");
1820 info(
"Reaching maximal iterations in two-class probability estimates\n");
1824 static double sigmoid_predict(
double decision_value,
double A,
double B)
1826 double fApB = decision_value*A+B;
1829 return exp(-fApB)/(1.0+exp(-fApB));
1831 return 1.0/(1+exp(fApB)) ;
1835 static void multiclass_probability(
int k,
double **r,
double *p)
1838 int iter = 0, max_iter=
max(100,k);
1839 double **Q=
Malloc(
double *,k);
1840 double *Qp=
Malloc(
double,k);
1841 double pQp,
eps=0.005/
k;
1850 Q[t][t]+=r[
j][t]*r[
j][t];
1855 Q[t][t]+=r[
j][t]*r[
j][t];
1856 Q[t][
j]=-r[
j][t]*r[t][
j];
1859 for (iter=0;iter<max_iter;iter++)
1867 Qp[t]+=Q[t][j]*p[j];
1873 double error=fabs(Qp[t]-pQp);
1874 if (error>max_error)
1877 if (max_error<eps)
break;
1881 double diff=(-Qp[t]+pQp)/Q[t][t];
1883 pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
1886 Qp[
j]=(Qp[
j]+diff*Q[t][
j])/(1+diff);
1892 info(
"Exceeds max_iter in multiclass_prob\n");
1893 for(t=0;t<
k;t++)
free(Q[t]);
1899 static void svm_binary_svc_probability(
1901 double Cp,
double Cn,
double& probA,
double& probB)
1905 int *perm =
Malloc(
int,prob->
l);
1906 double *dec_values =
Malloc(
double,prob->
l);
1909 for(i=0;i<prob->
l;i++) perm[i]=i;
1910 for(i=0;i<prob->
l;i++)
1912 int j = i+rand()%(prob->
l-
i);
1913 swap(perm[i],perm[j]);
1915 for(i=0;i<nr_fold;i++)
1917 int begin = i*prob->
l/nr_fold;
1918 int end = (i+1)*prob->
l/nr_fold;
1922 subprob.
l = prob->
l-(end-begin);
1924 subprob.y =
Malloc(
double,subprob.l);
1927 for(j=0;j<begin;j++)
1929 subprob.x[
k] = prob->
x[perm[
j]];
1930 subprob.y[
k] = prob->
y[perm[
j]];
1933 for(j=end;j<prob->
l;j++)
1935 subprob.x[
k] = prob->
x[perm[
j]];
1936 subprob.y[
k] = prob->
y[perm[
j]];
1939 int p_count=0,n_count=0;
1946 if(p_count==0 && n_count==0)
1947 for(j=begin;j<end;j++)
1948 dec_values[perm[j]] = 0;
1949 else if(p_count > 0 && n_count == 0)
1950 for(j=begin;j<end;j++)
1951 dec_values[perm[j]] = 1;
1952 else if(p_count == 0 && n_count > 0)
1953 for(j=begin;j<end;j++)
1954 dec_values[perm[j]] = -1;
1968 for(j=begin;j<end;j++)
1972 dec_values[perm[
j]] *= submodel->
label[0];
1980 sigmoid_train(prob->
l,dec_values,prob->
y,probA,probB);
1986 static double svm_svr_probability(
1991 double *ymv =
Malloc(
double,prob->
l);
1997 for(i=0;i<prob->
l;i++)
1999 ymv[
i]=prob->
y[
i]-ymv[
i];
2000 mae += fabs(ymv[i]);
2006 for(i=0;i<prob->
l;i++)
2007 if (fabs(ymv[i]) > 5*std)
2011 mae /= (prob->
l-count);
2012 info(
"Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
2020 static void svm_group_classes(
const svm_problem *prob,
int *nr_class_ret,
int **label_ret,
int **start_ret,
int **count_ret,
int *perm)
2023 int max_nr_class = 16;
2026 int *count =
Malloc(
int,max_nr_class);
2027 int *data_label =
Malloc(
int,l);
2032 int this_label = (int)prob->
y[i];
2034 for(j=0;j<nr_class;j++)
2036 if(this_label == label[j])
2045 if(nr_class == max_nr_class)
2048 label = (
int *)realloc(label,max_nr_class*
sizeof(
int));
2049 count = (
int *)realloc(count,max_nr_class*
sizeof(
int));
2062 if (nr_class == 2 && label[0] == -1 && label[1] == 1)
2064 swap(label[0],label[1]);
2065 swap(count[0],count[1]);
2068 if(data_label[i] == 0)
2075 int *start =
Malloc(
int,nr_class);
2078 start[i] = start[i-1]+count[i-1];
2081 perm[start[data_label[
i]]] =
i;
2082 ++start[data_label[
i]];
2086 start[i] = start[i-1]+count[i-1];
2110 model->
label = NULL;
2120 model->
probA[0] = svm_svr_probability(prob,param);
2129 for(i=0;i<prob->
l;i++)
2136 for(i=0;i<prob->
l;i++)
2137 if(fabs(f.
alpha[i]) > 0)
2139 model->
SV[
j] = prob->
x[
i];
2155 int *perm =
Malloc(
int,l);
2158 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2160 info(
"WARNING: training data in only one class. See README for details.\n");
2165 x[i] = prob->
x[perm[i]];
2169 double *weighted_C =
Malloc(
double, nr_class);
2171 weighted_C[i] = param->
C;
2172 for(i=0;i<param->nr_weight;i++)
2179 fprintf(stderr,
"WARNING: class label %d specified in weight is not found\n", param->
weight_label[i]);
2186 bool *nonzero =
Malloc(
bool,l);
2191 double *probA=NULL,*probB=NULL;
2194 probA=
Malloc(
double,nr_class*(nr_class-1)/2);
2195 probB=
Malloc(
double,nr_class*(nr_class-1)/2);
2203 int si = start[
i], sj = start[
j];
2204 int ci = count[
i], cj = count[
j];
2207 sub_prob.
y =
Malloc(
double,sub_prob.
l);
2211 sub_prob.
x[
k] = x[si+
k];
2216 sub_prob.
x[ci+
k] = x[sj+
k];
2217 sub_prob.
y[ci+
k] = -1;
2221 svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
2223 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
2225 if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
2226 nonzero[si+
k] =
true;
2228 if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
2229 nonzero[sj+
k] =
true;
2241 model->
label[i] = label[i];
2243 model->
rho =
Malloc(
double,nr_class*(nr_class-1)/2);
2244 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2245 model->
rho[i] = f[i].
rho;
2249 model->
probA =
Malloc(
double,nr_class*(nr_class-1)/2);
2250 model->
probB =
Malloc(
double,nr_class*(nr_class-1)/2);
2251 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2264 int *nz_count =
Malloc(
int,nr_class);
2269 for(
int j=0;j<count[
i];j++)
2270 if(nonzero[start[i]+j])
2279 info(
"Total nSV = %d\n",total_sv);
2281 model->
l = total_sv;
2288 model->
SV[p] = x[
i];
2292 int *nz_start =
Malloc(
int,nr_class);
2295 nz_start[i] = nz_start[i-1]+nz_count[i-1];
2298 for(i=0;i<nr_class-1;i++)
2314 int q = nz_start[
i];
2335 for(i=0;i<nr_class*(nr_class-1)/2;i++)
2350 int *perm =
Malloc(
int,l);
2355 fprintf(stderr,
"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
2357 fold_start =
Malloc(
int,nr_fold+1);
2366 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
2369 int *fold_count =
Malloc(
int,nr_fold);
2375 for(i=0;i<count[
c];i++)
2377 int j = i+rand()%(count[
c]-
i);
2378 swap(index[start[c]+j],index[start[c]+i]);
2380 for(i=0;i<nr_fold;i++)
2384 fold_count[i]+=(i+1)*count[
c]/nr_fold-i*count[
c]/nr_fold;
2387 for (i=1;i<=nr_fold;i++)
2388 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2390 for(i=0;i<nr_fold;i++)
2392 int begin = start[
c]+i*count[
c]/nr_fold;
2393 int end = start[
c]+(i+1)*count[c]/nr_fold;
2394 for(
int j=begin;j<end;j++)
2396 perm[fold_start[
i]] = index[
j];
2401 for (i=1;i<=nr_fold;i++)
2402 fold_start[i] = fold_start[i-1]+fold_count[i-1];
2411 for(i=0;i<
l;i++) perm[i]=i;
2414 int j = i+rand()%(l-
i);
2415 swap(perm[i],perm[j]);
2417 for(i=0;i<=nr_fold;i++)
2418 fold_start[i]=i*l/nr_fold;
2421 for(i=0;i<nr_fold;i++)
2423 int begin = fold_start[
i];
2424 int end = fold_start[i+1];
2428 subprob.
l = l-(end-begin);
2430 subprob.
y =
Malloc(
double,subprob.
l);
2433 for(j=0;j<begin;j++)
2435 subprob.
x[
k] = prob->
x[perm[
j]];
2436 subprob.
y[
k] = prob->
y[perm[
j]];
2441 subprob.
x[
k] = prob->
x[perm[
j]];
2442 subprob.
y[
k] = prob->
y[perm[
j]];
2450 for(j=begin;j<end;j++)
2452 free(prob_estimates);
2455 for(j=begin;j<end;j++)
2456 target[perm[j]] =
svm_predict(submodel,prob->
x[perm[j]]);
2478 if (model->
label != NULL)
2480 label[i] = model->
label[i];
2486 for(
int i=0;i<model->
l;i++)
2499 return model->
probA[0];
2502 fprintf(stderr,
"Model doesn't contain information for SVR probability inference\n");
2516 for(i=0;i<model->
l;i++)
2518 sum -= model->
rho[0];
2522 return (sum>0)?1:-1;
2531 double *kvalue =
Malloc(
double,l);
2535 int *start =
Malloc(
int,nr_class);
2538 start[i] = start[i-1]+model->
nSV[i-1];
2540 int *vote =
Malloc(
int,nr_class);
2551 int ci = model->
nSV[
i];
2552 int cj = model->
nSV[
j];
2555 double *coef1 = model->
sv_coef[j-1];
2558 sum += coef1[si+k] * kvalue[si+k];
2560 sum += coef2[sj+k] * kvalue[sj+k];
2561 sum -= model->
rho[p];
2562 dec_values[p] = sum;
2564 if(dec_values[p] > 0)
2571 int vote_max_idx = 0;
2573 if(vote[i] > vote[vote_max_idx])
2579 return model->
label[vote_max_idx];
2590 dec_values =
Malloc(
double, 1);
2592 dec_values =
Malloc(
double, nr_class*(nr_class-1)/2);
2606 double *dec_values =
Malloc(
double, nr_class*(nr_class-1)/2);
2609 double min_prob=1e-7;
2610 double **pairwise_prob=
Malloc(
double *,nr_class);
2612 pairwise_prob[i]=
Malloc(
double,nr_class);
2617 pairwise_prob[
i][
j]=
min(
max(sigmoid_predict(dec_values[k],model->
probA[k],model->
probB[k]),min_prob),1-min_prob);
2618 pairwise_prob[
j][
i]=1-pairwise_prob[
i][
j];
2623 prob_estimates[0] = pairwise_prob[0][1];
2624 prob_estimates[1] = pairwise_prob[1][0];
2627 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
2629 int prob_max_idx = 0;
2631 if(prob_estimates[i] > prob_estimates[prob_max_idx])
2634 free(pairwise_prob[i]);
2636 free(pairwise_prob);
2637 return model->
label[prob_max_idx];
2643 static const char *svm_type_table[] =
2645 "c_svc",
"nu_svc",
"one_class",
"epsilon_svr",
"nu_svr",NULL
2648 static const char *kernel_type_table[]=
2650 "linear",
"polynomial",
"rbf",
"sigmoid",
"precomputed",NULL
2655 FILE *fp = fopen(model_file_name,
"w");
2656 if(fp==NULL)
return -1;
2658 char *old_locale = setlocale(LC_ALL, NULL);
2660 old_locale = strdup(old_locale);
2662 setlocale(LC_ALL,
"C");
2680 fprintf(fp,
"nr_class %d\n", nr_class);
2681 fprintf(fp,
"total_sv %d\n",l);
2685 for(
int i=0;i<nr_class*(nr_class-1)/2;i++)
2701 for(
int i=0;i<nr_class*(nr_class-1)/2;i++)
2708 for(
int i=0;i<nr_class*(nr_class-1)/2;i++)
2725 for(
int i=0;i<
l;i++)
2727 for(
int j=0;j<nr_class-1;j++)
2728 fprintf(fp,
"%.17g ",sv_coef[j][i]);
2735 while(p->
index != -1)
2743 setlocale(LC_ALL, old_locale);
2746 if (ferror(fp) != 0 || fclose(fp) != 0)
return -1;
2750 static char *line = NULL;
2751 static int max_line_len;
2753 static char* readline(FILE *input)
2757 if(fgets(line,max_line_len,input) == NULL)
2760 while(strrchr(line,
'\n') == NULL)
2763 line = (
char *) realloc(line,max_line_len);
2764 len = (int) strlen(line);
2765 if(fgets(line+len,max_line_len-len,input) == NULL)
2778 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0) 2792 if(strcmp(cmd,
"svm_type")==0)
2796 for(i=0;svm_type_table[
i];i++)
2798 if(strcmp(svm_type_table[i],cmd)==0)
2804 if(svm_type_table[i] == NULL)
2806 fprintf(stderr,
"unknown svm type.\n");
2810 else if(strcmp(cmd,
"kernel_type")==0)
2814 for(i=0;kernel_type_table[
i];i++)
2816 if(strcmp(kernel_type_table[i],cmd)==0)
2822 if(kernel_type_table[i] == NULL)
2824 fprintf(stderr,
"unknown kernel function.\n");
2828 else if(strcmp(cmd,
"degree")==0)
2830 else if(strcmp(cmd,
"gamma")==0)
2832 else if(strcmp(cmd,
"coef0")==0)
2834 else if(strcmp(cmd,
"nr_class")==0)
2836 else if(strcmp(cmd,
"total_sv")==0)
2838 else if(strcmp(cmd,
"rho")==0)
2842 for(
int i=0;i<
n;i++)
2845 else if(strcmp(cmd,
"label")==0)
2849 for(
int i=0;i<
n;i++)
2852 else if(strcmp(cmd,
"probA")==0)
2856 for(
int i=0;i<
n;i++)
2859 else if(strcmp(cmd,
"probB")==0)
2863 for(
int i=0;i<
n;i++)
2866 else if(strcmp(cmd,
"nr_sv")==0)
2870 for(
int i=0;i<
n;i++)
2873 else if(strcmp(cmd,
"SV")==0)
2878 if(c==EOF || c==
'\n')
break;
2884 fprintf(stderr,
"unknown text in model file: [%s]\n",cmd);
2895 FILE *fp = fopen(model_file_name,
"rb");
2896 if(fp==NULL)
return NULL;
2898 char *old_locale = setlocale(LC_ALL, NULL);
2900 old_locale = strdup(old_locale);
2902 setlocale(LC_ALL,
"C");
2908 model->
probA = NULL;
2909 model->
probB = NULL;
2911 model->
label = NULL;
2917 fprintf(stderr,
"ERROR: fscanf failed to read model\n");
2918 setlocale(LC_ALL, old_locale);
2930 long pos = ftell(fp);
2932 max_line_len = 1024;
2933 line =
Malloc(
char,max_line_len);
2934 char *p,*endptr,*idx,*val;
2936 while(readline(fp)!=NULL)
2938 p = strtok(line,
":");
2941 p = strtok(NULL,
":");
2947 elements += model->
l;
2949 fseek(fp,pos,SEEK_SET);
2965 model->
SV[
i] = &x_space[
j];
2967 p = strtok(line,
" \t");
2968 model->
sv_coef[0][
i] = strtod(p,&endptr);
2969 for(
int k=1;k<
m;k++)
2971 p = strtok(NULL,
" \t");
2972 model->
sv_coef[
k][
i] = strtod(p,&endptr);
2977 idx = strtok(NULL,
":");
2978 val = strtok(NULL,
" \t");
2982 x_space[
j].
index = (int) strtol(idx,&endptr,10);
2983 x_space[
j].
value = strtod(val,&endptr);
2987 x_space[j++].
index = -1;
2991 setlocale(LC_ALL, old_locale);
2994 if (ferror(fp) != 0 || fclose(fp) != 0)
3003 if(model_ptr->
free_sv && model_ptr->
l > 0 && model_ptr->
SV != NULL)
3004 free((
void *)(model_ptr->
SV[0]));
3007 for(
int i=0;i<model_ptr->
nr_class-1;i++)
3012 model_ptr->
SV = NULL;
3018 model_ptr->
rho = NULL;
3021 model_ptr->
label= NULL;
3024 model_ptr->
probA = NULL;
3027 model_ptr->
probB= NULL;
3033 model_ptr->
nSV = NULL;
3038 if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
3041 free(*model_ptr_ptr);
3042 *model_ptr_ptr = NULL;
3057 if(svm_type !=
C_SVC &&
3062 return "unknown svm type";
3067 if(kernel_type !=
LINEAR &&
3068 kernel_type !=
POLY &&
3069 kernel_type !=
RBF &&
3072 return "unknown kernel type";
3074 if((kernel_type ==
POLY || kernel_type ==
RBF || kernel_type ==
SIGMOID) &&
3079 return "degree of polynomial kernel < 0";
3084 return "cache_size <= 0";
3089 if(svm_type ==
C_SVC ||
3098 if(param->
nu <= 0 || param->
nu > 1)
3099 return "nu <= 0 or nu > 1";
3107 return "shrinking != 0 and shrinking != 1";
3111 return "probability != 0 and probability != 1";
3115 return "one-class SVM probability output not supported yet";
3123 int max_nr_class = 16;
3126 int *count =
Malloc(
int,max_nr_class);
3131 int this_label = (int)prob->
y[i];
3133 for(j=0;j<nr_class;j++)
3134 if(this_label == label[j])
3141 if(nr_class == max_nr_class)
3144 label = (
int *)realloc(label,max_nr_class*
sizeof(
int));
3145 count = (
int *)realloc(count,max_nr_class*
sizeof(
int));
3159 if(param->
nu*(n1+n2)/2 >
min(n1,n2))
3163 return "specified nu is infeasible";
3179 model->
probA!=NULL);
3184 if(print_func == NULL)
3185 svm_print_string = &print_string_stdout;
3187 svm_print_string = print_func;
void min(Image< double > &op1, const Image< double > &op2)
int get_data(const int index, Qfloat **data, int len)
int svm_get_nr_class(const svm_model *model)
#define FSCANF(_stream, _format, _var)
void Solve(int l, const QMatrix &Q, const double *p, const schar *y, double *alpha, double Cp, double Cn, double eps, SolutionInfo *si, int shrinking)
void swap_index(int i, int j)
double svm_predict_probability(const svm_model *model, const svm_node *x, double *prob_estimates)
void sqrt(Image< double > &op)
svm_model * svm_train(const svm_problem *prob, const svm_parameter *param)
void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
void svm_set_print_string_function(void(*print_func)(const char *))
ONE_CLASS_Q(const svm_problem &prob, const svm_parameter ¶m)
virtual Qfloat * get_Q(int column, int len) const =0
int svm_get_svm_type(const svm_model *model)
void swap_index(int i, int j) const
double(Kernel::* kernel_function)(int i, int j) const
void svm_free_and_destroy_model(svm_model **model_ptr_ptr)
void swap_index(int i, int j) const
ql0001_ & k(htemp+1),(cvec+1),(atemp+1),(bj+1),(bl+1),(bu+1),(x+1),(clamda+1), &iout, infoqp, &zero,(w+1), &lenw,(iw+1), &leniw, &glob_grd.epsmac
virtual int select_working_set(int &i, int &j)
void svm_free_model_content(svm_model *model_ptr)
virtual void swap_index(int i, int j) const
void log(Image< double > &op)
Kernel(int l, svm_node *const *x, const svm_parameter ¶m)
void svm_destroy_param(svm_parameter *param)
bool is_upper_bound(int i)
void max(Image< double > &op1, const Image< double > &op2)
bool read_model_header(FILE *fp, svm_model *model)
const char * svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
double svm_get_svr_probability(const svm_model *model)
void update_alpha_status(int i)
struct svm_parameter param
void swap_index(int i, int j) const
SVR_Q(const svm_problem &prob, const svm_parameter ¶m)
int svm_check_probability_model(const svm_model *model)
Qfloat * get_Q(int i, int len) const
struct _parameter * param
double svm_predict_values(const svm_model *model, const svm_node *x, double *dec_values)
virtual double * get_QD() const =0
int svm_save_model(const char *model_file_name, const svm_model *model)
void swap_index(int i, int j)
SVC_Q(const svm_problem &prob, const svm_parameter ¶m, const schar *y_)
double svm_predict(const svm_model *model, const svm_node *x)
void svm_get_labels(const svm_model *model, int *label)
virtual void do_shrinking()
static double k_function(const svm_node *x, const svm_node *y, const svm_parameter ¶m)
void svm_get_sv_indices(const svm_model *model, int *indices)
bool is_lower_bound(int i)
fprintf(glob_prnt.io, "\)
void Solve(int l, const QMatrix &Q, const double *p_, const schar *y_, double *alpha_, double Cp, double Cn, double eps, SolutionInfo *si, int shrinking)
svm_model * svm_load_model(const char *model_file_name)
virtual double calculate_rho()
Qfloat * get_Q(int i, int len) const
void reconstruct_gradient()
Cache(int l, long int size)
int svm_get_nr_sv(const svm_model *model)
Qfloat * get_Q(int i, int len) const
__host__ __device__ float dot(float2 a, float2 b)