From a77aaa279801ab79baf543f64b826145f54e4c6a Mon Sep 17 00:00:00 2001 From: Syd Xu Date: Mon, 7 Feb 2022 17:29:30 +0800 Subject: [PATCH] fix(hand): pose3d/mediapipe palmobject.landmarks wrong malloc --- go/examples/hand/main.go | 1 + src/classifier/svm/svm_light/svm_learn.c | 6373 +++++++++++----------- src/classifier/svm/svm_light/svm_learn.h | 3 + src/hand/pose3d/estimator.cpp | 17 +- src/hand/pose3d/mediapipe/mediapipe.cpp | 4 +- 5 files changed, 3210 insertions(+), 3188 deletions(-) diff --git a/go/examples/hand/main.go b/go/examples/hand/main.go index 1924511..d6daac6 100644 --- a/go/examples/hand/main.go +++ b/go/examples/hand/main.go @@ -43,6 +43,7 @@ func main() { // } d3d := mediapipe(modelPath) detect3d(d3d, imgPath, fontPath, "hand1.jpg") + detect3d(d3d, imgPath, fontPath, "hand2.jpg") } func yolox(modelPath string) detecter.Detecter { diff --git a/src/classifier/svm/svm_light/svm_learn.c b/src/classifier/svm/svm_light/svm_learn.c index 5bf8756..0dff71a 100644 --- a/src/classifier/svm/svm_light/svm_learn.c +++ b/src/classifier/svm/svm_light/svm_learn.c @@ -14,15 +14,14 @@ /* author. The author is not responsible for implications from the */ /* use of this software. */ /* */ -/***********************************************************************/ +/***********************************************************************/ + +#include "svm_learn.h" +#include "svm_common.h" - -# include "svm_common.h" -# include "svm_learn.h" - -#define MAX(x,y) ((x) < (y) ? (y) : (x)) -#define MIN(x,y) ((x) > (y) ? (y) : (x)) -#define SIGN(x) ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0)))) +#define MAX(x, y) ((x) < (y) ? (y) : (x)) +#define MIN(x, y) ((x) > (y) ? (y) : (x)) +#define SIGN(x) ((x) > (0) ? (1) : (((x) < (0) ? (-1) : (0)))) /* interface to QP-solver */ double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); @@ -33,439 +32,463 @@ double *optimize_qp(QP *, double *, long, double *, LEARN_PARM *); docs/label. The resulting model is returned in the structure model. */ -void svm_learn_classification(DOC **docs, double *class, long int - totdoc, long int totwords, - LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - MODEL *model, - double *alpha) - /* docs: Training vectors (x-part) */ - /* class: Training labels (y-part, zero if test example for - transduction) */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* learn_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache:Initialized Cache of size totdoc, if using a kernel. - NULL if linear.*/ - /* model: Returns learning result (assumed empty before called) */ - /* alpha: Start values for the alpha variables or NULL - pointer. The new alpha values are returned after - optimization if not NULL. Array must be of size totdoc. */ +void svm_learn_classification(DOC **docs, double *class, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, MODEL *model, + double *alpha) +/* docs: Training vectors (x-part) */ +/* class: Training labels (y-part, zero if test example for + transduction) */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* learn_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache:Initialized Cache of size totdoc, if using a kernel. + NULL if linear.*/ +/* model: Returns learning result (assumed empty before called) */ +/* alpha: Start values for the alpha variables or NULL + pointer. The new alpha values are returned after + optimization if not NULL. Array must be of size totdoc. */ { - long *inconsistent,i,*label; + long *inconsistent, i, *label; long inconsistentnum; - long misclassified,upsupvecnum; - double loss,model_length,example_length; - double dualitygap,xisum,alphasum,xi; - double maxdiff,*lin,*a,*c; - double runtime_start,runtime_end; + long misclassified, upsupvecnum; + double loss, model_length, example_length; + double dualitygap, xisum, alphasum, xi; + double maxdiff, *lin, *a, *c; + double runtime_start, runtime_end; long iterations; - long *unlabeled,transduction; + long *unlabeled, transduction; long heldout; - long loo_count=0,loo_count_pos=0,loo_count_neg=0,trainpos=0,trainneg=0; - long loocomputed=0; - double runtime_start_loo=0,runtime_start_xa=0; - double heldout_c=0,r_delta_sq=0,r_delta,r_delta_avg; - long *index,*index2dnum; + long loo_count = 0, loo_count_pos = 0, loo_count_neg = 0, trainpos = 0, + trainneg = 0; + long loocomputed = 0; + double runtime_start_loo = 0, runtime_start_xa = 0; + double heldout_c = 0, r_delta_sq = 0, r_delta, r_delta_avg; + long *index, *index2dnum; double *weights; - CFLOAT *aicache; /* buffer to keep one row of hessian */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ double *xi_fullset; /* buffer for storing xi on full sample in loo */ double *a_fullset; /* buffer for storing alpha on full sample in loo */ TIMING timing_profile; SHRINK_STATE shrink_state; - runtime_start=get_runtime(); - timing_profile.time_kernel=0; - timing_profile.time_opti=0; - timing_profile.time_shrink=0; - timing_profile.time_update=0; - timing_profile.time_model=0; - timing_profile.time_check=0; - timing_profile.time_select=0; - kernel_cache_statistic=0; + runtime_start = get_runtime(); + timing_profile.time_kernel = 0; + timing_profile.time_opti = 0; + timing_profile.time_shrink = 0; + timing_profile.time_update = 0; + timing_profile.time_model = 0; + timing_profile.time_check = 0; + timing_profile.time_select = 0; + kernel_cache_statistic = 0; - learn_parm->totwords=totwords; + learn_parm->totwords = totwords; /* make sure -n value is reasonable */ - if((learn_parm->svm_newvarsinqp < 2) - || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if ((learn_parm->svm_newvarsinqp < 2) || + (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; } - init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + init_shrink_state(&shrink_state, totdoc, (long)MAXSHRINK); - label = (long *)my_malloc(sizeof(long)*totdoc); - inconsistent = (long *)my_malloc(sizeof(long)*totdoc); - unlabeled = (long *)my_malloc(sizeof(long)*totdoc); - c = (double *)my_malloc(sizeof(double)*totdoc); - a = (double *)my_malloc(sizeof(double)*totdoc); - a_fullset = (double *)my_malloc(sizeof(double)*totdoc); - xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); - lin = (double *)my_malloc(sizeof(double)*totdoc); - learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); - model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); - model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); - model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + label = (long *)my_malloc(sizeof(long) * totdoc); + inconsistent = (long *)my_malloc(sizeof(long) * totdoc); + unlabeled = (long *)my_malloc(sizeof(long) * totdoc); + c = (double *)my_malloc(sizeof(double) * totdoc); + a = (double *)my_malloc(sizeof(double) * totdoc); + a_fullset = (double *)my_malloc(sizeof(double) * totdoc); + xi_fullset = (double *)my_malloc(sizeof(double) * totdoc); + lin = (double *)my_malloc(sizeof(double) * totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double) * totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * (totdoc + 2)); + model->alpha = (double *)my_malloc(sizeof(double) * (totdoc + 2)); + model->index = (long *)my_malloc(sizeof(long) * (totdoc + 2)); - model->at_upper_bound=0; - model->b=0; - model->supvec[0]=0; /* element 0 reserved and empty for now */ - model->alpha[0]=0; - model->lin_weights=NULL; - model->totwords=totwords; - model->totdoc=totdoc; - model->kernel_parm=(*kernel_parm); - model->sv_num=1; - model->loo_error=-1; - model->loo_recall=-1; - model->loo_precision=-1; - model->xa_error=-1; - model->xa_recall=-1; - model->xa_precision=-1; - inconsistentnum=0; - transduction=0; + model->at_upper_bound = 0; + model->b = 0; + model->supvec[0] = 0; /* element 0 reserved and empty for now */ + model->alpha[0] = 0; + model->lin_weights = NULL; + model->totwords = totwords; + model->totdoc = totdoc; + model->kernel_parm = (*kernel_parm); + model->sv_num = 1; + model->loo_error = -1; + model->loo_recall = -1; + model->loo_precision = -1; + model->xa_error = -1; + model->xa_recall = -1; + model->xa_precision = -1; + inconsistentnum = 0; + transduction = 0; - r_delta=estimate_r_delta(docs,totdoc,kernel_parm); - r_delta_sq=r_delta*r_delta; + r_delta = estimate_r_delta(docs, totdoc, kernel_parm); + r_delta_sq = r_delta * r_delta; - r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); - if(learn_parm->svm_c == 0.0) { /* default value for C */ - learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); - if(verbosity>=1) - printf("Setting default regularization parameter C=%.4f\n", - learn_parm->svm_c); + r_delta_avg = estimate_r_delta_average(docs, totdoc, kernel_parm); + if (learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c = 1.0 / (r_delta_avg * r_delta_avg); + if (verbosity >= 1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); } - learn_parm->eps=-1.0; /* equivalent regression epsilon for - classification */ + learn_parm->eps = -1.0; /* equivalent regression epsilon for + classification */ - for(i=0;idocnum=i; - inconsistent[i]=0; - a[i]=0; - lin[i]=0; - c[i]=0.0; - unlabeled[i]=0; - if(class[i] == 0) { - unlabeled[i]=1; - label[i]=0; - transduction=1; + for (i = 0; i < totdoc; i++) { /* various inits */ + docs[i]->docnum = i; + inconsistent[i] = 0; + a[i] = 0; + lin[i] = 0; + c[i] = 0.0; + unlabeled[i] = 0; + if (class[i] == 0) { + unlabeled[i] = 1; + label[i] = 0; + transduction = 1; } - if(class[i] > 0) { - learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* - docs[i]->costfactor; - label[i]=1; + if (class[i] > 0) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_costratio * docs[i]->costfactor; + label[i] = 1; trainpos++; - } - else if(class[i] < 0) { - learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; - label[i]=-1; + } else if (class[i] < 0) { + learn_parm->svm_cost[i] = learn_parm->svm_c * docs[i]->costfactor; + label[i] = -1; trainneg++; - } - else { - learn_parm->svm_cost[i]=0; + } else { + learn_parm->svm_cost[i] = 0; } } - if(verbosity>=2) { - printf("%ld positive, %ld negative, and %ld unlabeled examples.\n",trainpos,trainneg,totdoc-trainpos-trainneg); fflush(stdout); + if (verbosity >= 2) { + printf("%ld positive, %ld negative, and %ld unlabeled examples.\n", + trainpos, trainneg, totdoc - trainpos - trainneg); + fflush(stdout); } /* caching makes no sense for linear kernel */ - if(kernel_parm->kernel_type == LINEAR) { + if (kernel_parm->kernel_type == LINEAR) { /* kernel_cache = NULL; */ - } - + } + /* compute starting state for initial alpha values */ - if(alpha) { - if(verbosity>=1) { - printf("Computing starting state..."); fflush(stdout); + if (alpha) { + if (verbosity >= 1) { + printf("Computing starting state..."); + fflush(stdout); } - index = (long *)my_malloc(sizeof(long)*totdoc); - index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - weights=(double *)my_malloc(sizeof(double)*(totwords+1)); - aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); - for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + index = (long *)my_malloc(sizeof(long) * totdoc); + index2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + weights = (double *)my_malloc(sizeof(double) * (totwords + 1)); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * totdoc); + for (i = 0; i < totdoc; i++) { /* create full index and clip alphas */ + index[i] = 1; + alpha[i] = fabs(alpha[i]); + if (alpha[i] < 0) + alpha[i] = 0; + if (alpha[i] > learn_parm->svm_cost[i]) + alpha[i] = learn_parm->svm_cost[i]; } - if(kernel_cache && (kernel_parm->kernel_type != LINEAR)) { - for(i=0;i0) && (alpha[i]svm_cost[i]) - && (kernel_cache_space_available(kernel_cache))) - cache_kernel_row(kernel_cache,docs,i,kernel_parm); - for(i=0;isvm_cost[i]) - && (kernel_cache_space_available(kernel_cache))) - cache_kernel_row(kernel_cache,docs,i,kernel_parm); + if (kernel_cache && (kernel_parm->kernel_type != LINEAR)) { + for (i = 0; i < totdoc; i++) /* fill kernel cache with unbounded SV */ + if ((alpha[i] > 0) && (alpha[i] < learn_parm->svm_cost[i]) && + (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache, docs, i, kernel_parm); + for (i = 0; i < totdoc; + i++) /* fill rest of kernel cache with bounded SV */ + if ((alpha[i] == learn_parm->svm_cost[i]) && + (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache, docs, i, kernel_parm); } - clear_nvector(weights,totwords); /* set weights to zero */ - (void)compute_index(index,totdoc,index2dnum); - update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, - totwords,kernel_parm,kernel_cache,lin,aicache, - weights); - (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, - learn_parm,index2dnum,index2dnum,model); - for(i=0;i=1) { - printf("done.\n"); fflush(stdout); - } - } - - if(transduction) { - learn_parm->svm_iter_to_shrink=99999999; - if(verbosity >= 1) - printf("\nDeactivating Shrinking due to an incompatibility with the transductive \nlearner in the current version.\n\n"); + if (verbosity >= 1) { + printf("done.\n"); + fflush(stdout); + } + } + + if (transduction) { + learn_parm->svm_iter_to_shrink = 99999999; + if (verbosity >= 1) + printf("\nDeactivating Shrinking due to an incompatibility with the " + "transductive \nlearner in the current version.\n\n"); } - if(transduction && learn_parm->compute_loo) { - learn_parm->compute_loo=0; - if(verbosity >= 1) - printf("\nCannot compute leave-one-out estimates for transductive learner.\n\n"); - } - - if(learn_parm->remove_inconsistent && learn_parm->compute_loo) { - learn_parm->compute_loo=0; - printf("\nCannot compute leave-one-out estimates when removing inconsistent examples.\n\n"); - } - - if(learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) { - learn_parm->compute_loo=0; - printf("\nCannot compute leave-one-out with only one example in one class.\n\n"); - } - - - if(verbosity==1) { - printf("Optimizing"); fflush(stdout); + if (transduction && learn_parm->compute_loo) { + learn_parm->compute_loo = 0; + if (verbosity >= 1) + printf("\nCannot compute leave-one-out estimates for transductive " + "learner.\n\n"); + } + + if (learn_parm->remove_inconsistent && learn_parm->compute_loo) { + learn_parm->compute_loo = 0; + printf("\nCannot compute leave-one-out estimates when removing " + "inconsistent examples.\n\n"); + } + + if (learn_parm->compute_loo && ((trainpos == 1) || (trainneg == 1))) { + learn_parm->compute_loo = 0; + printf("\nCannot compute leave-one-out with only one example in one " + "class.\n\n"); + } + + if (verbosity == 1) { + printf("Optimizing"); + fflush(stdout); } /* train the svm */ - iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, - kernel_parm,kernel_cache,&shrink_state,model, - inconsistent,unlabeled,a,lin, - c,&timing_profile, - &maxdiff,(long)-1, - (long)1); - - if(verbosity>=1) { - if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + iterations = optimize_to_convergence( + docs, label, totdoc, totwords, learn_parm, kernel_parm, kernel_cache, + &shrink_state, model, inconsistent, unlabeled, a, lin, c, &timing_profile, + &maxdiff, (long)-1, (long)1); + + if (verbosity >= 1) { + if (verbosity == 1) + printf("done. (%ld iterations)\n", iterations); - misclassified=0; - for(i=0;(ib)*(double)label[i] <= 0.0) - misclassified++; + misclassified = 0; + for (i = 0; (i < totdoc); i++) { /* get final statistic */ + if ((lin[i] - model->b) * (double)label[i] <= 0.0) + misclassified++; } - printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n", - misclassified,maxdiff); - - runtime_end=get_runtime(); - if(verbosity>=2) { - printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", - (runtime_end-runtime_start)/100.0, - (100.0*timing_profile.time_kernel)/(runtime_end-runtime_start), - (100.0*timing_profile.time_opti)/(runtime_end-runtime_start), - (100.0*timing_profile.time_shrink)/(runtime_end-runtime_start), - (100.0*timing_profile.time_update)/(runtime_end-runtime_start), - (100.0*timing_profile.time_model)/(runtime_end-runtime_start), - (100.0*timing_profile.time_check)/(runtime_end-runtime_start), - (100.0*timing_profile.time_select)/(runtime_end-runtime_start)); - } - else { - printf("Runtime in cpu-seconds: %.2f\n", - (runtime_end-runtime_start)/100.0); + printf("Optimization finished (%ld misclassified, maxdiff=%.5f).\n", + misclassified, maxdiff); + + runtime_end = get_runtime(); + if (verbosity >= 2) { + printf( + "Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for " + "optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for " + "model/%.2f%% for check/%.2f%% for select)\n", + (runtime_end - runtime_start) / 100.0, + (100.0 * timing_profile.time_kernel) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_opti) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_shrink) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_update) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_model) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_check) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_select) / (runtime_end - runtime_start)); + } else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end - runtime_start) / 100.0); } - if(learn_parm->remove_inconsistent) { - inconsistentnum=0; - for(i=0;isv_num-1,inconsistentnum); - } - else { - upsupvecnum=0; - for(i=1;isv_num;i++) { - if(fabs(model->alpha[i]) >= - (learn_parm->svm_cost[(model->supvec[i])->docnum]- - learn_parm->epsilon_a)) - upsupvecnum++; + if (learn_parm->remove_inconsistent) { + inconsistentnum = 0; + for (i = 0; i < totdoc; i++) + if (inconsistent[i]) + inconsistentnum++; + printf("Number of SV: %ld (plus %ld inconsistent examples)\n", + model->sv_num - 1, inconsistentnum); + } else { + upsupvecnum = 0; + for (i = 1; i < model->sv_num; i++) { + if (fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum] - + learn_parm->epsilon_a)) + upsupvecnum++; } - printf("Number of SV: %ld (including %ld at upper bound)\n", - model->sv_num-1,upsupvecnum); - } - - if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { - loss=0; - xisum=0; - alphasum=0; - model_length=0; - for(i=0;ib)*(double)label[i]); - if(xi > learn_parm->epsilon_crit) - loss+=xi; - xisum+=xi*learn_parm->svm_cost[i]; - alphasum+=a[i]; - model_length+=a[i]*label[i]*lin[i]; + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num - 1, upsupvecnum); + } + + if ((verbosity >= 1) && (!learn_parm->skip_final_opt_check)) { + loss = 0; + xisum = 0; + alphasum = 0; + model_length = 0; + for (i = 0; i < totdoc; i++) { + xi = MAX(0.0, 1.0 - (lin[i] - model->b) * (double)label[i]); + if (xi > learn_parm->epsilon_crit) + loss += xi; + xisum += xi * learn_parm->svm_cost[i]; + alphasum += a[i]; + model_length += a[i] * label[i] * lin[i]; } - model_length=sqrt(model_length); - dualitygap=(0.5*model_length*model_length+xisum) - -(alphasum-0.5*model_length*model_length); - fprintf(stdout,"Upper bound on duality gap: gap=%.5f\n",dualitygap); - fprintf(stdout,"Dual objective value: dval=%.5f\n", - alphasum-0.5*model_length*model_length); - fprintf(stdout,"L1 loss: loss=%.5f\n",loss); - fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); - example_length=estimate_sphere(model); - fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", - length_of_longest_document_vector(docs,totdoc,kernel_parm)); - fprintf(stdout,"Estimated VCdim of classifier: VCdim<=%.5f\n", - estimate_margin_vcdim(model,model_length,example_length)); - if((!learn_parm->remove_inconsistent) && (!transduction)) { - runtime_start_xa=get_runtime(); - if(verbosity>=1) { - printf("Computing XiAlpha-estimates..."); fflush(stdout); - } - compute_xa_estimates(model,label,unlabeled,totdoc,docs,lin,a, - kernel_parm,learn_parm,&(model->xa_error), - &(model->xa_recall),&(model->xa_precision)); - if(verbosity>=1) { - printf("done\n"); - } - printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n", - (get_runtime()-runtime_start_xa)/100.0); - - fprintf(stdout,"XiAlpha-estimate of the error: error<=%.2f%% (rho=%.2f,depth=%ld)\n", - model->xa_error,learn_parm->rho,learn_parm->xa_depth); - fprintf(stdout,"XiAlpha-estimate of the recall: recall=>%.2f%% (rho=%.2f,depth=%ld)\n", - model->xa_recall,learn_parm->rho,learn_parm->xa_depth); - fprintf(stdout,"XiAlpha-estimate of the precision: precision=>%.2f%% (rho=%.2f,depth=%ld)\n", - model->xa_precision,learn_parm->rho,learn_parm->xa_depth); - } - else if(!learn_parm->remove_inconsistent) { - estimate_transduction_quality(model,label,unlabeled,totdoc,docs,lin); + model_length = sqrt(model_length); + dualitygap = (0.5 * model_length * model_length + xisum) - + (alphasum - 0.5 * model_length * model_length); + fprintf(stdout, "Upper bound on duality gap: gap=%.5f\n", dualitygap); + fprintf(stdout, "Dual objective value: dval=%.5f\n", + alphasum - 0.5 * model_length * model_length); + fprintf(stdout, "L1 loss: loss=%.5f\n", loss); + fprintf(stdout, "Norm of weight vector: |w|=%.5f\n", model_length); + example_length = estimate_sphere(model); + fprintf(stdout, "Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs, totdoc, kernel_parm)); + fprintf(stdout, "Estimated VCdim of classifier: VCdim<=%.5f\n", + estimate_margin_vcdim(model, model_length, example_length)); + if ((!learn_parm->remove_inconsistent) && (!transduction)) { + runtime_start_xa = get_runtime(); + if (verbosity >= 1) { + printf("Computing XiAlpha-estimates..."); + fflush(stdout); + } + compute_xa_estimates(model, label, unlabeled, totdoc, docs, lin, a, + kernel_parm, learn_parm, &(model->xa_error), + &(model->xa_recall), &(model->xa_precision)); + if (verbosity >= 1) { + printf("done\n"); + } + printf("Runtime for XiAlpha-estimates in cpu-seconds: %.2f\n", + (get_runtime() - runtime_start_xa) / 100.0); + + fprintf(stdout, + "XiAlpha-estimate of the error: error<=%.2f%% " + "(rho=%.2f,depth=%ld)\n", + model->xa_error, learn_parm->rho, learn_parm->xa_depth); + fprintf(stdout, + "XiAlpha-estimate of the recall: recall=>%.2f%% " + "(rho=%.2f,depth=%ld)\n", + model->xa_recall, learn_parm->rho, learn_parm->xa_depth); + fprintf(stdout, + "XiAlpha-estimate of the precision: precision=>%.2f%% " + "(rho=%.2f,depth=%ld)\n", + model->xa_precision, learn_parm->rho, learn_parm->xa_depth); + } else if (!learn_parm->remove_inconsistent) { + estimate_transduction_quality(model, label, unlabeled, totdoc, docs, + lin); } } - if(verbosity>=1) { - printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + if (verbosity >= 1) { + printf("Number of kernel evaluations: %ld\n", kernel_cache_statistic); } - } - - + } + /* leave-one-out testing starts now */ - if(learn_parm->compute_loo) { + if (learn_parm->compute_loo) { /* save results of training on full dataset for leave-one-out */ - runtime_start_loo=get_runtime(); - for(i=0;ib)*(double)label[i]); - if(xi_fullset[i]<0) xi_fullset[i]=0; - a_fullset[i]=a[i]; + runtime_start_loo = get_runtime(); + for (i = 0; i < totdoc; i++) { + xi_fullset[i] = 1.0 - ((lin[i] - model->b) * (double)label[i]); + if (xi_fullset[i] < 0) + xi_fullset[i] = 0; + a_fullset[i] = a[i]; } - if(verbosity>=1) { + if (verbosity >= 1) { printf("Computing leave-one-out"); - } - + } + /* repeat this loop for every held-out example */ - for(heldout=0;(heldoutrho*a_fullset[heldout]*r_delta_sq+xi_fullset[heldout] - < 1.0) { - /* guaranteed to not produce a leave-one-out error */ - if(verbosity==1) { - printf("+"); fflush(stdout); - } + for (heldout = 0; (heldout < totdoc); heldout++) { + if (learn_parm->rho * a_fullset[heldout] * r_delta_sq + + xi_fullset[heldout] < + 1.0) { + /* guaranteed to not produce a leave-one-out error */ + if (verbosity == 1) { + printf("+"); + fflush(stdout); + } + } else if (xi_fullset[heldout] > 1.0) { + /* guaranteed to produce a leave-one-out error */ + loo_count++; + if (label[heldout] > 0) + loo_count_pos++; + else + loo_count_neg++; + if (verbosity == 1) { + printf("-"); + fflush(stdout); + } + } else { + loocomputed++; + heldout_c = learn_parm->svm_cost[heldout]; /* set upper bound to zero */ + learn_parm->svm_cost[heldout] = 0; + /* make sure heldout example is not currently */ + /* shrunk away. Assumes that lin is up to date! */ + shrink_state.active[heldout] = 1; + if (verbosity >= 2) + printf("\nLeave-One-Out test on example %ld\n", heldout); + if (verbosity >= 1) { + printf("(?[%ld]", heldout); + fflush(stdout); + } + + optimize_to_convergence(docs, label, totdoc, totwords, learn_parm, + kernel_parm, kernel_cache, &shrink_state, model, + inconsistent, unlabeled, a, lin, c, + &timing_profile, &maxdiff, heldout, (long)2); + + /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */ + + if (((lin[heldout] - model->b) * (double)label[heldout]) <= 0.0) { + loo_count++; /* there was a loo-error */ + if (label[heldout] > 0) + loo_count_pos++; + else + loo_count_neg++; + if (verbosity >= 1) { + printf("-)"); + fflush(stdout); + } + } else { + if (verbosity >= 1) { + printf("+)"); + fflush(stdout); + } + } + /* now we need to restore the original data set*/ + learn_parm->svm_cost[heldout] = heldout_c; /* restore upper bound */ } - else if(xi_fullset[heldout] > 1.0) { - /* guaranteed to produce a leave-one-out error */ - loo_count++; - if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; - if(verbosity==1) { - printf("-"); fflush(stdout); - } - } - else { - loocomputed++; - heldout_c=learn_parm->svm_cost[heldout]; /* set upper bound to zero */ - learn_parm->svm_cost[heldout]=0; - /* make sure heldout example is not currently */ - /* shrunk away. Assumes that lin is up to date! */ - shrink_state.active[heldout]=1; - if(verbosity>=2) - printf("\nLeave-One-Out test on example %ld\n",heldout); - if(verbosity>=1) { - printf("(?[%ld]",heldout); fflush(stdout); - } - - optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, - kernel_parm, - kernel_cache,&shrink_state,model,inconsistent,unlabeled, - a,lin,c,&timing_profile, - &maxdiff,heldout,(long)2); - - /* printf("%.20f\n",(lin[heldout]-model->b)*(double)label[heldout]); */ - - if(((lin[heldout]-model->b)*(double)label[heldout]) <= 0.0) { - loo_count++; /* there was a loo-error */ - if(label[heldout] > 0) loo_count_pos++; else loo_count_neg++; - if(verbosity>=1) { - printf("-)"); fflush(stdout); - } - } - else { - if(verbosity>=1) { - printf("+)"); fflush(stdout); - } - } - /* now we need to restore the original data set*/ - learn_parm->svm_cost[heldout]=heldout_c; /* restore upper bound */ - } - } /* end of leave-one-out loop */ - - - if(verbosity>=1) { - printf("\nRetrain on full problem"); fflush(stdout); + } /* end of leave-one-out loop */ + + if (verbosity >= 1) { + printf("\nRetrain on full problem"); + fflush(stdout); } - optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, - kernel_parm, - kernel_cache,&shrink_state,model,inconsistent,unlabeled, - a,lin,c,&timing_profile, - &maxdiff,(long)-1,(long)1); - if(verbosity >= 1) - printf("done.\n"); - - + optimize_to_convergence(docs, label, totdoc, totwords, learn_parm, + kernel_parm, kernel_cache, &shrink_state, model, + inconsistent, unlabeled, a, lin, c, &timing_profile, + &maxdiff, (long)-1, (long)1); + if (verbosity >= 1) + printf("done.\n"); + /* after all leave-one-out computed */ - model->loo_error=100.0*loo_count/(double)totdoc; - model->loo_recall=(1.0-(double)loo_count_pos/(double)trainpos)*100.0; - model->loo_precision=(trainpos-loo_count_pos)/ - (double)(trainpos-loo_count_pos+loo_count_neg)*100.0; - if(verbosity >= 1) { - fprintf(stdout,"Leave-one-out estimate of the error: error=%.2f%%\n", - model->loo_error); - fprintf(stdout,"Leave-one-out estimate of the recall: recall=%.2f%%\n", - model->loo_recall); - fprintf(stdout,"Leave-one-out estimate of the precision: precision=%.2f%%\n", - model->loo_precision); - fprintf(stdout,"Actual leave-one-outs computed: %ld (rho=%.2f)\n", - loocomputed,learn_parm->rho); - printf("Runtime for leave-one-out in cpu-seconds: %.2f\n", - (get_runtime()-runtime_start_loo)/100.0); + model->loo_error = 100.0 * loo_count / (double)totdoc; + model->loo_recall = + (1.0 - (double)loo_count_pos / (double)trainpos) * 100.0; + model->loo_precision = (trainpos - loo_count_pos) / + (double)(trainpos - loo_count_pos + loo_count_neg) * + 100.0; + if (verbosity >= 1) { + fprintf(stdout, "Leave-one-out estimate of the error: error=%.2f%%\n", + model->loo_error); + fprintf(stdout, "Leave-one-out estimate of the recall: recall=%.2f%%\n", + model->loo_recall); + fprintf(stdout, + "Leave-one-out estimate of the precision: precision=%.2f%%\n", + model->loo_precision); + fprintf(stdout, "Actual leave-one-outs computed: %ld (rho=%.2f)\n", + loocomputed, learn_parm->rho); + printf("Runtime for leave-one-out in cpu-seconds: %.2f\n", + (get_runtime() - runtime_start_loo) / 100.0); } - } - - if(learn_parm->alphafile[0]) - write_alphas(learn_parm->alphafile,a,label,totdoc); - + } + + if (learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile, a, label, totdoc); + shrink_state_cleanup(&shrink_state); free(label); free(inconsistent); @@ -476,36 +499,35 @@ void svm_learn_classification(DOC **docs, double *class, long int free(xi_fullset); free(lin); free(learn_parm->svm_cost); -} - - +} + /* Learns an SVM regression model based on the training data in docs/label. The resulting model is returned in the structure model. */ -void svm_learn_regression(DOC **docs, double *value, long int totdoc, - long int totwords, LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE **kernel_cache, MODEL *model) - /* docs: Training vectors (x-part) */ - /* class: Training value (y-part) */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* learn_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache:Initialized Cache, if using a kernel. NULL if - linear. Note that it will be free'd and reassigned */ - /* model: Returns learning result (assumed empty before called) */ +void svm_learn_regression(DOC **docs, double *value, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, + MODEL *model) +/* docs: Training vectors (x-part) */ +/* class: Training value (y-part) */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* learn_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache:Initialized Cache, if using a kernel. NULL if + linear. Note that it will be free'd and reassigned */ +/* model: Returns learning result (assumed empty before called) */ { - long *inconsistent,i,j; + long *inconsistent, i, j; long inconsistentnum; long upsupvecnum; - double loss,model_length,example_length; - double maxdiff,*lin,*a,*c; - double runtime_start,runtime_end; - long iterations,kernel_cache_size; + double loss, model_length, example_length; + double maxdiff, *lin, *a, *c; + double runtime_start, runtime_end; + long iterations, kernel_cache_size; long *unlabeled; - double r_delta_sq=0,r_delta,r_delta_avg; + double r_delta_sq = 0, r_delta, r_delta_avg; double *xi_fullset; /* buffer for storing xi on full sample in loo */ double *a_fullset; /* buffer for storing alpha on full sample in loo */ TIMING timing_profile; @@ -514,195 +536,203 @@ void svm_learn_regression(DOC **docs, double *value, long int totdoc, long *label; /* set up regression problem in standard form */ - docs_org=docs; - docs = (DOC **)my_malloc(sizeof(DOC)*2*totdoc); - label = (long *)my_malloc(sizeof(long)*2*totdoc); - c = (double *)my_malloc(sizeof(double)*2*totdoc); - for(i=0;icostfactor,docs_org[i]->fvec); - label[i]=+1; - c[i]=value[i]; - docs[j]=create_example(j,0,0,docs_org[i]->costfactor,docs_org[i]->fvec); - label[j]=-1; - c[j]=value[i]; + docs_org = docs; + docs = (DOC **)my_malloc(sizeof(DOC) * 2 * totdoc); + label = (long *)my_malloc(sizeof(long) * 2 * totdoc); + c = (double *)my_malloc(sizeof(double) * 2 * totdoc); + for (i = 0; i < totdoc; i++) { + j = 2 * totdoc - 1 - i; + docs[i] = + create_example(i, 0, 0, docs_org[i]->costfactor, docs_org[i]->fvec); + label[i] = +1; + c[i] = value[i]; + docs[j] = + create_example(j, 0, 0, docs_org[i]->costfactor, docs_org[i]->fvec); + label[j] = -1; + c[j] = value[i]; } - totdoc*=2; + totdoc *= 2; /* need to get a bigger kernel cache */ - if(*kernel_cache) { - kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + if (*kernel_cache) { + kernel_cache_size = + (*kernel_cache)->buffsize * sizeof(CFLOAT) / (1024 * 1024); kernel_cache_cleanup(*kernel_cache); - (*kernel_cache)=kernel_cache_init(totdoc,kernel_cache_size); + (*kernel_cache) = kernel_cache_init(totdoc, kernel_cache_size); } - runtime_start=get_runtime(); - timing_profile.time_kernel=0; - timing_profile.time_opti=0; - timing_profile.time_shrink=0; - timing_profile.time_update=0; - timing_profile.time_model=0; - timing_profile.time_check=0; - timing_profile.time_select=0; - kernel_cache_statistic=0; + runtime_start = get_runtime(); + timing_profile.time_kernel = 0; + timing_profile.time_opti = 0; + timing_profile.time_shrink = 0; + timing_profile.time_update = 0; + timing_profile.time_model = 0; + timing_profile.time_check = 0; + timing_profile.time_select = 0; + kernel_cache_statistic = 0; - learn_parm->totwords=totwords; + learn_parm->totwords = totwords; /* make sure -n value is reasonable */ - if((learn_parm->svm_newvarsinqp < 2) - || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if ((learn_parm->svm_newvarsinqp < 2) || + (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; } - init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + init_shrink_state(&shrink_state, totdoc, (long)MAXSHRINK); - inconsistent = (long *)my_malloc(sizeof(long)*totdoc); - unlabeled = (long *)my_malloc(sizeof(long)*totdoc); - a = (double *)my_malloc(sizeof(double)*totdoc); - a_fullset = (double *)my_malloc(sizeof(double)*totdoc); - xi_fullset = (double *)my_malloc(sizeof(double)*totdoc); - lin = (double *)my_malloc(sizeof(double)*totdoc); - learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); - model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); - model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); - model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + inconsistent = (long *)my_malloc(sizeof(long) * totdoc); + unlabeled = (long *)my_malloc(sizeof(long) * totdoc); + a = (double *)my_malloc(sizeof(double) * totdoc); + a_fullset = (double *)my_malloc(sizeof(double) * totdoc); + xi_fullset = (double *)my_malloc(sizeof(double) * totdoc); + lin = (double *)my_malloc(sizeof(double) * totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double) * totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * (totdoc + 2)); + model->alpha = (double *)my_malloc(sizeof(double) * (totdoc + 2)); + model->index = (long *)my_malloc(sizeof(long) * (totdoc + 2)); - model->at_upper_bound=0; - model->b=0; - model->supvec[0]=0; /* element 0 reserved and empty for now */ - model->alpha[0]=0; - model->lin_weights=NULL; - model->totwords=totwords; - model->totdoc=totdoc; - model->kernel_parm=(*kernel_parm); - model->sv_num=1; - model->loo_error=-1; - model->loo_recall=-1; - model->loo_precision=-1; - model->xa_error=-1; - model->xa_recall=-1; - model->xa_precision=-1; - inconsistentnum=0; + model->at_upper_bound = 0; + model->b = 0; + model->supvec[0] = 0; /* element 0 reserved and empty for now */ + model->alpha[0] = 0; + model->lin_weights = NULL; + model->totwords = totwords; + model->totdoc = totdoc; + model->kernel_parm = (*kernel_parm); + model->sv_num = 1; + model->loo_error = -1; + model->loo_recall = -1; + model->loo_precision = -1; + model->xa_error = -1; + model->xa_recall = -1; + model->xa_precision = -1; + inconsistentnum = 0; - r_delta=estimate_r_delta(docs,totdoc,kernel_parm); - r_delta_sq=r_delta*r_delta; + r_delta = estimate_r_delta(docs, totdoc, kernel_parm); + r_delta_sq = r_delta * r_delta; - r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); - if(learn_parm->svm_c == 0.0) { /* default value for C */ - learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); - if(verbosity>=1) - printf("Setting default regularization parameter C=%.4f\n", - learn_parm->svm_c); + r_delta_avg = estimate_r_delta_average(docs, totdoc, kernel_parm); + if (learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c = 1.0 / (r_delta_avg * r_delta_avg); + if (verbosity >= 1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); } - for(i=0;i 0) { - learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* - docs[i]->costfactor; - } - else if(label[i] < 0) { - learn_parm->svm_cost[i]=learn_parm->svm_c*docs[i]->costfactor; + for (i = 0; i < totdoc; i++) { /* various inits */ + inconsistent[i] = 0; + a[i] = 0; + lin[i] = 0; + unlabeled[i] = 0; + if (label[i] > 0) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_costratio * docs[i]->costfactor; + } else if (label[i] < 0) { + learn_parm->svm_cost[i] = learn_parm->svm_c * docs[i]->costfactor; } } /* caching makes no sense for linear kernel */ - if((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) { - printf("WARNING: Using a kernel cache for linear case will slow optimization down!\n"); - } - - if(verbosity==1) { - printf("Optimizing"); fflush(stdout); + if ((kernel_parm->kernel_type == LINEAR) && (*kernel_cache)) { + printf("WARNING: Using a kernel cache for linear case will slow " + "optimization down!\n"); + } + + if (verbosity == 1) { + printf("Optimizing"); + fflush(stdout); } /* train the svm */ - iterations=optimize_to_convergence(docs,label,totdoc,totwords,learn_parm, - kernel_parm,*kernel_cache,&shrink_state, - model,inconsistent,unlabeled,a,lin,c, - &timing_profile,&maxdiff,(long)-1, - (long)1); - - if(verbosity>=1) { - if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + iterations = optimize_to_convergence( + docs, label, totdoc, totwords, learn_parm, kernel_parm, *kernel_cache, + &shrink_state, model, inconsistent, unlabeled, a, lin, c, &timing_profile, + &maxdiff, (long)-1, (long)1); + + if (verbosity >= 1) { + if (verbosity == 1) + printf("done. (%ld iterations)\n", iterations); - printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); - - runtime_end=get_runtime(); - if(verbosity>=2) { - printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", - (runtime_end-runtime_start)/100.0, - (100.0*timing_profile.time_kernel)/(runtime_end-runtime_start), - (100.0*timing_profile.time_opti)/(runtime_end-runtime_start), - (100.0*timing_profile.time_shrink)/(runtime_end-runtime_start), - (100.0*timing_profile.time_update)/(runtime_end-runtime_start), - (100.0*timing_profile.time_model)/(runtime_end-runtime_start), - (100.0*timing_profile.time_check)/(runtime_end-runtime_start), - (100.0*timing_profile.time_select)/(runtime_end-runtime_start)); - } - else { - printf("Runtime in cpu-seconds: %.2f\n", - (runtime_end-runtime_start)/100.0); + printf("Optimization finished (maxdiff=%.5f).\n", maxdiff); + + runtime_end = get_runtime(); + if (verbosity >= 2) { + printf( + "Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for " + "optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for " + "model/%.2f%% for check/%.2f%% for select)\n", + (runtime_end - runtime_start) / 100.0, + (100.0 * timing_profile.time_kernel) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_opti) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_shrink) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_update) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_model) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_check) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_select) / (runtime_end - runtime_start)); + } else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end - runtime_start) / 100.0); } - if(learn_parm->remove_inconsistent) { - inconsistentnum=0; - for(i=0;isv_num-1,inconsistentnum); - } - else { - upsupvecnum=0; - for(i=1;isv_num;i++) { - if(fabs(model->alpha[i]) >= - (learn_parm->svm_cost[(model->supvec[i])->docnum]- - learn_parm->epsilon_a)) - upsupvecnum++; + if (learn_parm->remove_inconsistent) { + inconsistentnum = 0; + for (i = 0; i < totdoc; i++) + if (inconsistent[i]) + inconsistentnum++; + printf("Number of SV: %ld (plus %ld inconsistent examples)\n", + model->sv_num - 1, inconsistentnum); + } else { + upsupvecnum = 0; + for (i = 1; i < model->sv_num; i++) { + if (fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum] - + learn_parm->epsilon_a)) + upsupvecnum++; } - printf("Number of SV: %ld (including %ld at upper bound)\n", - model->sv_num-1,upsupvecnum); - } - - if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { - loss=0; - model_length=0; - for(i=0;ib)*(double)label[i] < (-learn_parm->eps+(double)label[i]*c[i])-learn_parm->epsilon_crit) - loss+=-learn_parm->eps+(double)label[i]*c[i]-(lin[i]-model->b)*(double)label[i]; - model_length+=a[i]*label[i]*lin[i]; + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num - 1, upsupvecnum); + } + + if ((verbosity >= 1) && (!learn_parm->skip_final_opt_check)) { + loss = 0; + model_length = 0; + for (i = 0; i < totdoc; i++) { + if ((lin[i] - model->b) * (double)label[i] < + (-learn_parm->eps + (double)label[i] * c[i]) - + learn_parm->epsilon_crit) + loss += -learn_parm->eps + (double)label[i] * c[i] - + (lin[i] - model->b) * (double)label[i]; + model_length += a[i] * label[i] * lin[i]; } - model_length=sqrt(model_length); - fprintf(stdout,"L1 loss: loss=%.5f\n",loss); - fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); - example_length=estimate_sphere(model); - fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", - length_of_longest_document_vector(docs,totdoc,kernel_parm)); + model_length = sqrt(model_length); + fprintf(stdout, "L1 loss: loss=%.5f\n", loss); + fprintf(stdout, "Norm of weight vector: |w|=%.5f\n", model_length); + example_length = estimate_sphere(model); + fprintf(stdout, "Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs, totdoc, kernel_parm)); } - if(verbosity>=1) { - printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); + if (verbosity >= 1) { + printf("Number of kernel evaluations: %ld\n", kernel_cache_statistic); } - } - - if(learn_parm->alphafile[0]) - write_alphas(learn_parm->alphafile,a,label,totdoc); + } + + if (learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile, a, label, totdoc); - /* this makes sure the model we return does not contain pointers to the + /* this makes sure the model we return does not contain pointers to the temporary documents */ - for(i=1;isv_num;i++) { - j=model->supvec[i]->docnum; - if(j >= (totdoc/2)) { - j=totdoc-j-1; + for (i = 1; i < model->sv_num; i++) { + j = model->supvec[i]->docnum; + if (j >= (totdoc / 2)) { + j = totdoc - j - 1; } - model->supvec[i]=docs_org[j]; - } - + model->supvec[i] = docs_org[j]; + } + shrink_state_cleanup(&shrink_state); - for(i=0;isvm_cost); } -void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, - long int totwords, LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, - MODEL *model) - /* docs: Training vectors (x-part) */ - /* rankvalue: Training target values that determine the ranking */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* learn_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if - using a kernel. NULL if linear. NOTE: Cache is - getting reinitialized in this function */ - /* model: Returns learning result (assumed empty before called) */ +void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, KERNEL_CACHE **kernel_cache, + MODEL *model) +/* docs: Training vectors (x-part) */ +/* rankvalue: Training target values that determine the ranking */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* learn_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache:Initialized pointer to Cache of size 1*totdoc, if + using a kernel. NULL if linear. NOTE: Cache is + getting reinitialized in this function */ +/* model: Returns learning result (assumed empty before called) */ { DOC **docdiff; - long i,j,k,totpair,kernel_cache_size; - double *target,*alpha,cost; - long *greater,*lesser; + long i, j, k, totpair, kernel_cache_size; + double *target, *alpha, cost; + long *greater, *lesser; MODEL *pairmodel; - SVECTOR *flow,*fhigh; + SVECTOR *flow, *fhigh; - totpair=0; - for(i=0;iqueryid==docs[j]->queryid) && (rankvalue[i] != rankvalue[j])) { - totpair++; + totpair = 0; + for (i = 0; i < totdoc; i++) { + for (j = i + 1; j < totdoc; j++) { + if ((docs[i]->queryid == docs[j]->queryid) && + (rankvalue[i] != rankvalue[j])) { + totpair++; } } } - printf("Constructing %ld rank constraints...",totpair); fflush(stdout); - docdiff=(DOC **)my_malloc(sizeof(DOC)*totpair); - target=(double *)my_malloc(sizeof(double)*totpair); - greater=(long *)my_malloc(sizeof(long)*totpair); - lesser=(long *)my_malloc(sizeof(long)*totpair); - - k=0; - for(i=0;iqueryid == docs[j]->queryid) { - /* "Hijacked" costfactor to input rhs of constraints */ - /* cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0; */ - cost=1; - if(rankvalue[i] > rankvalue[j]) { - if(kernel_parm->kernel_type == LINEAR) - docdiff[k]=create_example(k,0,0,cost, - sub_ss(docs[i]->fvec,docs[j]->fvec)); - else { - flow=copy_svector(docs[j]->fvec); - flow->factor=-1.0; - flow->next=NULL; - fhigh=copy_svector(docs[i]->fvec); - fhigh->factor=1.0; - fhigh->next=flow; - docdiff[k]=create_example(k,0,0,cost,fhigh); - } - target[k]=1+docs[i]->costfactor-docs[j]->costfactor; - greater[k]=i; - lesser[k]=j; - k++; - } - else if(rankvalue[i] < rankvalue[j]) { - if(kernel_parm->kernel_type == LINEAR) - docdiff[k]=create_example(k,0,0,cost, - sub_ss(docs[j]->fvec,docs[i]->fvec)); - else { - flow=copy_svector(docs[j]->fvec); - flow->factor=1.0; - flow->next=NULL; - fhigh=copy_svector(docs[i]->fvec); - fhigh->factor=-1.0; - fhigh->next=flow; - docdiff[k]=create_example(k,0,0,cost,fhigh); - } - target[k]=1+docs[j]->costfactor-docs[i]->costfactor; - greater[k]=j; - lesser[k]=i; - k++; - } + printf("Constructing %ld rank constraints...", totpair); + fflush(stdout); + docdiff = (DOC **)my_malloc(sizeof(DOC) * totpair); + target = (double *)my_malloc(sizeof(double) * totpair); + greater = (long *)my_malloc(sizeof(long) * totpair); + lesser = (long *)my_malloc(sizeof(long) * totpair); + + k = 0; + for (i = 0; i < totdoc; i++) { + for (j = i + 1; j < totdoc; j++) { + if (docs[i]->queryid == docs[j]->queryid) { + /* "Hijacked" costfactor to input rhs of constraints */ + /* cost=(docs[i]->costfactor+docs[j]->costfactor)/2.0; */ + cost = 1; + if (rankvalue[i] > rankvalue[j]) { + if (kernel_parm->kernel_type == LINEAR) + docdiff[k] = create_example(k, 0, 0, cost, + sub_ss(docs[i]->fvec, docs[j]->fvec)); + else { + flow = copy_svector(docs[j]->fvec); + flow->factor = -1.0; + flow->next = NULL; + fhigh = copy_svector(docs[i]->fvec); + fhigh->factor = 1.0; + fhigh->next = flow; + docdiff[k] = create_example(k, 0, 0, cost, fhigh); + } + target[k] = 1 + docs[i]->costfactor - docs[j]->costfactor; + greater[k] = i; + lesser[k] = j; + k++; + } else if (rankvalue[i] < rankvalue[j]) { + if (kernel_parm->kernel_type == LINEAR) + docdiff[k] = create_example(k, 0, 0, cost, + sub_ss(docs[j]->fvec, docs[i]->fvec)); + else { + flow = copy_svector(docs[j]->fvec); + flow->factor = 1.0; + flow->next = NULL; + fhigh = copy_svector(docs[i]->fvec); + fhigh->factor = -1.0; + fhigh->next = flow; + docdiff[k] = create_example(k, 0, 0, cost, fhigh); + } + target[k] = 1 + docs[j]->costfactor - docs[i]->costfactor; + greater[k] = j; + lesser[k] = i; + k++; + } } } } - printf("done.\n"); fflush(stdout); + printf("done.\n"); + fflush(stdout); /* need to get a bigger kernel cache */ - if(*kernel_cache) { - kernel_cache_size=(*kernel_cache)->buffsize*sizeof(CFLOAT)/(1024*1024); + if (*kernel_cache) { + kernel_cache_size = + (*kernel_cache)->buffsize * sizeof(CFLOAT) / (1024 * 1024); kernel_cache_cleanup(*kernel_cache); - (*kernel_cache)=kernel_cache_init(totpair,kernel_cache_size); + (*kernel_cache) = kernel_cache_init(totpair, kernel_cache_size); } /* must use unbiased hyperplane on difference vectors */ - learn_parm->biased_hyperplane=0; - pairmodel=(MODEL *)my_malloc(sizeof(MODEL)); - svm_learn_optimization(docdiff,target,totpair,totwords,learn_parm, - kernel_parm,(*kernel_cache),pairmodel,NULL); + learn_parm->biased_hyperplane = 0; + pairmodel = (MODEL *)my_malloc(sizeof(MODEL)); + svm_learn_optimization(docdiff, target, totpair, totwords, learn_parm, + kernel_parm, (*kernel_cache), pairmodel, NULL); /* Transfer the result into a more compact model. If you would like to output the original model on pairs of documents, see below. */ - alpha=(double *)my_malloc(sizeof(double)*totdoc); - for(i=0;isv_num;i++) { - alpha[lesser[(pairmodel->supvec[i])->docnum]]-=pairmodel->alpha[i]; - alpha[greater[(pairmodel->supvec[i])->docnum]]+=pairmodel->alpha[i]; + for (i = 1; i < pairmodel->sv_num; i++) { + alpha[lesser[(pairmodel->supvec[i])->docnum]] -= pairmodel->alpha[i]; + alpha[greater[(pairmodel->supvec[i])->docnum]] += pairmodel->alpha[i]; } - model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); - model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); - model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); - model->supvec[0]=0; /* element 0 reserved and empty for now */ - model->alpha[0]=0; - model->sv_num=1; - for(i=0;isupvec[model->sv_num]=docs[i]; - model->alpha[model->sv_num]=alpha[i]; - model->index[i]=model->sv_num; + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * (totdoc + 2)); + model->alpha = (double *)my_malloc(sizeof(double) * (totdoc + 2)); + model->index = (long *)my_malloc(sizeof(long) * (totdoc + 2)); + model->supvec[0] = 0; /* element 0 reserved and empty for now */ + model->alpha[0] = 0; + model->sv_num = 1; + for (i = 0; i < totdoc; i++) { + if (alpha[i]) { + model->supvec[model->sv_num] = docs[i]; + model->alpha[model->sv_num] = alpha[i]; + model->index[i] = model->sv_num; model->sv_num++; - } - else { - model->index[i]=-1; + } else { + model->index[i] = -1; } } - model->at_upper_bound=0; - model->b=0; - model->lin_weights=NULL; - model->totwords=totwords; - model->totdoc=totdoc; - model->kernel_parm=(*kernel_parm); - model->loo_error=-1; - model->loo_recall=-1; - model->loo_precision=-1; - model->xa_error=-1; - model->xa_recall=-1; - model->xa_precision=-1; + model->at_upper_bound = 0; + model->b = 0; + model->lin_weights = NULL; + model->totwords = totwords; + model->totdoc = totdoc; + model->kernel_parm = (*kernel_parm); + model->loo_error = -1; + model->loo_recall = -1; + model->loo_precision = -1; + model->xa_error = -1; + model->xa_recall = -1; + model->xa_precision = -1; free(alpha); free(greater); @@ -860,13 +892,12 @@ void svm_learn_ranking(DOC **docs, double *rankvalue, long int totdoc, /* If you would like to output the original model on pairs of document, replace the following lines with '(*model)=(*pairmodel);' */ - for(i=0;itotwords=totwords; + learn_parm->totwords = totwords; /* make sure -n value is reasonable */ - if((learn_parm->svm_newvarsinqp < 2) - || (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if ((learn_parm->svm_newvarsinqp < 2) || + (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize)) { + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; } - init_shrink_state(&shrink_state,totdoc,(long)MAXSHRINK); + init_shrink_state(&shrink_state, totdoc, (long)MAXSHRINK); - label = (long *)my_malloc(sizeof(long)*totdoc); - unlabeled = (long *)my_malloc(sizeof(long)*totdoc); - inconsistent = (long *)my_malloc(sizeof(long)*totdoc); - c = (double *)my_malloc(sizeof(double)*totdoc); - a = (double *)my_malloc(sizeof(double)*totdoc); - lin = (double *)my_malloc(sizeof(double)*totdoc); - learn_parm->svm_cost = (double *)my_malloc(sizeof(double)*totdoc); - model->supvec = (DOC **)my_malloc(sizeof(DOC *)*(totdoc+2)); - model->alpha = (double *)my_malloc(sizeof(double)*(totdoc+2)); - model->index = (long *)my_malloc(sizeof(long)*(totdoc+2)); + label = (long *)my_malloc(sizeof(long) * totdoc); + unlabeled = (long *)my_malloc(sizeof(long) * totdoc); + inconsistent = (long *)my_malloc(sizeof(long) * totdoc); + c = (double *)my_malloc(sizeof(double) * totdoc); + a = (double *)my_malloc(sizeof(double) * totdoc); + lin = (double *)my_malloc(sizeof(double) * totdoc); + learn_parm->svm_cost = (double *)my_malloc(sizeof(double) * totdoc); + model->supvec = (DOC **)my_malloc(sizeof(DOC *) * (totdoc + 2)); + model->alpha = (double *)my_malloc(sizeof(double) * (totdoc + 2)); + model->index = (long *)my_malloc(sizeof(long) * (totdoc + 2)); - model->at_upper_bound=0; - model->b=0; - model->supvec[0]=0; /* element 0 reserved and empty for now */ - model->alpha[0]=0; - model->lin_weights=NULL; - model->totwords=totwords; - model->totdoc=totdoc; - model->kernel_parm=(*kernel_parm); - model->sv_num=1; - model->loo_error=-1; - model->loo_recall=-1; - model->loo_precision=-1; - model->xa_error=-1; - model->xa_recall=-1; - model->xa_precision=-1; + model->at_upper_bound = 0; + model->b = 0; + model->supvec[0] = 0; /* element 0 reserved and empty for now */ + model->alpha[0] = 0; + model->lin_weights = NULL; + model->totwords = totwords; + model->totdoc = totdoc; + model->kernel_parm = (*kernel_parm); + model->sv_num = 1; + model->loo_error = -1; + model->loo_recall = -1; + model->loo_precision = -1; + model->xa_error = -1; + model->xa_recall = -1; + model->xa_precision = -1; - r_delta_avg=estimate_r_delta_average(docs,totdoc,kernel_parm); - if(learn_parm->svm_c == 0.0) { /* default value for C */ - learn_parm->svm_c=1.0/(r_delta_avg*r_delta_avg); - if(verbosity>=1) - printf("Setting default regularization parameter C=%.4f\n", - learn_parm->svm_c); + r_delta_avg = estimate_r_delta_average(docs, totdoc, kernel_parm); + if (learn_parm->svm_c == 0.0) { /* default value for C */ + learn_parm->svm_c = 1.0 / (r_delta_avg * r_delta_avg); + if (verbosity >= 1) + printf("Setting default regularization parameter C=%.4f\n", + learn_parm->svm_c); } - learn_parm->biased_hyperplane=0; /* learn an unbiased hyperplane */ + learn_parm->biased_hyperplane = 0; /* learn an unbiased hyperplane */ - learn_parm->eps=0.0; /* No margin, unless explicitly handcoded - in the right-hand side in the training - set. */ + learn_parm->eps = 0.0; /* No margin, unless explicitly handcoded + in the right-hand side in the training + set. */ - for(i=0;idocnum=i; - a[i]=0; - lin[i]=0; - c[i]=rhs[i]; /* set right-hand side */ - unlabeled[i]=0; - inconsistent[i]=0; - learn_parm->svm_cost[i]=learn_parm->svm_c*learn_parm->svm_costratio* - docs[i]->costfactor; - label[i]=1; + for (i = 0; i < totdoc; i++) { /* various inits */ + docs[i]->docnum = i; + a[i] = 0; + lin[i] = 0; + c[i] = rhs[i]; /* set right-hand side */ + unlabeled[i] = 0; + inconsistent[i] = 0; + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_costratio * docs[i]->costfactor; + label[i] = 1; } - if(learn_parm->sharedslack) /* if shared slacks are used, they must */ - for(i=0;islackid) { - perror("Error: Missing shared slacks definitions in some of the examples."); - exit(0); + if (learn_parm->sharedslack) /* if shared slacks are used, they must */ + for (i = 0; i < totdoc; i++) /* be used on every constraint */ + if (!docs[i]->slackid) { + perror("Error: Missing shared slacks definitions in some of the " + "examples."); + exit(0); } /* print kernel matrix */ @@ -1001,203 +1033,211 @@ void svm_learn_optimization(DOC **docs, double *rhs, long int */ /* compute starting state for initial alpha values */ - if(alpha) { - if(verbosity>=1) { - printf("Computing starting state..."); fflush(stdout); + if (alpha) { + if (verbosity >= 1) { + printf("Computing starting state..."); + fflush(stdout); } - index = (long *)my_malloc(sizeof(long)*totdoc); - index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - if(kernel_parm->kernel_type == LINEAR) { - weights=(double *)my_malloc(sizeof(double)*(totwords+1)); - clear_nvector(weights,totwords); /* set weights to zero */ - aicache=NULL; + index = (long *)my_malloc(sizeof(long) * totdoc); + index2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + if (kernel_parm->kernel_type == LINEAR) { + weights = (double *)my_malloc(sizeof(double) * (totwords + 1)); + clear_nvector(weights, totwords); /* set weights to zero */ + aicache = NULL; + } else { + weights = NULL; + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * totdoc); } - else { - weights=NULL; - aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); + for (i = 0; i < totdoc; i++) { /* create full index and clip alphas */ + index[i] = 1; + alpha[i] = fabs(alpha[i]); + if (alpha[i] < 0) + alpha[i] = 0; + if (alpha[i] > learn_parm->svm_cost[i]) + alpha[i] = learn_parm->svm_cost[i]; } - for(i=0;ilearn_parm->svm_cost[i]) alpha[i]=learn_parm->svm_cost[i]; + if (kernel_cache && (kernel_parm->kernel_type != LINEAR)) { + for (i = 0; i < totdoc; i++) /* fill kernel cache with unbounded SV */ + if ((alpha[i] > 0) && (alpha[i] < learn_parm->svm_cost[i]) && + (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache, docs, i, kernel_parm); + for (i = 0; i < totdoc; + i++) /* fill rest of kernel cache with bounded SV */ + if ((alpha[i] == learn_parm->svm_cost[i]) && + (kernel_cache_space_available(kernel_cache))) + cache_kernel_row(kernel_cache, docs, i, kernel_parm); } - if(kernel_cache && (kernel_parm->kernel_type != LINEAR)) { - for(i=0;i0) && (alpha[i]svm_cost[i]) - && (kernel_cache_space_available(kernel_cache))) - cache_kernel_row(kernel_cache,docs,i,kernel_parm); - for(i=0;isvm_cost[i]) - && (kernel_cache_space_available(kernel_cache))) - cache_kernel_row(kernel_cache,docs,i,kernel_parm); - } - (void)compute_index(index,totdoc,index2dnum); - update_linear_component(docs,label,index2dnum,alpha,a,index2dnum,totdoc, - totwords,kernel_parm,kernel_cache,lin,aicache, - weights); - (void)calculate_svm_model(docs,label,unlabeled,lin,alpha,a,c, - learn_parm,index2dnum,index2dnum,model); - for(i=0;i=1) { - printf("done.\n"); fflush(stdout); - } - } - + if (weights) + free(weights); + if (aicache) + free(aicache); + if (verbosity >= 1) { + printf("done.\n"); + fflush(stdout); + } + } + /* removing inconsistent does not work for general optimization problem */ - if(learn_parm->remove_inconsistent) { + if (learn_parm->remove_inconsistent) { learn_parm->remove_inconsistent = 0; - printf("'remove inconsistent' not available in this mode. Switching option off!"); fflush(stdout); + printf("'remove inconsistent' not available in this mode. Switching option " + "off!"); + fflush(stdout); } /* caching makes no sense for linear kernel */ - if(kernel_parm->kernel_type == LINEAR) { + if (kernel_parm->kernel_type == LINEAR) { /* kernel_cache = NULL; */ - } - - if(verbosity==1) { - printf("Optimizing"); fflush(stdout); + } + + if (verbosity == 1) { + printf("Optimizing"); + fflush(stdout); } /* train the svm */ - if(learn_parm->sharedslack) - iterations=optimize_to_convergence_sharedslack(docs,label,totdoc, - totwords,learn_parm,kernel_parm, - kernel_cache,&shrink_state,model, - a,lin,c,&timing_profile, - &maxdiff); + if (learn_parm->sharedslack) + iterations = optimize_to_convergence_sharedslack( + docs, label, totdoc, totwords, learn_parm, kernel_parm, kernel_cache, + &shrink_state, model, a, lin, c, &timing_profile, &maxdiff); else - iterations=optimize_to_convergence(docs,label,totdoc, - totwords,learn_parm,kernel_parm, - kernel_cache,&shrink_state,model, - inconsistent,unlabeled, - a,lin,c,&timing_profile, - &maxdiff,(long)-1,(long)1); - - if(verbosity>=1) { - if(verbosity==1) printf("done. (%ld iterations)\n",iterations); + iterations = optimize_to_convergence( + docs, label, totdoc, totwords, learn_parm, kernel_parm, kernel_cache, + &shrink_state, model, inconsistent, unlabeled, a, lin, c, + &timing_profile, &maxdiff, (long)-1, (long)1); + + if (verbosity >= 1) { + if (verbosity == 1) + printf("done. (%ld iterations)\n", iterations); - misclassified=0; - for(i=0;(ib)*(double)label[i] <= 0.0) - misclassified++; + misclassified = 0; + for (i = 0; (i < totdoc); i++) { /* get final statistic */ + if ((lin[i] - model->b) * (double)label[i] <= 0.0) + misclassified++; } - printf("Optimization finished (maxdiff=%.5f).\n",maxdiff); - - runtime_end=get_runtime(); - if(verbosity>=2) { - printf("Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for model/%.2f%% for check/%.2f%% for select)\n", - (runtime_end-runtime_start)/100.0, - (100.0*timing_profile.time_kernel)/(runtime_end-runtime_start), - (100.0*timing_profile.time_opti)/(runtime_end-runtime_start), - (100.0*timing_profile.time_shrink)/(runtime_end-runtime_start), - (100.0*timing_profile.time_update)/(runtime_end-runtime_start), - (100.0*timing_profile.time_model)/(runtime_end-runtime_start), - (100.0*timing_profile.time_check)/(runtime_end-runtime_start), - (100.0*timing_profile.time_select)/(runtime_end-runtime_start)); - } - else { - printf("Runtime in cpu-seconds: %.2f\n", - (runtime_end-runtime_start)/100.0); + printf("Optimization finished (maxdiff=%.5f).\n", maxdiff); + + runtime_end = get_runtime(); + if (verbosity >= 2) { + printf( + "Runtime in cpu-seconds: %.2f (%.2f%% for kernel/%.2f%% for " + "optimizer/%.2f%% for final/%.2f%% for update/%.2f%% for " + "model/%.2f%% for check/%.2f%% for select)\n", + (runtime_end - runtime_start) / 100.0, + (100.0 * timing_profile.time_kernel) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_opti) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_shrink) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_update) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_model) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_check) / (runtime_end - runtime_start), + (100.0 * timing_profile.time_select) / (runtime_end - runtime_start)); + } else { + printf("Runtime in cpu-seconds: %.2f\n", + (runtime_end - runtime_start) / 100.0); } } - if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { - loss=0; - model_length=0; - alphasum=0; - for(i=0;ib)*(double)label[i] < c[i]-learn_parm->epsilon_crit) - loss+=c[i]-(lin[i]-model->b)*(double)label[i]; - model_length+=a[i]*label[i]*lin[i]; - alphasum+=rhs[i]*a[i]; + if ((verbosity >= 1) && (!learn_parm->skip_final_opt_check)) { + loss = 0; + model_length = 0; + alphasum = 0; + for (i = 0; i < totdoc; i++) { + if ((lin[i] - model->b) * (double)label[i] < + c[i] - learn_parm->epsilon_crit) + loss += c[i] - (lin[i] - model->b) * (double)label[i]; + model_length += a[i] * label[i] * lin[i]; + alphasum += rhs[i] * a[i]; } - model_length=sqrt(model_length); - fprintf(stdout,"Dual objective value: dval=%.5f\n", - alphasum-0.5*model_length*model_length); - fprintf(stdout,"Norm of weight vector: |w|=%.5f\n",model_length); - } - - if(learn_parm->sharedslack) { - index = (long *)my_malloc(sizeof(long)*totdoc); - index2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - maxslackid=0; - for(i=0;islackid) - maxslackid=docs[i]->slackid; + model_length = sqrt(model_length); + fprintf(stdout, "Dual objective value: dval=%.5f\n", + alphasum - 0.5 * model_length * model_length); + fprintf(stdout, "Norm of weight vector: |w|=%.5f\n", model_length); + } + + if (learn_parm->sharedslack) { + index = (long *)my_malloc(sizeof(long) * totdoc); + index2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + maxslackid = 0; + for (i = 0; i < totdoc; i++) { /* create full index */ + index[i] = 1; + if (maxslackid < docs[i]->slackid) + maxslackid = docs[i]->slackid; } - (void)compute_index(index,totdoc,index2dnum); - slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); - alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); - for(i=0;i<=maxslackid;i++) { /* init shared slacks */ - slack[i]=0; - alphaslack[i]=0; + (void)compute_index(index, totdoc, index2dnum); + slack = (double *)my_malloc(sizeof(double) * (maxslackid + 1)); + alphaslack = (double *)my_malloc(sizeof(double) * (maxslackid + 1)); + for (i = 0; i <= maxslackid; i++) { /* init shared slacks */ + slack[i] = 0; + alphaslack[i] = 0; } - for(i=0;islackid]+=a[i]; + for (i = 0; i < totdoc; i++) { /* compute alpha aggregated by slack */ + alphaslack[docs[i]->slackid] += a[i]; } - compute_shared_slacks(docs,label,a,lin,c,index2dnum,learn_parm, - slack,alphaslack); - loss=0; - model->at_upper_bound=0; - svsetnum=0; - for(i=0;i<=maxslackid;i++) { /* create full index */ - loss+=slack[i]; - if(alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) - model->at_upper_bound++; - if(alphaslack[i] > learn_parm->epsilon_a) - svsetnum++; + compute_shared_slacks(docs, label, a, lin, c, index2dnum, learn_parm, slack, + alphaslack); + loss = 0; + model->at_upper_bound = 0; + svsetnum = 0; + for (i = 0; i <= maxslackid; i++) { /* create full index */ + loss += slack[i]; + if (alphaslack[i] > (learn_parm->svm_c - learn_parm->epsilon_a)) + model->at_upper_bound++; + if (alphaslack[i] > learn_parm->epsilon_a) + svsetnum++; } free(index); free(index2dnum); free(slack); free(alphaslack); - } - - if((verbosity>=1) && (!learn_parm->skip_final_opt_check)) { - if(learn_parm->sharedslack) { - printf("Number of SV: %ld\n", - model->sv_num-1); - printf("Number of non-zero slack variables: %ld (%ld slacks have non-zero alpha)\n", - model->at_upper_bound,svsetnum); - fprintf(stdout,"L1 loss: loss=%.5f\n",loss); - } - else { - upsupvecnum=0; - for(i=1;isv_num;i++) { - if(fabs(model->alpha[i]) >= - (learn_parm->svm_cost[(model->supvec[i])->docnum]- - learn_parm->epsilon_a)) - upsupvecnum++; + } + + if ((verbosity >= 1) && (!learn_parm->skip_final_opt_check)) { + if (learn_parm->sharedslack) { + printf("Number of SV: %ld\n", model->sv_num - 1); + printf("Number of non-zero slack variables: %ld (%ld slacks have " + "non-zero alpha)\n", + model->at_upper_bound, svsetnum); + fprintf(stdout, "L1 loss: loss=%.5f\n", loss); + } else { + upsupvecnum = 0; + for (i = 1; i < model->sv_num; i++) { + if (fabs(model->alpha[i]) >= + (learn_parm->svm_cost[(model->supvec[i])->docnum] - + learn_parm->epsilon_a)) + upsupvecnum++; } - printf("Number of SV: %ld (including %ld at upper bound)\n", - model->sv_num-1,upsupvecnum); - fprintf(stdout,"L1 loss: loss=%.5f\n",loss); + printf("Number of SV: %ld (including %ld at upper bound)\n", + model->sv_num - 1, upsupvecnum); + fprintf(stdout, "L1 loss: loss=%.5f\n", loss); } - example_length=estimate_sphere(model); - fprintf(stdout,"Norm of longest example vector: |x|=%.5f\n", - length_of_longest_document_vector(docs,totdoc,kernel_parm)); + example_length = estimate_sphere(model); + fprintf(stdout, "Norm of longest example vector: |x|=%.5f\n", + length_of_longest_document_vector(docs, totdoc, kernel_parm)); } - if(verbosity>=1) { - printf("Number of kernel evaluations: %ld\n",kernel_cache_statistic); - } - - if(alpha) { - for(i=0;i= 1) { + printf("Number of kernel evaluations: %ld\n", kernel_cache_statistic); + } + + if (alpha) { + for (i = 0; i < totdoc; i++) { /* copy final alphas */ + alpha[i] = a[i]; } - } - - if(learn_parm->alphafile[0]) - write_alphas(learn_parm->alphafile,a,label,totdoc); - + } + + if (learn_parm->alphafile[0]) + write_alphas(learn_parm->alphafile, a, label, totdoc); + shrink_state_cleanup(&shrink_state); free(label); free(unlabeled); @@ -1206,428 +1246,431 @@ void svm_learn_optimization(DOC **docs, double *rhs, long int free(a); free(lin); free(learn_parm->svm_cost); -} - - -long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, - long int totwords, LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - SHRINK_STATE *shrink_state, MODEL *model, - long int *inconsistent, long int *unlabeled, - double *a, double *lin, double *c, - TIMING *timing_profile, double *maxdiff, - long int heldout, long int retrain) - /* docs: Training vectors (x-part) */ - /* label: Training labels/value (y-part, zero if test example for - transduction) */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* laern_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache: Initialized/partly filled Cache, if using a kernel. - NULL if linear. */ - /* shrink_state: State of active variables */ - /* model: Returns learning result */ - /* inconsistent: examples thrown out as inconstistent */ - /* unlabeled: test examples for transduction */ - /* a: alphas */ - /* lin: linear component of gradient */ - /* c: right hand side of inequalities (margin) */ - /* maxdiff: returns maximum violation of KT-conditions */ - /* heldout: marks held-out example for leave-one-out (or -1) */ - /* retrain: selects training mode (1=regular / 2=holdout) */ +} + +long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, + long int totwords, LEARN_PARM *learn_parm, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, + SHRINK_STATE *shrink_state, MODEL *model, + long int *inconsistent, long int *unlabeled, + double *a, double *lin, double *c, + TIMING *timing_profile, double *maxdiff, + long int heldout, long int retrain) +/* docs: Training vectors (x-part) */ +/* label: Training labels/value (y-part, zero if test example for + transduction) */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* laern_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ +/* shrink_state: State of active variables */ +/* model: Returns learning result */ +/* inconsistent: examples thrown out as inconstistent */ +/* unlabeled: test examples for transduction */ +/* a: alphas */ +/* lin: linear component of gradient */ +/* c: right hand side of inequalities (margin) */ +/* maxdiff: returns maximum violation of KT-conditions */ +/* heldout: marks held-out example for leave-one-out (or -1) */ +/* retrain: selects training mode (1=regular / 2=holdout) */ { - long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink; - long inconsistentnum,choosenum,already_chosen=0,iteration; - long misclassified,supvecnum=0,*active2dnum,inactivenum; - long *working2dnum,*selexam; + long *chosen, *key, i, j, jj, *last_suboptimal_at, noshrink; + long inconsistentnum, choosenum, already_chosen = 0, iteration; + long misclassified, supvecnum = 0, *active2dnum, inactivenum; + long *working2dnum, *selexam; long activenum; - double criterion,eq; + double criterion, eq; double *a_old; - double t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ + double t0 = 0, t1 = 0, t2 = 0, t3 = 0, t4 = 0, t5 = 0, t6 = 0; /* timing */ long transductcycle; long transduction; - double epsilon_crit_org; + double epsilon_crit_org; double bestmaxdiff; - long bestmaxdiffiter,terminate; + long bestmaxdiffiter, terminate; - double *selcrit; /* buffer for sorting */ - CFLOAT *aicache; /* buffer to keep one row of hessian */ - double *weights; /* buffer for weight vector in linear case */ - QP qp; /* buffer for one quadratic program */ + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ - epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ - if(kernel_parm->kernel_type == LINEAR) { - learn_parm->epsilon_crit=2.0; - /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ - } - learn_parm->epsilon_shrink=2; - (*maxdiff)=1; + epsilon_crit_org = learn_parm->epsilon_crit; /* save org */ + if (kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit = 2.0; + /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink = 2; + (*maxdiff) = 1; - learn_parm->totwords=totwords; + learn_parm->totwords = totwords; - chosen = (long *)my_malloc(sizeof(long)*totdoc); - last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); - key = (long *)my_malloc(sizeof(long)*(totdoc+11)); - selcrit = (double *)my_malloc(sizeof(double)*totdoc); - selexam = (long *)my_malloc(sizeof(long)*totdoc); - a_old = (double *)my_malloc(sizeof(double)*totdoc); - aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); - working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + chosen = (long *)my_malloc(sizeof(long) * totdoc); + last_suboptimal_at = (long *)my_malloc(sizeof(long) * totdoc); + key = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + selcrit = (double *)my_malloc(sizeof(double) * totdoc); + selexam = (long *)my_malloc(sizeof(long) * totdoc); + a_old = (double *)my_malloc(sizeof(double) * totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * totdoc); + working2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + active2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + qp.opt_ce = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); qp.opt_ce0 = (double *)my_malloc(sizeof(double)); - qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize - *learn_parm->svm_maxqpsize); - qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - if(kernel_parm->kernel_type == LINEAR) { - weights=create_nvector(totwords); - clear_nvector(weights,totwords); /* set weights to zero */ - } - else - weights=NULL; + qp.opt_g = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize * + learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_xinit = + (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_low = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_up = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + if (kernel_parm->kernel_type == LINEAR) { + weights = create_nvector(totwords); + clear_nvector(weights, totwords); /* set weights to zero */ + } else + weights = NULL; - choosenum=0; - inconsistentnum=0; - transductcycle=0; - transduction=0; - if(!retrain) retrain=1; - iteration=1; - bestmaxdiffiter=1; - bestmaxdiff=999999999; - terminate=0; + choosenum = 0; + inconsistentnum = 0; + transductcycle = 0; + transduction = 0; + if (!retrain) + retrain = 1; + iteration = 1; + bestmaxdiffiter = 1; + bestmaxdiff = 999999999; + terminate = 0; - if(kernel_cache) { - kernel_cache->time=iteration; /* for lru cache */ + if (kernel_cache) { + kernel_cache->time = iteration; /* for lru cache */ kernel_cache_reset_lru(kernel_cache); } - for(i=0;iactive,totdoc,active2dnum); - inactivenum=totdoc-activenum; - clear_index(working2dnum); + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = totdoc - activenum; + clear_index(working2dnum); + + /* repeat this loop until we have convergence */ + for (; retrain && (!terminate); iteration++) { - /* repeat this loop until we have convergence */ - for(;retrain && (!terminate);iteration++) { - - if(kernel_cache) - kernel_cache->time=iteration; /* for lru cache */ - if(verbosity>=2) { - printf( - "Iteration %ld: ",iteration); fflush(stdout); - } - else if(verbosity==1) { - printf("."); fflush(stdout); + if (kernel_cache) + kernel_cache->time = iteration; /* for lru cache */ + if (verbosity >= 2) { + printf("Iteration %ld: ", iteration); + fflush(stdout); + } else if (verbosity == 1) { + printf("."); + fflush(stdout); } - if(verbosity>=2) t0=get_runtime(); - if(verbosity>=3) { - printf("\nSelecting working set... "); fflush(stdout); + if (verbosity >= 2) + t0 = get_runtime(); + if (verbosity >= 3) { + printf("\nSelecting working set... "); + fflush(stdout); } - if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; - i=0; - for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ - if((chosen[j]>=(learn_parm->svm_maxqpsize/ - minl(learn_parm->svm_maxqpsize, - learn_parm->svm_newvarsinqp))) - || (inconsistent[j]) - || (j == heldout)) { - chosen[j]=0; - choosenum--; - } - else { - chosen[j]++; - working2dnum[i++]=j; + i = 0; + for (jj = 0; (j = working2dnum[jj]) >= 0; jj++) { /* clear working set */ + if ((chosen[j] >= + (learn_parm->svm_maxqpsize / + minl(learn_parm->svm_maxqpsize, learn_parm->svm_newvarsinqp))) || + (inconsistent[j]) || (j == heldout)) { + chosen[j] = 0; + choosenum--; + } else { + chosen[j]++; + working2dnum[i++] = j; } } - working2dnum[i]=-1; + working2dnum[i] = -1; - if(retrain == 2) { - choosenum=0; - for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* fully clear working set */ - chosen[j]=0; + if (retrain == 2) { + choosenum = 0; + for (jj = 0; (j = working2dnum[jj]) >= 0; + jj++) { /* fully clear working set */ + chosen[j] = 0; } clear_index(working2dnum); - for(i=0;ibiased_hyperplane) { - eq=0; - for(i=0;i learn_parm->epsilon_a);i++) { - if((eq*label[i] > 0) && (a[i] > 0)) { - chosen[i]=88888; - choosenum++; - if((eq*label[i]) > a[i]) { - eq-=(a[i]*label[i]); - a[i]=0; - } - else { - a[i]-=(eq*label[i]); - eq=0; - } - } - } + if (learn_parm->biased_hyperplane) { + eq = 0; + for (i = 0; i < totdoc; + i++) { /* make sure we fulfill equality constraint */ + eq += a[i] * label[i]; + } + for (i = 0; (i < totdoc) && (fabs(eq) > learn_parm->epsilon_a); i++) { + if ((eq * label[i] > 0) && (a[i] > 0)) { + chosen[i] = 88888; + choosenum++; + if ((eq * label[i]) > a[i]) { + eq -= (a[i] * label[i]); + a[i] = 0; + } else { + a[i] -= (eq * label[i]); + eq = 0; + } + } + } } - compute_index(chosen,totdoc,working2dnum); - } - else { /* select working set according to steepest gradient */ - if(iteration % 101) { - already_chosen=0; - if((minl(learn_parm->svm_newvarsinqp, - learn_parm->svm_maxqpsize-choosenum)>=4) - && (kernel_parm->kernel_type != LINEAR)) { - /* select part of the working set from cache */ - already_chosen=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - (long)(minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp) - /2), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache,1, - key,chosen); - choosenum+=already_chosen; - } - choosenum+=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp-already_chosen), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache,0,key, - chosen); - } - else { /* once in a while, select a somewhat random working set - to get unlocked of infinite loops due to numerical - inaccuracies in the core qp-solver */ - choosenum+=select_next_qp_subproblem_rand( - label,unlabeled,a,lin,c,totdoc, - minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache,key, - chosen,iteration); + compute_index(chosen, totdoc, working2dnum); + } else { /* select working set according to steepest gradient */ + if (iteration % 101) { + already_chosen = 0; + if ((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize - choosenum) >= 4) && + (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen = select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, + (long)(minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp) / + 2), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, 1, key, chosen); + choosenum += already_chosen; + } + choosenum += select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, + minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp - already_chosen), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, 0, key, chosen); + } else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum += select_next_qp_subproblem_rand( + label, unlabeled, a, lin, c, totdoc, + minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, key, chosen, iteration); } } - if(verbosity>=2) { - printf(" %ld vectors chosen\n",choosenum); fflush(stdout); + if (verbosity >= 2) { + printf(" %ld vectors chosen\n", choosenum); + fflush(stdout); } - if(verbosity>=2) t1=get_runtime(); + if (verbosity >= 2) + t1 = get_runtime(); - if(kernel_cache) - cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, - choosenum,kernel_parm); - - if(verbosity>=2) t2=get_runtime(); - if(retrain != 2) { - optimize_svm(docs,label,unlabeled,inconsistent,0.0,chosen,active2dnum, - model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, - aicache,kernel_parm,&qp,&epsilon_crit_org); + if (kernel_cache) + cache_multiple_kernel_rows(kernel_cache, docs, working2dnum, choosenum, + kernel_parm); + + if (verbosity >= 2) + t2 = get_runtime(); + if (retrain != 2) { + optimize_svm(docs, label, unlabeled, inconsistent, 0.0, chosen, + active2dnum, model, totdoc, working2dnum, choosenum, a, lin, + c, learn_parm, aicache, kernel_parm, &qp, &epsilon_crit_org); } - if(verbosity>=2) t3=get_runtime(); - update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, - totwords,kernel_parm,kernel_cache,lin,aicache, - weights); + if (verbosity >= 2) + t3 = get_runtime(); + update_linear_component(docs, label, active2dnum, a, a_old, working2dnum, + totdoc, totwords, kernel_parm, kernel_cache, lin, + aicache, weights); - if(verbosity>=2) t4=get_runtime(); - supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, - learn_parm,working2dnum,active2dnum,model); + if (verbosity >= 2) + t4 = get_runtime(); + supvecnum = + calculate_svm_model(docs, label, unlabeled, lin, a, a_old, c, + learn_parm, working2dnum, active2dnum, model); - if(verbosity>=2) t5=get_runtime(); + if (verbosity >= 2) + t5 = get_runtime(); /* The following computation of the objective function works only */ /* relative to the active variables */ - if(verbosity>=3) { - criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, - active2dnum); - printf("Objective function (over active variables): %.16f\n",criterion); - fflush(stdout); + if (verbosity >= 3) { + criterion = compute_objective_function(a, lin, c, learn_parm->eps, label, + active2dnum); + printf("Objective function (over active variables): %.16f\n", criterion); + fflush(stdout); } - for(jj=0;(i=working2dnum[jj])>=0;jj++) { - a_old[i]=a[i]; + for (jj = 0; (i = working2dnum[jj]) >= 0; jj++) { + a_old[i] = a[i]; } - if(retrain == 2) { /* reset inconsistent unlabeled examples */ - for(i=0;(i=2) { - t6=get_runtime(); - timing_profile->time_select+=t1-t0; - timing_profile->time_kernel+=t2-t1; - timing_profile->time_opti+=t3-t2; - timing_profile->time_update+=t4-t3; - timing_profile->time_model+=t5-t4; - timing_profile->time_check+=t6-t5; + if (verbosity >= 2) { + t6 = get_runtime(); + timing_profile->time_select += t1 - t0; + timing_profile->time_kernel += t2 - t1; + timing_profile->time_opti += t3 - t2; + timing_profile->time_update += t4 - t3; + timing_profile->time_model += t5 - t4; + timing_profile->time_check += t6 - t5; } /* checking whether optimizer got stuck */ - if((*maxdiff) < bestmaxdiff) { - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; + if ((*maxdiff) < bestmaxdiff) { + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; } - if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + if (iteration > (bestmaxdiffiter + learn_parm->maxiter)) { /* long time no progress? */ - terminate=1; - retrain=0; - if(verbosity>=1) - printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + terminate = 1; + retrain = 0; + if (verbosity >= 1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! " + "Terminating!\n"); } - noshrink=0; - if((!retrain) && (inactivenum>0) - && ((!learn_parm->skip_final_opt_check) - || (kernel_parm->kernel_type == LINEAR))) { - if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) - || (verbosity>=2)) { - if(verbosity==1) { - printf("\n"); - } - printf(" Checking optimality of inactive variables..."); - fflush(stdout); + noshrink = 0; + if ((!retrain) && (inactivenum > 0) && + ((!learn_parm->skip_final_opt_check) || + (kernel_parm->kernel_type == LINEAR))) { + if (((verbosity >= 1) && (kernel_parm->kernel_type != LINEAR)) || + (verbosity >= 2)) { + if (verbosity == 1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); } - t1=get_runtime(); - reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, - totwords,iteration,learn_parm,inconsistent, - docs,kernel_parm,kernel_cache,model,aicache, - weights,maxdiff); + t1 = get_runtime(); + reactivate_inactive_examples( + label, unlabeled, a, shrink_state, lin, c, totdoc, totwords, + iteration, learn_parm, inconsistent, docs, kernel_parm, kernel_cache, + model, aicache, weights, maxdiff); /* Update to new active variables. */ - activenum=compute_index(shrink_state->active,totdoc,active2dnum); - inactivenum=totdoc-activenum; + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = totdoc - activenum; /* reset watchdog */ - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; /* termination criterion */ - noshrink=1; - retrain=0; - if((*maxdiff) > learn_parm->epsilon_crit) - retrain=1; - timing_profile->time_shrink+=get_runtime()-t1; - if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) - || (verbosity>=2)) { - printf("done.\n"); fflush(stdout); - printf(" Number of inactive variables = %ld\n",inactivenum); - } + noshrink = 1; + retrain = 0; + if ((*maxdiff) > learn_parm->epsilon_crit) + retrain = 1; + timing_profile->time_shrink += get_runtime() - t1; + if (((verbosity >= 1) && (kernel_parm->kernel_type != LINEAR)) || + (verbosity >= 2)) { + printf("done.\n"); + fflush(stdout); + printf(" Number of inactive variables = %ld\n", inactivenum); + } } - if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) - learn_parm->epsilon_crit=(*maxdiff); - if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { - learn_parm->epsilon_crit/=2.0; - retrain=1; - noshrink=1; + if ((!retrain) && (learn_parm->epsilon_crit > (*maxdiff))) + learn_parm->epsilon_crit = (*maxdiff); + if ((!retrain) && (learn_parm->epsilon_crit > epsilon_crit_org)) { + learn_parm->epsilon_crit /= 2.0; + retrain = 1; + noshrink = 1; } - if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; - - if(verbosity>=2) { - printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", - supvecnum,model->at_upper_bound,(*maxdiff)); + if (learn_parm->epsilon_crit < epsilon_crit_org) + learn_parm->epsilon_crit = epsilon_crit_org; + + if (verbosity >= 2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum, model->at_upper_bound, (*maxdiff)); fflush(stdout); } - if(verbosity>=3) { + if (verbosity >= 3) { printf("\n"); } - if((!retrain) && (transduction)) { - for(i=0;(iactive[i]=1; + if ((!retrain) && (transduction)) { + for (i = 0; (i < totdoc); i++) { + shrink_state->active[i] = 1; } - activenum=compute_index(shrink_state->active,totdoc,active2dnum); - inactivenum=0; - if(verbosity==1) printf("done\n"); - retrain=incorporate_unlabeled_examples(model,label,inconsistent, - unlabeled,a,lin,totdoc, - selcrit,selexam,key, - transductcycle,kernel_parm, - learn_parm); - epsilon_crit_org=learn_parm->epsilon_crit; - if(kernel_parm->kernel_type == LINEAR) - learn_parm->epsilon_crit=1; + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = 0; + if (verbosity == 1) + printf("done\n"); + retrain = incorporate_unlabeled_examples( + model, label, inconsistent, unlabeled, a, lin, totdoc, selcrit, + selexam, key, transductcycle, kernel_parm, learn_parm); + epsilon_crit_org = learn_parm->epsilon_crit; + if (kernel_parm->kernel_type == LINEAR) + learn_parm->epsilon_crit = 1; transductcycle++; /* reset watchdog */ - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; - } - else if(((iteration % 10) == 0) && (!noshrink)) { - activenum=shrink_problem(docs,learn_parm,shrink_state,kernel_parm, - active2dnum,last_suboptimal_at,iteration,totdoc, - maxl((long)(activenum/10), - maxl((long)(totdoc/500),100)), - a,inconsistent); - inactivenum=totdoc-activenum; - if((kernel_cache) - && (supvecnum>kernel_cache->max_elems) - && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { - kernel_cache_shrink(kernel_cache,totdoc, - minl((kernel_cache->activenum-activenum), - (kernel_cache->activenum-supvecnum)), - shrink_state->active); + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; + } else if (((iteration % 10) == 0) && (!noshrink)) { + activenum = shrink_problem( + docs, learn_parm, shrink_state, kernel_parm, active2dnum, + last_suboptimal_at, iteration, totdoc, + maxl((long)(activenum / 10), maxl((long)(totdoc / 500), 100)), a, + inconsistent); + inactivenum = totdoc - activenum; + if ((kernel_cache) && (supvecnum > kernel_cache->max_elems) && + ((kernel_cache->activenum - activenum) > + maxl((long)(activenum / 10), 500))) { + kernel_cache_shrink(kernel_cache, totdoc, + minl((kernel_cache->activenum - activenum), + (kernel_cache->activenum - supvecnum)), + shrink_state->active); } } - if((!retrain) && learn_parm->remove_inconsistent) { - if(verbosity>=1) { - printf(" Moving training errors to inconsistent examples..."); - fflush(stdout); + if ((!retrain) && learn_parm->remove_inconsistent) { + if (verbosity >= 1) { + printf(" Moving training errors to inconsistent examples..."); + fflush(stdout); } - if(learn_parm->remove_inconsistent == 1) { - retrain=identify_inconsistent(a,label,unlabeled,totdoc,learn_parm, - &inconsistentnum,inconsistent); + if (learn_parm->remove_inconsistent == 1) { + retrain = identify_inconsistent(a, label, unlabeled, totdoc, learn_parm, + &inconsistentnum, inconsistent); + } else if (learn_parm->remove_inconsistent == 2) { + retrain = identify_misclassified(lin, label, unlabeled, totdoc, model, + &inconsistentnum, inconsistent); + } else if (learn_parm->remove_inconsistent == 3) { + retrain = + identify_one_misclassified(lin, label, unlabeled, totdoc, model, + &inconsistentnum, inconsistent); } - else if(learn_parm->remove_inconsistent == 2) { - retrain=identify_misclassified(lin,label,unlabeled,totdoc, - model,&inconsistentnum,inconsistent); + if (retrain) { + if (kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ + learn_parm->epsilon_crit = 2.0; + } } - else if(learn_parm->remove_inconsistent == 3) { - retrain=identify_one_misclassified(lin,label,unlabeled,totdoc, - model,&inconsistentnum,inconsistent); - } - if(retrain) { - if(kernel_parm->kernel_type == LINEAR) { /* reinit shrinking */ - learn_parm->epsilon_crit=2.0; - } - } - if(verbosity>=1) { - printf("done.\n"); - if(retrain) { - printf(" Now %ld inconsistent examples.\n",inconsistentnum); - } + if (verbosity >= 1) { + printf("done.\n"); + if (retrain) { + printf(" Now %ld inconsistent examples.\n", inconsistentnum); + } } } } /* end of loop */ @@ -1648,430 +1691,433 @@ long optimize_to_convergence(DOC **docs, long int *label, long int totdoc, free(qp.opt_xinit); free(qp.opt_low); free(qp.opt_up); - if(weights) free(weights); + if (weights) + free(weights); - learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ - model->maxdiff=(*maxdiff); + learn_parm->epsilon_crit = epsilon_crit_org; /* restore org */ + model->maxdiff = (*maxdiff); - return(iteration); + return (iteration); } -long optimize_to_convergence_sharedslack(DOC **docs, long int *label, - long int totdoc, - long int totwords, LEARN_PARM *learn_parm, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - SHRINK_STATE *shrink_state, MODEL *model, - double *a, double *lin, double *c, - TIMING *timing_profile, double *maxdiff) - /* docs: Training vectors (x-part) */ - /* label: Training labels/value (y-part, zero if test example for - transduction) */ - /* totdoc: Number of examples in docs/label */ - /* totwords: Number of features (i.e. highest feature index) */ - /* learn_parm: Learning paramenters */ - /* kernel_parm: Kernel paramenters */ - /* kernel_cache: Initialized/partly filled Cache, if using a kernel. - NULL if linear. */ - /* shrink_state: State of active variables */ - /* model: Returns learning result */ - /* a: alphas */ - /* lin: linear component of gradient */ - /* c: right hand side of inequalities (margin) */ - /* maxdiff: returns maximum violation of KT-conditions */ +long optimize_to_convergence_sharedslack( + DOC **docs, long int *label, long int totdoc, long int totwords, + LEARN_PARM *learn_parm, KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, SHRINK_STATE *shrink_state, MODEL *model, + double *a, double *lin, double *c, TIMING *timing_profile, double *maxdiff) +/* docs: Training vectors (x-part) */ +/* label: Training labels/value (y-part, zero if test example for + transduction) */ +/* totdoc: Number of examples in docs/label */ +/* totwords: Number of features (i.e. highest feature index) */ +/* learn_parm: Learning paramenters */ +/* kernel_parm: Kernel paramenters */ +/* kernel_cache: Initialized/partly filled Cache, if using a kernel. + NULL if linear. */ +/* shrink_state: State of active variables */ +/* model: Returns learning result */ +/* a: alphas */ +/* lin: linear component of gradient */ +/* c: right hand side of inequalities (margin) */ +/* maxdiff: returns maximum violation of KT-conditions */ { - long *chosen,*key,i,j,jj,*last_suboptimal_at,noshrink,*unlabeled; - long *inconsistent,choosenum,already_chosen=0,iteration; - long misclassified,supvecnum=0,*active2dnum,inactivenum; - long *working2dnum,*selexam,*ignore; - long activenum,retrain,maxslackid,slackset,jointstep; - double criterion,eq_target; - double *a_old,*alphaslack; - double t0=0,t1=0,t2=0,t3=0,t4=0,t5=0,t6=0; /* timing */ - double epsilon_crit_org,maxsharedviol; + long *chosen, *key, i, j, jj, *last_suboptimal_at, noshrink, *unlabeled; + long *inconsistent, choosenum, already_chosen = 0, iteration; + long misclassified, supvecnum = 0, *active2dnum, inactivenum; + long *working2dnum, *selexam, *ignore; + long activenum, retrain, maxslackid, slackset, jointstep; + double criterion, eq_target; + double *a_old, *alphaslack; + double t0 = 0, t1 = 0, t2 = 0, t3 = 0, t4 = 0, t5 = 0, t6 = 0; /* timing */ + double epsilon_crit_org, maxsharedviol; double bestmaxdiff; - long bestmaxdiffiter,terminate; + long bestmaxdiffiter, terminate; - double *selcrit; /* buffer for sorting */ - CFLOAT *aicache; /* buffer to keep one row of hessian */ - double *weights; /* buffer for weight vector in linear case */ - QP qp; /* buffer for one quadratic program */ - double *slack; /* vector of slack variables for optimization with - shared slacks */ + double *selcrit; /* buffer for sorting */ + CFLOAT *aicache; /* buffer to keep one row of hessian */ + double *weights; /* buffer for weight vector in linear case */ + QP qp; /* buffer for one quadratic program */ + double *slack; /* vector of slack variables for optimization with + shared slacks */ - epsilon_crit_org=learn_parm->epsilon_crit; /* save org */ - if(kernel_parm->kernel_type == LINEAR) { - learn_parm->epsilon_crit=2.0; - /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ - } - learn_parm->epsilon_shrink=2; - (*maxdiff)=1; + epsilon_crit_org = learn_parm->epsilon_crit; /* save org */ + if (kernel_parm->kernel_type == LINEAR) { + learn_parm->epsilon_crit = 2.0; + /* kernel_cache=NULL; */ /* caching makes no sense for linear kernel */ + } + learn_parm->epsilon_shrink = 2; + (*maxdiff) = 1; - learn_parm->totwords=totwords; + learn_parm->totwords = totwords; - chosen = (long *)my_malloc(sizeof(long)*totdoc); - unlabeled = (long *)my_malloc(sizeof(long)*totdoc); - inconsistent = (long *)my_malloc(sizeof(long)*totdoc); - ignore = (long *)my_malloc(sizeof(long)*totdoc); - last_suboptimal_at = (long *)my_malloc(sizeof(long)*totdoc); - key = (long *)my_malloc(sizeof(long)*(totdoc+11)); - selcrit = (double *)my_malloc(sizeof(double)*totdoc); - selexam = (long *)my_malloc(sizeof(long)*totdoc); - a_old = (double *)my_malloc(sizeof(double)*totdoc); - aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT)*totdoc); - working2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - active2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); - qp.opt_ce = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); + chosen = (long *)my_malloc(sizeof(long) * totdoc); + unlabeled = (long *)my_malloc(sizeof(long) * totdoc); + inconsistent = (long *)my_malloc(sizeof(long) * totdoc); + ignore = (long *)my_malloc(sizeof(long) * totdoc); + last_suboptimal_at = (long *)my_malloc(sizeof(long) * totdoc); + key = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + selcrit = (double *)my_malloc(sizeof(double) * totdoc); + selexam = (long *)my_malloc(sizeof(long) * totdoc); + a_old = (double *)my_malloc(sizeof(double) * totdoc); + aicache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * totdoc); + working2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + active2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + qp.opt_ce = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); qp.opt_ce0 = (double *)my_malloc(sizeof(double)); - qp.opt_g = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize - *learn_parm->svm_maxqpsize); - qp.opt_g0 = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_xinit = (double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_low=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - qp.opt_up=(double *)my_malloc(sizeof(double)*learn_parm->svm_maxqpsize); - if(kernel_parm->kernel_type == LINEAR) { - weights=create_nvector(totwords); - clear_nvector(weights,totwords); /* set weights to zero */ + qp.opt_g = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize * + learn_parm->svm_maxqpsize); + qp.opt_g0 = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_xinit = + (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_low = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + qp.opt_up = (double *)my_malloc(sizeof(double) * learn_parm->svm_maxqpsize); + if (kernel_parm->kernel_type == LINEAR) { + weights = create_nvector(totwords); + clear_nvector(weights, totwords); /* set weights to zero */ + } else + weights = NULL; + maxslackid = 0; + for (i = 0; i < totdoc; i++) { /* determine size of slack array */ + if (maxslackid < docs[i]->slackid) + maxslackid = docs[i]->slackid; } - else - weights=NULL; - maxslackid=0; - for(i=0;islackid) - maxslackid=docs[i]->slackid; - } - slack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); - alphaslack=(double *)my_malloc(sizeof(double)*(maxslackid+1)); - for(i=0;i<=maxslackid;i++) { /* init shared slacks */ - slack[i]=0; - alphaslack[i]=0; + slack = (double *)my_malloc(sizeof(double) * (maxslackid + 1)); + alphaslack = (double *)my_malloc(sizeof(double) * (maxslackid + 1)); + for (i = 0; i <= maxslackid; i++) { /* init shared slacks */ + slack[i] = 0; + alphaslack[i] = 0; } - choosenum=0; - retrain=1; - iteration=1; - bestmaxdiffiter=1; - bestmaxdiff=999999999; - terminate=0; + choosenum = 0; + retrain = 1; + iteration = 1; + bestmaxdiffiter = 1; + bestmaxdiff = 999999999; + terminate = 0; - if(kernel_cache) { - kernel_cache->time=iteration; /* for lru cache */ + if (kernel_cache) { + kernel_cache->time = iteration; /* for lru cache */ kernel_cache_reset_lru(kernel_cache); } - for(i=0;islackid]+=a[i]; - a_old[i]=a[i]; - last_suboptimal_at[i]=1; + for (i = 0; i < totdoc; i++) { /* various inits */ + chosen[i] = 0; + unlabeled[i] = 0; + inconsistent[i] = 0; + ignore[i] = 0; + alphaslack[docs[i]->slackid] += a[i]; + a_old[i] = a[i]; + last_suboptimal_at[i] = 1; } - activenum=compute_index(shrink_state->active,totdoc,active2dnum); - inactivenum=totdoc-activenum; + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = totdoc - activenum; clear_index(working2dnum); /* call to init slack and alphaslack */ - compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, - slack,alphaslack); + compute_shared_slacks(docs, label, a, lin, c, active2dnum, learn_parm, slack, + alphaslack); + + /* repeat this loop until we have convergence */ + for (; retrain && (!terminate); iteration++) { - /* repeat this loop until we have convergence */ - for(;retrain && (!terminate);iteration++) { - - if(kernel_cache) - kernel_cache->time=iteration; /* for lru cache */ - if(verbosity>=2) { - printf( - "Iteration %ld: ",iteration); fflush(stdout); - } - else if(verbosity==1) { - printf("."); fflush(stdout); + if (kernel_cache) + kernel_cache->time = iteration; /* for lru cache */ + if (verbosity >= 2) { + printf("Iteration %ld: ", iteration); + fflush(stdout); + } else if (verbosity == 1) { + printf("."); + fflush(stdout); } - if(verbosity>=2) t0=get_runtime(); - if(verbosity>=3) { - printf("\nSelecting working set... "); fflush(stdout); + if (verbosity >= 2) + t0 = get_runtime(); + if (verbosity >= 3) { + printf("\nSelecting working set... "); + fflush(stdout); } - if(learn_parm->svm_newvarsinqp>learn_parm->svm_maxqpsize) - learn_parm->svm_newvarsinqp=learn_parm->svm_maxqpsize; + if (learn_parm->svm_newvarsinqp > learn_parm->svm_maxqpsize) + learn_parm->svm_newvarsinqp = learn_parm->svm_maxqpsize; /* select working set according to steepest gradient */ - jointstep=0; - eq_target=0; - if(iteration % 101) { - slackset=select_next_qp_slackset(docs,label,a,lin,slack,alphaslack,c, - learn_parm,active2dnum,&maxsharedviol); - if((!(iteration % 100)) - || (!slackset) || (maxsharedviolepsilon_crit)){ - /* do a step with examples from different slack sets */ - if(verbosity >= 2) { - printf("(i-step)"); fflush(stdout); - } - i=0; - for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear old part of working set */ - if((chosen[j]>=(learn_parm->svm_maxqpsize/ - minl(learn_parm->svm_maxqpsize, - learn_parm->svm_newvarsinqp)))) { - chosen[j]=0; - choosenum--; - } - else { - chosen[j]++; - working2dnum[i++]=j; - } - } - working2dnum[i]=-1; - - already_chosen=0; - if((minl(learn_parm->svm_newvarsinqp, - learn_parm->svm_maxqpsize-choosenum)>=4) - && (kernel_parm->kernel_type != LINEAR)) { - /* select part of the working set from cache */ - already_chosen=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - (long)(minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp) - /2), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache, - (long)1,key,chosen); - choosenum+=already_chosen; - } - choosenum+=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp-already_chosen), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache, - (long)0,key,chosen); + jointstep = 0; + eq_target = 0; + if (iteration % 101) { + slackset = + select_next_qp_slackset(docs, label, a, lin, slack, alphaslack, c, + learn_parm, active2dnum, &maxsharedviol); + if ((!(iteration % 100)) || (!slackset) || + (maxsharedviol < learn_parm->epsilon_crit)) { + /* do a step with examples from different slack sets */ + if (verbosity >= 2) { + printf("(i-step)"); + fflush(stdout); + } + i = 0; + for (jj = 0; (j = working2dnum[jj]) >= 0; + jj++) { /* clear old part of working set */ + if ((chosen[j] >= (learn_parm->svm_maxqpsize / + minl(learn_parm->svm_maxqpsize, + learn_parm->svm_newvarsinqp)))) { + chosen[j] = 0; + choosenum--; + } else { + chosen[j]++; + working2dnum[i++] = j; + } + } + working2dnum[i] = -1; + + already_chosen = 0; + if ((minl(learn_parm->svm_newvarsinqp, + learn_parm->svm_maxqpsize - choosenum) >= 4) && + (kernel_parm->kernel_type != LINEAR)) { + /* select part of the working set from cache */ + already_chosen = select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, + (long)(minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp) / + 2), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, (long)1, key, chosen); + choosenum += already_chosen; + } + choosenum += select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, + minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp - already_chosen), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, + selexam, kernel_cache, (long)0, key, chosen); + } else { /* do a step with all examples from same slack set */ + if (verbosity >= 2) { + printf("(j-step on %ld)", slackset); + fflush(stdout); + } + jointstep = 1; + for (jj = 0; (j = working2dnum[jj]) >= 0; + jj++) { /* clear working set */ + chosen[j] = 0; + } + working2dnum[0] = -1; + eq_target = alphaslack[slackset]; + for (j = 0; j < totdoc; j++) { /* mask all but slackset */ + /* for(jj=0;(j=active2dnum[jj])>=0;jj++) { */ + if (docs[j]->slackid != slackset) + ignore[j] = 1; + else { + ignore[j] = 0; + learn_parm->svm_cost[j] = learn_parm->svm_c; + /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ + } + } + learn_parm->biased_hyperplane = 1; + choosenum = select_next_qp_subproblem_grad( + label, unlabeled, a, lin, c, totdoc, learn_parm->svm_maxqpsize, + learn_parm, ignore, active2dnum, working2dnum, selcrit, selexam, + kernel_cache, (long)0, key, chosen); + learn_parm->biased_hyperplane = 0; } - else { /* do a step with all examples from same slack set */ - if(verbosity >= 2) { - printf("(j-step on %ld)",slackset); fflush(stdout); - } - jointstep=1; - for(jj=0;(j=working2dnum[jj])>=0;jj++) { /* clear working set */ - chosen[j]=0; - } - working2dnum[0]=-1; - eq_target=alphaslack[slackset]; - for(j=0;j=0;jj++) { */ - if(docs[j]->slackid != slackset) - ignore[j]=1; - else { - ignore[j]=0; - learn_parm->svm_cost[j]=learn_parm->svm_c; - /* printf("Inslackset(%ld,%ld)",j,shrink_state->active[j]); */ - } - } - learn_parm->biased_hyperplane=1; - choosenum=select_next_qp_subproblem_grad( - label,unlabeled,a,lin,c,totdoc, - learn_parm->svm_maxqpsize, - learn_parm,ignore,active2dnum, - working2dnum,selcrit,selexam,kernel_cache, - (long)0,key,chosen); - learn_parm->biased_hyperplane=0; + } else { /* once in a while, select a somewhat random working set + to get unlocked of infinite loops due to numerical + inaccuracies in the core qp-solver */ + choosenum += select_next_qp_subproblem_rand( + label, unlabeled, a, lin, c, totdoc, + minl(learn_parm->svm_maxqpsize - choosenum, + learn_parm->svm_newvarsinqp), + learn_parm, inconsistent, active2dnum, working2dnum, selcrit, selexam, + kernel_cache, key, chosen, iteration); + } + + if (verbosity >= 2) { + printf(" %ld vectors chosen\n", choosenum); + fflush(stdout); + } + + if (verbosity >= 2) + t1 = get_runtime(); + + if (kernel_cache) + cache_multiple_kernel_rows(kernel_cache, docs, working2dnum, choosenum, + kernel_parm); + + if (verbosity >= 2) + t2 = get_runtime(); + if (jointstep) + learn_parm->biased_hyperplane = 1; + optimize_svm(docs, label, unlabeled, ignore, eq_target, chosen, active2dnum, + model, totdoc, working2dnum, choosenum, a, lin, c, learn_parm, + aicache, kernel_parm, &qp, &epsilon_crit_org); + learn_parm->biased_hyperplane = 0; + + for (jj = 0; (i = working2dnum[jj]) >= 0; + jj++) /* recompute sums of alphas */ + alphaslack[docs[i]->slackid] += (a[i] - a_old[i]); + for (jj = 0; (i = working2dnum[jj]) >= 0; jj++) { /* reduce alpha to fulfill + constraints */ + if (alphaslack[docs[i]->slackid] > learn_parm->svm_c) { + if (a[i] < (alphaslack[docs[i]->slackid] - learn_parm->svm_c)) { + alphaslack[docs[i]->slackid] -= a[i]; + a[i] = 0; + } else { + a[i] -= (alphaslack[docs[i]->slackid] - learn_parm->svm_c); + alphaslack[docs[i]->slackid] = learn_parm->svm_c; + } } } - else { /* once in a while, select a somewhat random working set - to get unlocked of infinite loops due to numerical - inaccuracies in the core qp-solver */ - choosenum+=select_next_qp_subproblem_rand( - label,unlabeled,a,lin,c,totdoc, - minl(learn_parm->svm_maxqpsize-choosenum, - learn_parm->svm_newvarsinqp), - learn_parm,inconsistent,active2dnum, - working2dnum,selcrit,selexam,kernel_cache,key, - chosen,iteration); + for (jj = 0; (i = active2dnum[jj]) >= 0; jj++) + learn_parm->svm_cost[i] = + a[i] + (learn_parm->svm_c - alphaslack[docs[i]->slackid]); + model->at_upper_bound = 0; + for (jj = 0; jj <= maxslackid; jj++) { + if (alphaslack[jj] > (learn_parm->svm_c - learn_parm->epsilon_a)) + model->at_upper_bound++; } - if(verbosity>=2) { - printf(" %ld vectors chosen\n",choosenum); fflush(stdout); - } + if (verbosity >= 2) + t3 = get_runtime(); + update_linear_component(docs, label, active2dnum, a, a_old, working2dnum, + totdoc, totwords, kernel_parm, kernel_cache, lin, + aicache, weights); + compute_shared_slacks(docs, label, a, lin, c, active2dnum, learn_parm, + slack, alphaslack); - if(verbosity>=2) t1=get_runtime(); + if (verbosity >= 2) + t4 = get_runtime(); + supvecnum = + calculate_svm_model(docs, label, unlabeled, lin, a, a_old, c, + learn_parm, working2dnum, active2dnum, model); - if(kernel_cache) - cache_multiple_kernel_rows(kernel_cache,docs,working2dnum, - choosenum,kernel_parm); - - if(verbosity>=2) t2=get_runtime(); - if(jointstep) learn_parm->biased_hyperplane=1; - optimize_svm(docs,label,unlabeled,ignore,eq_target,chosen,active2dnum, - model,totdoc,working2dnum,choosenum,a,lin,c,learn_parm, - aicache,kernel_parm,&qp,&epsilon_crit_org); - learn_parm->biased_hyperplane=0; - - for(jj=0;(i=working2dnum[jj])>=0;jj++) /* recompute sums of alphas */ - alphaslack[docs[i]->slackid]+=(a[i]-a_old[i]); - for(jj=0;(i=working2dnum[jj])>=0;jj++) { /* reduce alpha to fulfill - constraints */ - if(alphaslack[docs[i]->slackid] > learn_parm->svm_c) { - if(a[i] < (alphaslack[docs[i]->slackid]-learn_parm->svm_c)) { - alphaslack[docs[i]->slackid]-=a[i]; - a[i]=0; - } - else { - a[i]-=(alphaslack[docs[i]->slackid]-learn_parm->svm_c); - alphaslack[docs[i]->slackid]=learn_parm->svm_c; - } - } - } - for(jj=0;(i=active2dnum[jj])>=0;jj++) - learn_parm->svm_cost[i]=a[i]+(learn_parm->svm_c - -alphaslack[docs[i]->slackid]); - model->at_upper_bound=0; - for(jj=0;jj<=maxslackid;jj++) { - if(alphaslack[jj]>(learn_parm->svm_c-learn_parm->epsilon_a)) - model->at_upper_bound++; - } - - if(verbosity>=2) t3=get_runtime(); - update_linear_component(docs,label,active2dnum,a,a_old,working2dnum,totdoc, - totwords,kernel_parm,kernel_cache,lin,aicache, - weights); - compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, - slack,alphaslack); - - if(verbosity>=2) t4=get_runtime(); - supvecnum=calculate_svm_model(docs,label,unlabeled,lin,a,a_old,c, - learn_parm,working2dnum,active2dnum,model); - - if(verbosity>=2) t5=get_runtime(); + if (verbosity >= 2) + t5 = get_runtime(); /* The following computation of the objective function works only */ /* relative to the active variables */ - if(verbosity>=3) { - criterion=compute_objective_function(a,lin,c,learn_parm->eps,label, - active2dnum); - printf("Objective function (over active variables): %.16f\n",criterion); - fflush(stdout); + if (verbosity >= 3) { + criterion = compute_objective_function(a, lin, c, learn_parm->eps, label, + active2dnum); + printf("Objective function (over active variables): %.16f\n", criterion); + fflush(stdout); } - for(jj=0;(i=working2dnum[jj])>=0;jj++) { - a_old[i]=a[i]; + for (jj = 0; (i = working2dnum[jj]) >= 0; jj++) { + a_old[i] = a[i]; } - retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, - slack,alphaslack,totdoc,learn_parm, - maxdiff,epsilon_crit_org,&misclassified, - active2dnum,last_suboptimal_at, - iteration,kernel_parm); + retrain = check_optimality_sharedslack( + docs, model, label, a, lin, c, slack, alphaslack, totdoc, learn_parm, + maxdiff, epsilon_crit_org, &misclassified, active2dnum, + last_suboptimal_at, iteration, kernel_parm); - if(verbosity>=2) { - t6=get_runtime(); - timing_profile->time_select+=t1-t0; - timing_profile->time_kernel+=t2-t1; - timing_profile->time_opti+=t3-t2; - timing_profile->time_update+=t4-t3; - timing_profile->time_model+=t5-t4; - timing_profile->time_check+=t6-t5; + if (verbosity >= 2) { + t6 = get_runtime(); + timing_profile->time_select += t1 - t0; + timing_profile->time_kernel += t2 - t1; + timing_profile->time_opti += t3 - t2; + timing_profile->time_update += t4 - t3; + timing_profile->time_model += t5 - t4; + timing_profile->time_check += t6 - t5; } /* checking whether optimizer got stuck */ - if((*maxdiff) < bestmaxdiff) { - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; + if ((*maxdiff) < bestmaxdiff) { + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; } - if(iteration > (bestmaxdiffiter+learn_parm->maxiter)) { + if (iteration > (bestmaxdiffiter + learn_parm->maxiter)) { /* long time no progress? */ - terminate=1; - retrain=0; - if(verbosity>=1) - printf("\nWARNING: Relaxing KT-Conditions due to slow progress! Terminating!\n"); + terminate = 1; + retrain = 0; + if (verbosity >= 1) + printf("\nWARNING: Relaxing KT-Conditions due to slow progress! " + "Terminating!\n"); } - noshrink=0; - - if((!retrain) && (inactivenum>0) - && ((!learn_parm->skip_final_opt_check) - || (kernel_parm->kernel_type == LINEAR))) { - if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) - || (verbosity>=2)) { - if(verbosity==1) { - printf("\n"); - } - printf(" Checking optimality of inactive variables..."); - fflush(stdout); + noshrink = 0; + + if ((!retrain) && (inactivenum > 0) && + ((!learn_parm->skip_final_opt_check) || + (kernel_parm->kernel_type == LINEAR))) { + if (((verbosity >= 1) && (kernel_parm->kernel_type != LINEAR)) || + (verbosity >= 2)) { + if (verbosity == 1) { + printf("\n"); + } + printf(" Checking optimality of inactive variables..."); + fflush(stdout); } - t1=get_runtime(); - reactivate_inactive_examples(label,unlabeled,a,shrink_state,lin,c,totdoc, - totwords,iteration,learn_parm,inconsistent, - docs,kernel_parm,kernel_cache,model,aicache, - weights,maxdiff); + t1 = get_runtime(); + reactivate_inactive_examples( + label, unlabeled, a, shrink_state, lin, c, totdoc, totwords, + iteration, learn_parm, inconsistent, docs, kernel_parm, kernel_cache, + model, aicache, weights, maxdiff); /* Update to new active variables. */ - activenum=compute_index(shrink_state->active,totdoc,active2dnum); - inactivenum=totdoc-activenum; - /* check optimality, since check in reactivate does not work for - sharedslacks */ - compute_shared_slacks(docs,label,a,lin,c,active2dnum,learn_parm, - slack,alphaslack); - retrain=check_optimality_sharedslack(docs,model,label,a,lin,c, - slack,alphaslack,totdoc,learn_parm, - maxdiff,epsilon_crit_org,&misclassified, - active2dnum,last_suboptimal_at, - iteration,kernel_parm); + activenum = compute_index(shrink_state->active, totdoc, active2dnum); + inactivenum = totdoc - activenum; + /* check optimality, since check in reactivate does not work for + sharedslacks */ + compute_shared_slacks(docs, label, a, lin, c, active2dnum, learn_parm, + slack, alphaslack); + retrain = check_optimality_sharedslack( + docs, model, label, a, lin, c, slack, alphaslack, totdoc, learn_parm, + maxdiff, epsilon_crit_org, &misclassified, active2dnum, + last_suboptimal_at, iteration, kernel_parm); /* reset watchdog */ - bestmaxdiff=(*maxdiff); - bestmaxdiffiter=iteration; + bestmaxdiff = (*maxdiff); + bestmaxdiffiter = iteration; /* termination criterion */ - noshrink=1; - retrain=0; - if((*maxdiff) > learn_parm->epsilon_crit) - retrain=1; - timing_profile->time_shrink+=get_runtime()-t1; - if(((verbosity>=1) && (kernel_parm->kernel_type != LINEAR)) - || (verbosity>=2)) { - printf("done.\n"); fflush(stdout); - printf(" Number of inactive variables = %ld\n",inactivenum); - } + noshrink = 1; + retrain = 0; + if ((*maxdiff) > learn_parm->epsilon_crit) + retrain = 1; + timing_profile->time_shrink += get_runtime() - t1; + if (((verbosity >= 1) && (kernel_parm->kernel_type != LINEAR)) || + (verbosity >= 2)) { + printf("done.\n"); + fflush(stdout); + printf(" Number of inactive variables = %ld\n", inactivenum); + } } - if((!retrain) && (learn_parm->epsilon_crit>(*maxdiff))) - learn_parm->epsilon_crit=(*maxdiff); - if((!retrain) && (learn_parm->epsilon_crit>epsilon_crit_org)) { - learn_parm->epsilon_crit/=2.0; - retrain=1; - noshrink=1; + if ((!retrain) && (learn_parm->epsilon_crit > (*maxdiff))) + learn_parm->epsilon_crit = (*maxdiff); + if ((!retrain) && (learn_parm->epsilon_crit > epsilon_crit_org)) { + learn_parm->epsilon_crit /= 2.0; + retrain = 1; + noshrink = 1; } - if(learn_parm->epsilon_critepsilon_crit=epsilon_crit_org; - - if(verbosity>=2) { - printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", - supvecnum,model->at_upper_bound,(*maxdiff)); + if (learn_parm->epsilon_crit < epsilon_crit_org) + learn_parm->epsilon_crit = epsilon_crit_org; + + if (verbosity >= 2) { + printf(" => (%ld SV (incl. %ld SV at u-bound), max violation=%.5f)\n", + supvecnum, model->at_upper_bound, (*maxdiff)); fflush(stdout); } - if(verbosity>=3) { + if (verbosity >= 3) { printf("\n"); } - if(((iteration % 10) == 0) && (!noshrink)) { - activenum=shrink_problem(docs,learn_parm,shrink_state, - kernel_parm,active2dnum, - last_suboptimal_at,iteration,totdoc, - maxl((long)(activenum/10), - maxl((long)(totdoc/500),100)), - a,inconsistent); - inactivenum=totdoc-activenum; - if((kernel_cache) - && (supvecnum>kernel_cache->max_elems) - && ((kernel_cache->activenum-activenum)>maxl((long)(activenum/10),500))) { - kernel_cache_shrink(kernel_cache,totdoc, - minl((kernel_cache->activenum-activenum), - (kernel_cache->activenum-supvecnum)), - shrink_state->active); + if (((iteration % 10) == 0) && (!noshrink)) { + activenum = shrink_problem( + docs, learn_parm, shrink_state, kernel_parm, active2dnum, + last_suboptimal_at, iteration, totdoc, + maxl((long)(activenum / 10), maxl((long)(totdoc / 500), 100)), a, + inconsistent); + inactivenum = totdoc - activenum; + if ((kernel_cache) && (supvecnum > kernel_cache->max_elems) && + ((kernel_cache->activenum - activenum) > + maxl((long)(activenum / 10), 500))) { + kernel_cache_shrink(kernel_cache, totdoc, + minl((kernel_cache->activenum - activenum), + (kernel_cache->activenum - supvecnum)), + shrink_state->active); } } - } /* end of loop */ - - + } /* end of loop */ + free(alphaslack); free(slack); free(chosen); @@ -2093,496 +2139,501 @@ long optimize_to_convergence_sharedslack(DOC **docs, long int *label, free(qp.opt_xinit); free(qp.opt_low); free(qp.opt_up); - if(weights) free(weights); + if (weights) + free(weights); - learn_parm->epsilon_crit=epsilon_crit_org; /* restore org */ - model->maxdiff=(*maxdiff); + learn_parm->epsilon_crit = epsilon_crit_org; /* restore org */ + model->maxdiff = (*maxdiff); - return(iteration); -} - - -double compute_objective_function(double *a, double *lin, double *c, - double eps, long int *label, - long int *active2dnum) - /* Return value of objective function. */ - /* Works only relative to the active variables! */ + return (iteration); +} + +double compute_objective_function(double *a, double *lin, double *c, double eps, + long int *label, long int *active2dnum) +/* Return value of objective function. */ +/* Works only relative to the active variables! */ { - long i,ii; + long i, ii; double criterion; /* calculate value of objective function */ - criterion=0; - for(ii=0;active2dnum[ii]>=0;ii++) { - i=active2dnum[ii]; - criterion=criterion+(eps-(double)label[i]*c[i])*a[i]+0.5*a[i]*label[i]*lin[i]; - } - return(criterion); + criterion = 0; + for (ii = 0; active2dnum[ii] >= 0; ii++) { + i = active2dnum[ii]; + criterion = criterion + (eps - (double)label[i] * c[i]) * a[i] + + 0.5 * a[i] * label[i] * lin[i]; + } + return (criterion); } -void clear_index(long int *index) - /* initializes and empties index */ +void clear_index(long int *index) +/* initializes and empties index */ { - index[0]=-1; -} - -void add_to_index(long int *index, long int elem) - /* initializes and empties index */ + index[0] = -1; +} + +void add_to_index(long int *index, long int elem) +/* initializes and empties index */ { register long i; - for(i=0;index[i] != -1;i++); - index[i]=elem; - index[i+1]=-1; + for (i = 0; index[i] != -1; i++) + ; + index[i] = elem; + index[i + 1] = -1; } -long compute_index(long int *binfeature, long int range, long int *index) - /* create an inverted index of binfeature */ -{ - register long i,ii; +long compute_index(long int *binfeature, long int range, long int *index) +/* create an inverted index of binfeature */ +{ + register long i, ii; - ii=0; - for(i=0;i= 3) { + printf("Running optimizer..."); + fflush(stdout); + } + /* call the qp-subsolver */ + a_v = optimize_qp(qp, epsilon_crit_target, learn_parm->svm_maxqpsize, + &(model->b), /* in case the optimizer gives us */ + /* the threshold for free. otherwise */ + /* b is calculated in calculate_model. */ + learn_parm); + if (verbosity >= 3) { + printf("done\n"); + } + + for (i = 0; i < varnum; i++) { + a[working2dnum[i]] = a_v[i]; + /* + if(a_v[i]<=(0+learn_parm->epsilon_a)) { + a[working2dnum[i]]=0; + } + else + if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { + a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; + } + */ } - return(ii); } - -void optimize_svm(DOC **docs, long int *label, long int *unlabeled, - long int *exclude_from_eq_const, double eq_target, - long int *chosen, long int *active2dnum, MODEL *model, - long int totdoc, long int *working2dnum, long int varnum, - double *a, double *lin, double *c, LEARN_PARM *learn_parm, - CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp, - double *epsilon_crit_target) - /* Do optimization on the working set. */ -{ - long i; - double *a_v; - - compute_matrices_for_optimization(docs,label,unlabeled, - exclude_from_eq_const,eq_target,chosen, - active2dnum,working2dnum,model,a,lin,c, - varnum,totdoc,learn_parm,aicache, - kernel_parm,qp); - - if(verbosity>=3) { - printf("Running optimizer..."); fflush(stdout); - } - /* call the qp-subsolver */ - a_v=optimize_qp(qp,epsilon_crit_target, - learn_parm->svm_maxqpsize, - &(model->b), /* in case the optimizer gives us */ - /* the threshold for free. otherwise */ - /* b is calculated in calculate_model. */ - learn_parm); - if(verbosity>=3) { - printf("done\n"); - } - - for(i=0;iepsilon_a)) { - a[working2dnum[i]]=0; - } - else if(a_v[i]>=(learn_parm->svm_cost[working2dnum[i]]-learn_parm->epsilon_a)) { - a[working2dnum[i]]=learn_parm->svm_cost[working2dnum[i]]; - } - */ - } -} - -void compute_matrices_for_optimization(DOC **docs, long int *label, - long int *unlabeled, long *exclude_from_eq_const, double eq_target, - long int *chosen, long int *active2dnum, - long int *key, MODEL *model, double *a, double *lin, double *c, - long int varnum, long int totdoc, LEARN_PARM *learn_parm, - CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp) -{ - register long ki,kj,i,j; +void compute_matrices_for_optimization( + DOC **docs, long int *label, long int *unlabeled, + long *exclude_from_eq_const, double eq_target, long int *chosen, + long int *active2dnum, long int *key, MODEL *model, double *a, double *lin, + double *c, long int varnum, long int totdoc, LEARN_PARM *learn_parm, + CFLOAT *aicache, KERNEL_PARM *kernel_parm, QP *qp) { + register long ki, kj, i, j; register double kernel_temp; - if(verbosity>=3) { - fprintf(stdout,"Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, coef_lin %f, coef_const %f])...",kernel_parm->kernel_type,kernel_parm->poly_degree,kernel_parm->rbf_gamma,kernel_parm->coef_lin,kernel_parm->coef_const); + if (verbosity >= 3) { + fprintf(stdout, + "Computing qp-matrices (type %ld kernel [degree %ld, rbf_gamma %f, " + "coef_lin %f, coef_const %f])...", + kernel_parm->kernel_type, kernel_parm->poly_degree, + kernel_parm->rbf_gamma, kernel_parm->coef_lin, + kernel_parm->coef_const); fflush(stdout); } - qp->opt_n=varnum; - qp->opt_ce0[0]=-eq_target; /* compute the constant for equality constraint */ - for(j=1;jsv_num;j++) { /* start at 1 */ - if((!chosen[(model->supvec[j])->docnum]) - && (!exclude_from_eq_const[(model->supvec[j])->docnum])) { - qp->opt_ce0[0]+=model->alpha[j]; + qp->opt_n = varnum; + qp->opt_ce0[0] = + -eq_target; /* compute the constant for equality constraint */ + for (j = 1; j < model->sv_num; j++) { /* start at 1 */ + if ((!chosen[(model->supvec[j])->docnum]) && + (!exclude_from_eq_const[(model->supvec[j])->docnum])) { + qp->opt_ce0[0] += model->alpha[j]; } - } - if(learn_parm->biased_hyperplane) - qp->opt_m=1; - else - qp->opt_m=0; /* eq-constraint will be ignored */ + } + if (learn_parm->biased_hyperplane) + qp->opt_m = 1; + else + qp->opt_m = 0; /* eq-constraint will be ignored */ /* init linear part of objective function */ - for(i=0;iopt_g0[i]=lin[key[i]]; + for (i = 0; i < varnum; i++) { + qp->opt_g0[i] = lin[key[i]]; } - for(i=0;iopt_ce[i]=label[ki]; - qp->opt_low[i]=0; - qp->opt_up[i]=learn_parm->svm_cost[ki]; + qp->opt_ce[i] = label[ki]; + qp->opt_low[i] = 0; + qp->opt_up[i] = learn_parm->svm_cost[ki]; - kernel_temp=(double)kernel(kernel_parm,docs[ki],docs[ki]); + kernel_temp = (double)kernel(kernel_parm, docs[ki], docs[ki]); /* compute linear part of objective function */ - qp->opt_g0[i]-=(kernel_temp*a[ki]*(double)label[ki]); + qp->opt_g0[i] -= (kernel_temp * a[ki] * (double)label[ki]); /* compute quadratic part of objective function */ - qp->opt_g[varnum*i+i]=kernel_temp; - for(j=i+1;jopt_g[varnum * i + i] = kernel_temp; + for (j = i + 1; j < varnum; j++) { + kj = key[j]; + kernel_temp = (double)kernel(kernel_parm, docs[ki], docs[kj]); /* compute linear part of objective function */ - qp->opt_g0[i]-=(kernel_temp*a[kj]*(double)label[kj]); - qp->opt_g0[j]-=(kernel_temp*a[ki]*(double)label[ki]); + qp->opt_g0[i] -= (kernel_temp * a[kj] * (double)label[kj]); + qp->opt_g0[j] -= (kernel_temp * a[ki] * (double)label[ki]); /* compute quadratic part of objective function */ - qp->opt_g[varnum*i+j]=(double)label[ki]*(double)label[kj]*kernel_temp; - qp->opt_g[varnum*j+i]=(double)label[ki]*(double)label[kj]*kernel_temp; + qp->opt_g[varnum * i + j] = + (double)label[ki] * (double)label[kj] * kernel_temp; + qp->opt_g[varnum * j + i] = + (double)label[ki] * (double)label[kj] * kernel_temp; } - if(verbosity>=3) { - if(i % 20 == 0) { - fprintf(stdout,"%ld..",i); fflush(stdout); + if (verbosity >= 3) { + if (i % 20 == 0) { + fprintf(stdout, "%ld..", i); + fflush(stdout); } } } - for(i=0;iopt_xinit[i]=a[key[i]]; + qp->opt_xinit[i] = a[key[i]]; /* set linear part of objective function */ - qp->opt_g0[i]=(learn_parm->eps-(double)label[key[i]]*c[key[i]])+qp->opt_g0[i]*(double)label[key[i]]; + qp->opt_g0[i] = (learn_parm->eps - (double)label[key[i]] * c[key[i]]) + + qp->opt_g0[i] * (double)label[key[i]]; } - if(verbosity>=3) { - fprintf(stdout,"done\n"); + if (verbosity >= 3) { + fprintf(stdout, "done\n"); } } -long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, - double *lin, double *a, double *a_old, double *c, - LEARN_PARM *learn_parm, long int *working2dnum, - long int *active2dnum, MODEL *model) - /* Compute decision function based on current values */ - /* of alpha. */ +long calculate_svm_model(DOC **docs, long int *label, long int *unlabeled, + double *lin, double *a, double *a_old, double *c, + LEARN_PARM *learn_parm, long int *working2dnum, + long int *active2dnum, MODEL *model) +/* Compute decision function based on current values */ +/* of alpha. */ { - long i,ii,pos,b_calculated=0,first_low,first_high; - double ex_c,b_temp,b_low,b_high; + long i, ii, pos, b_calculated = 0, first_low, first_high; + double ex_c, b_temp, b_low, b_high; - if(verbosity>=3) { - printf("Calculating model..."); fflush(stdout); + if (verbosity >= 3) { + printf("Calculating model..."); + fflush(stdout); } - if(!learn_parm->biased_hyperplane) { - model->b=0; - b_calculated=1; + if (!learn_parm->biased_hyperplane) { + model->b = 0; + b_calculated = 1; } - for(ii=0;(i=working2dnum[ii])>=0;ii++) { - if((a_old[i]>0) && (a[i]==0)) { /* remove from model */ - pos=model->index[i]; - model->index[i]=-1; + for (ii = 0; (i = working2dnum[ii]) >= 0; ii++) { + if ((a_old[i] > 0) && (a[i] == 0)) { /* remove from model */ + pos = model->index[i]; + model->index[i] = -1; (model->sv_num)--; - model->supvec[pos]=model->supvec[model->sv_num]; - model->alpha[pos]=model->alpha[model->sv_num]; - model->index[(model->supvec[pos])->docnum]=pos; - } - else if((a_old[i]==0) && (a[i]>0)) { /* add to model */ - model->supvec[model->sv_num]=docs[i]; - model->alpha[model->sv_num]=a[i]*(double)label[i]; - model->index[i]=model->sv_num; + model->supvec[pos] = model->supvec[model->sv_num]; + model->alpha[pos] = model->alpha[model->sv_num]; + model->index[(model->supvec[pos])->docnum] = pos; + } else if ((a_old[i] == 0) && (a[i] > 0)) { /* add to model */ + model->supvec[model->sv_num] = docs[i]; + model->alpha[model->sv_num] = a[i] * (double)label[i]; + model->index[i] = model->sv_num; (model->sv_num)++; - } - else if(a_old[i]==a[i]) { /* nothing to do */ - } - else { /* just update alpha */ - model->alpha[model->index[i]]=a[i]*(double)label[i]; - } - - ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; - if(!learn_parm->sharedslack) { - if((a_old[i]>=ex_c) && (a[i]at_upper_bound)--; - } - else if((a_old[i]=ex_c)) { - (model->at_upper_bound)++; + } else if (a_old[i] == a[i]) { /* nothing to do */ + } else { /* just update alpha */ + model->alpha[model->index[i]] = a[i] * (double)label[i]; + } + + ex_c = learn_parm->svm_cost[i] - learn_parm->epsilon_a; + if (!learn_parm->sharedslack) { + if ((a_old[i] >= ex_c) && (a[i] < ex_c)) { + (model->at_upper_bound)--; + } else if ((a_old[i] < ex_c) && (a[i] >= ex_c)) { + (model->at_upper_bound)++; } } - if((!b_calculated) - && (a[i]>learn_parm->epsilon_a) && (a[i]b=((double)label[i]*learn_parm->eps-c[i]+lin[i]); - /* model->b=(-(double)label[i]+lin[i]); */ - b_calculated=1; + if ((!b_calculated) && (a[i] > learn_parm->epsilon_a) && + (a[i] < ex_c)) { /* calculate b */ + model->b = ((double)label[i] * learn_parm->eps - c[i] + lin[i]); + /* model->b=(-(double)label[i]+lin[i]); */ + b_calculated = 1; } - } - + } + /* No alpha in the working set not at bounds, so b was not calculated in the usual way. The following handles this special case. */ - if(learn_parm->biased_hyperplane - && (!b_calculated) - && (model->sv_num-1 == model->at_upper_bound)) { - first_low=1; - first_high=1; - b_low=0; - b_high=0; - for(ii=0;(i=active2dnum[ii])>=0;ii++) { - ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; - if(a[i]0) { - b_temp=-(learn_parm->eps-c[i]+lin[i]); - if((b_temp>b_low) || (first_low)) { - b_low=b_temp; - first_low=0; - } - } - else { - b_temp=-(-learn_parm->eps-c[i]+lin[i]); - if((b_tempeps-c[i]+lin[i]); - if((b_temp>b_low) || (first_low)) { - b_low=b_temp; - first_low=0; - } - } - else { - b_temp=-(learn_parm->eps-c[i]+lin[i]); - if((b_tempbiased_hyperplane && (!b_calculated) && + (model->sv_num - 1 == model->at_upper_bound)) { + first_low = 1; + first_high = 1; + b_low = 0; + b_high = 0; + for (ii = 0; (i = active2dnum[ii]) >= 0; ii++) { + ex_c = learn_parm->svm_cost[i] - learn_parm->epsilon_a; + if (a[i] < ex_c) { + if (label[i] > 0) { + b_temp = -(learn_parm->eps - c[i] + lin[i]); + if ((b_temp > b_low) || (first_low)) { + b_low = b_temp; + first_low = 0; + } + } else { + b_temp = -(-learn_parm->eps - c[i] + lin[i]); + if ((b_temp < b_high) || (first_high)) { + b_high = b_temp; + first_high = 0; + } + } + } else { + if (label[i] < 0) { + b_temp = -(-learn_parm->eps - c[i] + lin[i]); + if ((b_temp > b_low) || (first_low)) { + b_low = b_temp; + first_low = 0; + } + } else { + b_temp = -(learn_parm->eps - c[i] + lin[i]); + if ((b_temp < b_high) || (first_high)) { + b_high = b_temp; + first_high = 0; + } + } } } - if(first_high) { - model->b=-b_low; - } - else if(first_low) { - model->b=-b_high; - } - else { - model->b=-(b_high+b_low)/2.0; /* select b as the middle of range */ + if (first_high) { + model->b = -b_low; + } else if (first_low) { + model->b = -b_high; + } else { + model->b = -(b_high + b_low) / 2.0; /* select b as the middle of range */ /* printf("\nb_low=%f, b_high=%f,b=%f\n",b_low,b_high,model->b); */ } } - if(verbosity>=3) { - printf("done\n"); fflush(stdout); + if (verbosity >= 3) { + printf("done\n"); + fflush(stdout); } - return(model->sv_num-1); /* have to substract one, since element 0 is empty*/ + return (model->sv_num - + 1); /* have to substract one, since element 0 is empty*/ } -long check_optimality(MODEL *model, long int *label, long int *unlabeled, - double *a, double *lin, double *c, long int totdoc, - LEARN_PARM *learn_parm, double *maxdiff, - double epsilon_crit_org, long int *misclassified, - long int *inconsistent, long int *active2dnum, - long int *last_suboptimal_at, - long int iteration, KERNEL_PARM *kernel_parm) - /* Check KT-conditions */ +long check_optimality(MODEL *model, long int *label, long int *unlabeled, + double *a, double *lin, double *c, long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, + double epsilon_crit_org, long int *misclassified, + long int *inconsistent, long int *active2dnum, + long int *last_suboptimal_at, long int iteration, + KERNEL_PARM *kernel_parm) +/* Check KT-conditions */ { - long i,ii,retrain; - double dist,ex_c,target; + long i, ii, retrain; + double dist, ex_c, target; - if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ - learn_parm->epsilon_shrink=-learn_parm->epsilon_crit+epsilon_crit_org; + if (kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink = -learn_parm->epsilon_crit + epsilon_crit_org; + } else { /* be conservative */ + learn_parm->epsilon_shrink = + learn_parm->epsilon_shrink * 0.7 + (*maxdiff) * 0.3; } - else { /* be conservative */ - learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; - } - retrain=0; - (*maxdiff)=0; - (*misclassified)=0; - for(ii=0;(i=active2dnum[ii])>=0;ii++) { - if((!inconsistent[i]) && label[i]) { - dist=(lin[i]-model->b)*(double)label[i];/* 'distance' from - hyperplane*/ - target=-(learn_parm->eps-(double)label[i]*c[i]); - ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; - if(dist <= 0) { - (*misclassified)++; /* does not work due to deactivation of var */ + retrain = 0; + (*maxdiff) = 0; + (*misclassified) = 0; + for (ii = 0; (i = active2dnum[ii]) >= 0; ii++) { + if ((!inconsistent[i]) && label[i]) { + dist = (lin[i] - model->b) * (double)label[i]; /* 'distance' from + hyperplane*/ + target = -(learn_parm->eps - (double)label[i] * c[i]); + ex_c = learn_parm->svm_cost[i] - learn_parm->epsilon_a; + if (dist <= 0) { + (*misclassified)++; /* does not work due to deactivation of var */ } - if((a[i]>learn_parm->epsilon_a) && (dist > target)) { - if((dist-target)>(*maxdiff)) /* largest violation */ - (*maxdiff)=dist-target; - } - else if((a[i](*maxdiff)) /* largest violation */ - (*maxdiff)=target-dist; + if ((a[i] > learn_parm->epsilon_a) && (dist > target)) { + if ((dist - target) > (*maxdiff)) /* largest violation */ + (*maxdiff) = dist - target; + } else if ((a[i] < ex_c) && (dist < target)) { + if ((target - dist) > (*maxdiff)) /* largest violation */ + (*maxdiff) = target - dist; } /* Count how long a variable was at lower/upper bound (and optimal).*/ /* Variables, which were at the bound and optimal for a long */ /* time are unlikely to become support vectors. In case our */ /* cache is filled up, those variables are excluded to save */ - /* kernel evaluations. (See chapter 'Shrinking').*/ - if((a[i]>(learn_parm->epsilon_a)) - && (a[i] (learn_parm->epsilon_a)) && (a[i] < ex_c)) { + last_suboptimal_at[i] = iteration; /* not at bound */ + } else if ((a[i] <= (learn_parm->epsilon_a)) && + (dist < (target + learn_parm->epsilon_shrink))) { + last_suboptimal_at[i] = iteration; /* not likely optimal */ + } else if ((a[i] >= ex_c) && + (dist > (target - learn_parm->epsilon_shrink))) { + last_suboptimal_at[i] = iteration; /* not likely optimal */ } - else if((a[i]<=(learn_parm->epsilon_a)) - && (dist < (target+learn_parm->epsilon_shrink))) { - last_suboptimal_at[i]=iteration; /* not likely optimal */ - } - else if((a[i]>=ex_c) - && (dist > (target-learn_parm->epsilon_shrink))) { - last_suboptimal_at[i]=iteration; /* not likely optimal */ - } - } + } } /* termination criterion */ - if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { - retrain=1; + if ((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain = 1; } - return(retrain); + return (retrain); } -long check_optimality_sharedslack(DOC **docs, MODEL *model, long int *label, - double *a, double *lin, double *c, double *slack, - double *alphaslack, - long int totdoc, - LEARN_PARM *learn_parm, double *maxdiff, - double epsilon_crit_org, long int *misclassified, - long int *active2dnum, - long int *last_suboptimal_at, - long int iteration, KERNEL_PARM *kernel_parm) - /* Check KT-conditions */ +long check_optimality_sharedslack( + DOC **docs, MODEL *model, long int *label, double *a, double *lin, + double *c, double *slack, double *alphaslack, long int totdoc, + LEARN_PARM *learn_parm, double *maxdiff, double epsilon_crit_org, + long int *misclassified, long int *active2dnum, + long int *last_suboptimal_at, long int iteration, KERNEL_PARM *kernel_parm) +/* Check KT-conditions */ { - long i,ii,retrain; - double dist,dist_noslack,ex_c=0,target; + long i, ii, retrain; + double dist, dist_noslack, ex_c = 0, target; - if(kernel_parm->kernel_type == LINEAR) { /* be optimistic */ - learn_parm->epsilon_shrink=-learn_parm->epsilon_crit/2.0; - } - else { /* be conservative */ - learn_parm->epsilon_shrink=learn_parm->epsilon_shrink*0.7+(*maxdiff)*0.3; + if (kernel_parm->kernel_type == LINEAR) { /* be optimistic */ + learn_parm->epsilon_shrink = -learn_parm->epsilon_crit / 2.0; + } else { /* be conservative */ + learn_parm->epsilon_shrink = + learn_parm->epsilon_shrink * 0.7 + (*maxdiff) * 0.3; } - retrain=0; - (*maxdiff)=0; - (*misclassified)=0; - for(ii=0;(i=active2dnum[ii])>=0;ii++) { + retrain = 0; + (*maxdiff) = 0; + (*misclassified) = 0; + for (ii = 0; (i = active2dnum[ii]) >= 0; ii++) { /* 'distance' from hyperplane*/ - dist_noslack=(lin[i]-model->b)*(double)label[i]; - dist=dist_noslack+slack[docs[i]->slackid]; - target=-(learn_parm->eps-(double)label[i]*c[i]); - ex_c=learn_parm->svm_c-learn_parm->epsilon_a; - if((a[i]>learn_parm->epsilon_a) && (dist > target)) { - if((dist-target)>(*maxdiff)) { /* largest violation */ - (*maxdiff)=dist-target; - if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); - if(verbosity>=5) printf(" (single %f)\n",(*maxdiff)); + dist_noslack = (lin[i] - model->b) * (double)label[i]; + dist = dist_noslack + slack[docs[i]->slackid]; + target = -(learn_parm->eps - (double)label[i] * c[i]); + ex_c = learn_parm->svm_c - learn_parm->epsilon_a; + if ((a[i] > learn_parm->epsilon_a) && (dist > target)) { + if ((dist - target) > (*maxdiff)) { /* largest violation */ + (*maxdiff) = dist - target; + if (verbosity >= 5) + printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, " + "alphaslack=%f\n", + docs[i]->slackid, dist, target, slack[docs[i]->slackid], a[i], + alphaslack[docs[i]->slackid]); + if (verbosity >= 5) + printf(" (single %f)\n", (*maxdiff)); } } - if((alphaslack[docs[i]->slackid]slackid]>0)) { - if((slack[docs[i]->slackid])>(*maxdiff)) { /* largest violation */ - (*maxdiff)=slack[docs[i]->slackid]; - if(verbosity>=5) printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, alphaslack=%f\n",docs[i]->slackid,dist,target,slack[docs[i]->slackid],a[i],alphaslack[docs[i]->slackid]); - if(verbosity>=5) printf(" (joint %f)\n",(*maxdiff)); + if ((alphaslack[docs[i]->slackid] < ex_c) && + (slack[docs[i]->slackid] > 0)) { + if ((slack[docs[i]->slackid]) > (*maxdiff)) { /* largest violation */ + (*maxdiff) = slack[docs[i]->slackid]; + if (verbosity >= 5) + printf("sid %ld: dist=%.2f, target=%.2f, slack=%.2f, a=%f, " + "alphaslack=%f\n", + docs[i]->slackid, dist, target, slack[docs[i]->slackid], a[i], + alphaslack[docs[i]->slackid]); + if (verbosity >= 5) + printf(" (joint %f)\n", (*maxdiff)); } } /* Count how long a variable was at lower/upper bound (and optimal).*/ /* Variables, which were at the bound and optimal for a long */ /* time are unlikely to become support vectors. In case our */ /* cache is filled up, those variables are excluded to save */ - /* kernel evaluations. (See chapter 'Shrinking').*/ - if((a[i]<=learn_parm->epsilon_a) && (dist < (target+learn_parm->epsilon_shrink))) { - last_suboptimal_at[i]=iteration; /* not likely optimal */ + /* kernel evaluations. (See chapter 'Shrinking').*/ + if ((a[i] <= learn_parm->epsilon_a) && + (dist < (target + learn_parm->epsilon_shrink))) { + last_suboptimal_at[i] = iteration; /* not likely optimal */ + } else if ((alphaslack[docs[i]->slackid] < ex_c) && + (a[i] > learn_parm->epsilon_a) && + (fabs(dist_noslack - target) > -learn_parm->epsilon_shrink)) { + last_suboptimal_at[i] = iteration; /* not at lower bound */ + } else if ((alphaslack[docs[i]->slackid] >= ex_c) && + (a[i] > learn_parm->epsilon_a) && + (fabs(target - dist) > -learn_parm->epsilon_shrink)) { + last_suboptimal_at[i] = iteration; /* not likely optimal */ } - else if((alphaslack[docs[i]->slackid]learn_parm->epsilon_a) && (fabs(dist_noslack - target) > -learn_parm->epsilon_shrink)) { - last_suboptimal_at[i]=iteration; /* not at lower bound */ - } - else if((alphaslack[docs[i]->slackid]>=ex_c) && (a[i]>learn_parm->epsilon_a) && (fabs(target-dist) > -learn_parm->epsilon_shrink)) { - last_suboptimal_at[i]=iteration; /* not likely optimal */ - } - } + } /* termination criterion */ - if((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { - retrain=1; + if ((!retrain) && ((*maxdiff) > learn_parm->epsilon_crit)) { + retrain = 1; } - return(retrain); + return (retrain); } -void compute_shared_slacks(DOC **docs, long int *label, - double *a, double *lin, - double *c, long int *active2dnum, - LEARN_PARM *learn_parm, - double *slack, double *alphaslack) - /* compute the value of shared slacks and the joint alphas */ +void compute_shared_slacks(DOC **docs, long int *label, double *a, double *lin, + double *c, long int *active2dnum, + LEARN_PARM *learn_parm, double *slack, + double *alphaslack) +/* compute the value of shared slacks and the joint alphas */ { - long jj,i; - double dist,target; + long jj, i; + double dist, target; - for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* clear slack variables */ - slack[docs[i]->slackid]=0.0; + for (jj = 0; (i = active2dnum[jj]) >= 0; jj++) { /* clear slack variables */ + slack[docs[i]->slackid] = 0.0; /* alphaslack[docs[i]->slackid]=0.0; */ } - for(jj=0;(i=active2dnum[jj])>=0;jj++) { /* recompute slack variables */ - dist=(lin[i])*(double)label[i]; - target=-(learn_parm->eps-(double)label[i]*c[i]); - if((target-dist) > slack[docs[i]->slackid]) - slack[docs[i]->slackid]=target-dist; + for (jj = 0; (i = active2dnum[jj]) >= 0; + jj++) { /* recompute slack variables */ + dist = (lin[i]) * (double)label[i]; + target = -(learn_parm->eps - (double)label[i] * c[i]); + if ((target - dist) > slack[docs[i]->slackid]) + slack[docs[i]->slackid] = target - dist; /* alphaslack[docs[i]->slackid]+=a[i]; */ } -} - - -long identify_inconsistent(double *a, long int *label, - long int *unlabeled, long int totdoc, - LEARN_PARM *learn_parm, - long int *inconsistentnum, long int *inconsistent) -{ - long i,retrain; +} + +long identify_inconsistent(double *a, long int *label, long int *unlabeled, + long int totdoc, LEARN_PARM *learn_parm, + long int *inconsistentnum, long int *inconsistent) { + long i, retrain; /* Throw out examples with multipliers at upper bound. This */ /* corresponds to the -i 1 option. */ /* ATTENTION: this is just a heuristic for finding a close */ /* to minimum number of examples to exclude to */ /* make the problem separable with desired margin */ - retrain=0; - for(i=0;i=(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { - (*inconsistentnum)++; - inconsistent[i]=1; /* never choose again */ - retrain=2; /* start over */ - if(verbosity>=3) { - printf("inconsistent(%ld)..",i); fflush(stdout); - } + retrain = 0; + for (i = 0; i < totdoc; i++) { + if ((!inconsistent[i]) && (!unlabeled[i]) && + (a[i] >= (learn_parm->svm_cost[i] - learn_parm->epsilon_a))) { + (*inconsistentnum)++; + inconsistent[i] = 1; /* never choose again */ + retrain = 2; /* start over */ + if (verbosity >= 3) { + printf("inconsistent(%ld)..", i); + fflush(stdout); + } } } - return(retrain); + return (retrain); } -long identify_misclassified(double *lin, long int *label, - long int *unlabeled, long int totdoc, - MODEL *model, long int *inconsistentnum, - long int *inconsistent) -{ - long i,retrain; +long identify_misclassified(double *lin, long int *label, long int *unlabeled, + long int totdoc, MODEL *model, + long int *inconsistentnum, long int *inconsistent) { + long i, retrain; double dist; /* Throw out misclassified examples. This */ @@ -2590,387 +2641,378 @@ long identify_misclassified(double *lin, long int *label, /* ATTENTION: this is just a heuristic for finding a close */ /* to minimum number of examples to exclude to */ /* make the problem separable with desired margin */ - retrain=0; - for(i=0;ib)*(double)label[i]; /* 'distance' from hyperplane*/ - if((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { - (*inconsistentnum)++; - inconsistent[i]=1; /* never choose again */ - retrain=2; /* start over */ - if(verbosity>=3) { - printf("inconsistent(%ld)..",i); fflush(stdout); - } + retrain = 0; + for (i = 0; i < totdoc; i++) { + dist = + (lin[i] - model->b) * (double)label[i]; /* 'distance' from hyperplane*/ + if ((!inconsistent[i]) && (!unlabeled[i]) && (dist <= 0)) { + (*inconsistentnum)++; + inconsistent[i] = 1; /* never choose again */ + retrain = 2; /* start over */ + if (verbosity >= 3) { + printf("inconsistent(%ld)..", i); + fflush(stdout); + } } } - return(retrain); + return (retrain); } -long identify_one_misclassified(double *lin, long int *label, - long int *unlabeled, - long int totdoc, MODEL *model, - long int *inconsistentnum, - long int *inconsistent) -{ - long i,retrain,maxex=-1; - double dist,maxdist=0; +long identify_one_misclassified(double *lin, long int *label, + long int *unlabeled, long int totdoc, + MODEL *model, long int *inconsistentnum, + long int *inconsistent) { + long i, retrain, maxex = -1; + double dist, maxdist = 0; /* Throw out the 'most misclassified' example. This */ /* corresponds to the -i 3 option. */ /* ATTENTION: this is just a heuristic for finding a close */ /* to minimum number of examples to exclude to */ /* make the problem separable with desired margin */ - retrain=0; - for(i=0;ib)*(double)label[i];/* 'distance' from hyperplane*/ - if(distb) * + (double)label[i]; /* 'distance' from hyperplane*/ + if (dist < maxdist) { + maxdist = dist; + maxex = i; } } } - if(maxex>=0) { + if (maxex >= 0) { (*inconsistentnum)++; - inconsistent[maxex]=1; /* never choose again */ - retrain=2; /* start over */ - if(verbosity>=3) { - printf("inconsistent(%ld)..",i); fflush(stdout); + inconsistent[maxex] = 1; /* never choose again */ + retrain = 2; /* start over */ + if (verbosity >= 3) { + printf("inconsistent(%ld)..", i); + fflush(stdout); } } - return(retrain); + return (retrain); } -void update_linear_component(DOC **docs, long int *label, - long int *active2dnum, double *a, - double *a_old, long int *working2dnum, - long int totdoc, long int totwords, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - double *lin, CFLOAT *aicache, double *weights) - /* keep track of the linear component */ - /* lin of the gradient etc. by updating */ - /* based on the change of the variables */ - /* in the current working set */ - /* WARNING: Assumes that array of weights is initialized to all zero - values for linear kernel! */ +void update_linear_component(DOC **docs, long int *label, long int *active2dnum, + double *a, double *a_old, long int *working2dnum, + long int totdoc, long int totwords, + KERNEL_PARM *kernel_parm, + KERNEL_CACHE *kernel_cache, double *lin, + CFLOAT *aicache, double *weights) +/* keep track of the linear component */ +/* lin of the gradient etc. by updating */ +/* based on the change of the variables */ +/* in the current working set */ +/* WARNING: Assumes that array of weights is initialized to all zero + values for linear kernel! */ { - register long i,ii,j,jj; + register long i, ii, j, jj; register double tec; SVECTOR *f; - if(kernel_parm->kernel_type==0) { /* special linear case */ + if (kernel_parm->kernel_type == 0) { /* special linear case */ /* clear_vector_n(weights,totwords); */ - for(ii=0;(i=working2dnum[ii])>=0;ii++) { - if(a[i] != a_old[i]) { - for(f=docs[i]->fvec;f;f=f->next) - add_vector_ns(weights,f, - f->factor*((a[i]-a_old[i])*(double)label[i])); + for (ii = 0; (i = working2dnum[ii]) >= 0; ii++) { + if (a[i] != a_old[i]) { + for (f = docs[i]->fvec; f; f = f->next) + add_vector_ns(weights, f, + f->factor * ((a[i] - a_old[i]) * (double)label[i])); } } - for(jj=0;(j=active2dnum[jj])>=0;jj++) { - for(f=docs[j]->fvec;f;f=f->next) - lin[j]+=f->factor*sprod_ns(weights,f); + for (jj = 0; (j = active2dnum[jj]) >= 0; jj++) { + for (f = docs[j]->fvec; f; f = f->next) + lin[j] += f->factor * sprod_ns(weights, f); } - for(ii=0;(i=working2dnum[ii])>=0;ii++) { - if(a[i] != a_old[i]) { - for(f=docs[i]->fvec;f;f=f->next) - mult_vector_ns(weights,f,0.0); /* Set weights back to zero. */ - } /* This is faster than init */ - } /* weights to zero in each iter. */ - } - else { /* general case */ - for(jj=0;(i=working2dnum[jj])>=0;jj++) { - if(a[i] != a_old[i]) { - get_kernel_row(kernel_cache,docs,i,totdoc,active2dnum,aicache, - kernel_parm); - for(ii=0;(j=active2dnum[ii])>=0;ii++) { - tec=aicache[j]; - lin[j]+=(((a[i]*tec)-(a_old[i]*tec))*(double)label[i]); - } + for (ii = 0; (i = working2dnum[ii]) >= 0; ii++) { + if (a[i] != a_old[i]) { + for (f = docs[i]->fvec; f; f = f->next) + mult_vector_ns(weights, f, 0.0); /* Set weights back to zero. */ + } /* This is faster than init */ + } /* weights to zero in each iter. */ + } else { /* general case */ + for (jj = 0; (i = working2dnum[jj]) >= 0; jj++) { + if (a[i] != a_old[i]) { + get_kernel_row(kernel_cache, docs, i, totdoc, active2dnum, aicache, + kernel_parm); + for (ii = 0; (j = active2dnum[ii]) >= 0; ii++) { + tec = aicache[j]; + lin[j] += (((a[i] * tec) - (a_old[i] * tec)) * (double)label[i]); + } } } } -} - - -long incorporate_unlabeled_examples(MODEL *model, long int *label, - long int *inconsistent, - long int *unlabeled, - double *a, double *lin, - long int totdoc, double *selcrit, - long int *select, long int *key, - long int transductcycle, - KERNEL_PARM *kernel_parm, - LEARN_PARM *learn_parm) -{ - long i,j,k,j1,j2,j3,j4,unsupaddnum1=0,unsupaddnum2=0; - long pos,neg,upos,uneg,orgpos,orgneg,nolabel,newpos,newneg,allunlab; - double dist,model_length,posratio,negratio; - long check_every=2; +} + +long incorporate_unlabeled_examples(MODEL *model, long int *label, + long int *inconsistent, long int *unlabeled, + double *a, double *lin, long int totdoc, + double *selcrit, long int *select, + long int *key, long int transductcycle, + KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm) { + long i, j, k, j1, j2, j3, j4, unsupaddnum1 = 0, unsupaddnum2 = 0; + long pos, neg, upos, uneg, orgpos, orgneg, nolabel, newpos, newneg, allunlab; + double dist, model_length, posratio, negratio; + long check_every = 2; double loss; - static double switchsens=0.0,switchsensorg=0.0; - double umin,umax,sumalpha; - long imin=0,imax=0; - static long switchnum=0; + static double switchsens = 0.0, switchsensorg = 0.0; + double umin, umax, sumalpha; + long imin = 0, imax = 0; + static long switchnum = 0; - switchsens/=1.2; + switchsens /= 1.2; /* assumes that lin[] is up to date -> no inactive vars */ - orgpos=0; - orgneg=0; - newpos=0; - newneg=0; - nolabel=0; - allunlab=0; - for(i=0;i 0) { - orgpos++; + orgpos = 0; + orgneg = 0; + newpos = 0; + newneg = 0; + nolabel = 0; + allunlab = 0; + for (i = 0; i < totdoc; i++) { + if (!unlabeled[i]) { + if (label[i] > 0) { + orgpos++; + } else { + orgneg++; } - else { - orgneg++; - } - } - else { + } else { allunlab++; - if(unlabeled[i]) { - if(label[i] > 0) { - newpos++; - } - else if(label[i] < 0) { - newneg++; - } + if (unlabeled[i]) { + if (label[i] > 0) { + newpos++; + } else if (label[i] < 0) { + newneg++; + } } } - if(label[i]==0) { + if (label[i] == 0) { nolabel++; } } - if(learn_parm->transduction_posratio >= 0) { - posratio=learn_parm->transduction_posratio; - } - else { - posratio=(double)orgpos/(double)(orgpos+orgneg); /* use ratio of pos/neg */ - } /* in training data */ - negratio=1.0-posratio; + if (learn_parm->transduction_posratio >= 0) { + posratio = learn_parm->transduction_posratio; + } else { + posratio = + (double)orgpos / (double)(orgpos + orgneg); /* use ratio of pos/neg */ + } /* in training data */ + negratio = 1.0 - posratio; - learn_parm->svm_costratio=1.0; /* global */ - if(posratio>0) { - learn_parm->svm_costratio_unlab=negratio/posratio; - } - else { - learn_parm->svm_costratio_unlab=1.0; - } - - pos=0; - neg=0; - upos=0; - uneg=0; - for(i=0;ib); /* 'distance' from hyperplane*/ - if(dist>0) { + learn_parm->svm_costratio = 1.0; /* global */ + if (posratio > 0) { + learn_parm->svm_costratio_unlab = negratio / posratio; + } else { + learn_parm->svm_costratio_unlab = 1.0; + } + + pos = 0; + neg = 0; + upos = 0; + uneg = 0; + for (i = 0; i < totdoc; i++) { + dist = (lin[i] - model->b); /* 'distance' from hyperplane*/ + if (dist > 0) { pos++; - } - else { + } else { neg++; } - if(unlabeled[i]) { - if(dist>0) { - upos++; - } - else { - uneg++; + if (unlabeled[i]) { + if (dist > 0) { + upos++; + } else { + uneg++; } } - if((!unlabeled[i]) && (a[i]>(learn_parm->svm_cost[i]-learn_parm->epsilon_a))) { - /* printf("Ubounded %ld (class %ld, unlabeled %ld)\n",i,label[i],unlabeled[i]); */ + if ((!unlabeled[i]) && + (a[i] > (learn_parm->svm_cost[i] - learn_parm->epsilon_a))) { + /* printf("Ubounded %ld (class %ld, unlabeled + * %ld)\n",i,label[i],unlabeled[i]); */ } } - if(verbosity>=2) { - printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n",pos,orgpos,orgneg); - printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n",pos,newpos,newneg); - printf("pos ratio = %f (%f).\n",(double)(upos)/(double)(allunlab),posratio); + if (verbosity >= 2) { + printf("POS=%ld, ORGPOS=%ld, ORGNEG=%ld\n", pos, orgpos, orgneg); + printf("POS=%ld, NEWPOS=%ld, NEWNEG=%ld\n", pos, newpos, newneg); + printf("pos ratio = %f (%f).\n", (double)(upos) / (double)(allunlab), + posratio); fflush(stdout); } - if(transductcycle == 0) { - j1=0; - j2=0; - j4=0; - for(i=0;ib); /* 'distance' from hyperplane*/ - if((label[i]==0) && (unlabeled[i])) { - selcrit[j4]=dist; - key[j4]=i; - j4++; + if (transductcycle == 0) { + j1 = 0; + j2 = 0; + j4 = 0; + for (i = 0; i < totdoc; i++) { + dist = (lin[i] - model->b); /* 'distance' from hyperplane*/ + if ((label[i] == 0) && (unlabeled[i])) { + selcrit[j4] = dist; + key[j4] = i; + j4++; } } - unsupaddnum1=0; - unsupaddnum2=0; - select_top_n(selcrit,j4,select,(long)(allunlab*posratio+0.5)); - for(k=0;(k<(long)(allunlab*posratio+0.5));k++) { - i=key[select[k]]; - label[i]=1; - unsupaddnum1++; + unsupaddnum1 = 0; + unsupaddnum2 = 0; + select_top_n(selcrit, j4, select, (long)(allunlab * posratio + 0.5)); + for (k = 0; (k < (long)(allunlab * posratio + 0.5)); k++) { + i = key[select[k]]; + label[i] = 1; + unsupaddnum1++; j1++; } - for(i=0;isvm_cost[i]=learn_parm->svm_c* - learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; - } - else if(label[i] == -1) { - learn_parm->svm_cost[i]=learn_parm->svm_c* - learn_parm->svm_unlabbound; - } + for (i = 0; i < totdoc; i++) { /* set upper bounds on vars */ + if (unlabeled[i]) { + if (label[i] == 1) { + learn_parm->svm_cost[i] = learn_parm->svm_c * + learn_parm->svm_costratio_unlab * + learn_parm->svm_unlabbound; + } else if (label[i] == -1) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_unlabbound; + } } } - if(verbosity>=1) { - /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", - learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, - learn_parm->svm_unlabbound); */ - printf("Classifying unlabeled data as %ld POS / %ld NEG.\n", - unsupaddnum1,unsupaddnum2); + if (verbosity >= 1) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("Classifying unlabeled data as %ld POS / %ld NEG.\n", unsupaddnum1, + unsupaddnum2); fflush(stdout); } - if(verbosity >= 1) + if (verbosity >= 1) printf("Retraining."); - if(verbosity >= 2) printf("\n"); - return((long)3); + if (verbosity >= 2) + printf("\n"); + return ((long)3); } - if((transductcycle % check_every) == 0) { - if(verbosity >= 1) + if ((transductcycle % check_every) == 0) { + if (verbosity >= 1) printf("Retraining."); - if(verbosity >= 2) printf("\n"); - j1=0; - j2=0; - unsupaddnum1=0; - unsupaddnum2=0; - for(i=0;i= 2) + printf("\n"); + j1 = 0; + j2 = 0; + unsupaddnum1 = 0; + unsupaddnum2 = 0; + for (i = 0; i < totdoc; i++) { + if ((unlabeled[i] == 2)) { + unlabeled[i] = 1; + label[i] = 1; + j1++; + unsupaddnum1++; + } else if ((unlabeled[i] == 3)) { + unlabeled[i] = 1; + label[i] = -1; + j2++; + unsupaddnum2++; } } - for(i=0;isvm_cost[i]=learn_parm->svm_c* - learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; - } - else if(label[i] == -1) { - learn_parm->svm_cost[i]=learn_parm->svm_c* - learn_parm->svm_unlabbound; - } + for (i = 0; i < totdoc; i++) { /* set upper bounds on vars */ + if (unlabeled[i]) { + if (label[i] == 1) { + learn_parm->svm_cost[i] = learn_parm->svm_c * + learn_parm->svm_costratio_unlab * + learn_parm->svm_unlabbound; + } else if (label[i] == -1) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_unlabbound; + } } } - if(verbosity>=2) { - /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", - learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, - learn_parm->svm_unlabbound); */ - printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n", - upos,unsupaddnum1,unsupaddnum2); + if (verbosity >= 2) { + /* printf("costratio %f, costratio_unlab %f, unlabbound %f\n", + learn_parm->svm_costratio,learn_parm->svm_costratio_unlab, + learn_parm->svm_unlabbound); */ + printf("%ld positive -> Added %ld POS / %ld NEG unlabeled examples.\n", + upos, unsupaddnum1, unsupaddnum2); fflush(stdout); } - if(learn_parm->svm_unlabbound == 1) { - learn_parm->epsilon_crit=0.001; /* do the last run right */ - } - else { - learn_parm->epsilon_crit=0.01; /* otherwise, no need to be so picky */ + if (learn_parm->svm_unlabbound == 1) { + learn_parm->epsilon_crit = 0.001; /* do the last run right */ + } else { + learn_parm->epsilon_crit = 0.01; /* otherwise, no need to be so picky */ } - return((long)3); - } - else if(((transductcycle % check_every) < check_every)) { - model_length=0; - sumalpha=0; - loss=0; - for(i=0;ib); /* 'distance' from hyperplane*/ - if((label[i]*dist)<(1.0-learn_parm->epsilon_crit)) { - loss+=(1.0-(label[i]*dist))*learn_parm->svm_cost[i]; + return ((long)3); + } else if (((transductcycle % check_every) < check_every)) { + model_length = 0; + sumalpha = 0; + loss = 0; + for (i = 0; i < totdoc; i++) { + model_length += a[i] * label[i] * lin[i]; + sumalpha += a[i]; + dist = (lin[i] - model->b); /* 'distance' from hyperplane*/ + if ((label[i] * dist) < (1.0 - learn_parm->epsilon_crit)) { + loss += (1.0 - (label[i] * dist)) * learn_parm->svm_cost[i]; } } - model_length=sqrt(model_length); - if(verbosity>=2) { - printf("Model-length = %f (%f), loss = %f, objective = %f\n", - model_length,sumalpha,loss,loss+0.5*model_length*model_length); + model_length = sqrt(model_length); + if (verbosity >= 2) { + printf("Model-length = %f (%f), loss = %f, objective = %f\n", + model_length, sumalpha, loss, + loss + 0.5 * model_length * model_length); fflush(stdout); } - j1=0; - j2=0; - j3=0; - j4=0; - unsupaddnum1=0; - unsupaddnum2=0; - umin=99999; - umax=-99999; - j4=1; - while(j4) { - umin=99999; - umax=-99999; - for(i=0;(ib); - if((label[i]>0) && (unlabeled[i]) && (!inconsistent[i]) - && (distumax)) { - umax=dist; - imax=i; - } + j1 = 0; + j2 = 0; + j3 = 0; + j4 = 0; + unsupaddnum1 = 0; + unsupaddnum2 = 0; + umin = 99999; + umax = -99999; + j4 = 1; + while (j4) { + umin = 99999; + umax = -99999; + for (i = 0; (i < totdoc); i++) { + dist = (lin[i] - model->b); + if ((label[i] > 0) && (unlabeled[i]) && (!inconsistent[i]) && + (dist < umin)) { + umin = dist; + imin = i; + } + if ((label[i] < 0) && (unlabeled[i]) && (!inconsistent[i]) && + (dist > umax)) { + umax = dist; + imax = i; + } } - if((umin < (umax+switchsens-1E-4))) { - j1++; - j2++; - unsupaddnum1++; - unlabeled[imin]=3; - inconsistent[imin]=1; - unsupaddnum2++; - unlabeled[imax]=2; - inconsistent[imax]=1; - } - else - j4=0; - j4=0; + if ((umin < (umax + switchsens - 1E-4))) { + j1++; + j2++; + unsupaddnum1++; + unlabeled[imin] = 3; + inconsistent[imin] = 1; + unsupaddnum2++; + unlabeled[imax] = 2; + inconsistent[imax] = 1; + } else + j4 = 0; + j4 = 0; } - for(j=0;(j0) { - unlabeled[j]=2; - } - else if(label[j]<0) { - unlabeled[j]=3; - } - /* inconsistent[j]=1; */ - j3++; + for (j = 0; (j < totdoc); j++) { + if (unlabeled[j] && (!inconsistent[j])) { + if (label[j] > 0) { + unlabeled[j] = 2; + } else if (label[j] < 0) { + unlabeled[j] = 3; + } + /* inconsistent[j]=1; */ + j3++; } } - switchnum+=unsupaddnum1+unsupaddnum2; + switchnum += unsupaddnum1 + unsupaddnum2; /* stop and print out current margin printf("switchnum %ld %ld\n",switchnum,kernel_parm->poly_degree); @@ -2979,478 +3021,446 @@ long incorporate_unlabeled_examples(MODEL *model, long int *label, } */ - if((!unsupaddnum1) && (!unsupaddnum2)) { - if((learn_parm->svm_unlabbound>=1) && ((newpos+newneg) == allunlab)) { - for(j=0;(jpredfile,model,lin,a,unlabeled,label, - totdoc,learn_parm); - if(verbosity>=1) - printf("Number of switches: %ld\n",switchnum); - return((long)0); + if ((!unsupaddnum1) && (!unsupaddnum2)) { + if ((learn_parm->svm_unlabbound >= 1) && + ((newpos + newneg) == allunlab)) { + for (j = 0; (j < totdoc); j++) { + inconsistent[j] = 0; + if (unlabeled[j]) + unlabeled[j] = 1; + } + write_prediction(learn_parm->predfile, model, lin, a, unlabeled, label, + totdoc, learn_parm); + if (verbosity >= 1) + printf("Number of switches: %ld\n", switchnum); + return ((long)0); } - switchsens=switchsensorg; - learn_parm->svm_unlabbound*=1.5; - if(learn_parm->svm_unlabbound>1) { - learn_parm->svm_unlabbound=1; + switchsens = switchsensorg; + learn_parm->svm_unlabbound *= 1.5; + if (learn_parm->svm_unlabbound > 1) { + learn_parm->svm_unlabbound = 1; } - model->at_upper_bound=0; /* since upper bound increased */ - if(verbosity>=1) - printf("Increasing influence of unlabeled examples to %f%% .", - learn_parm->svm_unlabbound*100.0); - } - else if(verbosity>=1) { - printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled examples.", - upos,unsupaddnum1,unsupaddnum2); + model->at_upper_bound = 0; /* since upper bound increased */ + if (verbosity >= 1) + printf("Increasing influence of unlabeled examples to %f%% .", + learn_parm->svm_unlabbound * 100.0); + } else if (verbosity >= 1) { + printf("%ld positive -> Switching labels of %ld POS / %ld NEG unlabeled " + "examples.", + upos, unsupaddnum1, unsupaddnum2); fflush(stdout); } - if(verbosity >= 2) printf("\n"); - - learn_parm->epsilon_crit=0.5; /* don't need to be so picky */ + if (verbosity >= 2) + printf("\n"); + + learn_parm->epsilon_crit = 0.5; /* don't need to be so picky */ - for(i=0;isvm_cost[i]=learn_parm->svm_c* - learn_parm->svm_costratio_unlab*learn_parm->svm_unlabbound; - } - else if(label[i] == -1) { - learn_parm->svm_cost[i]=learn_parm->svm_c* - learn_parm->svm_unlabbound; - } + for (i = 0; i < totdoc; i++) { /* set upper bounds on vars */ + if (unlabeled[i]) { + if (label[i] == 1) { + learn_parm->svm_cost[i] = learn_parm->svm_c * + learn_parm->svm_costratio_unlab * + learn_parm->svm_unlabbound; + } else if (label[i] == -1) { + learn_parm->svm_cost[i] = + learn_parm->svm_c * learn_parm->svm_unlabbound; + } } } - return((long)2); + return ((long)2); } - return((long)0); + return ((long)0); } /*************************** Working set selection ***************************/ -long select_next_qp_subproblem_grad(long int *label, - long int *unlabeled, - double *a, double *lin, - double *c, long int totdoc, - long int qp_size, - LEARN_PARM *learn_parm, - long int *inconsistent, - long int *active2dnum, - long int *working2dnum, - double *selcrit, - long int *select, - KERNEL_CACHE *kernel_cache, - long int cache_only, - long int *key, long int *chosen) - /* Use the feasible direction approach to select the next - qp-subproblem (see chapter 'Selecting a good working set'). If - 'cache_only' is true, then the variables are selected only among - those for which the kernel evaluations are cached. */ +long select_next_qp_subproblem_grad( + long int *label, long int *unlabeled, double *a, double *lin, double *c, + long int totdoc, long int qp_size, LEARN_PARM *learn_parm, + long int *inconsistent, long int *active2dnum, long int *working2dnum, + double *selcrit, long int *select, KERNEL_CACHE *kernel_cache, + long int cache_only, long int *key, long int *chosen) +/* Use the feasible direction approach to select the next + qp-subproblem (see chapter 'Selecting a good working set'). If + 'cache_only' is true, then the variables are selected only among + those for which the kernel evaluations are cached. */ { - long choosenum,i,j,k,activedoc,inum,valid; + long choosenum, i, j, k, activedoc, inum, valid; double s; - for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ - choosenum=0; - activedoc=0; - for(i=0;(j=active2dnum[i])>=0;i++) { - s=-label[j]; - if(kernel_cache && cache_only) - valid=(kernel_cache->index[j]>=0); + for (inum = 0; working2dnum[inum] >= 0; inum++) + ; /* find end of index */ + choosenum = 0; + activedoc = 0; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + s = -label[j]; + if (kernel_cache && cache_only) + valid = (kernel_cache->index[j] >= 0); else - valid=1; - if(valid - && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) - && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) - && (s>0))) - && (!chosen[j]) - && (label[j]) - && (!inconsistent[j])) - { - selcrit[activedoc]=(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); - /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); */ - key[activedoc]=j; + valid = 1; + if (valid && (!((a[j] <= (0 + learn_parm->epsilon_a)) && (s < 0))) && + (!((a[j] >= (learn_parm->svm_cost[j] - learn_parm->epsilon_a)) && + (s > 0))) && + (!chosen[j]) && (label[j]) && (!inconsistent[j])) { + selcrit[activedoc] = + (double)label[j] * (learn_parm->eps - (double)label[j] * c[j] + + (double)label[j] * lin[j]); + /* selcrit[activedoc]=(double)label[j]*(-1.0+(double)label[j]*lin[j]); + */ + key[activedoc] = j; activedoc++; } } - select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); - for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (kbiased_hyperplane || (selcrit[select[k]] > 0)) { */ - i=key[select[k]]; - chosen[i]=1; - working2dnum[inum+choosenum]=i; - choosenum+=1; - if(kernel_cache) - kernel_cache_touch(kernel_cache,i); /* make sure it does not get - kicked out of cache */ - /* } */ + select_top_n(selcrit, activedoc, select, (long)(qp_size / 2)); + for (k = 0; + (choosenum < (qp_size / 2)) && (k < (qp_size / 2)) && (k < activedoc); + k++) { + /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */ + i = key[select[k]]; + chosen[i] = 1; + working2dnum[inum + choosenum] = i; + choosenum += 1; + if (kernel_cache) + kernel_cache_touch(kernel_cache, i); /* make sure it does not get + kicked out of cache */ + /* } */ } - activedoc=0; - for(i=0;(j=active2dnum[i])>=0;i++) { - s=label[j]; - if(kernel_cache && cache_only) - valid=(kernel_cache->index[j]>=0); + activedoc = 0; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + s = label[j]; + if (kernel_cache && cache_only) + valid = (kernel_cache->index[j] >= 0); else - valid=1; - if(valid - && (!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) - && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) - && (s>0))) - && (!chosen[j]) - && (label[j]) - && (!inconsistent[j])) - { - selcrit[activedoc]=-(double)label[j]*(learn_parm->eps-(double)label[j]*c[j]+(double)label[j]*lin[j]); - /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); */ - key[activedoc]=j; + valid = 1; + if (valid && (!((a[j] <= (0 + learn_parm->epsilon_a)) && (s < 0))) && + (!((a[j] >= (learn_parm->svm_cost[j] - learn_parm->epsilon_a)) && + (s > 0))) && + (!chosen[j]) && (label[j]) && (!inconsistent[j])) { + selcrit[activedoc] = + -(double)label[j] * (learn_parm->eps - (double)label[j] * c[j] + + (double)label[j] * lin[j]); + /* selcrit[activedoc]=-(double)(label[j]*(-1.0+(double)label[j]*lin[j])); + */ + key[activedoc] = j; activedoc++; } } - select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); - for(k=0;(choosenumbiased_hyperplane || (selcrit[select[k]] > 0)) { */ - i=key[select[k]]; - chosen[i]=1; - working2dnum[inum+choosenum]=i; - choosenum+=1; - if(kernel_cache) - kernel_cache_touch(kernel_cache,i); /* make sure it does not get - kicked out of cache */ - /* } */ - } - working2dnum[inum+choosenum]=-1; /* complete index */ - return(choosenum); + select_top_n(selcrit, activedoc, select, (long)(qp_size / 2)); + for (k = 0; (choosenum < qp_size) && (k < (qp_size / 2)) && (k < activedoc); + k++) { + /* if(learn_parm->biased_hyperplane || (selcrit[select[k]] > 0)) { */ + i = key[select[k]]; + chosen[i] = 1; + working2dnum[inum + choosenum] = i; + choosenum += 1; + if (kernel_cache) + kernel_cache_touch(kernel_cache, i); /* make sure it does not get + kicked out of cache */ + /* } */ + } + working2dnum[inum + choosenum] = -1; /* complete index */ + return (choosenum); } -long select_next_qp_subproblem_rand(long int *label, - long int *unlabeled, - double *a, double *lin, - double *c, long int totdoc, - long int qp_size, - LEARN_PARM *learn_parm, - long int *inconsistent, - long int *active2dnum, - long int *working2dnum, - double *selcrit, - long int *select, - KERNEL_CACHE *kernel_cache, - long int *key, - long int *chosen, - long int iteration) +long select_next_qp_subproblem_rand( + long int *label, long int *unlabeled, double *a, double *lin, double *c, + long int totdoc, long int qp_size, LEARN_PARM *learn_parm, + long int *inconsistent, long int *active2dnum, long int *working2dnum, + double *selcrit, long int *select, KERNEL_CACHE *kernel_cache, + long int *key, long int *chosen, long int iteration) /* Use the feasible direction approach to select the next qp-subproblem (see section 'Selecting a good working set'). Chooses a feasible direction at (pseudo) random to help jump over numerical problem. */ { - long choosenum,i,j,k,activedoc,inum; + long choosenum, i, j, k, activedoc, inum; double s; - for(inum=0;working2dnum[inum]>=0;inum++); /* find end of index */ - choosenum=0; - activedoc=0; - for(i=0;(j=active2dnum[i])>=0;i++) { - s=-label[j]; - if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) - && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) - && (s>0))) - && (!inconsistent[j]) - && (label[j]) - && (!chosen[j])) { - selcrit[activedoc]=(j+iteration) % totdoc; - key[activedoc]=j; + for (inum = 0; working2dnum[inum] >= 0; inum++) + ; /* find end of index */ + choosenum = 0; + activedoc = 0; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + s = -label[j]; + if ((!((a[j] <= (0 + learn_parm->epsilon_a)) && (s < 0))) && + (!((a[j] >= (learn_parm->svm_cost[j] - learn_parm->epsilon_a)) && + (s > 0))) && + (!inconsistent[j]) && (label[j]) && (!chosen[j])) { + selcrit[activedoc] = (j + iteration) % totdoc; + key[activedoc] = j; activedoc++; } } - select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); - for(k=0;(choosenum<(qp_size/2)) && (k<(qp_size/2)) && (k=0;i++) { - s=label[j]; - if((!((a[j]<=(0+learn_parm->epsilon_a)) && (s<0))) - && (!((a[j]>=(learn_parm->svm_cost[j]-learn_parm->epsilon_a)) - && (s>0))) - && (!inconsistent[j]) - && (label[j]) - && (!chosen[j])) { - selcrit[activedoc]=(j+iteration) % totdoc; - key[activedoc]=j; + activedoc = 0; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + s = label[j]; + if ((!((a[j] <= (0 + learn_parm->epsilon_a)) && (s < 0))) && + (!((a[j] >= (learn_parm->svm_cost[j] - learn_parm->epsilon_a)) && + (s > 0))) && + (!inconsistent[j]) && (label[j]) && (!chosen[j])) { + selcrit[activedoc] = (j + iteration) % totdoc; + key[activedoc] = j; activedoc++; } } - select_top_n(selcrit,activedoc,select,(long)(qp_size/2)); - for(k=0;(choosenum=0;ii++) { - ex_c=learn_parm->svm_c-learn_parm->epsilon_a; - if(alphaslack[docs[i]->slackid] >= ex_c) { - dist=(lin[i])*(double)label[i]+slack[docs[i]->slackid]; /* distance */ - target=-(learn_parm->eps-(double)label[i]*c[i]); /* rhs of constraint */ - if((a[i]>learn_parm->epsilon_a) && (dist > target)) { - if((dist-target)>maxdiff) { /* largest violation */ - maxdiff=dist-target; - maxdiffid=docs[i]->slackid; - } + maxdiff = 0; + maxdiffid = 0; + for (ii = 0; (i = active2dnum[ii]) >= 0; ii++) { + ex_c = learn_parm->svm_c - learn_parm->epsilon_a; + if (alphaslack[docs[i]->slackid] >= ex_c) { + dist = + (lin[i]) * (double)label[i] + slack[docs[i]->slackid]; /* distance */ + target = + -(learn_parm->eps - (double)label[i] * c[i]); /* rhs of constraint */ + if ((a[i] > learn_parm->epsilon_a) && (dist > target)) { + if ((dist - target) > maxdiff) { /* largest violation */ + maxdiff = dist - target; + maxdiffid = docs[i]->slackid; + } } } } - (*maxviol)=maxdiff; - return(maxdiffid); -} + (*maxviol) = maxdiff; + return (maxdiffid); +} + +void select_top_n(double *selcrit, long int range, long int *select, + long int n) { + register long i, j; - -void select_top_n(double *selcrit, long int range, long int *select, - long int n) -{ - register long i,j; - - for(i=0;(i=0;j--) { - if((j>0) && (selcrit[select[j-1]]= 0; j--) { + if ((j > 0) && (selcrit[select[j - 1]] < selcrit[i])) { + select[j] = select[j - 1]; + } else { + select[j] = i; + j = -1; } } } - if(n>0) { - for(i=n;iselcrit[select[n-1]]) { - for(j=n-1;j>=0;j--) { - if((j>0) && (selcrit[select[j-1]] 0) { + for (i = n; i < range; i++) { + if (selcrit[i] > selcrit[select[n - 1]]) { + for (j = n - 1; j >= 0; j--) { + if ((j > 0) && (selcrit[select[j - 1]] < selcrit[i])) { + select[j] = select[j - 1]; + } else { + select[j] = i; + j = -1; + } + } } } } -} - - +} + /******************************** Shrinking *********************************/ -void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc, - long int maxhistory) -{ +void init_shrink_state(SHRINK_STATE *shrink_state, long int totdoc, + long int maxhistory) { long i; - shrink_state->deactnum=0; - shrink_state->active = (long *)my_malloc(sizeof(long)*totdoc); - shrink_state->inactive_since = (long *)my_malloc(sizeof(long)*totdoc); - shrink_state->a_history = (double **)my_malloc(sizeof(double *)*maxhistory); - shrink_state->maxhistory=maxhistory; - shrink_state->last_lin = (double *)my_malloc(sizeof(double)*totdoc); - shrink_state->last_a = (double *)my_malloc(sizeof(double)*totdoc); + shrink_state->deactnum = 0; + shrink_state->active = (long *)my_malloc(sizeof(long) * totdoc); + shrink_state->inactive_since = (long *)my_malloc(sizeof(long) * totdoc); + shrink_state->a_history = (double **)my_malloc(sizeof(double *) * maxhistory); + shrink_state->maxhistory = maxhistory; + shrink_state->last_lin = (double *)my_malloc(sizeof(double) * totdoc); + shrink_state->last_a = (double *)my_malloc(sizeof(double) * totdoc); - for(i=0;iactive[i]=1; - shrink_state->inactive_since[i]=0; - shrink_state->last_a[i]=0; - shrink_state->last_lin[i]=0; + for (i = 0; i < totdoc; i++) { + shrink_state->active[i] = 1; + shrink_state->inactive_since[i] = 0; + shrink_state->last_a[i] = 0; + shrink_state->last_lin[i] = 0; } } -void shrink_state_cleanup(SHRINK_STATE *shrink_state) -{ +void shrink_state_cleanup(SHRINK_STATE *shrink_state) { free(shrink_state->active); free(shrink_state->inactive_since); - if(shrink_state->deactnum > 0) - free(shrink_state->a_history[shrink_state->deactnum-1]); + if (shrink_state->deactnum > 0) + free(shrink_state->a_history[shrink_state->deactnum - 1]); free(shrink_state->a_history); free(shrink_state->last_a); free(shrink_state->last_lin); } -long shrink_problem(DOC **docs, - LEARN_PARM *learn_parm, - SHRINK_STATE *shrink_state, - KERNEL_PARM *kernel_parm, - long int *active2dnum, - long int *last_suboptimal_at, - long int iteration, - long int totdoc, - long int minshrink, - double *a, - long int *inconsistent) - /* Shrink some variables away. Do the shrinking only if at least - minshrink variables can be removed. */ +long shrink_problem(DOC **docs, LEARN_PARM *learn_parm, + SHRINK_STATE *shrink_state, KERNEL_PARM *kernel_parm, + long int *active2dnum, long int *last_suboptimal_at, + long int iteration, long int totdoc, long int minshrink, + double *a, long int *inconsistent) +/* Shrink some variables away. Do the shrinking only if at least + minshrink variables can be removed. */ { - long i,ii,change,activenum,lastiter; - double *a_old; - - activenum=0; - change=0; - for(ii=0;active2dnum[ii]>=0;ii++) { - i=active2dnum[ii]; + long i, ii, change, activenum, lastiter; + double *a_old; + + activenum = 0; + change = 0; + for (ii = 0; active2dnum[ii] >= 0; ii++) { + i = active2dnum[ii]; activenum++; - if(0 && learn_parm->sharedslack) - lastiter=last_suboptimal_at[docs[i]->slackid]; + if (0 && learn_parm->sharedslack) + lastiter = last_suboptimal_at[docs[i]->slackid]; else - lastiter=last_suboptimal_at[i]; - if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) - || (inconsistent[i])) { + lastiter = last_suboptimal_at[i]; + if (((iteration - lastiter) > learn_parm->svm_iter_to_shrink) || + (inconsistent[i])) { change++; } } - if((change>=minshrink) /* shrink only if sufficiently many candidates */ - && (shrink_state->deactnummaxhistory)) { /* and enough memory */ + if ((change >= minshrink) /* shrink only if sufficiently many candidates */ + && (shrink_state->deactnum < + shrink_state->maxhistory)) { /* and enough memory */ /* Shrink problem by removing those variables which are */ /* optimal at a bound for a minimum number of iterations */ - if(verbosity>=2) { - printf(" Shrinking..."); fflush(stdout); + if (verbosity >= 2) { + printf(" Shrinking..."); + fflush(stdout); } - if(kernel_parm->kernel_type != LINEAR) { /* non-linear case save alphas */ - a_old=(double *)my_malloc(sizeof(double)*totdoc); - shrink_state->a_history[shrink_state->deactnum]=a_old; - for(i=0;ikernel_type != LINEAR) { /* non-linear case save alphas */ + a_old = (double *)my_malloc(sizeof(double) * totdoc); + shrink_state->a_history[shrink_state->deactnum] = a_old; + for (i = 0; i < totdoc; i++) { + a_old[i] = a[i]; } } - for(ii=0;active2dnum[ii]>=0;ii++) { - i=active2dnum[ii]; - if(0 && learn_parm->sharedslack) - lastiter=last_suboptimal_at[docs[i]->slackid]; - else - lastiter=last_suboptimal_at[i]; - if(((iteration-lastiter) > learn_parm->svm_iter_to_shrink) - || (inconsistent[i])) { - shrink_state->active[i]=0; - shrink_state->inactive_since[i]=shrink_state->deactnum; + for (ii = 0; active2dnum[ii] >= 0; ii++) { + i = active2dnum[ii]; + if (0 && learn_parm->sharedslack) + lastiter = last_suboptimal_at[docs[i]->slackid]; + else + lastiter = last_suboptimal_at[i]; + if (((iteration - lastiter) > learn_parm->svm_iter_to_shrink) || + (inconsistent[i])) { + shrink_state->active[i] = 0; + shrink_state->inactive_since[i] = shrink_state->deactnum; } } - activenum=compute_index(shrink_state->active,totdoc,active2dnum); + activenum = compute_index(shrink_state->active, totdoc, active2dnum); shrink_state->deactnum++; - if(kernel_parm->kernel_type == LINEAR) { - shrink_state->deactnum=0; + if (kernel_parm->kernel_type == LINEAR) { + shrink_state->deactnum = 0; } - if(verbosity>=2) { - printf("done.\n"); fflush(stdout); - printf(" Number of inactive variables = %ld\n",totdoc-activenum); + if (verbosity >= 2) { + printf("done.\n"); + fflush(stdout); + printf(" Number of inactive variables = %ld\n", totdoc - activenum); } } - return(activenum); -} - - -void reactivate_inactive_examples(long int *label, - long int *unlabeled, - double *a, - SHRINK_STATE *shrink_state, - double *lin, - double *c, - long int totdoc, - long int totwords, - long int iteration, - LEARN_PARM *learn_parm, - long int *inconsistent, - DOC **docs, - KERNEL_PARM *kernel_parm, - KERNEL_CACHE *kernel_cache, - MODEL *model, - CFLOAT *aicache, - double *weights, - double *maxdiff) - /* Make all variables active again which had been removed by - shrinking. */ - /* Computes lin for those variables from scratch. */ - /* WARNING: Assumes that array of weights is initialized to all zero - values for linear kernel! */ + return (activenum); +} + +void reactivate_inactive_examples( + long int *label, long int *unlabeled, double *a, SHRINK_STATE *shrink_state, + double *lin, double *c, long int totdoc, long int totwords, + long int iteration, LEARN_PARM *learn_parm, long int *inconsistent, + DOC **docs, KERNEL_PARM *kernel_parm, KERNEL_CACHE *kernel_cache, + MODEL *model, CFLOAT *aicache, double *weights, double *maxdiff) +/* Make all variables active again which had been removed by + shrinking. */ +/* Computes lin for those variables from scratch. */ +/* WARNING: Assumes that array of weights is initialized to all zero + values for linear kernel! */ { - register long i,j,ii,jj,t,*changed2dnum,*inactive2dnum; - long *changed,*inactive; - register double kernel_val,*a_old,dist; - double ex_c,target; + register long i, j, ii, jj, t, *changed2dnum, *inactive2dnum; + long *changed, *inactive; + register double kernel_val, *a_old, dist; + double ex_c, target; SVECTOR *f; - if(kernel_parm->kernel_type == LINEAR) { /* special linear case */ + if (kernel_parm->kernel_type == LINEAR) { /* special linear case */ /* clear_vector_n(weights,totwords); set weights to zero */ - a_old=shrink_state->last_a; - for(i=0;ifvec;f;f=f->next) - add_vector_ns(weights,f, - f->factor*((a[i]-a_old[i])*(double)label[i])); - a_old[i]=a[i]; + a_old = shrink_state->last_a; + for (i = 0; i < totdoc; i++) { + if (a[i] != a_old[i]) { + for (f = docs[i]->fvec; f; f = f->next) + add_vector_ns(weights, f, + f->factor * ((a[i] - a_old[i]) * (double)label[i])); + a_old[i] = a[i]; } } - for(i=0;iactive[i]) { - for(f=docs[i]->fvec;f;f=f->next) - lin[i]=shrink_state->last_lin[i]+f->factor*sprod_ns(weights,f); + for (i = 0; i < totdoc; i++) { + if (!shrink_state->active[i]) { + for (f = docs[i]->fvec; f; f = f->next) + lin[i] = shrink_state->last_lin[i] + f->factor * sprod_ns(weights, f); } - shrink_state->last_lin[i]=lin[i]; + shrink_state->last_lin[i] = lin[i]; } - for(i=0;ifvec;f;f=f->next) - mult_vector_ns(weights,f,0.0); /* set weights back to zero */ + for (i = 0; i < totdoc; i++) { + for (f = docs[i]->fvec; f; f = f->next) + mult_vector_ns(weights, f, 0.0); /* set weights back to zero */ } - } - else { - changed=(long *)my_malloc(sizeof(long)*totdoc); - changed2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); - inactive=(long *)my_malloc(sizeof(long)*totdoc); - inactive2dnum=(long *)my_malloc(sizeof(long)*(totdoc+11)); - for(t=shrink_state->deactnum-1;(t>=0) && shrink_state->a_history[t];t--) { - if(verbosity>=2) { - printf("%ld..",t); fflush(stdout); + } else { + changed = (long *)my_malloc(sizeof(long) * totdoc); + changed2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + inactive = (long *)my_malloc(sizeof(long) * totdoc); + inactive2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + for (t = shrink_state->deactnum - 1; (t >= 0) && shrink_state->a_history[t]; + t--) { + if (verbosity >= 2) { + printf("%ld..", t); + fflush(stdout); } - a_old=shrink_state->a_history[t]; - for(i=0;iactive[i]) - && (shrink_state->inactive_since[i] == t)); - changed[i]= (a[i] != a_old[i]); + a_old = shrink_state->a_history[t]; + for (i = 0; i < totdoc; i++) { + inactive[i] = ((!shrink_state->active[i]) && + (shrink_state->inactive_since[i] == t)); + changed[i] = (a[i] != a_old[i]); } - compute_index(inactive,totdoc,inactive2dnum); - compute_index(changed,totdoc,changed2dnum); - - for(ii=0;(i=changed2dnum[ii])>=0;ii++) { - get_kernel_row(kernel_cache,docs,i,totdoc,inactive2dnum,aicache, - kernel_parm); - for(jj=0;(j=inactive2dnum[jj])>=0;jj++) { - kernel_val=aicache[j]; - lin[j]+=(((a[i]*kernel_val)-(a_old[i]*kernel_val))*(double)label[i]); - } + compute_index(inactive, totdoc, inactive2dnum); + compute_index(changed, totdoc, changed2dnum); + + for (ii = 0; (i = changed2dnum[ii]) >= 0; ii++) { + get_kernel_row(kernel_cache, docs, i, totdoc, inactive2dnum, aicache, + kernel_parm); + for (jj = 0; (j = inactive2dnum[jj]) >= 0; jj++) { + kernel_val = aicache[j]; + lin[j] += (((a[i] * kernel_val) - (a_old[i] * kernel_val)) * + (double)label[i]); + } } } free(changed); @@ -3458,255 +3468,249 @@ void reactivate_inactive_examples(long int *label, free(inactive); free(inactive2dnum); } - (*maxdiff)=0; - for(i=0;iinactive_since[i]=shrink_state->deactnum-1; - if(!inconsistent[i]) { - dist=(lin[i]-model->b)*(double)label[i]; - target=-(learn_parm->eps-(double)label[i]*c[i]); - ex_c=learn_parm->svm_cost[i]-learn_parm->epsilon_a; - if((a[i]>learn_parm->epsilon_a) && (dist > target)) { - if((dist-target)>(*maxdiff)) /* largest violation */ - (*maxdiff)=dist-target; + (*maxdiff) = 0; + for (i = 0; i < totdoc; i++) { + shrink_state->inactive_since[i] = shrink_state->deactnum - 1; + if (!inconsistent[i]) { + dist = (lin[i] - model->b) * (double)label[i]; + target = -(learn_parm->eps - (double)label[i] * c[i]); + ex_c = learn_parm->svm_cost[i] - learn_parm->epsilon_a; + if ((a[i] > learn_parm->epsilon_a) && (dist > target)) { + if ((dist - target) > (*maxdiff)) /* largest violation */ + (*maxdiff) = dist - target; + } else if ((a[i] < ex_c) && (dist < target)) { + if ((target - dist) > (*maxdiff)) /* largest violation */ + (*maxdiff) = target - dist; } - else if((a[i](*maxdiff)) /* largest violation */ - (*maxdiff)=target-dist; - } - if((a[i]>(0+learn_parm->epsilon_a)) - && (a[i]active[i]=1; /* not at bound */ - } - else if((a[i]<=(0+learn_parm->epsilon_a)) && (dist < (target+learn_parm->epsilon_shrink))) { - shrink_state->active[i]=1; - } - else if((a[i]>=ex_c) - && (dist > (target-learn_parm->epsilon_shrink))) { - shrink_state->active[i]=1; - } - else if(learn_parm->sharedslack) { /* make all active when sharedslack */ - shrink_state->active[i]=1; + if ((a[i] > (0 + learn_parm->epsilon_a)) && (a[i] < ex_c)) { + shrink_state->active[i] = 1; /* not at bound */ + } else if ((a[i] <= (0 + learn_parm->epsilon_a)) && + (dist < (target + learn_parm->epsilon_shrink))) { + shrink_state->active[i] = 1; + } else if ((a[i] >= ex_c) && + (dist > (target - learn_parm->epsilon_shrink))) { + shrink_state->active[i] = 1; + } else if (learn_parm + ->sharedslack) { /* make all active when sharedslack */ + shrink_state->active[i] = 1; } } } - if(kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */ - for(i=0;ia_history[shrink_state->deactnum-1])[i]=a[i]; + if (kernel_parm->kernel_type != LINEAR) { /* update history for non-linear */ + for (i = 0; i < totdoc; i++) { + (shrink_state->a_history[shrink_state->deactnum - 1])[i] = a[i]; } - for(t=shrink_state->deactnum-2;(t>=0) && shrink_state->a_history[t];t--) { + for (t = shrink_state->deactnum - 2; (t >= 0) && shrink_state->a_history[t]; + t--) { free(shrink_state->a_history[t]); - shrink_state->a_history[t]=0; + shrink_state->a_history[t] = 0; } } } /****************************** Cache handling *******************************/ -void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, - long int docnum, long int totdoc, - long int *active2dnum, CFLOAT *buffer, - KERNEL_PARM *kernel_parm) - /* Get's a row of the matrix of kernel values This matrix has the - same form as the Hessian, just that the elements are not - multiplied by */ - /* y_i * y_j * a_i * a_j */ - /* Takes the values from the cache if available. */ +void get_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, long int docnum, + long int totdoc, long int *active2dnum, CFLOAT *buffer, + KERNEL_PARM *kernel_parm) +/* Get's a row of the matrix of kernel values This matrix has the + same form as the Hessian, just that the elements are not + multiplied by */ +/* y_i * y_j * a_i * a_j */ +/* Takes the values from the cache if available. */ { - register long i,j,start; + register long i, j, start; DOC *ex; - ex=docs[docnum]; + ex = docs[docnum]; - if(kernel_cache && (kernel_cache->index[docnum] != -1)) {/* row is cached? */ - kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time;/* lru */ - start=kernel_cache->activenum*kernel_cache->index[docnum]; - for(i=0;(j=active2dnum[i])>=0;i++) { - if(kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */ - buffer[j]=kernel_cache->buffer[start+kernel_cache->totdoc2active[j]]; - } - else { - buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + if (kernel_cache && + (kernel_cache->index[docnum] != -1)) { /* row is cached? */ + kernel_cache->lru[kernel_cache->index[docnum]] = + kernel_cache->time; /* lru */ + start = kernel_cache->activenum * kernel_cache->index[docnum]; + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + if (kernel_cache->totdoc2active[j] >= 0) { /* column is cached? */ + buffer[j] = + kernel_cache->buffer[start + kernel_cache->totdoc2active[j]]; + } else { + buffer[j] = (CFLOAT)kernel(kernel_parm, ex, docs[j]); } } - } - else { - for(i=0;(j=active2dnum[i])>=0;i++) { - buffer[j]=(CFLOAT)kernel(kernel_parm,ex,docs[j]); + } else { + for (i = 0; (j = active2dnum[i]) >= 0; i++) { + buffer[j] = (CFLOAT)kernel(kernel_parm, ex, docs[j]); } } -} - - -void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, - long int m, KERNEL_PARM *kernel_parm) - /* Fills cache for the row m */ +} + +void cache_kernel_row(KERNEL_CACHE *kernel_cache, DOC **docs, long int m, + KERNEL_PARM *kernel_parm) +/* Fills cache for the row m */ { register DOC *ex; - register long j,k,l; + register long j, k, l; register CFLOAT *cache; - if(!kernel_cache_check(kernel_cache,m)) { /* not cached yet*/ - cache = kernel_cache_clean_and_malloc(kernel_cache,m); - if(cache) { - l=kernel_cache->totdoc2active[m]; - ex=docs[m]; - for(j=0;jactivenum;j++) { /* fill cache */ - k=kernel_cache->active2totdoc[j]; - if((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) { - cache[j]=kernel_cache->buffer[kernel_cache->activenum - *kernel_cache->index[k]+l]; - } - else { - cache[j]=kernel(kernel_parm,ex,docs[k]); - } + if (!kernel_cache_check(kernel_cache, m)) { /* not cached yet*/ + cache = kernel_cache_clean_and_malloc(kernel_cache, m); + if (cache) { + l = kernel_cache->totdoc2active[m]; + ex = docs[m]; + for (j = 0; j < kernel_cache->activenum; j++) { /* fill cache */ + k = kernel_cache->active2totdoc[j]; + if ((kernel_cache->index[k] != -1) && (l != -1) && (k != m)) { + cache[j] = + kernel_cache + ->buffer[kernel_cache->activenum * kernel_cache->index[k] + + l]; + } else { + cache[j] = kernel(kernel_parm, ex, docs[k]); + } } - } - else { + } else { perror("Error: Kernel cache full! => increase cache size"); } } -} - - -void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, - long int *key, long int varnum, - KERNEL_PARM *kernel_parm) - /* Fills cache for the rows in key */ +} + +void cache_multiple_kernel_rows(KERNEL_CACHE *kernel_cache, DOC **docs, + long int *key, long int varnum, + KERNEL_PARM *kernel_parm) +/* Fills cache for the rows in key */ { register long i; - for(i=0;i=2) { - printf(" Reorganizing cache..."); fflush(stdout); + if (verbosity >= 2) { + printf(" Reorganizing cache..."); + fflush(stdout); } - keep=(long *)my_malloc(sizeof(long)*totdoc); - for(j=0;jactivenum) && (scountactive2totdoc[jj]; - if(!after[j]) { + scount = 0; + for (jj = 0; (jj < kernel_cache->activenum) && (scount < numshrink); jj++) { + j = kernel_cache->active2totdoc[jj]; + if (!after[j]) { scount++; - keep[j]=0; + keep[j] = 0; } } - for(i=0;imax_elems;i++) { - for(jj=0;jjactivenum;jj++) { - j=kernel_cache->active2totdoc[jj]; - if(!keep[j]) { - from++; - } - else { - kernel_cache->buffer[to]=kernel_cache->buffer[from]; - to++; - from++; + for (i = 0; i < kernel_cache->max_elems; i++) { + for (jj = 0; jj < kernel_cache->activenum; jj++) { + j = kernel_cache->active2totdoc[jj]; + if (!keep[j]) { + from++; + } else { + kernel_cache->buffer[to] = kernel_cache->buffer[from]; + to++; + from++; } } } - kernel_cache->activenum=0; - for(j=0;jtotdoc2active[j] != -1)) { - kernel_cache->active2totdoc[kernel_cache->activenum]=j; - kernel_cache->totdoc2active[j]=kernel_cache->activenum; + kernel_cache->activenum = 0; + for (j = 0; j < totdoc; j++) { + if ((keep[j]) && (kernel_cache->totdoc2active[j] != -1)) { + kernel_cache->active2totdoc[kernel_cache->activenum] = j; + kernel_cache->totdoc2active[j] = kernel_cache->activenum; kernel_cache->activenum++; - } - else { - kernel_cache->totdoc2active[j]=-1; + } else { + kernel_cache->totdoc2active[j] = -1; } } - kernel_cache->max_elems=(long)(kernel_cache->buffsize/kernel_cache->activenum); - if(kernel_cache->max_elems>totdoc) { - kernel_cache->max_elems=totdoc; + kernel_cache->max_elems = + (long)(kernel_cache->buffsize / kernel_cache->activenum); + if (kernel_cache->max_elems > totdoc) { + kernel_cache->max_elems = totdoc; } free(keep); - if(verbosity>=2) { - printf("done.\n"); fflush(stdout); - printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); + if (verbosity >= 2) { + printf("done.\n"); + fflush(stdout); + printf(" Cache-size in rows = %ld\n", kernel_cache->max_elems); } } -KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize) -{ +KERNEL_CACHE *kernel_cache_init(long int totdoc, long int buffsize) { long i; KERNEL_CACHE *kernel_cache; - kernel_cache=(KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE)); - kernel_cache->index = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->occu = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->lru = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->invindex = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long)*totdoc); - kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024*1024); + kernel_cache = (KERNEL_CACHE *)my_malloc(sizeof(KERNEL_CACHE)); + kernel_cache->index = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->occu = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->lru = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->invindex = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->active2totdoc = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->totdoc2active = (long *)my_malloc(sizeof(long) * totdoc); + kernel_cache->buffer = (CFLOAT *)my_malloc((size_t)(buffsize)*1024 * 1024); - kernel_cache->buffsize=(long)(buffsize/sizeof(CFLOAT)*1024*1024); + kernel_cache->buffsize = (long)(buffsize / sizeof(CFLOAT) * 1024 * 1024); - kernel_cache->max_elems=(long)(kernel_cache->buffsize/totdoc); - if(kernel_cache->max_elems>totdoc) { - kernel_cache->max_elems=totdoc; + kernel_cache->max_elems = (long)(kernel_cache->buffsize / totdoc); + if (kernel_cache->max_elems > totdoc) { + kernel_cache->max_elems = totdoc; } - if(verbosity>=2) { - printf(" Cache-size in rows = %ld\n",kernel_cache->max_elems); - printf(" Kernel evals so far: %ld\n",kernel_cache_statistic); + if (verbosity >= 2) { + printf(" Cache-size in rows = %ld\n", kernel_cache->max_elems); + printf(" Kernel evals so far: %ld\n", kernel_cache_statistic); } - kernel_cache->elems=0; /* initialize cache */ - for(i=0;iindex[i]=-1; - kernel_cache->lru[i]=0; + kernel_cache->elems = 0; /* initialize cache */ + for (i = 0; i < totdoc; i++) { + kernel_cache->index[i] = -1; + kernel_cache->lru[i] = 0; } - for(i=0;ioccu[i]=0; - kernel_cache->invindex[i]=-1; + for (i = 0; i < totdoc; i++) { + kernel_cache->occu[i] = 0; + kernel_cache->invindex[i] = -1; } - kernel_cache->activenum=totdoc;; - for(i=0;iactive2totdoc[i]=i; - kernel_cache->totdoc2active[i]=i; + kernel_cache->activenum = totdoc; + ; + for (i = 0; i < totdoc; i++) { + kernel_cache->active2totdoc[i] = i; + kernel_cache->totdoc2active[i] = i; } - kernel_cache->time=0; - - return(kernel_cache); -} - -void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache) -{ - long maxlru=0,k; - - for(k=0;kmax_elems;k++) { - if(maxlru < kernel_cache->lru[k]) - maxlru=kernel_cache->lru[k]; + kernel_cache->time = 0; + + return (kernel_cache); +} + +void kernel_cache_reset_lru(KERNEL_CACHE *kernel_cache) { + long maxlru = 0, k; + + for (k = 0; k < kernel_cache->max_elems; k++) { + if (maxlru < kernel_cache->lru[k]) + maxlru = kernel_cache->lru[k]; } - for(k=0;kmax_elems;k++) { - kernel_cache->lru[k]-=maxlru; + for (k = 0; k < kernel_cache->max_elems; k++) { + kernel_cache->lru[k] -= maxlru; } } -void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) -{ +void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) { free(kernel_cache->index); free(kernel_cache->occu); free(kernel_cache->lru); @@ -3717,500 +3721,503 @@ void kernel_cache_cleanup(KERNEL_CACHE *kernel_cache) free(kernel_cache); } -long kernel_cache_malloc(KERNEL_CACHE *kernel_cache) -{ +long kernel_cache_malloc(KERNEL_CACHE *kernel_cache) { long i; - if(kernel_cache_space_available(kernel_cache)) { - for(i=0;imax_elems;i++) { - if(!kernel_cache->occu[i]) { - kernel_cache->occu[i]=1; - kernel_cache->elems++; - return(i); + if (kernel_cache_space_available(kernel_cache)) { + for (i = 0; i < kernel_cache->max_elems; i++) { + if (!kernel_cache->occu[i]) { + kernel_cache->occu[i] = 1; + kernel_cache->elems++; + return (i); } } } - return(-1); + return (-1); } -void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i) -{ - kernel_cache->occu[i]=0; +void kernel_cache_free(KERNEL_CACHE *kernel_cache, long int i) { + kernel_cache->occu[i] = 0; kernel_cache->elems--; } -long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) - /* remove least recently used cache element */ -{ - register long k,least_elem=-1,least_time; +long kernel_cache_free_lru(KERNEL_CACHE *kernel_cache) +/* remove least recently used cache element */ +{ + register long k, least_elem = -1, least_time; - least_time=kernel_cache->time+1; - for(k=0;kmax_elems;k++) { - if(kernel_cache->invindex[k] != -1) { - if(kernel_cache->lru[k]lru[k]; - least_elem=k; + least_time = kernel_cache->time + 1; + for (k = 0; k < kernel_cache->max_elems; k++) { + if (kernel_cache->invindex[k] != -1) { + if (kernel_cache->lru[k] < least_time) { + least_time = kernel_cache->lru[k]; + least_elem = k; } } } - if(least_elem != -1) { - kernel_cache_free(kernel_cache,least_elem); - kernel_cache->index[kernel_cache->invindex[least_elem]]=-1; - kernel_cache->invindex[least_elem]=-1; - return(1); + if (least_elem != -1) { + kernel_cache_free(kernel_cache, least_elem); + kernel_cache->index[kernel_cache->invindex[least_elem]] = -1; + kernel_cache->invindex[least_elem] = -1; + return (1); } - return(0); -} - - -CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, - long int docnum) - /* Get a free cache entry. In case cache is full, the lru element - is removed. */ + return (0); +} + +CFLOAT *kernel_cache_clean_and_malloc(KERNEL_CACHE *kernel_cache, + long int docnum) +/* Get a free cache entry. In case cache is full, the lru element + is removed. */ { long result; - if((result = kernel_cache_malloc(kernel_cache)) == -1) { - if(kernel_cache_free_lru(kernel_cache)) { + if ((result = kernel_cache_malloc(kernel_cache)) == -1) { + if (kernel_cache_free_lru(kernel_cache)) { result = kernel_cache_malloc(kernel_cache); } } - kernel_cache->index[docnum]=result; - if(result == -1) { - return(0); + kernel_cache->index[docnum] = result; + if (result == -1) { + return (0); } - kernel_cache->invindex[result]=docnum; - kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ - return((CFLOAT *)((long)kernel_cache->buffer - +(kernel_cache->activenum*sizeof(CFLOAT)* - kernel_cache->index[docnum]))); + kernel_cache->invindex[result] = docnum; + kernel_cache->lru[kernel_cache->index[docnum]] = kernel_cache->time; /* lru */ + return ((CFLOAT *)((long)kernel_cache->buffer + + (kernel_cache->activenum * sizeof(CFLOAT) * + kernel_cache->index[docnum]))); } -long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum) - /* Update lru time to avoid removal from cache. */ +long kernel_cache_touch(KERNEL_CACHE *kernel_cache, long int docnum) +/* Update lru time to avoid removal from cache. */ { - if(kernel_cache && kernel_cache->index[docnum] != -1) { - kernel_cache->lru[kernel_cache->index[docnum]]=kernel_cache->time; /* lru */ - return(1); + if (kernel_cache && kernel_cache->index[docnum] != -1) { + kernel_cache->lru[kernel_cache->index[docnum]] = + kernel_cache->time; /* lru */ + return (1); } - return(0); -} - -long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum) - /* Is that row cached? */ + return (0); +} + +long kernel_cache_check(KERNEL_CACHE *kernel_cache, long int docnum) +/* Is that row cached? */ { - return(kernel_cache->index[docnum] != -1); -} - -long kernel_cache_space_available(KERNEL_CACHE *kernel_cache) - /* Is there room for one more row? */ + return (kernel_cache->index[docnum] != -1); +} + +long kernel_cache_space_available(KERNEL_CACHE *kernel_cache) +/* Is there room for one more row? */ { - return(kernel_cache->elems < kernel_cache->max_elems); -} - + return (kernel_cache->elems < kernel_cache->max_elems); +} + /************************** Compute estimates ******************************/ -void compute_xa_estimates(MODEL *model, long int *label, - long int *unlabeled, long int totdoc, - DOC **docs, double *lin, double *a, - KERNEL_PARM *kernel_parm, - LEARN_PARM *learn_parm, double *error, - double *recall, double *precision) - /* Computes xa-estimate of error rate, recall, and precision. See - T. Joachims, Estimating the Generalization Performance of an SVM - Efficiently, IMCL, 2000. */ +void compute_xa_estimates(MODEL *model, long int *label, long int *unlabeled, + long int totdoc, DOC **docs, double *lin, double *a, + KERNEL_PARM *kernel_parm, LEARN_PARM *learn_parm, + double *error, double *recall, double *precision) +/* Computes xa-estimate of error rate, recall, and precision. See + T. Joachims, Estimating the Generalization Performance of an SVM + Efficiently, IMCL, 2000. */ { - long i,looerror,looposerror,loonegerror; - long totex,totposex; - double xi,r_delta,r_delta_sq,sim=0; - long *sv2dnum=NULL,*sv=NULL,svnum; + long i, looerror, looposerror, loonegerror; + long totex, totposex; + double xi, r_delta, r_delta_sq, sim = 0; + long *sv2dnum = NULL, *sv = NULL, svnum; - r_delta=estimate_r_delta(docs,totdoc,kernel_parm); - r_delta_sq=r_delta*r_delta; + r_delta = estimate_r_delta(docs, totdoc, kernel_parm); + r_delta_sq = r_delta * r_delta; - looerror=0; - looposerror=0; - loonegerror=0; - totex=0; - totposex=0; - svnum=0; + looerror = 0; + looposerror = 0; + loonegerror = 0; + totex = 0; + totposex = 0; + svnum = 0; - if(learn_parm->xa_depth > 0) { - sv = (long *)my_malloc(sizeof(long)*(totdoc+11)); - for(i=0;isv_num;i++) - if(a[model->supvec[i]->docnum] - < (learn_parm->svm_cost[model->supvec[i]->docnum] - -learn_parm->epsilon_a)) { - sv[model->supvec[i]->docnum]=1; - svnum++; + if (learn_parm->xa_depth > 0) { + sv = (long *)my_malloc(sizeof(long) * (totdoc + 11)); + for (i = 0; i < totdoc; i++) + sv[i] = 0; + for (i = 1; i < model->sv_num; i++) + if (a[model->supvec[i]->docnum] < + (learn_parm->svm_cost[model->supvec[i]->docnum] - + learn_parm->epsilon_a)) { + sv[model->supvec[i]->docnum] = 1; + svnum++; } - sv2dnum = (long *)my_malloc(sizeof(long)*(totdoc+11)); + sv2dnum = (long *)my_malloc(sizeof(long) * (totdoc + 11)); clear_index(sv2dnum); - compute_index(sv,totdoc,sv2dnum); + compute_index(sv, totdoc, sv2dnum); } - for(i=0;ib)*(double)label[i]); - if(xi<0) xi=0; - if(label[i]>0) { - totposex++; + } else { + xi = 1.0 - ((lin[i] - model->b) * (double)label[i]); + if (xi < 0) + xi = 0; + if (label[i] > 0) { + totposex++; } - if((learn_parm->rho*a[i]*r_delta_sq+xi) >= 1.0) { - if(learn_parm->xa_depth > 0) { /* makes assumptions */ - sim=distribute_alpha_t_greedily(sv2dnum,svnum,docs,a,i,label, - kernel_parm,learn_parm, - (double)((1.0-xi-a[i]*r_delta_sq)/(2.0*a[i]))); - } - if((learn_parm->xa_depth == 0) || - ((a[i]*kernel(kernel_parm,docs[i],docs[i])+a[i]*2.0*sim+xi) >= 1.0)) { - looerror++; - if(label[i]>0) { - looposerror++; - } - else { - loonegerror++; - } - } + if ((learn_parm->rho * a[i] * r_delta_sq + xi) >= 1.0) { + if (learn_parm->xa_depth > 0) { /* makes assumptions */ + sim = distribute_alpha_t_greedily( + sv2dnum, svnum, docs, a, i, label, kernel_parm, learn_parm, + (double)((1.0 - xi - a[i] * r_delta_sq) / (2.0 * a[i]))); + } + if ((learn_parm->xa_depth == 0) || + ((a[i] * kernel(kernel_parm, docs[i], docs[i]) + a[i] * 2.0 * sim + + xi) >= 1.0)) { + looerror++; + if (label[i] > 0) { + looposerror++; + } else { + loonegerror++; + } + } } totex++; } } - (*error)=((double)looerror/(double)totex)*100.0; - (*recall)=(1.0-(double)looposerror/(double)totposex)*100.0; - (*precision)=(((double)totposex-(double)looposerror) - /((double)totposex-(double)looposerror+(double)loonegerror))*100.0; + (*error) = ((double)looerror / (double)totex) * 100.0; + (*recall) = (1.0 - (double)looposerror / (double)totposex) * 100.0; + (*precision) = + (((double)totposex - (double)looposerror) / + ((double)totposex - (double)looposerror + (double)loonegerror)) * + 100.0; free(sv); free(sv2dnum); -} - - -double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, - DOC **docs, double *a, - long int docnum, - long int *label, - KERNEL_PARM *kernel_parm, - LEARN_PARM *learn_parm, double thresh) - /* Experimental Code improving plain XiAlpha Estimates by - computing a better bound using a greedy optimzation strategy. */ +} + +double distribute_alpha_t_greedily(long int *sv2dnum, long int svnum, + DOC **docs, double *a, long int docnum, + long int *label, KERNEL_PARM *kernel_parm, + LEARN_PARM *learn_parm, double thresh) +/* Experimental Code improving plain XiAlpha Estimates by + computing a better bound using a greedy optimzation strategy. */ { - long best_depth=0; - long i,j,k,d,skip,allskip; - double best,best_val[101],val,init_val_sq,init_val_lin; + long best_depth = 0; + long i, j, k, d, skip, allskip; + double best, best_val[101], val, init_val_sq, init_val_lin; long best_ex[101]; - CFLOAT *cache,*trow; + CFLOAT *cache, *trow; - cache=(CFLOAT *)my_malloc(sizeof(CFLOAT)*learn_parm->xa_depth*svnum); - trow = (CFLOAT *)my_malloc(sizeof(CFLOAT)*svnum); + cache = (CFLOAT *)my_malloc(sizeof(CFLOAT) * learn_parm->xa_depth * svnum); + trow = (CFLOAT *)my_malloc(sizeof(CFLOAT) * svnum); - for(k=0;kxa_depth;d++) { - allskip=1; - if(d>=1) { - init_val_sq+=cache[best_ex[d-1]+svnum*(d-1)]; - for(k=0;kxa_depth; d++) { + allskip = 1; + if (d >= 1) { + init_val_sq += cache[best_ex[d - 1] + svnum * (d - 1)]; + for (k = 0; k < d - 1; k++) { + init_val_sq += 2.0 * cache[best_ex[k] + svnum * (d - 1)]; } - init_val_lin+=trow[best_ex[d-1]]; + init_val_lin += trow[best_ex[d - 1]]; } - for(i=0;ixa_depth; + if (allskip || (best < thresh)) { + d = learn_parm->xa_depth; } - } - + } + free(cache); free(trow); /* printf("Distribute[%ld](%ld)=%f, ",docnum,best_depth,best); */ - return(best); -} - - -void estimate_transduction_quality(MODEL *model, long int *label, - long int *unlabeled, - long int totdoc, DOC **docs, double *lin) - /* Loo-bound based on observation that loo-errors must have an - equal distribution in both training and test examples, given - that the test examples are classified correctly. Compare - chapter "Constraints on the Transductive Hyperplane" in my - Dissertation. */ + return (best); +} + +void estimate_transduction_quality(MODEL *model, long int *label, + long int *unlabeled, long int totdoc, + DOC **docs, double *lin) +/* Loo-bound based on observation that loo-errors must have an + equal distribution in both training and test examples, given + that the test examples are classified correctly. Compare + chapter "Constraints on the Transductive Hyperplane" in my + Dissertation. */ { - long i,j,l=0,ulab=0,lab=0,labpos=0,labneg=0,ulabpos=0,ulabneg=0,totulab=0; - double totlab=0,totlabpos=0,totlabneg=0,labsum=0,ulabsum=0; - double r_delta,r_delta_sq,xi,xisum=0,asum=0; + long i, j, l = 0, ulab = 0, lab = 0, labpos = 0, labneg = 0, ulabpos = 0, + ulabneg = 0, totulab = 0; + double totlab = 0, totlabpos = 0, totlabneg = 0, labsum = 0, ulabsum = 0; + double r_delta, r_delta_sq, xi, xisum = 0, asum = 0; - r_delta=estimate_r_delta(docs,totdoc,&(model->kernel_parm)); - r_delta_sq=r_delta*r_delta; + r_delta = estimate_r_delta(docs, totdoc, &(model->kernel_parm)); + r_delta_sq = r_delta * r_delta; - for(j=0;j 0) - totlabpos++; - else - totlabneg++; + if (label[j] > 0) + totlabpos++; + else + totlabneg++; } } - for(j=1;jsv_num;j++) { - i=model->supvec[j]->docnum; - xi=1.0-((lin[i]-model->b)*(double)label[i]); - if(xi<0) xi=0; + for (j = 1; j < model->sv_num; j++) { + i = model->supvec[j]->docnum; + xi = 1.0 - ((lin[i] - model->b) * (double)label[i]); + if (xi < 0) + xi = 0; - xisum+=xi; - asum+=fabs(model->alpha[j]); - if(unlabeled[i]) { - ulabsum+=(fabs(model->alpha[j])*r_delta_sq+xi); + xisum += xi; + asum += fabs(model->alpha[j]); + if (unlabeled[i]) { + ulabsum += (fabs(model->alpha[j]) * r_delta_sq + xi); + } else { + labsum += (fabs(model->alpha[j]) * r_delta_sq + xi); } - else { - labsum+=(fabs(model->alpha[j])*r_delta_sq+xi); - } - if((fabs(model->alpha[j])*r_delta_sq+xi) >= 1) { + if ((fabs(model->alpha[j]) * r_delta_sq + xi) >= 1) { l++; - if(unlabeled[model->supvec[j]->docnum]) { - ulab++; - if(model->alpha[j] > 0) - ulabpos++; - else - ulabneg++; - } - else { - lab++; - if(model->alpha[j] > 0) - labpos++; - else - labneg++; + if (unlabeled[model->supvec[j]->docnum]) { + ulab++; + if (model->alpha[j] > 0) + ulabpos++; + else + ulabneg++; + } else { + lab++; + if (model->alpha[j] > 0) + labpos++; + else + labneg++; } } } - printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n",(double)labpos/(double)totlab*100.0,(double)labneg/(double)totlab*100.0,(double)totlabpos/(double)(totlab)*100.0); - printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n",(double)ulabpos/(double)totulab*100.0,(double)ulabneg/(double)totulab*100.0); - printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)lab/(double)totlab*100.0,(double)ulab/(double)totulab*100.0,(double)l/(double)(totdoc)*100.0); - printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n",(double)labsum/(double)totlab*100.0,(double)ulabsum/(double)totulab*100.0,(double)(labsum+ulabsum)/(double)(totdoc)*100.0); - printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n",r_delta_sq,xisum,asum); + printf("xacrit>=1: labeledpos=%.5f labeledneg=%.5f default=%.5f\n", + (double)labpos / (double)totlab * 100.0, + (double)labneg / (double)totlab * 100.0, + (double)totlabpos / (double)(totlab)*100.0); + printf("xacrit>=1: unlabelpos=%.5f unlabelneg=%.5f\n", + (double)ulabpos / (double)totulab * 100.0, + (double)ulabneg / (double)totulab * 100.0); + printf("xacrit>=1: labeled=%.5f unlabled=%.5f all=%.5f\n", + (double)lab / (double)totlab * 100.0, + (double)ulab / (double)totulab * 100.0, + (double)l / (double)(totdoc)*100.0); + printf("xacritsum: labeled=%.5f unlabled=%.5f all=%.5f\n", + (double)labsum / (double)totlab * 100.0, + (double)ulabsum / (double)totulab * 100.0, + (double)(labsum + ulabsum) / (double)(totdoc)*100.0); + printf("r_delta_sq=%.5f xisum=%.5f asum=%.5f\n", r_delta_sq, xisum, asum); } -double estimate_margin_vcdim(MODEL *model, double w, double R) - /* optional: length of model vector in feature space */ - /* optional: radius of ball containing the data */ +double estimate_margin_vcdim(MODEL *model, double w, double R) +/* optional: length of model vector in feature space */ +/* optional: radius of ball containing the data */ { double h; /* follows chapter 5.6.4 in [Vapnik/95] */ - if(w<0) { - w=model_length_s(model); + if (w < 0) { + w = model_length_s(model); } - if(R<0) { - R=estimate_sphere(model); + if (R < 0) { + R = estimate_sphere(model); } - h = w*w * R*R +1; - return(h); + h = w * w * R * R + 1; + return (h); } -double estimate_sphere(MODEL *model) - /* Approximates the radius of the ball containing */ - /* the support vectors by bounding it with the */ -{ /* length of the longest support vector. This is */ - register long j; /* pretty good for text categorization, since all */ - double xlen,maxxlen=0; /* documents have feature vectors of length 1. It */ - DOC *nulldoc; /* assumes that the center of the ball is at the */ - WORD nullword; /* origin of the space. */ - KERNEL_PARM *kernel_parm=&(model->kernel_parm); +double estimate_sphere(MODEL *model) +/* Approximates the radius of the ball containing */ +/* the support vectors by bounding it with the */ +{ /* length of the longest support vector. This is */ + register long j; /* pretty good for text categorization, since all */ + double xlen, maxxlen = 0; /* documents have feature vectors of length 1. It */ + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ + KERNEL_PARM *kernel_parm = &(model->kernel_parm); - nullword.wnum=0; - nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); - - for(j=1;jsv_num;j++) { - xlen=sqrt(kernel(kernel_parm,model->supvec[j],model->supvec[j]) - -2*kernel(kernel_parm,model->supvec[j],nulldoc) - +kernel(kernel_parm,nulldoc,nulldoc)); - if(xlen>maxxlen) { - maxxlen=xlen; + nullword.wnum = 0; + nulldoc = create_example(-2, 0, 0, 0.0, create_svector(&nullword, "", 1.0)); + + for (j = 1; j < model->sv_num; j++) { + xlen = sqrt(kernel(kernel_parm, model->supvec[j], model->supvec[j]) - + 2 * kernel(kernel_parm, model->supvec[j], nulldoc) + + kernel(kernel_parm, nulldoc, nulldoc)); + if (xlen > maxxlen) { + maxxlen = xlen; } } - free_example(nulldoc,1); - return(maxxlen); + free_example(nulldoc, 1); + return (maxxlen); } -double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm) -{ +double estimate_r_delta(DOC **docs, long int totdoc, KERNEL_PARM *kernel_parm) { long i; - double maxxlen,xlen; - DOC *nulldoc; /* assumes that the center of the ball is at the */ - WORD nullword; /* origin of the space. */ + double maxxlen, xlen; + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ - nullword.wnum=0; - nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); - - maxxlen=0; - for(i=0;imaxxlen) { - maxxlen=xlen; + nullword.wnum = 0; + nulldoc = create_example(-2, 0, 0, 0.0, create_svector(&nullword, "", 1.0)); + + maxxlen = 0; + for (i = 0; i < totdoc; i++) { + xlen = sqrt(kernel(kernel_parm, docs[i], docs[i]) - + 2 * kernel(kernel_parm, docs[i], nulldoc) + + kernel(kernel_parm, nulldoc, nulldoc)); + if (xlen > maxxlen) { + maxxlen = xlen; } } - free_example(nulldoc,1); - return(maxxlen); + free_example(nulldoc, 1); + return (maxxlen); } -double estimate_r_delta_average(DOC **docs, long int totdoc, - KERNEL_PARM *kernel_parm) -{ +double estimate_r_delta_average(DOC **docs, long int totdoc, + KERNEL_PARM *kernel_parm) { long i; double avgxlen; - DOC *nulldoc; /* assumes that the center of the ball is at the */ - WORD nullword; /* origin of the space. */ + DOC *nulldoc; /* assumes that the center of the ball is at the */ + WORD nullword; /* origin of the space. */ - nullword.wnum=0; - nulldoc=create_example(-2,0,0,0.0,create_svector(&nullword,"",1.0)); - - avgxlen=0; - for(i=0;imaxxlen) { - maxxlen=xlen; + maxxlen = 0; + for (i = 0; i < totdoc; i++) { + xlen = sqrt(kernel(kernel_parm, docs[i], docs[i])); + if (xlen > maxxlen) { + maxxlen = xlen; } } - return(maxxlen); + return (maxxlen); } /****************************** IO-handling **********************************/ -void write_prediction(char *predfile, MODEL *model, double *lin, - double *a, long int *unlabeled, - long int *label, long int totdoc, - LEARN_PARM *learn_parm) -{ +void write_prediction(char *predfile, MODEL *model, double *lin, double *a, + long int *unlabeled, long int *label, long int totdoc, + LEARN_PARM *learn_parm) { FILE *predfl; long i; - double dist,a_max; + double dist, a_max; - if(verbosity>=1) { - printf("Writing prediction file..."); fflush(stdout); + if (verbosity >= 1) { + printf("Writing prediction file..."); + fflush(stdout); } - if ((predfl = fopen (predfile, "w")) == NULL) - { perror (predfile); exit (1); } - a_max=learn_parm->epsilon_a; - for(i=0;ia_max)) { - a_max=a[i]; + if ((predfl = fopen(predfile, "w")) == NULL) { + perror(predfile); + exit(1); + } + a_max = learn_parm->epsilon_a; + for (i = 0; i < totdoc; i++) { + if ((unlabeled[i]) && (a[i] > a_max)) { + a_max = a[i]; } } - for(i=0;i(learn_parm->epsilon_a))) { - dist=(double)label[i]*(1.0-learn_parm->epsilon_crit-a[i]/(a_max*2.0)); + for (i = 0; i < totdoc; i++) { + if (unlabeled[i]) { + if ((a[i] > (learn_parm->epsilon_a))) { + dist = (double)label[i] * + (1.0 - learn_parm->epsilon_crit - a[i] / (a_max * 2.0)); + } else { + dist = (lin[i] - model->b); } - else { - dist=(lin[i]-model->b); - } - if(dist>0) { - fprintf(predfl,"%.8g:+1 %.8g:-1\n",dist,-dist); - } - else { - fprintf(predfl,"%.8g:-1 %.8g:+1\n",-dist,dist); + if (dist > 0) { + fprintf(predfl, "%.8g:+1 %.8g:-1\n", dist, -dist); + } else { + fprintf(predfl, "%.8g:-1 %.8g:+1\n", -dist, dist); } } } fclose(predfl); - if(verbosity>=1) { + if (verbosity >= 1) { printf("done\n"); } } -void write_alphas(char *alphafile, double *a, - long int *label, long int totdoc) -{ +void write_alphas(char *alphafile, double *a, long int *label, + long int totdoc) { FILE *alphafl; long i; - if(verbosity>=1) { - printf("Writing alpha file..."); fflush(stdout); + if (verbosity >= 1) { + printf("Writing alpha file..."); + fflush(stdout); } - if ((alphafl = fopen (alphafile, "w")) == NULL) - { perror (alphafile); exit (1); } - for(i=0;i=1) { + if (verbosity >= 1) { printf("done\n"); } -} - +} diff --git a/src/classifier/svm/svm_light/svm_learn.h b/src/classifier/svm/svm_light/svm_learn.h index 9dc57b4..6e01304 100644 --- a/src/classifier/svm/svm_light/svm_learn.h +++ b/src/classifier/svm/svm_light/svm_learn.h @@ -19,9 +19,12 @@ #ifndef SVM_LEARN #define SVM_LEARN +#include "svm_common.h" + #ifdef __cplusplus extern "C" { #endif + void svm_learn_classification(DOC **, double *, long, long, LEARN_PARM *, KERNEL_PARM *, KERNEL_CACHE *, MODEL *, double *); void svm_learn_regression(DOC **, double *, long, long, LEARN_PARM *, diff --git a/src/hand/pose3d/estimator.cpp b/src/hand/pose3d/estimator.cpp index ceb7589..c33cb1f 100644 --- a/src/hand/pose3d/estimator.cpp +++ b/src/hand/pose3d/estimator.cpp @@ -53,6 +53,8 @@ int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, int ret = static_cast(d)->Detect( rgbdata, img_width, img_height, objs); if (ret != 0) { + objects->length = 0; + objects->items = NULL; return ret; } const size_t total_objs = objs.size(); @@ -61,6 +63,7 @@ int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, objects->items = NULL; return 0; } + objects->items = (PalmObject *)malloc(total_objs * sizeof(PalmObject)); for (size_t i = 0; i < total_objs; ++i) { objects->items[i].score = objs[i].score; @@ -75,7 +78,7 @@ int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, (Point2fVector *)malloc(sizeof(Point2fVector)); objects->items[i].landmarks->length = 7; objects->items[i].landmarks->points = - (Point2f *)malloc(4 * sizeof(Point2f)); + (Point2f *)malloc(7 * sizeof(Point2f)); for (size_t j = 0; j < 7; ++j) { objects->items[i].landmarks->points[j] = objs[i].landmarks[j]; } @@ -85,18 +88,26 @@ int mediapipe_hand_detect(IHandPose3DEstimator d, const unsigned char *rgbdata, objects->items[i].skeleton3d = NULL; continue; } + const size_t total_skeleton3d = objs[i].skeleton3d.size(); + if (total_skeleton3d == 0) { + objects->items[i].skeleton = NULL; + objects->items[i].skeleton3d = NULL; + continue; + } objects->items[i].skeleton = (Point2fVector *)malloc(sizeof(Point2fVector)); objects->items[i].skeleton->length = total_skeleton; objects->items[i].skeleton->points = (Point2f *)malloc(total_skeleton * sizeof(Point2f)); objects->items[i].skeleton3d = (Point3dVector *)malloc(sizeof(Point3dVector)); - objects->items[i].skeleton3d->length = total_skeleton; + objects->items[i].skeleton3d->length = total_skeleton3d; objects->items[i].skeleton3d->points = - (Point3d *)malloc(total_skeleton * sizeof(Point3d)); + (Point3d *)malloc(total_skeleton3d * sizeof(Point3d)); for (size_t j = 0; j < total_skeleton; ++j) { objects->items[i].skeleton->points[j].x = objs[i].skeleton[j].x; objects->items[i].skeleton->points[j].y = objs[i].skeleton[j].y; + } + for (size_t j = 0; j < total_skeleton3d; ++j) { objects->items[i].skeleton3d->points[j].x = objs[i].skeleton3d[j].x; objects->items[i].skeleton3d->points[j].y = objs[i].skeleton3d[j].y; objects->items[i].skeleton3d->points[j].z = objs[i].skeleton3d[j].z; diff --git a/src/hand/pose3d/mediapipe/mediapipe.cpp b/src/hand/pose3d/mediapipe/mediapipe.cpp index 603f51a..6f5da7a 100644 --- a/src/hand/pose3d/mediapipe/mediapipe.cpp +++ b/src/hand/pose3d/mediapipe/mediapipe.cpp @@ -440,6 +440,7 @@ int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width, decode_bounds(region_list, prob_threshold, target_size, target_size, scores, bboxes, anchors); non_max_suppression(region_list, region_nms_list, nms_threshold); + objects.clear(); pack_detect_result(detect_results, region_nms_list, target_size, objects); @@ -490,10 +491,9 @@ int MediapipeHand::Detect(const unsigned char *rgbdata, int img_width, ncnn::Mat trans_image = ncnn::Mat::from_pixels(trans_mat, ncnn::Mat::PIXEL_RGB, 224, 224); + free(trans_mat); float score = GetLandmarks(trans_image, tm, objects[i].skeleton, objects[i].skeleton3d); - - free(trans_mat); } return 0; }