Here are the contents of svm-fns.php, which do the training.
// === svm-fns.php ===
/**
* Defines functions to train and use Support Vector Machine
*/
/**
* LINEARKERNEL returns a linear kernel between x1 and x2
* NOTE that the incoming vectors x1 and x2 were originally both column vectors
* of dimensions (vocab size) x 1
*/
function linear_kernel($x1, $x2) {
// Ensure that x1 and x2 are column vectors
// while this conversion may be necessary for a broad use of
// this function, it is unnecessary in the svmTrain context
// and probably hampers performance a tiny bit
// x1 = x1(:); x2 = x2(:);
// Compute the kernel
// this should return a 1x1 matrix (scalar value?)
return $x1->dot($x2); // dot product, should yield scalar
}
/**
* returns a radial basis function kernel between x1 and x2
* sim = gaussianKernel(x1, x2) returns a gaussian kernel between x1 and x2
* and returns the value
*/
function gaussian_kernel(Np\vector $x1, Np\vector $x2, $sigma) {
// NOTE the incoming vectors x1 and x2 are column vectors
// of dimension (vocab_size) x 1
// orig octave:
//sim = exp(-sum((x1 - x2) .^ 2) / (2 * sigma^2));
// NOTE sim will be a 1x1 result (a scalar value)
return exp(-$x1->subtract($x2)->square()->sum() / (2 * $sigma*$sigma));
}
function get_gaussian_predict_k(Np\matrix $x, array $model) {
// orig octave code in svmPredict
// Vectorized RBF Kernel
// This is equivalent to computing the kernel on every pair of examples
//X1 = sum(X.^2, 2);
//X2 = sum(model.X.^2, 2)';
//K = bsxfun(@plus, X1, bsxfun(@plus, X2, - 2 * X * model.X'));
//K = model.kernelFunction(1, 0) .^ K;
//K = bsxfun(@times, model.y', K);
//K = bsxfun(@times, model.alphas', K);
//p = sum(K, 2);
$x1 = $x->square()->sumRows();
//echo "x1 ", $x1, "\n";
// we don't need to transpose this because ghostjat/np doesn't distinguish col vs row vectors
$x2 = $model['x']->square()->sumRows();
//echo "x2 ", $x2, "\n";
// need to build K.
$K = $x->dot($model['x']->transpose())->multiply(-2);
// do the inner bsxfun(plus...)
// ghostjat has no means to add a ROW vector to a matrix soooo we fake it
$kshape = $K->getShape();
$km = $kshape->m;
$kn = $kshape->n;
$x2size = $x2->getSize();
// $km should match the dimensions of $x2
// sanity check
if ($x2size !== $kn) {
throw new \Exception('x2 size ($x2size) does not match kn ($kn)');
}
// i are columns, j are rows
for($i=0; $i<$x2size; $i++) {
$x2val = $x2->data[$i];
for($j=0; $j<$km; $j++) {
// add the ith x2 value to the ith column of the jth row
$K->data[($j * $kn) + $i] += $x2val;
}
}
// do the outer bsxfun(plus...)
// ghostjat has no means to add a COLUMN vector soooo we fake it
$x1size = $x1->getSize();
// $km should match the dimensions of $x1
// sanity check
if ($x1size !== $km) {
throw new \Exception('x1 size ($x1size) does not match km ($km)');
}
// i are rows, j are columns
for($i=0; $i<$x1size; $i++) {
$x1val = $x1->data[$i];
for($j=0; $j<$kn; $j++) {
// add the ith x1 value to the jaith column of the ith row
//$offset = ($i * $kn) + $j;
$K->data[($i * $kn) + $j] += $x1val;
}
}
$kf = gaussian_kernel(Np\vector::ar([1]), Np\vector::ar([0]), $model['sigma']);
//echo "kf ", $kf, "\n";
$K = $K->map(fn($v) => (pow($kf, $v)));
$mysize = $model['y']->getSize();
// $km should match the dimensions of $model['y']
// sanity check
if ($mysize !== $kn) {
throw new \Exception('model.y size ($mysize) does not match kn ($kn)');
}
// i are columns, j are rows
for($i=0; $i<$mysize; $i++) {
$yval = $model['y']->data[$i];
for($j=0; $j<$km; $j++) {
// multiply the ith y value by the ith column of the jth row
$K->data[($j * $kn) + $i] *= $yval;
}
}
$alphasize = $model['alphas']->getSize();
// $km should match the dimensions of $model['alphas']
// sanity check
if ($alphasize !== $kn) {
throw new \Exception('model.alpha size ($alphasize) does not match kn ($kn)');
}
// i are columns, j are rows
for($i=0; $i<$alphasize; $i++) {
$aval = $model['alphas']->data[$i];
for($j=0; $j<$km; $j++) {
// multiply the ith y value by the ith column of the jth row
$K->data[($j * $kn) + $i] *= $aval;
}
}
return $K;
}
/**
* trains an SVM classifier and returns trained model. X is the matrix of
* training examples. Each row is a training example, and the jth column
* holds the jth feature. Y is a column matrix containing 1 for positive
* examples and 0 for negative examples. C is the standard SVM regularization
* parameter. tol is a tolerance value used for determining equality of
* floating point numbers. max_passes controls the number of iterations
* over the dataset (without changes to alpha) before the algorithm quits.
*
* Note: This is a simplified version of the SMO algorithm for training
* SVMs. In practice, if you want to train an SVM classifier, we
* recommend using an optimized package such as:
* LIBSVM (http://www.csie.ntu.edu.tw/~cjlin/libsvm/)
& SVMLight (http://svmlight.joachims.org/)
*/
function svm_train($x_matrix, $y_vector, $C, $kernel_fn, $sigma=null, $tol=0.001, $max_passes=5) {
echo "train with C=$C";
if (!is_null($sigma)) {
echo ", sigma=$sigma";
}
echo " and kernel_fn=$kernel_fn\n";
$shape = $x_matrix->getShape();
echo "x matrix m={$shape->m}, n={$shape->n}\n";
$size = $y_vector->getSize();
echo "y vector size ", $size, "\n";
$m = $shape->m;
// map the 0s in y to -1; note this appears to be faster than vector->map() stuff
// BIG FAT WARNING we have to make a copy of the $y_vector object because
// changing it here apparently propagates those changes back up to the calling scope
$yvec = Np\vector::ones($y_vector->getSize());
$yndim = $y_vector->ndim;
for($i=0; $i<$yndim; $i++) {
if ($y_vector->data[$i] == 0) {
$yvec->data[$i] = -1;
}
}
// Pre-compute the Kernel Matrix since our dataset is small
// (in practice, optimized SVM packages that handle large datasets
// gracefully will _not_ do this)
echo "CALCULATING KERNEL\n";
// We have implemented optimized vectorized version of the Kernels here so
// that the svm training will run faster.
if ($kernel_fn === 'linear_kernel') {
// Vectorized computation for the Linear Kernel
// This is equivalent to computing the kernel on every pair of examples
$K = $x_matrix->dot($x_matrix->transpose());
} elseif ($kernel_fn === 'gaussian_kernel') {
if (is_null($sigma)) {
throw new Exception('You must provide a sigma value for gaussian kernel training');
}
// Vectorized RBF Kernel
// This is equivalent to computing the kernel on every pair of examples
// orig octave:
// X2 = sum(X.^2, 2);
// K = bsxfun(@plus, X2, bsxfun(@plus, X2', - 2 * (X * X')));
// K = kernelFunction(1, 0) .^ K;
// a vector of size n, orig X2 was a column vector
$x2 = $x_matrix->square()->sumRows();
// need to buidl K. this gets us pretty far, calculating inner bsxfun somehow
$K = $x_matrix->dot($x_matrix->transpose())->multiply(-2)->sum($x2);
// ghostjat has no means to add a column vector soooo we fake it
$kshape = $K->getShape();
$km = $kshape->m;
$kn = $kshape->n;
$x2size = $x2->getSize();
// $km should match the dimensions of $x2
// sanity check
if ($x2size !== $km) {
throw new \Exception('x2 size ($x2size) does not match km ($km)');
}
for($i=0; $i<$x2size; $i++) {
$x2val = $x2->data[$i];
for($j=0; $j<$kn; $j++) {
// add the ith x2 value to the jth column of each row
//$offset = ($i * $kn) + $j;
$K->data[($i * $kn) + $j] += $x2val;
}
}
// free memory
unset($x2);
$kf = gaussian_kernel(Np\vector::ar([1]), Np\vector::ar([0]), $sigma);
$K = $K->map(fn($v) => (pow($kf, $v)));
} else {
// Pre-compute the Kernel Matrix
// The following can be slow due to the lack of vectorization
echo "NON-VECTORIZED, SLOW\n";
$K = Np\matrix::zeros($m, $m);
for ($i=0; $i<$m; $i++) {
if ($i >0 && ($i % 10 == 0)) {
echo "\tloop $i\n";
}
for ($j=0; $j<$m; $j++) {
// original matlab/octave code
//K(i,j) = kernelFunction(X(i,:)', X(j,:)');
//K(j,i) = K(i,j); %the matrix is symmetric
// FIXME define a set() fn for matrix class rather than awkwardly calculating offset
$kernel_val = $kernel_fn($x_matrix->rowAsVector($i), $x_matrix->rowAsVector($j));
// location of $i, $j
$offset1 = ($i * $K->col) + $j;
$K->data[$offset1] = $kernel_val;
// K matrix is symmetric, location of $j, $i
$offset2 = ($j * $K->col) + $i;
$K->data[$offset2] = $kernel_val;
} // j loop
} // i loop
} // if linear/gaussian/slow
echo "KERNEL CALC COMPLETE\n";
// Variables
$alphas = Np\vector::zeros($m);
$b = 0;
$E = Np\vector::zeros($m);
$passes = 0;
$eta = 0;
$L = 0;
$H = 0;
// Train
echo "\nTraining...";
$dots = 11;
while ($passes < $max_passes) {
$num_changed_alphas = 0;
for ($i=0; $i<$m; $i++) {
// comments from original coursera class octave source:
// Calculate Ei = f(x(i)) - y(i) using (2).
// this line commented out in coursera source
// E(i) = b + sum (X(i, :) * (repmat(alphas.*Y,1,n).*X)') - Y(i);
// we want to calculate this octave expression from coursera source
//E(i) = b + sum (alphas.*Y.*K(:,i)) - Y(i);
// considerable trial and error yielded this for the sum, returns a scalar/float
//$sum = $alphas->multiply($yvec)->multiply($K->rowAsVector($i))->sum();
$E->data[$i] = $b + $alphas->multiply($yvec)->multiply($K->rowAsVector($i))->sum() - $yvec->data[$i];
// orig octave if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0)),
if (
($yvec->data[$i] * $E->data[$i] < -$tol && $alphas->data[$i] < $C)
|| ($yvec->data[$i] * $E->data[$i] > $tol && $alphas->data[$i] > 0)
) {
// In practice, there are many heuristics one can use to select
// the i and j. In this simplified code, we select them randomly.
do {
$j = mt_rand(0, ($m-1));
} while ($j === $i);
// TESTING
//$j = ($i + 1) % $m;
//echo "j: $j\n";
// Calculate Ej = f(x(j)) - y(j) using (2).
// orig octave calc: E(j) = b + sum (alphas.*Y.*K(:,j)) - Y(j);
$E->data[$j] = $b + $alphas->multiply($yvec)->multiply($K->rowAsVector($j))->sum() - $yvec->data[$j];
// Save old alphas
$alpha_i_old = $alphas->data[$i];
$alpha_j_old = $alphas->data[$j];
// Compute L and H by (10) or (11).
$ai = $alphas->data[$i]; // grab these to prevent costly lookups any more than necessary
$aj = $alphas->data[$j];
if ($yvec->data[$i] == $yvec->data[$j]) {
$L = max(0, $aj + $ai - $C);
$H = min($C, $aj + $ai);
} else {
$L = max(0, $aj - $ai);
$H = min($C, $C + $aj - $ai);
}
if ($L == $H) {
// continue to next i.
continue;
}
// Compute eta by (14).
$eta = 2 * $K->at($i,$j) - $K->at($i,$i) - $K->at($j,$j);
if ($eta >= 0) {
// continue to next i.
continue;
}
// Compute and clip new value for alpha j using (12) and (15).
// orig octave: alphas(j) = alphas(j) - (Y(j) * (E(i) - E(j))) / eta;
// to avoid costly lookups, lets use the $aj var we just set above
$aj = $aj - ($yvec->data[$j] * ($E->data[$i] - $E->data[$j])) / $eta;
// Clip
//alphas(j) = min (H, alphas(j));
//alphas(j) = max (L, alphas(j));
$aj = min($H, $aj);
$aj = max($L, $aj);
// make sure we put the new $aj value back into $alphas
$alphas->data[$j] = $aj;
// Check if change in alpha is significant
if (abs($aj - $alpha_j_old) < $tol) {
// continue to next i.
// replace anyway
$alphas->data[$j] = $alpha_j_old;
continue;
}
// Determine value for alpha i using (16).
// alphas(i) = alphas(i) + Y(i)*Y(j)*(alpha_j_old - alphas(j));
$ai = $ai + $yvec->data[$i] * $yvec->data[$j] * ($alpha_j_old - $aj);
// be sure to put new $ai back in $alphas
$alphas->data[$i] = $ai;
// Compute b1 and b2 using (17) and (18) respectively.
//b1 = b - E(i) ...
//- Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ...
//- Y(j) * (alphas(j) - alpha_j_old) * K(i,j)';
$b1 = $b - $E->data[$i]
- $yvec->data[$i] * ($ai - $alpha_i_old) * $K->at($i, $j)
- $yvec->data[$j] * ($aj - $alpha_j_old) * $K->at($i, $j);
//b2 = b - E(j) ...
//- Y(i) * (alphas(i) - alpha_i_old) * K(i,j)' ...
//- Y(j) * (alphas(j) - alpha_j_old) * K(j,j)';
$b2 = $b - $E->data[$j]
- $yvec->data[$i] * ($ai - $alpha_i_old) * $K->at($i, $j)
- $yvec->data[$j] * ($aj - $alpha_j_old) * $K->at($j, $j);
// Compute b by (19).
if (0 < $ai && $ai < $C) {
$b = $b1;
} elseif (0 < $aj && $aj < $C) {
$b = $b2;
} else {
$b = ($b1+$b2)/2;
}
$num_changed_alphas = $num_changed_alphas + 1;
} // if ((Y(i)*E(i) < -tol && alphas(i) < C) || (Y(i)*E(i) > tol && alphas(i) > 0))
} // for loop
if ($num_changed_alphas == 0) {
$passes++;
} else {
$passes = 0;
}
echo '.';
$dots++;
if ($dots > 78) {
$dots = 0;
echo "\n";
}
} // while passes < max_passes
echo "\nMAX_PASSES ($max_passes) REACHED, training done\n";
// NOTE: alphas is a m x 1 column vector containing some floats and
// many near-zero values and a few floats a tiny bit less than zero
// idx is an size m vector with ones or zeros indicating which alphas are > 0
// while this is convenient & readable in octave, it's gratuitous in PHP
// FIXME remove this
//$idx = $alphas->map(fn($v) => ($v > 0));
// b value calculated from our training, a float, e.g. 0.9990
// FIXME we don't need this extra ret_b var, move comment below?
$ret_b = $b;
// GENERATE THESE WITH LOOP which is actually faster/simpler than vectot::map()
// X subset matrix of orig feature vectors who end up with alpha > 0
// size typically 500 x n (where m x n is size of orig training set X)
$ret_x = [];
// subset (column) vector indicating original classification for our
// new subset model.X. size same as model.X, e.g. 500 x 1
$ret_y = [];
// subset (column) vector same size as our model.X, e.g., 500 x 1 containing
// float alpha values calculated by our training
$ret_alphas = [];
// only include x/y/alphas with value greater than zero
for($i=0; $i<$m; $i++) {
$alpha = $alphas->data[$i];
if ($alpha > 0) {
// sadly ghostjat/np offers no efficient methods to construct new matrix from vectors
// so we have to convert to native PHP arrays
// TODO this would probably be faster if we looped directly in $x_matrix->data
$ret_x[] = $x_matrix->rowAsVector($i)->asArray();
$ret_y[] = $yvec->data[$i];
$ret_alphas[] = $alpha;
}
}
$ret_x = Np\matrix::ar($ret_x);
$ret_y = Np\vector::ar($ret_y);
$ret_alphas = Np\vector::ar($ret_alphas);
// column vector containing our weights for each feature, size
// is n x 1 (where m x n is size of orig training set X)
// the orig octave
// model.w = ((alphas.*Y)'*X)';
// getting the correct output required much trial and error, produced weird sumRows thing
$ret_w = $alphas->multiply($yvec)->multiply($x_matrix->transpose())->sumRows();
// Return the model
return [
'kernel_fn' => $kernel_fn, // string specifying kernel function
'b' => $ret_b, // float
'x' => $ret_x, // matrix
'y' => $ret_y, // vector
'alphas' => $ret_alphas, // vector
'w' => $ret_w, // vector
'sigma' => $sigma,
'c' => $C
];
} // svm_train()
/**
* returns a vector of predictions using a SVM trained by svm_train
* @param $x is either a m x n matrix or a vector of size n
* @param model is an associative array svm model returned from svm_train()
* @return size m vector of predictions
*/
function svm_predict($model, $x) {
if ($x instanceof Np\matrix) {
// matrix is acceptable
} elseif ($x instanceof Np\vector) {
// FIXME work up a variant of this fn to predict for a vector
die("is vector\n");
} else {
throw new Exception(gettype($x) . ' is not a valid type for $x');
}
$shape = $x->getShape();
$m = $shape->m;
$features = $shape->n;
if ($model['kernel_fn'] == 'linear_kernel') {
// We can use the weights and bias directly if working with the
// linear kernel
// original octave:
// p = X * model.w + model.b;
// WARNING this seems to return the right result, but
// the order of operands is reversed, there's a sum, etc. real kludgy.
$p = $model['w']->multiply($x)->sumRows()->add($model['b']);
} elseif ($model['kernel_fn'] == 'gaussian_kernel') {
$K = get_gaussian_predict_k($x, $model);
//p = sum(K, 2);
$p = $K->sumRows();
} else {
// Other kernel fn -- THIS WILL PROB BE SLOW
$shape = $model['x']->getShape();
$model_x_m = $shape->m;
$p = Np\vector::zeros($m);
for($i=0; $i<$model_x_m; $i++) {
$prediction = 0;
for($j=0; $j<$features; $j++) {
throw new Exception("NOT YET IMPLEMENTED");
// we want to do this original octave stuff here:
//prediction = prediction + ...
//model.alphas(j) * model.y(j) * ...
//model.kernelFunction(X(i,:)', model.X(j,:)');
} // for j
$p->data[$i] = $prediction + $model['b'];
} // for i
} // if kernel_fn is linear/gaussian/other
// change calculated ranges to zero or one
return $p->map(fn($v) => ($v >= 0));
} // svm_predict()
/**
* Runs the specified model on the $x and $y provided and
* returns details about the time and accuracy
*/
function svm_assess(array $model, Np\matrix $x, Np\vector $y) {
$start = microtime(TRUE);
$retval = [];
$shape = $x->getShape();
$retval['x_samples'] = $shape->m;
$retval['x_features'] = $shape->n;
$y_size = $y->getSize();
$retval['y_samples'] = $y_size;
$p = svm_predict($model, $x);
$p_size = $p->getSize();
$retval['p_size'] = $p_size;
// sanity check
if ($p_size !== $y_size) {
throw new Exception("p size $p_size does not match y size $y_size");
}
// calculate what percentage of the time our model's prediction
// matches y. $p is full of predictions, $y is full of answers
$correct = 0;
$true_positives = 0;
$true_negatives = 0;
$false_positives = 0;
$false_negatives = 0;
for($i=0; $i<$p_size; $i++){
// FIXME modify this logic to calculate true & false positives/negatives
// if prediction matches training set value, it's CORRECT
$pval = $p->data[$i];
if ($pval == $y->data[$i]) {
$correct++;
if ($pval == 1) {
$true_positives++;
} else {
$true_negatives++;
}
} else {
if ($pval == 1) {
$false_positives++;
} else {
$false_negatives++;
}
}
}
$precision = $true_positives / ($true_positives + $false_positives);
$recall = $true_positives / ($true_positives + $false_negatives);
$retval['correct_predictions'] = $correct;
$retval['true_positives'] = $true_positives;
$retval['true_negatives'] = $true_negatives;
$retval['false_positives'] = $false_positives;
$retval['false_negatives'] = $false_negatives;
$retval['precision'] = $precision;
$retval['recall'] = $recall;
$retval['f_score'] = (2 * $precision * $recall) / ($precision + $recall);
$accuracy = ($correct/$p_size);
$retval['correct_decimal'] = $accuracy;
$retval['correct_percent'] = $accuracy * 100;
$retval['elapsed_time'] = microtime(TRUE) - $start;
return $retval;
}
/**
* returns optimal C by training numerous SVM classifiers with varying
* values of C and returning the one that performs best
*
*/
function svm_linear_optimal_c($xtrain, $ytrain, $xval, $yval) {
// evenly spaced (exponentially) generated from powers of 1.4
$cvals = [0.034571613033608,0.048400258247051,0.067760361545871,0.09486450616422,0.13281030862991,0.18593443208187,0.26030820491462,0.36443148688047,0.51020408163265,0.71428571428571,1,1.4,1.96,2.744,3.8416,5.37824,7.529536,10.5413504,14.75789056,20.661046784,28.9254654976];
echo "Begin linear sweep\n";
echo "\tvalues of c: ", implode(", ", $cvals), "\n";
$best_c = null;
$best_results = null;
$best_correct_percent = null;
$best_model = null;
$train_results = [];
$val_results = [];
$result_idx = 0;
foreach($cvals as $c_i => $cval) {
echo "== Training SVM with C=$cval ==\n";
$start = microtime(TRUE);
// train the model on the training set
$model = svm_train($xtrain, $ytrain, $cval, 'linear_kernel', null, 0.0001);
$elapsed = microtime(TRUE) - $start;
echo "training completed in $elapsed seconds\n";
// assess the model with the xtrain set
$results = svm_assess($model, $xtrain, $ytrain);
echo "XTRAIN\n";
print_r($results);
$train_results[$result_idx] = array_merge(
['c' => $cval, 'training_time' => $elapsed],
$results
);
// assess the model with the xval set
$results = svm_assess($model, $xval, $yval);
echo "XVAL\n";
print_r($results);
$val_results[$result_idx] = array_merge(
['c' => $cval, 'training_time' => $elapsed],
$results
);
// maybe optimize for f_score?
$correct_percent = $results['correct_percent'];
if (is_null($best_c) || $correct_percent > $best_correct_percent) {
$best_c = $cval;
$best_results = $results;
$best_correct_percent = $correct_percent;
$best_model = $model;
}
$result_idx++;
}
echo "\n=====\n";
echo "Best value for C is $best_c, with correct_percent of $best_correct_percent\n";
print_r($best_results);
return [
'c' => $best_c,
'model' => $best_model,
'train_results' => $train_results,
'val_results' => $val_results
];
}
/**
* returns optimal C and sigma by training numerous gaussian SVM classifiers with varying
* values of C and sigma, returning the one that performs best
*
*/
function svm_gaussian_optimal_c($xtrain, $ytrain, $xval, $yval) {
// good, sort of hand picked
// $cvals = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30];
// $sigma_vals = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30];
// evenly spaced (exponentially) generated from powers of 1.4
$cvals = [0.034571613033608,0.048400258247051,0.067760361545871,0.09486450616422,0.13281030862991,0.18593443208187,0.26030820491462,0.36443148688047,0.51020408163265,0.71428571428571,1,1.4,1.96,2.744,3.8416,5.37824,7.529536,10.5413504,14.75789056,20.661046784,28.9254654976];
$sigma_vals = [0.034571613033608,0.048400258247051,0.067760361545871,0.09486450616422,0.13281030862991,0.18593443208187,0.26030820491462,0.36443148688047,0.51020408163265,0.71428571428571,1,1.4,1.96,2.744,3.8416,5.37824,7.529536,10.5413504,14.75789056,20.661046784,28.9254654976];
echo "Begin gaussian sweep\n";
echo "\tvalues of c: ", implode(", ", $cvals), "\n";
echo "\tvalues of sigma: ", implode(", ", $sigma_vals), "\n";
$best_c = null;
$best_sigma = null;
$best_results = null;
$best_correct_percent = null;
$best_model = null;
$train_results = [];
$val_results = [];
$result_idx = 0;
foreach($cvals as $c_i => $cval) {
foreach($sigma_vals as $s_i => $sigma) {
echo "== Training SVM with C=$cval, sigma=$sigma ==\n";
$start = microtime(TRUE);
// train the model on the training set
$model = svm_train($xtrain, $ytrain, $cval, 'gaussian_kernel', $sigma);
$elapsed = microtime(TRUE) - $start;
echo "training completed in $elapsed seconds\n";
// assess the model with the xtrain set
$results = svm_assess($model, $xtrain, $ytrain);
echo "XTRAIN\n";
print_r($results);
$train_results[$result_idx] = array_merge(
['c' => $cval, 'sigma' => $sigma, 'training_time' => $elapsed],
$results
);
// assess the model with the xval set
$results = svm_assess($model, $xval, $yval);
echo "XVAL\n";
print_r($results);
$val_results[$result_idx] = array_merge(
['c' => $cval, 'sigma' => $sigma, 'training_time' => $elapsed],
$results
);
// TODO maybe optimize for f_score instead?
// find a way to punish false positives more or we throwing out good messages a spam
$correct_percent = $results['correct_percent'];
if (is_null($best_c) || $correct_percent > $best_correct_percent) {
$best_c = $cval;
$best_sigma = $sigma;
$best_results = $results;
$best_correct_percent = $correct_percent;
$best_model = $model;
}
$result_idx++;
} // foreach sigma
} // foreach c
echo "\n=====\n";
echo "Best C is $best_c, best sigma is $best_sigma, with correct_percent of $best_correct_percent\n";
print_r($best_results);
return [
'c' => $best_c,
'sigma' => $best_sigma,
'model' => $best_model,
'train_results' => $train_results,
'val_results' => $val_results
];
}