Interesting development! I compiled CBLAS according to this recipe that I linked previously. I took the resulting cblas_LINUX.so
file and copied it to a new folder and downloaded the blas.h file from the ghostjat/np library:
mkdir ~/cblas
cp cblas_LINUX.so ~/cblas/
cd ~/cblas
curl https://raw.githubusercontent.com/ghostjat/Np/main/src/core/blas.h > blas.h
I edited this line in blas.h:
#define FFI_LIB "libblas.so"
to point to the location of my cblas_LINUX.so
#define FFI_LIB "/home/sneakyimp/cblas/cblas_LINUX.so"
I started with a simple bar.php file, which loads that blas.h file via FFI:
<?php
$ffi_blas = FFI::load(__DIR__ . '/blas.h');
This script complains because blas.h
declares functions that do not exist in cblas_LINUX.so
like so:
PHP Fatal error: Uncaught FFI\Exception: Failed resolving C function 'openblas_get_config' in /home/sneakyimp/biz/cblas/bar.php
So I methodically ran it over and over again, each time deleting the function declaration in blas.h that caused a complaint, yielding this blas.h file:
#define FFI_SCOPE "blas"
#define FFI_LIB "/home/sneakyimp/cblas/cblas_LINUX.so"
typedef size_t CBLAS_INDEX_t;
size_t cblas_idamax(const int N, const float *X, const int incX);
double cblas_dsdot(const int N, const float *X, const int incX, const float *Y,
const int incY);
double cblas_ddot(const int N, const double *X, const int incX,
const double *Y, const int incY);
double cblas_dnrm2(const int N, const double *X, const int incX);
double cblas_dasum(const int N, const double *X, const int incX);
void cblas_dswap(const int N, double *X, const int incX,
double *Y, const int incY);
void cblas_dcopy(const int N, const double *X, const int incX,
double *Y, const int incY);
void cblas_daxpy(const int N, const double alpha, const double *X,
const int incX, double *Y, const int incY);
void cblas_drotg(double *a, double *b, double *c, double *s);
void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
void cblas_drot(const int N, double *X, const int incX,
double *Y, const int incY, const double c, const double s);
void cblas_drotm(const int N, double *X, const int incX,
double *Y, const int incY, const double *P);
void cblas_dscal(const int N, const double alpha, double *X, const int incX);
void cblas_dgemv(const enum CBLAS_ORDER order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const double alpha, const double *A, const int lda,
const double *X, const int incX, const double beta,
double *Y, const int incY);
void cblas_dgbmv(const enum CBLAS_ORDER order,
const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU, const double alpha,
const double *A, const int lda, const double *X,
const int incX, const double beta, double *Y, const int incY);
void cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const double *A, const int lda,
double *X, const int incX);
void cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const double *A, const int lda,
double *X, const int incX);
void cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const double *Ap, double *X, const int incX);
void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const double *A, const int lda, double *X,
const int incX);
void cblas_dtbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const int K, const double *A, const int lda,
double *X, const int incX);
void cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
const int N, const double *Ap, double *X, const int incX);
void cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *A,
const int lda, const double *X, const int incX,
const double beta, double *Y, const int incY);
void cblas_dsbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const int N, const int K, const double alpha, const double *A,
const int lda, const double *X, const int incX,
const double beta, double *Y, const int incY);
void cblas_dspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *Ap,
const double *X, const int incX,
const double beta, double *Y, const int incY);
void cblas_dger(const enum CBLAS_ORDER order, const int M, const int N,
const double alpha, const double *X, const int incX,
const double *Y, const int incY, double *A, const int lda);
void cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, double *A, const int lda);
void cblas_dspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, double *Ap);
void cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, const double *Y, const int incY, double *A,
const int lda);
void cblas_dspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, const double *Y, const int incY, double *A);
void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const double alpha, const double *A,
const int lda, const double *B, const int ldb,
const double beta, double *C, const int ldc);
void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const int M, const int N,
const double alpha, const double *A, const int lda,
const double *B, const int ldb, const double beta,
double *C, const int ldc);
void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const double alpha, const double *A, const int lda,
const double beta, double *C, const int ldc);
void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
const double alpha, const double *A, const int lda,
const double *B, const int ldb, const double beta,
double *C, const int ldc);
void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const double alpha, const double *A, const int lda,
double *B, const int ldb);
void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
const enum CBLAS_DIAG Diag, const int M, const int N,
const double alpha, const double *A, const int lda,
double *B, const int ldb);
I then emulated the code in ghostjat/np to create the necessary FFI\CData objects that I could feed to the cblas_gdemm function
to perform matrix multiplication using CBLAS:
<?php
// load CBLAS lib
$ffi_blas = FFI::load(__DIR__ . '/blas.h');
// load my data
$data = json_decode(file_get_contents(__DIR__ . '/../machine-learning/training_data_sets.json'), TRUE);
// this is the 1375 x 1966 matrix i want to multiply times its transpose
$m1 = $data['Xtrain'];
$start = microtime(true);
// transpose it
$m1t = array_map(null, ...$m1);
echo "transpose elapsed time: " . (microtime(true) - $start) . "seconds\n";
// wrote this by examining code in ghostjat/np, converts PHP array/array obj to FFI\CData
function get_cdata($m) {
// this check should be more specific, but works for now
if (!is_array($m) || !is_array($m[0])) {
throw new Exception('param must be array of arrays');
}
$rowcount = sizeof($m);
$colcount = sizeof($m[0]);
$flat_m = [];
$size = $rowcount * $colcount;
$cdata = \FFI::cast('double *', \FFI::new("double[$size]"));
$i = 0;
foreach($m as $row) {
foreach($row as $val) {
$flat_m[$i] = $val;
$cdata[$i] = $val;
$i++;
}
}
return [
'rows' => $rowcount,
'cols' => $colcount,
'flat_m' => $flat_m,
'cdata' => $cdata
];
}
// FFI function needs CData objects as params
$m1_c = get_cdata($m1);
$m1t_c = get_cdata($m1t);
// these constants mean something to CBLAS. copied from ghostjat/np
define('CBLAS_ROW_MAJOR', 101);
define('CBLAS_NO_TRANS', 111);
// some dimensions of our expected output matrix
$mr_size = $m1_c['rows'] * $m1t_c['cols'];
$mr_rows = $m1_c['rows'];
$mr_cols = $m1t_c['cols'];
$mr_cdata = \FFI::cast('double *', \FFI::new("double[$mr_size]"));
$start = microtime(true);
// INVOKE CBLAS VIA FFI. This fn works by pointer, returns some void object, I think
$some_val = $ffi_blas->cblas_dgemm(CBLAS_ROW_MAJOR, CBLAS_NO_TRANS, CBLAS_NO_TRANS, $m1_c['rows'], $m1t_c['cols'], $m1_c['cols'], 1.0, $m1_c['cdata'], $m1_c['cols'], $m1t_c['cdata'], $m1t_c['cols'], 0.0, $mr_cdata, $mr_cols);
echo "cblas_dgemm elapsed time: " . (microtime(true) - $start) . "seconds\n";
// convert resulting cdata to php object
$idx = 0;
$mr = [];
for($i=0; $i<$mr_rows; $i++) {
$mr[$i] = [];
for($j=0; $j<$mr_cols; $j++) {
$mr[$i][$j] = $mr_cdata[$idx];
$idx++;
}
}
echo "rows: " . sizeof($mr) . "\n";
echo "cols: " . sizeof($mr[0]) . "\n";
$json = json_encode($mr, JSON_PRETTY_PRINT);
file_put_contents(__DIR__ . '/cblas-product.json', $json);
echo "json md5 is " . md5($json) . "\n";
It runs in 250ms, and appears to yield the same (correct?) output as all the other functions I've tested:
transpose elapsed time: 0.056370973587036seconds
cblas_dgemm elapsed time: 0.28544211387634seconds
rows: 1375
cols: 1375
json md5 is 4a33a230c1cf8bb6ab75dbd0d3d7bf0e