#include "matrixtmp-new.h"
#include "matrix-new.h"
#include "api.h"
#include "fftn.h"
#include "lmmin.h"

#include <math.h>
#include <stdlib.h>
#include <vector>

#define SWAP(a, b) { double swap=(a); (a)=(b); (b)=swap; }

#define DEG_TO_RAD		0.017453292519943 
#define NPARS			20					// max number of parameters
#define EXLARGE			1.e32

#pragma warning(disable: 4244)
#pragma warning(disable: 4715)
#pragma warning(disable: 4018)

extern "C" {
// ccmath
void trnm(double*,int);
void vmul(double*,double*,double*,int);
void mattr(double*,double*,int,int);
void rmmult(double*,double*,double*,int,int,int);
int solv(double*,double*,int);
int minv(double*, int);
int ruinv(double*, int);
int psinv(double*, int);
int svduv(double*, double*, double*, int, double*, int);
void eigen(double*, double*, int);
double csfit(double, double*, double*, double*, int);
double tnsfit(double, double*, double*, double*, int, double);
void csplp(double*, double*, double*, int,double);
void cspl(double*, double*, double*, int, double);
double qrlsq(double*, double*, int, int, int*);
double qrvar(double*, int, int, double ssq);
//nelib/toms
int cubgcv_(double*,double*,double*,int*,double*,double*,int*,double*,int*,double*,double*,int*);
int intrc0_(int*, double*, double*, double*, double*, double*, double*, int*, int*, int*, int*, double*, int*);
int intrc1_(int*, double*, double*, double*, double*, double*, double*, int*, int*, int*, int*, double*, int*, double*, int*, double*, int*);
int gradg_(int*, double*, double*, double*, double*, int*, int*, int*, int*, double*, int*, double*, double*, int*);
int intrc0_752(double*, double*, int*, int*, int*, double*, double*, double*, int*, int*, int*, int*, double*, int*);
int intrc1_752(double*, double*, int*, int*, int*, double*, double*, double*, int*, int*, int*, int*, double*, double*, int*, int*, double*, double*, double*, int*);
int gradg_752(int*, int*, int*, double*, double*, double*, int*, int*, int*, int*, double*, int*, double*, double*, int*);
int trmesh_751(int*, double*, double*, int*, int*, int*, int*, int*, int*, double*, int*);
int trlist_751(int*, int*, int*, int*, int*, int*, int*, int*, int*, int*, int*);
int bnodes_751(int*, int*, int*, int*, int*, int*, int*, int*);
int trmesh_(int*, double*, double*, double*, int*, int*, int*, int*, int*, int*, double*, int*);
int trlist_(int*, int*, int*, int*, int*, int*, int*, int*);
int bnodes_(int*, int*, int*, int*, int*, int*, int*, int*);
};

int get_factor(const zoMatrixTmp<double> &X, double x, int &i1, int &i2, double &f1, double &f2);
int point_in_polygon(const double *xp, const double *yp, int n, double x, double y);

/////////////////////////////////////////////////////////////////////

void* num_mlfit(void *ctx, int nargs, void** args)
{
	if (nargs < 2) api_input_error(ctx);
	zoMatrixTmp<double> *Y = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	zoMatrixTmp<double> *X = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[1], MAT_DOUBLE));
	zoMatrixTmp<double> *er = 0;
	int m = X->nrow(), n = X->ncol();
	if (m != Y->ndat()) api_runtime_error(ctx, "bad matrix size");
	if (nargs > 2) {
		er = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[2], MAT_DOUBLE));
		if (m != er->ndat()) api_runtime_error(ctx, "bad matrix size");
		int i, j, k;
		for (i = 0, k = 0; i < m; i++) {
			double e = (*er)(i);
			(*Y)(i) /= e;
			for (j = 0; j < n; j++) {
				(*X)(k) /= e;
				k++;
			}
		}
	}
	int flag;
	double ssq = qrlsq(X->ptr(), Y->ptr(), m, n, &flag);
	if (flag < 0) api_runtime_error(ctx, "rank of X < Y\'s length");
	ssq = qrvar(X->ptr(), m, n, ssq);
	return api_create_real(ctx, ssq);
}


void* num_svd(void *ctx, int nargs, void** args)
{
	if (nargs < 1) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	int m = me->nrow();
	int n = me->ncol();
	if (n < 2 || m < 2) api_input_error(ctx);
	zoMatrixTmp<double> *u = new zoMatrixTmp<double>;
	u->resize(m, m);
	zoMatrixTmp<double> *v = new zoMatrixTmp<double>;
	v->resize(n, n);
	zoMatrixTmp<double> *d = new zoMatrixTmp<double>;
	d->resize(n, 1);
	svduv(d->ptr(), me->ptr(), u->ptr(), m, v->ptr(), n);
	void *arr = api_create_array(ctx, 3);
	api_set_array_object(ctx, arr, "0", api_create_user(0, u, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE));
	api_set_array_object(ctx, arr, "1", api_create_user(0, d, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE));
	api_set_array_object(ctx, arr, "2", api_create_user(0, v, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE));
	return arr;
}


void* num_invert(void *ctx, int nargs, void** args)
{
	if (nargs < 1) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	if (me->nrow() != me->ncol()) api_runtime_error(ctx, "row != col");
	int status;
	if (nargs > 1) {
		const char* s = api_get_string(ctx, args[1]);
		if      (strcmp(s, "ps") == 0) status = psinv(me->ptr(), me->nrow());
		else if (strcmp(s, "ru") == 0) status = ruinv(me->ptr(), me->nrow());
		else                           status =  minv(me->ptr(), me->nrow());
	}
	else {
		status =  minv(me->ptr(), me->nrow());
	}
	return api_create_integer(ctx, !status);
}


void* num_fft(void *ctx, int nargs, void** args)
{
	if (nargs < 2) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	int flag = api_get_integer(ctx, args[1]);
	if (me->ncol() != 2) api_runtime_error(ctx, "matrix must contain two columns");
	int dims[1] = { me->nrow() };
	zoMatrixTmp<double> x, y;
	x.getcol(*me, 0);
	y.getcol(*me, 1);
	if (flag > 0) flag = fftn(1, dims, x.ptr(), y.ptr(),  1,  0.0);
	else          flag = fftn(1, dims, x.ptr(), y.ptr(), -1, -1.0);
	me->setcol(x, 0);
	me->setcol(y, 1);
	return api_create_integer(ctx, !flag);
}


void* num_spline(void *ctx, int nargs, void** args)
{
	if (nargs < 6) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	double t = api_get_number(ctx, args[1]);
	int flag = api_get_integer(ctx, args[2]);
	double a = api_get_number(ctx, args[3]);
	double b = api_get_number(ctx, args[4]);
	int n    = api_get_integer(ctx, args[5]);

	if (me->ncol() != 2 || n < 3) api_input_error(ctx);

	double c, d = (b - a) / (n - 1);

	zoMatrixTmp<double> x, y, z;
	x.getcol(*me, 0);
	y.getcol(*me, 1);
	z.resize(me->nrow(), 1);

	if (flag != 0) {
		cspl(x.ptr(), y.ptr(), z.ptr(), me->nrow()-1, t);
	}
	else {
		csplp(x.ptr(), y.ptr(), z.ptr(), me->nrow()-1, t);
	}

	zoMatrixTmp<double> *p = new zoMatrixTmp<double>;
	p->resize(n, 2);

	if (t > 0) {
		for (int i = 0; i < n; i++) {
			c = a + i*d;
			(*p)(i,0) = c;
			(*p)(i,1) = tnsfit(c, x.ptr(), y.ptr(), z.ptr(), me->nrow()-1, t);
		}
	}
	else {
		for (int i = 0; i < n; i++) {
			c = a + i*d;
			(*p)(i,0) = c;
			(*p)(i,1) =  csfit(c, x.ptr(), y.ptr(), z.ptr(), me->nrow()-1);
		}
	}

	return api_create_user(ctx, p, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE);
}


void* num_cv_spline(void *ctx, int nargs, void** args)
{
	if (nargs < 4) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	double a = api_get_number(ctx, args[1]);
	double b = api_get_number(ctx, args[2]);
	int np   = api_get_integer(ctx, args[3]);

	if (me->ncol() != 2 || np < 3) api_input_error(ctx);

	double d = (b - a) / (np - 1);

	zoMatrixTmp<double> x, f, df;
	x.getcol(*me, 0);
	f.getcol(*me, 1);
	df.resize(me->nrow(), 1);
	df.fill(1, 0);

	int n = me->nrow();
	double var = -1.0;
	int ic = n-1;
	int job = 1;
	int ier;

	zoMatrixTmp<double> y, c, se, wk;
	y.resize(1, n);
	c.resize(1, ic*3);
	se.resize(1, n);
	wk.resize(1, 7*(n+2));

	cubgcv_(x.ptr(), f.ptr(), df.ptr(), &n, y.ptr(), c.ptr(), &ic, &var, &job, se.ptr(), wk.ptr(), &ier);

	zoMatrixTmp<double> *p = new zoMatrixTmp<double>;
	p->resize(np, 2);

	if (ier < 129 || ier > 133) {
		for (int i = 0; i < np; i++) {
			double u = a + i*d;
			(*p)(i,0) = u;
			if (u <= x(0)) {
				(*p)(i,1) = f(0);
			}
			else if (u >= x(n-1)) {
				(*p)(i,1) = f(n-1);
			}
			else {
				int j;
				for (j = 1; j < n; j++) {
					if (u < x(j)) break;
				}
				j--;
				double t = u - x(j);
				(*p)(i,1) = ((t * c(2*n-2+j) + c(n-1+j)) * t + c(j)) * t + y(j);
			}
		}
	}

	return api_create_user(ctx, p, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE);
}


void* num_dist_smooth(void *ctx, int nargs, void** args)
{
	if (nargs < 2) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	int n  = api_get_integer(ctx, args[1]);
	if (n < 1) api_input_error(ctx);
	double f = 1.0;
	if (nargs > 2) api_get_number(ctx, args[2]);
	if (f < 0) api_input_error(ctx);
	double missing = -EXLARGE;
	if (nargs > 3) missing = api_get_number(ctx, args[3]);
	int nr = me->nrow();
	int nc = me->ncol();
	zoMatrixTmp<double> p = (*me);
	zoMatrixTmp<double> *R = new zoMatrixTmp<double>;
	R->resize(nr, nc);
	for (int i = 0; i < nr; i++) {
		int m1 = i - n;
		if (m1 < 0) m1 = 0;
		int m2 = i + n;
		if (m2 >= nr) m2 = nr - 1;
		for (int j = 0; j < nc; j++) {
			int n1 = j - n;
			if (n1 < 0) n1 = 0;
			int n2 = j + n;
			if (n2 >= nc) n2 = nc - 1;
			double fsum = 0;
			double vsum = 0;
			int count = 0;
			for (int ii = m1; ii <= m2; ii++) {
				for (int jj = n1; jj <= n2; jj++) {
					if (p(ii,jj) > missing) {
						count++;
						double dis = sqrt(double((ii-i)*(ii-i)+(jj-j)*(jj-j)));
						double tmp = exp(-f*dis);
						fsum += tmp;
						vsum += tmp*p(ii, jj);
					}
				}
			}
			(*R)(i,j) = double(count)/(m2-m1+1)*(n2-n1+1);
			if (count > 0) (*me)(i,j) = vsum/fsum;
		}
	}
	return api_create_user(ctx, R, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE);
}


void sg_coef(void *ctx, zoMatrixTmp<double> &C, int m, int nl, int nr)
{
	zoMatrixTmp<double> A;
	A.resize(nl+nr+1, m+1);
	C.resize(nl+nr+1, 1);
	int i, j;
	for (i = -nl; i <= nr; i++) {
		for (j = 0; j <= m; j++) A(i+nl,j) = pow(double(i), double(j));
	}
	zoMatrixTmp<double> AT(A);
	AT.trans();
	AT.concat(A);
	if (psinv(AT.ptr(), m+1) != 0) api_runtime_error(ctx, "matrix not positive definite.");
	A.trans();
	for (i = -nl; i <= nr; i++) {
		C(i+nl) = 0;
		for (j = 0; j <= m; j++) C(i+nl) += AT(0,j)*A(j,i+nl);
	}
}

void* num_sg_smooth(void *ctx, int nargs, void** args)
{
	if (nargs < 3) api_input_error(ctx);
	zoMatrixTmp<double> *X = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	int m = api_get_integer(ctx, args[1]);
	int n = api_get_integer(ctx, args[2]);
	double missing = -EXLARGE;
	if (nargs > 3) missing = api_get_number(ctx, args[3]);
	int i, k, nd = X->ndat();
	if (n < m || m < 2 || nd < 2*n+1) api_input_error(ctx);
	zoMatrixTmp<double> T(*X), C;
	zoMatrixTmp<double> *R = new zoMatrixTmp<double>;
	R->resize(X->nrow(), X->ncol());
	int n1=0, n2=0;
	for (i = 0; i < nd; i++) {
		int nl = n > i ? i : n;
		int nr = i+n < nd ? n : nd-i-1;
		if (n1 != nl || n2 != nr) {
			sg_coef(ctx, C, m, nl, nr);
			n1 = nl;
			n2 = nr;
		}
		int count = 0;
		double val = 0;
		for (k = 0; k < C.ndat(); k++) {
			if (T(i-nl+k) > missing) {
				count++;
				val += C(k)*T(i-nl+k);
			}
		}
		(*R)(i) = double(count)/C.ndat();
		if (count > 0) (*X)(i) = val;
	}
	return api_create_user(ctx, R, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE);
}

void* num_delaunay(void *ctx, int nargs, void** args)
{
	if (nargs < 1) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	if (me->nrow() < 3 || me->ncol() < 2) api_input_error(ctx);
	int flag = 0;
	if (nargs > 1) flag = api_get_integer(ctx, args[1]);

	zoMatrixTmp<double> x, y, z;
	x.getcol(*me, 0);
	y.getcol(*me, 1);
	int n = me->nrow();
	zoMatrixTmp<int> *idx = new zoMatrixTmp<int>;
	zoMatrixTmp<int> *hul = new zoMatrixTmp<int>;
	zoMatrixTmp<int> tmp;
	tmp.resize(n, 1);
	int i, k, lnew, ier, nrow = 6, nt = 2*n, na, nb, ncc = 0, lcc[1], lct[1];
	zoMatrixTmp<int> near1, next, list, lptr, lend, ltri;
	near1.resize(1, n);
	next.resize(1, n);
	list.resize(1, 6*n);
	lptr.resize(1, 6*n);
	lend.resize(1, n);
	ltri.resize(1, nrow*nt);
	zoMatrixTmp<double> dist;
	dist.resize(1, n);

	char msg[256];
	msg[0] = 0;
	if (me->ncol() == 2) {
		trmesh_751(&n, x.ptr(), y.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &lnew, near1.ptr(), next.ptr(), dist.ptr(), &ier);
		if (ier != 0) {
			sprintf(msg, "trmesh_751() error code: %d", ier);
			api_runtime_error(ctx, msg);
		}
		trlist_751(&ncc, lcc, &n, list.ptr(), lptr.ptr(), lend.ptr(), &nrow, &nt, ltri.ptr(), lct, &ier);
		if (ier != 0) {
			sprintf(msg, "trlist_751() error code: %d", ier);
			api_runtime_error(ctx, msg);
		}
		if (flag) {
			zoMatrixTmp<int> wk;
			wk.resize(nt, 3);
			int count = 0;
			for (i = 0, k = 0; i < nt; i++, k += nrow) {
				int i1 = ltri(k  ) - 1;
				int i2 = ltri(k+1) - 1;
				int i3 = ltri(k+2) - 1;
				double px = 0.5*(x(i1) + x(i2));
				double py = 0.5*(y(i1) + y(i2));
				if (!point_in_polygon(x.ptr(), y.ptr(), x.nrow(), px, py)) continue;
				px = 0.5*(x(i1) + x(i3));
				py = 0.5*(y(i1) + y(i3));
				if (!point_in_polygon(x.ptr(), y.ptr(), x.nrow(), px, py)) continue;
				px = 0.5*(x(i2) + x(i3));
				py = 0.5*(y(i2) + y(i3));
				if (!point_in_polygon(x.ptr(), y.ptr(), x.nrow(), px, py)) continue;
				wk(count, 0) = i1;
				wk(count, 1) = i2;
				wk(count, 2) = i3;
				count++;
			}
			idx->resize(count, 3);
			for (i = 0; i < count; i++) {
				(*idx)(i,0) = wk(i,0);
				(*idx)(i,1) = wk(i,1);
				(*idx)(i,2) = wk(i,2);
			}
		}
		else {
			idx->resize(nt, 3);
			for (i = 0, k = 0; i < nt; i++, k += nrow) {
				(*idx)(i, 0) = ltri(k) - 1;
				(*idx)(i, 1) = ltri(k+1) - 1;
				(*idx)(i, 2) = ltri(k+2) - 1;
			}
		}
		bnodes_751(&n, list.ptr(), lptr.ptr(), lend.ptr(), tmp.ptr(), &nb, &na, &nt);
		hul->resize(nb, 1);
		for (i = 0; i < nb; i++) (*hul)(i) = tmp(i) - 1;
	}
	else {
		z.getcol(*me, 2);
		trmesh_(&n, x.ptr(), y.ptr(), z.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &lnew, near1.ptr(), next.ptr(), dist.ptr(), &ier);
		if (ier != 0) {
			sprintf(msg, "trmesh_() error code: %d", ier);
			api_runtime_error(ctx, msg);
		}
		trlist_(&n, list.ptr(), lptr.ptr(), lend.ptr(), &nrow, &nt, ltri.ptr(), &ier);
		if (ier != 0) {
			sprintf(msg, "trlist_() error code: %d", ier);
			api_runtime_error(ctx, msg);
		}
		idx->resize(nt, 3);
		for (i = 0, k = 0; i < nt; i++, k += nrow) {
			(*idx)(i, 0) = ltri(k) - 1;
			(*idx)(i, 1) = ltri(k+1) - 1;
			(*idx)(i, 2) = ltri(k+2) - 1;
		}
		bnodes_(&n, list.ptr(), lptr.ptr(), lend.ptr(), tmp.ptr(), &nb, &na, &nt);
		hul->resize(nb, 1);
		for (i = 0; i < nb; i++) (*hul)(i) = tmp(i) - 1;
	}

	void *arr = api_create_array(ctx, 2);
	api_set_array_object(ctx, arr, "0", api_create_user(0, idx, mat_opfunc_int, mat_destroy_int, MAT_INT));
	api_set_array_object(ctx, arr, "1", api_create_user(0, hul, mat_opfunc_int, mat_destroy_int, MAT_INT));
	return arr;
}

/////////////////////////////////////////////////////////////////////

struct lm_data
{
	void *ctx, *func;
	zoMatrixTmp<double> *x, *y, *par;
};

void lm_evaluate(double *par, int m_dat, double *fvec, void *data, int *info)
{
	int i;
	lm_data *u = (lm_data*)data;
	zoMatrixTmp<double> p(*u->par);
	for (i = 0; i < p.ndat(); i++) p(i) = par[i];
	void *args[2];
	args[0] = api_create_user(u->ctx, u->x, mat_opfunc_double, 0, MAT_DOUBLE);
	args[1] = api_create_user(u->ctx, &p, mat_opfunc_double, 0, MAT_DOUBLE);
	void *ret = api_call_func(u->ctx, u->func, 2, args);
	zoMatrixTmp<double> *yf = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(u->ctx, ret, MAT_DOUBLE));
	if (yf->ndat() != u->y->ndat()) api_runtime_error(u->ctx, "callback returns matrix of wrong size");
	for (i = 0; i < m_dat; i++) fvec[i] = (*u->y)(i) - (*yf)(i);
}

void lm_print(int n_par, double *par, int m_dat, double *fvec, void *data, int iflag, int iter, int nfev)
{
}

void* num_nllm(void *ctx, int nargs, void** args)
{
	if (nargs < 3) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	void *func = api_get_func(ctx, api_get_string(ctx, args[1]));
	zoMatrixTmp<double> *par = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[2], MAT_DOUBLE));

	int i, m=me->nrow(), n=par->ndat(), info=0, nfev=0;

	if (n < 2 || m < n || me->ncol() < 2) api_input_error(ctx);

	zoMatrixTmp<double> x, y, dy, diag, fjac, qtf, wa1, wa2, wa3, wa4;
	x.getcol(*me, 0);
	y.getcol(*me, 1);
	dy.resize(1, m);
	diag.resize(1, n);
	fjac.resize(m, n);
	qtf.resize(1, n);
	wa1.resize(1, n);
	wa2.resize(1, n);
	wa3.resize(1, n);
	wa4.resize(m, 1);

	zoMatrixTmp<int> ipvt;
	ipvt.resize(1, n);

	lm_data data;
	data.ctx = ctx;
	data.func = func;
	data.x = &x;
	data.y = &y;
	data.par = par;

	lm_lmdif(m, n, par->ptr(), dy.ptr(),
			1.e-9, 1.e-9, 1.e-9, 100, 1.e-9,
			diag.ptr(), 1, 100.0, &info, &nfev,
			fjac.ptr(), ipvt.ptr(), qtf.ptr(),
			wa1.ptr(), wa2.ptr(), wa3.ptr(), wa4.ptr(),
			lm_evaluate, lm_print,
			&data);

	double d = 0;
	for (i = 0; i < m; i++) d += dy(i)*dy(i);
	d = sqrt(d/(m-1));

	void *arr = api_create_array(ctx, 2);
	api_set_array_object(0, arr, "0", api_create_integer(0, nfev));
	api_set_array_object(0, arr, "1", api_create_real(0, d));
	return arr;
}

double nlsimplex_func(void *ctx, void *func,
					  zoMatrixTmp<double> &x, zoMatrixTmp<double> &y, zoMatrixTmp<double> *par)
{
	void *args[2];
	args[0] = api_create_user(ctx, &x, mat_opfunc_double, 0, MAT_DOUBLE);
	args[1] = api_create_user(ctx, par, mat_opfunc_double, 0, MAT_DOUBLE);
	void *ret = api_call_func(ctx, func, 2, args);
	zoMatrixTmp<double> *yf = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, ret, MAT_DOUBLE));
	if (yf->ndat() != x.ndat()) api_runtime_error(ctx, "nonlinear-fitting callback returns matrix of wrong size");
	double d, ytry = 0;
	for (int j = 0; j < x.ndat(); j++) {
		d = fabs(y(j) - (*yf)(j));
		ytry += d*d;
	}
	ytry = sqrt(ytry/(x.ndat()-1));
	return ytry;
}

void* num_nlsimplex(void *ctx, int nargs, void** args)
{
	if (nargs < 3) api_input_error(ctx);

	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	if (me->ncol() < 2 || me->nrow() < 2) api_runtime_error(ctx, "matrix columns/rows less than 2");
	void *func = api_get_func(ctx, api_get_string(ctx, args[1]));
	zoMatrixTmp<double> *par = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[2], MAT_DOUBLE));

	const int ndim=par->ndat(), mpts=ndim+1;

	if (ndim < 2 || me->nrow() < ndim || me->ncol() < 2) api_input_error(ctx);

	int i, j, count=0, ilo, ihi, inhi;
	
	zoMatrixTmp<double> *lb=0, *ub=0;
	
	if (nargs > 4) {
		lb = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[3], MAT_DOUBLE));
		ub = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[4], MAT_DOUBLE));
		if (ndim != lb->ndat() || ndim != ub->ndat()) api_input_error(ctx);
		for (i = 0; i < ndim; i++) {
			if ((*par)(i) < (*lb)(i) || (*par)(i) > (*ub)(i)) api_input_error(ctx);
		}
	}

	zoMatrixTmp<double> x, y, dy, p, p0;
	x.getcol(*me, 0);
	y.getcol(*me, 1);
	dy.resize(1, mpts);
	p.resize(mpts, ndim);
	p0.resize(1, ndim);

	// simplex vertexes
	for (i = 0; i < mpts; i++) {
		for (j = 0; j < ndim; j++) {
			if (i == j) {
				if (lb) {
					if ((*ub)(j)-(*par)(j) > (*par)(j)-(*lb)(i)) {
						p(i,j) = (*par)(j) + 0.1*((*ub)(j)-(*par)(j));
					}
					else {
						p(i,j) = (*par)(j) - 0.1*((*par)(j)-(*lb)(i));
					}
				}
				else {
					if ((*par)(j) == 0)
						p(i,j) = 1.e-5;
					else
						p(i,j) = 1.1*(*par)(j);
				}
			}
			else {
				p(i,j) = (*par)(j);
			}
		}
	}

	// function evaluation
	for (i = 0; i < mpts; i++) {
		for (j = 0; j < ndim; j++) (*par)(j) = p(i,j);
		dy(i) = nlsimplex_func(ctx, func, x, y, par);
	}

	for (;;) {
		// find low/high indexes
		ilo = 0;
		if (dy(1) > dy(0)) {
			ihi = 1;
			inhi = 0;
		}
		else {
			ihi = 0;
			inhi = 1;
		}
		for (i = 0; i < mpts; i++) {
			if (dy(i) <= dy(ilo)) ilo = i;
			if (dy(i) > dy(ihi)) {
				inhi = ihi;
				ihi = i;
			}
			else {
				if (dy(i) > dy(inhi) && i != ihi) inhi = i;
			}
		}

		// get out if high-low is small or too many trying
		if (count >= 1000 || dy(ilo) < 1.e-9) {
			for (j = 0; j < ndim; j++) (*par)(j) = p(ilo,j);
			dy(ilo) = nlsimplex_func(ctx, func, x, y, par);
			break;
		}

		// centroid
		for (j = 0; j < ndim; j++) {
			p0(j) = 0;
			for (i = 0; i < mpts; i++) {
				if (i != ihi) p0(j) += p(i,j);
			}
			p0(j) /= ndim;
		}

		// reflection
		for (j = 0; j < ndim; j++) {
			(*par)(j) = 2.0*p0(j) - p(ihi,j);
			if (lb) {
				if ((*par)(j) < (*lb)(j)) (*par)(j) = (*lb)(j);
				if ((*par)(j) > (*ub)(j)) (*par)(j) = (*ub)(j);
			}
		}
		double ytry = nlsimplex_func(ctx, func, x, y, par);

		if (ytry < dy(ilo)) {
			// expansion
			dy(ihi) = ytry;
			for (j = 0; j < ndim; j++) {
				p(ihi,j) = (*par)(j);
				(*par)(j) = 2.0*(*par)(j) - p0(j);
				if (lb) {
					if ((*par)(j) < (*lb)(j)) (*par)(j) = (*lb)(j);
					if ((*par)(j) > (*ub)(j)) (*par)(j) = (*ub)(j);
				}
			}
			ytry = nlsimplex_func(ctx, func, x, y, par);
			if (ytry < dy(ihi)) {
				dy(ihi) = ytry;
				for (j = 0; j < ndim; j++) p(ihi,j) = (*par)(j);
			}
		}
		else if (ytry <= dy(inhi)) {
			dy(ihi) = ytry;
			for (j = 0; j < ndim; j++) p(ihi,j) = (*par)(j);
		}
		else if (ytry >= dy(ihi)) {
			// contraction
			for (j = 0; j < ndim; j++) {
				(*par)(j) = 0.5*(p0(j) + p(ihi,j));
				if (lb) {
					if ((*par)(j) < (*lb)(j)) (*par)(j) = (*lb)(j);
					if ((*par)(j) > (*ub)(j)) (*par)(j) = (*ub)(j);
				}
			}
			ytry = nlsimplex_func(ctx, func, x, y, par);
			if (ytry < dy(ihi)) {
				dy(ihi) = ytry;
				for (j = 0; j < ndim; j++) p(ihi,j) = (*par)(j);
			}
		}
		else {
			// shrink
			if (ytry < dy(ihi)) {
				dy(ihi) = ytry;
				for (j = 0; j < ndim; j++) p(ihi,j) = (*par)(j);
			}
			for (i = 0; i < ndim; i++) {
				for (j = 0; j < ndim; j++) {
					(*par)(j) = 0.5*(p(ilo,j) + p(i,j));
					if (lb) {
						if ((*par)(j) < (*lb)(j)) (*par)(j) = (*lb)(j);
						if ((*par)(j) > (*ub)(j)) (*par)(j) = (*ub)(j);
					}
					p(i,j) = (*par)(j);
				}
				dy(i) = nlsimplex_func(ctx, func, x, y, par);
			}
		}
		count++;
	}

	void *arr = api_create_array(ctx, 2);
	api_set_array_object(ctx, arr, "0", api_create_integer(0, count));
	api_set_array_object(ctx, arr, "1", api_create_real(0, dy(ilo)));
	return arr;
}
/*
void* num_interpotri(void *ctx, int nargs, void** args)
{
	if (nargs < 4) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	zoMatrixTmp<double> *gx = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[1], MAT_DOUBLE));
	zoMatrixTmp<double> *gy = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[2], MAT_DOUBLE));
	int flag = api_get_integer(ctx, args[3]);
	if (me->nrow() < 3 || me->ncol() < 3) api_input_error(ctx);
	zoMatrixTmp<double> x, y, z, w;
	x.getcol(*me, 0);
	y.getcol(*me, 1);
	z.getcol(*me, 2);
	int nx = gx->ndat(), ny = gy->ndat();
	zoMatrixTmp<double> *gz = new zoMatrixTmp<double>;
	gz->resize(ny, nx);
	int n = me->nrow();
	zoMatrixTmp<int> near1, next, list, lptr, lend;
	near1.resize(1, n);
	next.resize(1, n);
	list.resize(1, 6*n);
	lptr.resize(1, 6*n);
	lend.resize(1, n);
	zoMatrixTmp<double> dist, grad;
	dist.resize(1, n);
	grad.resize(1, 2*n);
	double sigma = 0, dgmax = 1.e-3;
	int i, j, ist = 1, lnew, ier, ncc = 0, lcc[1], iflgs = 0, iflgg = 1, dflag = 1, nit = 32;
	char msg[256];
	msg[0] = 0;

	if (me->ncol() == 3) {
printf("trmesh_751\n");
		trmesh_751(&n, x.ptr(), y.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &lnew, near1.ptr(), next.ptr(), dist.ptr(), &ier);
printf("trmesh_751\n");
		if (ier != 0) {
			sprintf(msg, "trmesh_751() error code: %d", ier);
			api_runtime_error(ctx, msg);
		}
		if (flag != 0) {
printf("gradg_752\n");
			gradg_752(&ncc, lcc, &n, x.ptr(), y.ptr(), z.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &iflgs, &sigma, &nit, &dgmax, grad.ptr(), &ier);
printf("gradg_752\n");
			if (ier != 0) {
				sprintf(msg, "gradg_752() error code: %d", ier);
				api_runtime_error(ctx, msg);
			}
		}
		for (i = 0; i < ny; i++) {
			double py = (*gy)(i);
			for (j = 0; j < nx; j++) {
				double pz, px = (*gx)(j);
				if (flag != 0) {
printf("intrc1_752\n");
					intrc1_752(&px, &py, &ncc, lcc, &n, x.ptr(), y.ptr(), z.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &iflgs, &sigma, grad.ptr(), &dflag, &ist, &pz, gx->ptr(), gy->ptr(), &ier);
printf("intrc1_752\n");
					if (ier < 0) {
						sprintf(msg, "intrc1_752() error code: %d", ier);
						api_runtime_error(ctx, msg);
					}
					(*gz)(i, j) = pz;
				}
				else {
printf("intrc0_752\n");
					intrc0_752(&px, &py, &ncc, lcc, &n, x.ptr(), y.ptr(), z.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &ist, &pz, &ier);
printf("intrc0_752\n");
					if (ier < 0) {
						sprintf(msg, "intrc0_752() error code: %d", ier);
						api_runtime_error(ctx, msg);
					}
				}
				(*gz)(i,j) = pz;
			}
		}
	}
	else {
		w.getcol(*me, 3);
printf("trmesh_\n");
printf("%d\n",n);
		trmesh_(&n, x.ptr(), y.ptr(), z.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &lnew, near1.ptr(), next.ptr(), dist.ptr(), &ier);
printf("trmesh_\n");
		if (ier != 0) {
			sprintf(msg, "trmesh_() error code: %d", ier);
			api_runtime_error(ctx, msg);
		}
		if (flag != 0) {
printf("gradg_\n");
			gradg_(&n, x.ptr(), y.ptr(), z.ptr(), w.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &iflgs, &sigma, &nit, &dgmax, grad.ptr(), &ier);
printf("gradg_\n");
			if (ier != 0) {
				sprintf(msg, "gradg_() error code: %d", ier);
				api_runtime_error(ctx, msg);
			}
		}
		for (i = 0; i < ny; i++) {
			double lat = DEG_TO_RAD * (*gy)(i);
			for (j = 0; j < nx; j++) {
				double lon = DEG_TO_RAD * (*gx)(j);
				double pw;
				if (flag != 0) {
printf("intrc1_\n");
					intrc1_(&n, &lat, &lon, x.ptr(), y.ptr(), z.ptr(), w.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &iflgs, &sigma, &iflgg, grad.ptr(), &ist, &pw, &ier);
printf("intrc1_\n");
					if (ier < 0) {
						sprintf(msg, "intr1_() error code: %d", ier);
						api_runtime_error(ctx, msg);
					}
				}
				else {
printf("intrc0_\n");
					intrc0_(&n, &lat, &lon, x.ptr(), y.ptr(), z.ptr(), w.ptr(), list.ptr(), lptr.ptr(), lend.ptr(), &ist, &pw, &ier);
printf("intrc0_\n");
					if (ier < 0) {
						sprintf(msg, "intr0_() error code: %d", ier);
						api_runtime_error(ctx, msg);
					}
				}
				(*gz)(i, j) = pw;
			}
		}
	}

	return api_create_user(ctx, gz, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE);
}

*/

zoMatrixTmp<double>* nlfit_func(void *ctx, void *callback, zoMatrixTmp<double> &x, zoMatrixTmp<double> &par)
{
	void *args[2];
	args[0] = api_create_user(ctx, &x, mat_opfunc_double, 0, MAT_DOUBLE);
	args[1] = api_create_user(ctx, &par, mat_opfunc_double, 0, MAT_DOUBLE);
	void *ret = api_call_func(ctx, callback, 2, args);
	zoMatrixTmp<double> *yf = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, ret, MAT_DOUBLE));
	if (yf->ndat() != x.ndat()) api_runtime_error(ctx, "nonlinear-fitting callback returns matrix of wrong size");
	return yf;
}

double nlfit_try(void *ctx, void *callback, zoMatrixTmp<double> &x, zoMatrixTmp<double> &y, zoMatrixTmp<double> &par, zoMatrixTmp<double> &var)
{
	int n = x.ndat(), m = par.ndat();

	zoMatrixTmp<double> CP;
	CP.resize(1, 2*m);

	double *cp = CP.ptr();
	double *dp = cp + m;
	double *p, *q, *r, *s, *t;
	double err, ssq;
	int j, k;

	zoMatrixTmp<double> f(*nlfit_func(ctx, callback, x, par));

	zoMatrixTmp<double> de;
	de.resize(n, m);

	for (j = 0; j < m; j++) {
		double a = par(j); 
		double d = 1.e-5 * fabs(par(j)); 
		if (d < 1.e-9) d = 1.e-9;
		par(j) += d;
		zoMatrixTmp<double> g(*nlfit_func(ctx, callback, x, par));
		par(j) = a;
		for (k = 0; k < g.ndat(); k++) {
			de(k,j) = (g(k) - f(k)) / d;
		}
	}

	var.fill(0,0);

	for (j = 0, ssq = 0.0; j < n; ++j) {
		err = y(j) - f(j);
		ssq += err*err;
		for (k = 0, r = cp; k < m; ++k) *r++ = de(j,k);
		for(r = dp, s = cp, q = var.ptr(); s < dp; ++s, q += m + 1) {
			*r++ += err * *s;
			for (t = s, p = q; t < dp; ) *p++ += *s* *t++;
		}
	}

	for (j = 0, p = var.ptr(); j < m; ++j, p += m+1) {
		for (k = j+1, q = p, r = p; k < m; ++k) *(q += m) = *++r;
	}

	zoMatrixTmp<double> u, v, d;
	u.resize(m, m);
	v.resize(m, m);
	d.resize(1, m);
	svduv(d.ptr(), var.ptr(), u.ptr(), m, v.ptr(), m);
	var.fill(0,0);
	for (j=0; j < m; j++) {
		if (d(j) != 0) var(j,j) = 1./d(j);
	}
	u.trans();
	var.concat(u);
	v.concat(var);
	for (j=0; j < var.ndat(); j++) var(j) = v(j);

	for(k = 0, p = var.ptr(), s = par.ptr(); k < m; ++k, ++s) {
		for(j = 0, t = dp; j < m; ++j) *s += *p++ * *t++;
	}
	return ssq;
}

void* num_nlfit(void *ctx, int nargs, void** args)
{
	if (nargs < 3) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	if (me->ncol() < 2 || me->nrow() < 2) api_runtime_error(ctx, "matrix columns/rows less than 2");
	void *func = api_get_func(ctx, api_get_string(ctx, args[1]));
	zoMatrixTmp<double> *par = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[2], MAT_DOUBLE));
	int i = 0, j, np = par->ndat();
	if (np > NPARS) api_runtime_error(ctx, "too many parameters");
	zoMatrixTmp<double> *lb = 0, *ub = 0;
	if (nargs > 4) {
		lb  = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[3], MAT_DOUBLE));
		ub  = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[4], MAT_DOUBLE));
		if (lb->ndat() < np || ub->ndat() < np) api_runtime_error(ctx, "bad boundery matrix");
		for (i = 0; i < np; i++)  {
			if ((*lb)(i) >= (*ub)(i)) api_runtime_error(ctx, "bad boundery values");
		}
	}
	zoMatrixTmp<double> *var = new zoMatrixTmp<double>;
	var->resize(np, np);
	zoMatrixTmp<double> x, y;
	me->getcol(x, 0);
	me->getcol(y, 1);
	double ssq1, ssq2;
	ssq1 = ssq2 = nlfit_try(ctx, func, x, y, *par, *var);
	if (ssq1 > 0) {
		for (i = 0; i < 100; i++) {
			ssq2 = nlfit_try(ctx, func, x, y, *par, *var);
			if (ssq2 < 0 || ssq2 < 1.e-15 || ssq2 == ssq1) break;
			ssq1 = ssq2;
			if (lb) {
				for (j = 0; j < np; j++) {
					if ((*par)(j) > (*ub)(j)) (*par)(j) = (*ub)(j);
					if ((*par)(j) < (*lb)(j)) (*par)(j) = (*lb)(j);
				}
			}
		}
	}
	if (lb) {
		for (j = 0; j < np; j++) {
			if ((*par)(j) > (*ub)(j)) (*par)(j) = (*ub)(j);
			if ((*par)(j) < (*lb)(j)) (*par)(j) = (*lb)(j);
		}
	}
	
	zoMatrixTmp<double>* yf = nlfit_func(ctx, func, x, *par);
	ssq2 = 0;
	for (j = 0; j < y.ndat(); j++) {
		double d = y(j) - (*yf)(j);
		ssq2 += d*d;
	}

	void *arr = api_create_array(ctx, 3);
	api_set_array_object(ctx, arr, "0", api_create_integer(0, i));
	api_set_array_object(ctx, arr, "1", api_create_real(0, ssq2));
	api_set_array_object(ctx, arr, "2", api_create_user(0, var, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE));
	return arr;
}

static
void Solve_SVDnn(zoMatrixTmp<double> &A, zoMatrixTmp<double> &X)
{
	int np=X.ndat();

	zoMatrixTmp<double> U, V, S, Y;
	U.resize(np,np);
	V.resize(np,np);
	S.resize(np,1);
	Y.resize(np,1);

	//A=U*S*V~
	svduv(S.ptr(),A.ptr(),U.ptr(),np,V.ptr(),np);

	// inv(S)
	double smax=0;
	int i;
	for (i=0; i<np; i++) {
		double s=fabs(S(i));
		if (s>smax) smax=s;
	}
	for (i=0; i<np; i++) {
		double s=fabs(S(i)/smax);
		if (s<1.e-9) {
			S(i)=0;
		}
		else {
			S(i)=1.0/S(i);
		}
	}

	//inv(S)*U~*X
	trnm(U.ptr(),np);
	vmul(Y.ptr(),U.ptr(),X.ptr(),np);
	for (i=0; i<np; i++) {
		S(i)*=Y(i);
	}

	//Y=V*inv(S)*U~*X
	vmul(X.ptr(),V.ptr(),S.ptr(),np);
}

void* num_leqs(void *ctx, int nargs, void** args)
{
	if (nargs < 2) api_input_error(ctx);
	zoMatrixTmp<double> *A=reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx,args[0],MAT_DOUBLE));
	zoMatrixTmp<double> *Y=reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx,args[1],MAT_DOUBLE));

	if (A->nrow()!=Y->ndat() || A->ncol()<2 || A->nrow()<2) api_runtime_error(ctx,"bad matrix dimensions");

	int ret=1;

	if (A->nrow()>A->ncol()) {
		// overdetremined
		zoMatrixTmp<double> AT, AA, AY;
		AT.resize(A->ncol(),A->nrow());
		AA.resize(A->ncol(),A->ncol());
		AY.resize(A->ncol(),1);
		//AT=A~
		mattr(AT.ptr(),A->ptr(),A->nrow(),A->ncol());
		//AY=A~*Y
		rmmult(AY.ptr(),AT.ptr(),Y->ptr(),AT.nrow(),AT.ncol(),1);
		//AA=A~*A
		rmmult(AA.ptr(),AT.ptr(),A->ptr(),AT.nrow(),AT.ncol(),A->ncol());
		if (solv(AA.ptr(),AY.ptr(),AY.ndat())!=0) {
			Solve_SVDnn(AA,AY);
			ret=0;
		}
		for (int i=0; i<AY.ndat(); i++) (*Y)(i)=AY(i);
	}
	else if (A->nrow()==A->ncol()) {
		// square
		if (solv(A->ptr(),Y->ptr(),A->nrow())!=0) {
			Solve_SVDnn(*A,*Y);
			ret=0;
		}
	}
	else {
		// underdetermined
		zoMatrixTmp<double> AT, AA, AY;
		AT.resize(A->ncol(),A->nrow());
		AA.resize(A->nrow(),A->nrow());
		AY.resize(A->ncol(),1);
		//AT=A~
		mattr(AT.ptr(),A->ptr(),A->nrow(),A->ncol());
		//AA=A*A~
		rmmult(AA.ptr(),A->ptr(),AT.ptr(),A->nrow(),A->ncol(),AT.ncol());
		// solve B=inv(AA~)*Y
		if (solv(AA.ptr(),Y->ptr(),AA.nrow())!=0) {
			Solve_SVDnn(AA,*Y);
			ret=0;
		}
		//X=A~inv(A*A~)*Y=A~*B
		rmmult(AY.ptr(),AT.ptr(),Y->ptr(),AT.nrow(),AT.ncol(),1);
		for (int i=0; i<Y->ndat(); i++) (*Y)(i)=AY(i);
	}

	return api_create_integer(ctx,ret);
}

void* num_interpo0D(void *ctx, int nargs, void** args)
{
	if (nargs < 2) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	double d = api_get_number(ctx, args[1]);
	
	int i, i1, i2, n=me->ndat();
	double v, v1, v2;
	
	for (i = 0; i < n; i++) {
		v = (*me)(i);
		if ((d > 0 && v < d) || (d < 0 && v > d)) {
			// the first non-missing
			while (--i >= 0) (*me)(i) = v;
			break;
		}
	}
	if (i >= n) api_runtime_error(ctx, "all are missing");
	
	for (i = n-1; i > 0; i--) {
		v = (*me)(i);
		if ((d > 0 && v < d) || (d < 0 && v > d)) {
			// the last non-missing
			while (++i < me->ndat()) (*me)(i) = v;
			break;
		}
	}
	if (i <=0) api_runtime_error(ctx, "all are missing");

	v1 = (*me)(0);
	for (i1 = 1; i1 < n; i1++) {
		v = (*me)(i1);
		if ((d > 0 && v >= d) || (d < 0 && v <= d)) {
			// missing
			for (i2 = i1+1; i2 < n; i2++) {
				v = (*me)(i2);
				if ((d > 0 && v < d) || (d < 0 && v > d)) {
					v2 = v;
					v = (v2-v1)/(i2-i1+1);
					for (i = i1; i < i2; i++) {
						(*me)(i) = v1 + v*(i-i1+1);
					}
					break;
				}
			}
		}
		else {
			v1 = (*me)(i1);
		}
	}
	return 0;
}

void* num_interpo1D(void *ctx, int nargs, void** args)
{
	if (nargs < 2) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	zoMatrixTmp<double> *u  = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[1], MAT_DOUBLE));
	zoMatrixTmp<double> x, y;
	x.getcol(*me, 0);
	y.getcol(*me, 1);
	zoMatrixTmp<double> *p = new zoMatrixTmp<double>;
	p->resize(u->ndat(), 1);
	int i1, i2, i;
	double f1, f2;
	for (i = 0; i < u->ndat(); i++) {
		get_factor(x, (*u)(i), i1, i2, f1, f2);
		(*p)(i) = f1 * y(i1) + f2 * y(i2);
	}
	return api_create_user(ctx, p, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE);
}


void* num_interpo2Dirregular(void *ctx, int nargs, void** args)
{
	if (nargs < 5) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	zoMatrixTmp<double> *x  = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[1], MAT_DOUBLE));
	zoMatrixTmp<double> *y  = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[2], MAT_DOUBLE));
	zoMatrixTmp<double> *u  = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[3], MAT_DOUBLE));
	zoMatrixTmp<double> *v  = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[4], MAT_DOUBLE));
	if (x->ndat() != me->ncol() || y->ndat() != me->nrow()) api_input_error(ctx);
	zoMatrixTmp<double> fu1, fu2, fv1, fv2, *p = new zoMatrixTmp<double>;
	p->resize(v->ndat(), u->ndat());
	fu1.resize(1, u->ndat());
	fu2.resize(1, u->ndat());
	fv1.resize(1, v->ndat());
	fv2.resize(1, v->ndat());
	zoMatrixTmp<int> iu1, iu2, iv1, iv2;
	iu1.resize(1, u->ndat());
	iu2.resize(1, u->ndat());
	iv1.resize(1, v->ndat());
	iv2.resize(1, v->ndat());
	int i, j;
	for (i = 0; i < u->ndat(); i++) get_factor(*x, (*u)(i), iu1(i), iu2(i), fu1(i), fu2(i));
	for (i = 0; i < v->ndat(); i++) get_factor(*y, (*v)(i), iv1(i), iv2(i), fv1(i), fv2(i));
	for (i = 0; i < v->ndat(); i++) {
		for (j = 0; j < u->ndat(); j++) {
			(*p)(i,j) = fv1(i) * fu1(j) * (*me)(iv1(i), iu1(j)) +
						fv1(i) * fu2(j) * (*me)(iv1(i), iu2(j)) + 
						fv2(i) * fu1(j) * (*me)(iv2(i), iu1(j)) +
						fv2(i) * fu2(j) * (*me)(iv2(i), iu2(j));
		}
	}
	return api_create_user(ctx, p, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE);
}


void* num_interpo2D(void *ctx, int nargs, void** args)
{
	if (nargs < 3) api_input_error(ctx);
	if (nargs > 3) return num_interpo2Dirregular(ctx, nargs, args);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	zoMatrixTmp<double> *u  = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[1], MAT_DOUBLE));
	zoMatrixTmp<double> *v  = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[2], MAT_DOUBLE));
	zoMatrixTmp<double> fu1, fu2, fv1, fv2, x, y, *p = new zoMatrixTmp<double>;
	p->resize(v->ndat(), u->ndat());
	fu1.resize(1, u->ndat());
	fu2.resize(1, u->ndat());
	fv1.resize(1, v->ndat());
	fv2.resize(1, v->ndat());
	x.resize(1, me->ncol());
	x.fill(0, 1);
	y.resize(1, me->nrow());
	y.fill(0, 1);
	zoMatrixTmp<int> iu1, iu2, iv1, iv2;
	iu1.resize(1, u->ndat());
	iu2.resize(1, u->ndat());
	iv1.resize(1, v->ndat());
	iv2.resize(1, v->ndat());
	int i, j;
	for (i = 0; i < u->ndat(); i++) get_factor(x, (*u)(i), iu1(i), iu2(i), fu1(i), fu2(i));
	for (i = 0; i < v->ndat(); i++) get_factor(y, (*v)(i), iv1(i), iv2(i), fv1(i), fv2(i));
	for (i = 0; i < v->ndat(); i++) {
		for (j = 0; j < u->ndat(); j++) {
			(*p)(i,j) = fv1(i) * fu1(j) * (*me)(iv1(i), iu1(j)) +
						fv1(i) * fu2(j) * (*me)(iv1(i), iu2(j)) + 
						fv2(i) * fu1(j) * (*me)(iv2(i), iu1(j)) +
						fv2(i) * fu2(j) * (*me)(iv2(i), iu2(j));
		}
	}
	return api_create_user(ctx, p, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE);
}

void* num_inside(void *ctx, int nargs, void** args)
{
	if (nargs < 3) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	if (me->nrow() < 3 || me->ncol() < 2) api_input_error(ctx);
	double x = api_get_number(ctx, args[1]);
	double y = api_get_number(ctx, args[2]);
	zoMatrixTmp<double> xp, yp;
	xp.getcol(*me, 0);
	yp.getcol(*me, 1);
	return api_create_integer(ctx, point_in_polygon(xp.ptr(), yp.ptr(), xp.ndat(), x, y));
}

void* num_area(void *ctx, int nargs, void** args)
{
	if (nargs < 1) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	if (me->nrow() < 3 || me->ncol() < 2) api_input_error(ctx);
	double a = 0;
	int i, n = me->nrow();
	double x0 = (*me)(0,0);
	double y0 = (*me)(0,1);
	if (x0 == (*me)(n-1,0) && y0 == (*me)(n-1,1)) {
		n--;
		if (n < 3) api_input_error(ctx);
	}
	double x1 = (*me)(1,0) - x0;
	double y1 = (*me)(1,1) - y0;
	for (i = 2; i < n; i++) {
		double x2 = (*me)(i,0) - x0;
		double y2 = (*me)(i,1) - y0;
		a += x1*y2 - x2*y1;
		x1 = x2;
		y1 = y2;
	}
	return api_create_real(ctx, 0.5*a);
}

void* num_convolve(void *ctx, int nargs, void** args)
{
	if (nargs < 2) api_input_error(ctx);
	zoMatrixTmp<double> *me = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	zoMatrixTmp<double> *m2 = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[1], MAT_DOUBLE));
	zoMatrixTmp<double> *p = new zoMatrixTmp<double>;
	p->resize(1, me->ndat());
	p->fill(0, 0);
	int i, j;
	int m = m2->ndat()/2;
	int n = me->ndat() - m2->ndat();
	for (i=0; i<=n; i++) {
		double v=0;
		for (j=0; j<m2->ndat(); j++) v += (*me)(i+j)*(*m2)(j);
		(*p)(i+m) = v;
	}
	return api_create_user(ctx, p, mat_opfunc_double, mat_destroy_double, MAT_DOUBLE);
}

void* num_lfit(void *ctx, int nargs, void** args)
{
	if (nargs < 2) api_input_error(ctx);

	zoMatrixTmp<double> *Y = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[0], MAT_DOUBLE));
	zoMatrixTmp<double> *X = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[1], MAT_DOUBLE));

	if (Y->ndat()!=X->ndat()) api_runtime_error(ctx, "unequal matrix size");

	zoMatrixTmp<double> *er = 0;
	double xm=0.0, ym=0.0, s=0.0, sx=0.0, sy=0.0, sxx=0.0, syy=0.0, sxy=0.0;
	double a, b, r, dx, dy;
	int i;

	if (nargs > 2) {
		er = reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx, args[2], MAT_DOUBLE));
		if (Y->ndat()!=er->ndat()) api_runtime_error(ctx, "unequal matrix size");
	}

	for (i=0; i<Y->ndat(); i++) {
		xm+=(*X)(i);
		ym+=(*Y)(i);
	}
	xm/=X->ndat();
	ym/=Y->ndat();

	if (er) {
		for (i=0; i<Y->ndat(); i++) {
			r=1.0/(*er)(i);
			dx=((*X)(i)-xm)*r;
			dy=((*Y)(i)-ym)*r;
			s+=r*r;
			sx+=dx*r;
			sy+=dy*r;
			sxx+=dx*dx;
			syy+=dy*dy;
			sxy+=dx*dy;
		}
		r=s*sxx-sx*sx;
		a=(sxx*sy-sx*sxy)/r;
		b=(s*sxy-sx*sy)/r;
		a+=ym-b*xm;
		r=sxy/sqrt(sxx*syy);
	}
	else {
		for (i=0; i<Y->ndat(); i++) {
			dx=(*X)(i)-xm;
			dy=(*Y)(i)-ym;
			sx+=dx;
			sy+=dy;
			sxx+=dx*dx;
			syy+=dy*dy;
			sxy+=dx*dy;
		}
		b=sxy/sxx;
		a=ym-b*xm;
		r=sxy/sqrt(sxx*syy);
	}

	// y=a+b*x;

	void *arr=api_create_array(ctx,3);
	api_set_array_object(ctx,arr,"0",api_create_real(0,a));
	api_set_array_object(ctx,arr,"1",api_create_real(0,b));
	api_set_array_object(ctx,arr,"2",api_create_real(0,r));
	api_set_array_object(ctx,arr,"eq",api_create_string(0,"y=a+b*x"));
	api_set_array_object(ctx,arr,"a",api_create_real(0,a));
	api_set_array_object(ctx,arr,"b",api_create_real(0,b));
	api_set_array_object(ctx,arr,"R",api_create_real(0,r));

	return arr;
}

void* num_ssa(void *ctx, int nargs, void** args)
{
	if (nargs==2) {
		zoMatrixTmp<double> *A=reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx,args[0],MAT_DOUBLE));
		int L=api_get_integer(ctx,args[1]);
		if (L<2 || L>A->ndat()/2) api_runtime_error(ctx,"bad windows size");
		int K=A->ndat()-L+1;
		zoMatrixTmp<double> *U=new zoMatrixTmp<double>;
		U->resize(L,L);
		zoMatrixTmp<double> *D=new zoMatrixTmp<double>;
		D->resize(L,1);
		int i, j, k;
		// embedding C=A*A~/(K-1)
		for (i=0; i<L; i++) {
			for (j=0; j<L; j++) {
				double a=0;
				for (k=0; k<K; k++) {
					a+=(*A)(k+i)*(*A)(k+j);
				}
				a/=K-1;
				(*U)(i,j)=a;
				if (i!=j) (*U)(j,i)=a;
			}
		}
		eigen(U->ptr(),D->ptr(),L);
		// sorting 
		zoMatrixTmp<int> *J=new zoMatrixTmp<int>;
		J->resize(L,1);
		zoMatrixTmp<double> T;
		T.resize(L,1);
		for (i=0; i<L; i++) T(i)=fabs((*D)(i));
		zoMatrixTmp<size_t> Idx;
		Idx.resize(L,1);
		HeapSort(T.ptr(),Idx.ptr(),L,-1);
		for (i=0; i<L; i++) (*J)(i)=Idx(i);
		// return
		void *arr=api_create_array(ctx,3);
		api_set_array_object(ctx,arr,"0",api_create_user(0,U,mat_opfunc_double,mat_destroy_double,MAT_DOUBLE));
		api_set_array_object(ctx,arr,"1",api_create_user(0,D,mat_opfunc_double,mat_destroy_double,MAT_DOUBLE));
		api_set_array_object(ctx,arr,"2",api_create_user(0,J,mat_opfunc_int,mat_destroy_int,MAT_INT));
		return arr;
	}

	if (nargs<3) api_input_error(ctx);
	zoMatrixTmp<double> *A=reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx,args[0],MAT_DOUBLE));
	zoMatrixTmp<double> *U=reinterpret_cast<zoMatrixTmp<double>*>(api_get_user(ctx,args[1],MAT_DOUBLE));
	int idx=api_get_integer(ctx,args[2]);

	if (A->ndat()==U->ndat()) {
		// weighted correlation
		zoMatrixTmp<double> *Y1=A;
		zoMatrixTmp<double> *Y2=U;
		int L=idx;
		if (L<2 || L>Y1->ndat()/2) api_runtime_error(ctx,"bad windows size");
		int K=Y1->ndat()-L+1;
		double a=0, b=0, c=0;
		for (int i=0; i<Y1->ndat(); i++) {
			double y1=(*Y1)(i);
			double y2=(*Y2)(i);
			double w=i+1;
			if (i>=L) {
				if (i<K)
					w=L;
				else
					w=Y1->ndat()-i;
			}
			a+=w*y1*y1;
			b+=w*y2*y2;
			c+=w*y1*y2;
		}
		c/=sqrt(a*b);
		return api_create_real(ctx,c);
	}

	if (U->nrow()>A->ndat()/2 ||
		U->nrow()!=U->ncol()) api_runtime_error(ctx,"bad SVD matrix sizes.");
	if (idx<0 || idx>=U->nrow()) api_runtime_error(ctx,"bad index value.");

	int L=U->nrow();
	int K=A->ndat()-L+1;
	zoMatrixTmp<double> *Y=new zoMatrixTmp<double>;
	Y->resize(A->ndat(),1);
	// B=U~*trajectory(A);
	zoMatrixTmp<double> B;
	B.resize(L,K);
	int i, j, k;
	for (i=0; i<L; i++) {
		for (j=0; j<K; j++) {
			double a=0;
			for (k=0; k<L; k++) {
				a+=(*U)(k,i)*(*A)(j+k);
			}
			B(i,j)=a;
		}
	}
	zoMatrixTmp<double> C;
	C.resize(L,K);
	// C=U(*,idx)*B;
	for (i=0; i<L; i++) {
		double a=(*U)(i,idx);
		for (j=0; j<K; j++) {
			C(i,j)=a*B(idx,j);
		}
	}
	// Y from trajectory matrix;
	for (i=0, k=0; i<L; i++) {
		(*Y)(k++)=C(i,0);
	}
	i=L-1;
	for (j=1; j<K; j++) {
		(*Y)(k++)=C(i,j);
	}

	return api_create_user(ctx,Y,mat_opfunc_double,mat_destroy_double,MAT_DOUBLE);
}

int get_factor(const zoMatrixTmp<double> &X, double x, int &i1, int &i2, double &f1, double &f2)
{
	int i, n=X.ndat();

	if (n<2) return 0;

	if (X(1) > X(0)) {
		if (x <= X(0)) {
			i1 = 0;
			i2 = 0;
			f1 = 0;
			f2 = 1;
			return 1;
		}
		if (x >= X(n-1)) {
			i1 = n - 1;
			i2 = n - 1;
			f1 = 1;
			f2 = 0;
			return 1;
		}
		for (i = 1; i < n; i++) {
			if (x <= X(i)) {
				double d =  fabs(X(i)-X(i-1));
				if (d != 0) {
					f1 = fabs(X(i)-x) / d;
					f2 = fabs(X(i-1)-x) / d;
				}
				else {
					f1 = 0.5;
					f2 = 0.5;
				}
				i1 = i - 1;
				i2 = i;
				return 1;
			}
		}
	}
	else {
		if (x >= X(0)) {
			i1 = 0;
			i2 = 0;
			f1 = 0;
			f2 = 1;
			return 1;
		}
		if (x <= X(n-1)) {
			i1 = n - 1;
			i2 = n - 1;
			f1 = 1;
			f2 = 0;
			return 1;
		}
		for (i = 1; i < n; i++) {
			if (x >= X(i)) {
				double d =  fabs(X(i)-X(i-1));
				if (d != 0) {
					f1 = fabs(X(i)-x) / d;
					f2 = fabs(X(i-1)-x) / d;
				}
				else {
					f1 = 0.5;
					f2 = 0.5;
				}
				i1 = i - 1;
				i2 = i;
				return 1;
			}
		}
	}

	i1 = 0;
	i2 = 0;
	f1 = 0;
	f2 = 0;

	return 1;
}

// http://www.ecse.rpi.edu/Homepages/wrf/Research/Short_Notes/pnpoly.html
// W. Randolph Franklin
//
int point_in_polygon(const double *xp, const double *yp, int n, double x, double y)
{
	int i, j, c = 0;
	for (i = 0, j = n-1; i < n; j = i++) {
		if ((((yp[i] <= y) && (y < yp[j])) ||
			 ((yp[j] <= y) && (y < yp[i]))) &&
			(x < (xp[j] - xp[i]) * (y - yp[i]) / (yp[j] - yp[i]) + xp[i]))
		c = !c;
	}
	return c;
}


class mnumericRegNumeric
{
public:
	mnumericRegNumeric()
	{
		api_add_primitive("nlsim", MAT_DOUBLE, num_nlsimplex);
		api_add_primitive("nllm", MAT_DOUBLE, num_nllm);
		api_add_primitive("mlfit", MAT_DOUBLE, num_mlfit);
		api_add_primitive("delaunay", MAT_DOUBLE, num_delaunay);
		api_add_primitive("svd", MAT_DOUBLE, num_svd);
		api_add_primitive("invert", MAT_DOUBLE, num_invert);
		api_add_primitive("fft", MAT_DOUBLE, num_fft);
		api_add_primitive("spline", MAT_DOUBLE, num_spline);
		api_add_primitive("cvsp", MAT_DOUBLE, num_cv_spline);
		api_add_primitive("smooth", MAT_DOUBLE, num_dist_smooth);
		api_add_primitive("sgsm", MAT_DOUBLE, num_sg_smooth);
		api_add_primitive("interpo0d", MAT_DOUBLE, num_interpo0D);
		api_add_primitive("interpo1d", MAT_DOUBLE, num_interpo1D);
		api_add_primitive("interpo2d", MAT_DOUBLE, num_interpo2D);
		api_add_primitive("inside", MAT_DOUBLE, num_inside);
		api_add_primitive("area", MAT_DOUBLE, num_area);
		api_add_primitive("convolve", MAT_DOUBLE, num_convolve);
		api_add_primitive("nlfit", MAT_DOUBLE, num_nlfit);
		api_add_primitive("leqs", MAT_DOUBLE, num_leqs);
		api_add_primitive("lfit", MAT_DOUBLE, num_lfit);
		api_add_primitive("ssa", MAT_DOUBLE, num_ssa);
	}
};

static mnumericRegNumeric regster_numeric_primitve;
