package numericalRecipes;

import static java.lang.Math.*;
/** Routines copied directly from numerical recipes in C. Only neccessary C -> java conversions made
 * Oh, and it's all in double's now 
 * Also converted for arrays to be 0 based
 */
public class NumericalRecipes {
	
	final static double TINY = 1e-20;
	public static void dfour1(double data[], int nn, int isign)
	{
		int n,mmax,m,j,istep,i;
		double wtemp,wr,wpr,wpi,wi,theta;
		double tempr,tempi;
		n=nn << 1;
		j=1;
		for (i=1;i<n;i+=2) {
			if (j > i) {
				//SWAP(data[j-1],data[i-1]);
				double tmp = data[j-1]; data[j-1] = data[i-1]; data[i-1] = tmp;
				
				//SWAP(data[j],data[i]);
				tmp = data[j]; data[j] = data[i]; data[i] = tmp;
				
			}
			m=n >> 1;
			while (m >= 2 && j > m) {
				j -= m;
				m >>= 1;
			}
			j += m;
		}
		mmax=2;
		while (n > mmax) {
			istep=mmax << 1;
			theta=isign*(6.28318530717959/mmax);
			wtemp=sin(0.5*theta);
			wpr = -2.0*wtemp*wtemp;
			wpi=sin(theta);
			wr=1.0;
			wi=0.0;
			for (m=1;m<mmax;m+=2) {
				for (i=m;i<=n;i+=istep) {
					j=i+mmax;
					
					tempr=wr*data[j-1]-wi*data[j];
					tempi=wr*data[j]+wi*data[j-1];
					data[j-1]=data[i-1]-tempr;
					data[j]=data[i]-tempi;
					data[i-1] += tempr;
					data[i] += tempi;
				}
				wr=(wtemp=wr)*wpr-wi*wpi+wr;
				wi=wi*wpr+wtemp*wpi+wi;
			}
			mmax=istep;
		}
	}
	
	public static void drealft(double data[], int n, int isign)
	{
		int i,i1,i2,i3,i4,np3;
		double c1=0.5,c2,h1r,h1i,h2r,h2i;
		double wr,wi,wpr,wpi,wtemp,theta;
		theta=3.141592653589793/(double) (n>>1);
		if (isign == 1) {
			c2 = -0.5;
			dfour1(data,n>>1,1);
		} else {
			c2=0.5;
			theta = -theta;
		}
		wtemp=sin(0.5*theta);
		wpr = -2.0*wtemp*wtemp;
		wpi=sin(theta);
		wr=1.0+wpr;
		wi=wpi;
		np3=n+3;
		for (i=2;i<=(n>>2);i++) {
			i4=1+(i3=np3-(i2=1+(i1=i+i-1)));
			h1r=c1*(data[i1-1]+data[i3-1]);
			h1i=c1*(data[i2-1]-data[i4-1]);
			h2r = -c2*(data[i2-1]+data[i4-1]);
			h2i=c2*(data[i1-1]-data[i3-1]);
			data[i1-1]=h1r+wr*h2r-wi*h2i;
			data[i2-1]=h1i+wr*h2i+wi*h2r;
			data[i3-1]=h1r-wr*h2r+wi*h2i;
			data[i4-1] = -h1i+wr*h2i+wi*h2r;
			wr=(wtemp=wr)*wpr-wi*wpi+wr;
			wi=wi*wpr+wtemp*wpi+wi;
		}
		if (isign == 1) {
			data[0] = (h1r=data[0])+data[1];
			data[1] = h1r-data[1];
		} else {
			data[0]=c1*((h1r=data[0])+data[1]);
			data[1]=c1*(h1r-data[1]);
			dfour1(data,n>>1,-1);
		}
	}
	
	public static void dtwofft(double data1[], double data2[], double fft1[], double fft2[],int n)
		{			
			int nn3,nn2,jj,j;
			double rep,rem,aip,aim;
			nn3=1+(nn2=2+n+n);
			for (j=1,jj=2;j<=n;j++,jj+=2) {
				fft1[jj-2]=data1[j-1];
				fft1[jj-1]=data2[j-1];
			}
			dfour1(fft1,n,1);
			fft2[0]=fft1[1];
			fft1[1]=fft2[1]=0.0;
			for (j=3;j<=n+1;j+=2) {
				rep=0.5*(fft1[j-1]+fft1[nn2-j-1]);
				rem=0.5*(fft1[j-1]-fft1[nn2-j-1]);
				aip=0.5*(fft1[j]+fft1[nn3-j-1]);
				aim=0.5*(fft1[j]-fft1[nn3-j-1]);
				fft1[j-1]=rep;
				fft1[j]=aim;
				fft1[nn2-j-1]=rep;
				fft1[nn3-j-1] = -aim;
				fft2[j-1]=aip;
				fft2[j] = -rem;
				fft2[nn2-j-1]=aip;
				fft2[nn3-j-1]=rem;
			}
		}
	
		public static void dconvlv(double data[], int n, double respns[], int m,	int isign, double ans[])
		{
			int i,no2;
			double dum,mag2,fft[];
			//fft=vector(1,n<<1);
			fft = new double[n<<1];
			
			for (i=1;i<=(m-1)/2;i++)
				respns[n-i]=respns[m-i];
			for (i=(m+3)/2;i<=n-(m-1)/2;i++)
				respns[i-1]=0.0;
			dtwofft(data,respns,fft,ans,n);
			
			no2=n>>1;
			for (i=2;i<=n+2;i+=2) {
				if (isign == 1) {
					ans[i-2]=(fft[i-2]*(dum=ans[i-2])-fft[i-1]*ans[i-1])/no2;
					ans[i-1]=(fft[i-1]*dum+fft[i-2]*ans[i-1])/no2;
				} else if (isign == -1) {
					if ((mag2=SQR(ans[i-2])+SQR(ans[i-1])) == 0.0)
						throw new ArithmeticException("Deconvolving at response zero in convlv");
					ans[i-2]=(fft[i-2]*(dum=ans[i-2])+fft[i-1]*ans[i-1])/mag2/no2;
					ans[i-1]=(fft[i-1]*dum-fft[i-2]*ans[i-1])/mag2/no2;
				} else throw new ArithmeticException("No meaning for isign in convlv");
			}
			ans[1]=ans[n];
			drealft(ans,n,-1);
		}
		
		public static double SQR(double a){ return a*a; }
		
		/**
		 * Multiplies the complex fft arrays f and g together - This is for the convolution of two (at least real, maybe general) functions
		 * @param f
		 * @param g
		 * @return
		 */
		public static double[] fftMul(double f[], double g[]){
			int n = f.length;
			
			double ret[] = new double[n];
			for(int i=2;i<n;i+=2){				
				ret[i] = (f[i] * g[i  ]) 	- (f[i+1] * g[i+1]); // real part of a*b
				ret[i+1] = (f[i] * g[i+1]) 	+ (f[i+1] * g[i  ]); // img part of a*b
			}
			
			ret[0] = f[0] * g[0]; //img part of element 0 contains real value of element N (which is not included)
			ret[1] = f[1] * g[1]; 
		
			
			return ret;
		}
		
		
		/*public double[][] simpleConvolve(double fx[], double f[], double gx[], double g[]){
			int nf = f.length;
			int ng = g.length;
			int nC = nf + (ng-1);	//we require our final arrays to be AT LEAST nf+2*ng (for double zero padding) 
			
			double dx=(fx[nf-1] - fx[0])/(nf-1);		
			double C[][] = new double[2][nC];
			double y0,y1;
			
			C[0][0] = fx[0] + gx[0];
			int lit=0;
			
			if(true)
				throw new RuntimeException("Don't think I finished this one ???");
			
			return C;
		}*/
	
		/** Given an n 
 n band diagonal matrix A with m1 subdiagonal rows and m2 superdiagonal rows,
		 * compactly stored in the array a[1..n][1..m1+m2+1] as described in the comment for routine
		 * banmul, this routine constructs an LU decomposition of a rowwise permutation of A. The upper
		 * triangular matrix replaces a, while the lower triangular matrix is returned in al[1..n][1..m1].
		 * indx[1..n] is an output vector which records the row permutation e
ected by the partial
		 * pivoting; d is output as 
1 depending on whether the number of row interchanges was even
		 * or odd, respectively. This routine is used in combination with banbks to solve band-diagonal
		 * sets of equations.
		 * 
		 */
		public static double bandec(double a[][], int n, int m1, int m2, double al[][],int indx[]){			  
		     int i,l;
		     int mm;
		     double dum;
		     mm=m1+m2+1;
		     l=m1;
		     double d;
		                                                 //Rearrange the storage a bit.
		     for (i=1;i<=m1;i++) {
		         for (int j=m1+2-i;j<=mm;j++) a[i-1][j-l-1]=a[i-1][j-1];
		         l--;
		         for (int j=mm-l;j<=mm;j++) a[i-1][j-1]=0.0;
		     }
		     d = 1.0;
		     l=m1;
		                                                // For each row...
		     for (int k=1;k<=n;k++) {
		         dum=a[k-1][1-1];
		         i=k;
		         if (l < n) l++;
		                                                 //Find the pivot element.
		         for (int j=k+1;j<=l;j++) {
		              if (Math.abs(a[j-1][1-1]) > Math.abs(dum)) {
		                   dum=a[j-1][1-1];
		                   i=j;
		              }
		         }
		         indx[k-1]=i;
		         if (dum == 0.0)
		        	 a[k-1][1-1]=TINY;
		         //Matrix is algorithmically singular, but proceed anyway with TINY pivot (desirable in some applications).
		                                                 //Interchange rows.
		         if (i != k) {
		              d = -d;
		              for (int j=1;j<=mm;j++){
		            	  //SWAP(a[k][j],a[i][j])
		            	  double tmp = a[k-1][j-1];
		            	  a[k-1][j-1] = a[i-1][j-1];
		            	  a[i-1][j-1] = tmp;
		              }
		         }
		                                                // Do the elimination.
		         for (i=k+1;i<=l;i++) {
		              dum=a[i-1][1-1]/a[k-1][1-1];
		              al[k-1][i-k-1]=dum;
		              for (int j=2;j<=mm;j++)
		            	  a[i-1][j-1-1]=a[i-1][j-1]-dum*a[k-1][j-1];
		              a[i-1][mm-1]=0.0;
		         }
		     }
		     
		     return d;
		}
		
		/**
		 * Given the arrays a, al, and indx as returned from bandec, and given a right-hand side vector
		 * b[1..n], solves the band diagonal linear equations A 
 x = b. The solution vector x overwrites
		 * b[1..n]. The other input arrays are not modi
ed, and can be left in place for successive calls
		 * with di
erent right-hand sides. 
		 */		
		public static void banbks(double a[][], int n, int m1, int m2, double al[][], int indx[], double b[]){
		    int i,k,l;
		    int mm;
		    double dum;
		    mm=m1+m2+1;
		    l=m1;
		                                         //Forward substitution, unscrambling the permuted rows
		    for (k=1;k<=n;k++) {
		                                            // as we go.
		         i=indx[k-1];
		         if (i != k){ 
		        	 //SWAP(b[k],b[i])
		        	 double tmp = b[i-1];
		        	 b[i-1] = b[k-1];
		        	 b[k-1] = tmp;
		         
		         }
		         if (l < n) l++;
		         for (i=k+1;i<=l;i++) b[i-1] -= al[k-1][i-k-1]*b[k-1];
		    }
		    l=1;
		                                         //Backsubstitution.
		    for (i=n;i>=1;i--) {
		         dum=b[i-1];
		         for (k=2;k<=l;k++)
		        	 dum -= a[i-1][k-1]*b[k+i-1-1];
		         b[i-1]=dum/a[i-1][1-1];
		         if (l < mm) l++;
		    }
		}
		
		
		/**Matrix multiply b = A 
 x, where A is band diagonal with m1 rows below the diagonal and m2
		rows above. The input vector x and output vector b are stored as x[1..n] and b[1..n],
		respectively. The array a[1..n][1..m1+m2+1] stores A as follows: The diagonal elements
		are in a[1..n][m1+1]. Subdiagonal elements are in a[j..n][1..m1] (with j > 1 appropriate
		to the number of elements on each subdiagonal). Superdiagonal elements are in
		a[1..j][m1+2..m1+m2+1] with j < n appropriate to the number of elements on each superdiagonal.*/
		public static double[] banmul(double a[][], int n, int m1, int m2, double x[])
		{			
			double b[] = new double[n];
			for (int i=1;i<=n;i++) {
				b[i-1]=0.0;
				
				int k=i-m1-1;
				
				int j1=m1+m2+1;
				if( (n-k) < j1) j1 = n-k;
				
				int j0 = 1;
				if( (1-k) > j0) j0 = 1-k;
				
				for (int j=j0;j<=j1;j++)
					b[i-1] += a[i-1][j-1]*x[j+k-1];
			}
			return b;
		}
		
		
		/** Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise
		permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above;
		indx[1..n] is an output vector that records the row permutation effected by the partial
		pivoting; d is output as 
1 depending on whether the number of row interchanges was even
		or odd, respectively. This routine is used in combination with lubksb to solve linear equations
		or invert a matrix. */
		public static double ludcmp(double a[][], int n, int indx[])
		{
			int i,imax=0,j,k;
			double big,dum,sum,temp;
			double vv[]; // vv stores the implicit scaling of each row.
			double d; //for return
			vv = new double[n]; //vector(1,n);
			d = 1.0;  //No row interchanges yet.
			for (i=1;i<=n;i++) { //Loop over rows to get the implicit scaling information.
				big = 0.0; 
				for (j=1;j<=n;j++)
					if ((temp=Math.abs(a[i-1][j-1])) > big) big=temp;
				if (big == 0.0)
					throw new ArithmeticException("Singular matrix in routine ludcmp");
				//No nonzero largest element.
				vv[i-1]=1.0/big; //Save the scaling.
			}
			for (j=1;j<=n;j++) { //This is the loop over columns of Crouts method.
				for (i=1;i<j;i++) { //This is equation (2.3.12) except for i = j.
					sum=a[i-1][j-1];
					for (k=1;k<i;k++)
						sum -= a[i-1][k-1]*a[k-1][j-1];
					a[i-1][j-1]=sum;
				}
				big=0.0; //Initialize for the search for largest pivot element.
				for (i=j;i<=n;i++) { //This is i = j of equation (2.3.12) and i = j+1. . .N
					sum=a[i-1][j-1]; 	  //of equation (2.3.13).
					for (k=1;k<j;k++)
						sum -= a[i-1][k-1]*a[k-1][j-1];
					a[i-1][j-1]=sum;
					if ( (dum=vv[i-1]*Math.abs(sum)) >= big) {
						//Is the figure of merit for the pivot better than the best so far?
						big=dum;
						imax=i;
					}
				}
				if (j != imax) { //Do we need to interchange rows?
					for (k=1;k<=n;k++) { //Yes, do so...
						dum=a[imax-1][k-1];
						a[imax-1][k-1]=a[j-1][k-1];
						a[j-1][k-1]=dum;
					}
					d = -d; //...and change the parity of d.
					vv[imax-1]=vv[j-1]; //Also interchange the scale factor.
				}
				indx[j-1] = imax;
				if (a[j-1][j-1] == 0.0) a[j-1][j-1] = TINY;
				//If the pivot element is zero the matrix is singular (at least to the precision of the
				//algorithm). For some applications on singular matrices, it is desirable to substitute
				//TINY for zero.
				if (j != n) { //Now, finally, divide by the pivot element.
					dum=1.0/(a[j-1][j-1]);
					for (i=j+1;i<=n;i++)
						a[i-1][j-1] *= dum;
				}
			} //Go back for the next column in the reduction.
			return d;
		}
		
		/**Solves the set of n linear equations A
X = B. Here a[1..n][1..n] is input, not as the matrix
		A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input
		as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector
		B, and returns with the solution vector X. a, n, and indx are not modified by this routine
		and can be left in place for successive calls with different right-hand sides b. This routine takes
		into account the possibility that b will begin with many zero elements, so it is efficient for use
		in matrix inversion.*/
		
		public static void lubksb(double a[][], int n, int indx[], double b[])
		{
			int i,ii=0,ip,j;
			double sum;
			for (i=1;i<=n;i++) { //When ii is set to a positive value, it will become the
				//index of the first nonvanishing element of b. Wenow
				//do the forward substitution, equation (2.3.6). The
				//only new wrinkle is to unscramble the permutation
				//as we go.
				ip=indx[i-1];
				sum=b[ip-1];
				b[ip-1]=b[i-1];
				if(ii != 0)
					for (j=ii;j<=i-1;j++)
						sum -= a[i-1][j-1]*b[j-1];
				else if (sum != 0)
					ii=i; //A nonzero element was encountered, so from now on we
				b[i-1]=sum; // will have to do the sums in the loop above.
			}
			for (i=n;i>=1;i--) { //Now we do the backsubstitution, equation (2.3.7).
				sum=b[i-1];
				for (j=i+1;j<=n;j++)
					sum -= a[i-1][j-1]*b[j-1];
				b[i-1] = sum / a[i-1][i-1]; //Store a component of the solution vector X.
			} //All done!
		}
		
		/** invert square matrix a, a is replaced with it's LU decomposition and the inverse is returned */
		public static double[][] invertQuick(double a[][]){
			int N = a.length;
			double y[][] = new double[N][N];
			double col[] = new double[N];
			int i,j;
			int indx[] = new int[N];
			
			
			ludcmp(a,N,indx); 		//Decompose the matrix just once.
			for(j=1;j<=N;j++) { //Find inverse by columns.
				for(i=1;i<=N;i++)
					col[i-1]=0.0;
				col[j-1]=1.0;
				lubksb(a,N,indx,col);
				for(i=1;i<=N;i++)
					y[i-1][j-1]=col[i-1];
			}
			
			return y;
		}
		
		/** inverse of matrix in, in is returned untouched and it's inverse if returned */
		public static double[][] invertDontDestroy(double in[][]){
			int N = in.length;
			double y[][] = new double[N][N];
			double col[] = new double[N];
			int indx[] = new int[N];
			
			double a[][] = new double[N][N];
			for(int i=0;i<N;i++)
				System.arraycopy(in[i],0,a[i],0,N);
			
			ludcmp(a,N,indx); 		//Decompose the matrix just once.
			for(int j=1;j<=N;j++) { //Find inverse by columns.
				for(int i=1;i<=N;i++)
					col[i-1]=0.0;
				col[j-1]=1.0;
				lubksb(a,N,indx,col);
				for(int i=1;i<=N;i++)
					y[i-1][j-1]=col[i-1];
			}
			
			return y;
		}
		
		
		
		                                                        
		  /*#define GET_PSUM \
		                            for (j=1;j<=ndim;j++) {\
		                            for (sum=0.0,i=1;i<=mpts;i++) sum += p[i][j];\
		                            psum[j]=sum;}
		  #define SWAP(a,b) {swap=(a);(a)=(b);(b)=swap;} */
		  
	    
		
		public interface NRFunctionProvider { public double funk(double vec[]); }
		/**
		 * Multidimensional minimization of the function funk(x) where x[1..ndim] is a vector in ndim
		  dimensions, by the downhill simplex method of Nelder and Mead. The matrix p[1..ndim+1]
		  [1..ndim] is input. Its ndim+1 rows are ndim-dimensional vectors which are the vertices of
		  the starting simplex. Also input is the vector y[1..ndim+1], whose components must be pre-
		  initialized to the values of funk evaluated at the ndim+1 vertices (rows) of p; and ftol the
		  fractional convergence tolerance to be achieved in the function value (n.b.!). On output, p and
		  y will have been reset to ndim+1 new points all within ftol of a minimum function value, and
		  nfunk gives the number of function evaluations taken.
		 */		  
		public static void amoeba(double p[][], double y[], double ftol, int nMax, NRFunctionProvider funk, int nfunk[]){
				if(y.length != p.length)
					throw new IllegalArgumentException("y must be same length as p");
				int ndim = p.length - 1;
				
				final double TINY = 1.0e-10;
				//final int NMAX = 50000;  //  Maximum allowed number of function evaluations.
	            int i,ihi,ilo,inhi,j,mpts=ndim+1;
		        double rtol,sum,ysave,ytry;
		        //rets: nfunk,psuum
		        double psum[] = new double[ndim];						        //psum=vector(1,ndim); 
		        
		        nfunk[0]=0;
		        
		        for (j=1;j<=ndim;j++) {
                    for (sum=0.0,i=1;i<=mpts;i++) sum += p[i-1][j-1];
                    psum[j-1]=sum;
                }
		        
		        for (;;) {
		             ilo=1;
		             // First we must determine which point is the highest (worst), next-highest, and lowest
		             // (best), by looping over the points in the simplex.
		             if(y[1-1]>y[2-1]){
		            	 inhi=2;
		            	 ihi=1;
		             }else{
		            	 inhi=1;
		            	 ihi=2;
		             }
		             
		             for (i=1;i<=mpts;i++) {
		                  if (y[i-1] <= y[ilo-1]) ilo=i;
		                  if (y[i-1] > y[ihi-1]) {
		                       inhi=ihi;
		                       ihi=i;
		                  } else if (y[i-1] > y[inhi-1] && i != ihi) inhi=i;
		             }
		             rtol=2.0*StrictMath.abs(y[ihi-1]-y[ilo-1])/(StrictMath.abs(y[ihi-1])+StrictMath.abs(y[ilo-1])+TINY);
		             //Compute the fractional range from highest to lowest and return if satisfactory.
		             //If returning, put best point and value in slot 1.
		             if (rtol < ftol || nfunk[0] >= nMax) {
		                  double tmp = y[1-1]; y[1-1] = y[ilo-1]; y[ilo-1] = tmp;
		                  for (i=1;i<=ndim;i++){ 
		                	  tmp = p[1-1][i-1]; p[1-1][i-1] = p[ilo-1][i-1]; p[ilo-1][i-1] = tmp;
		                  }
		                  break;
		             }		          
		             nfunk[0] += 2;
		             //Begin a new iteration. First extrapolate by a factor 1 through the face of the simplex
		             //across from the high point, i.e., reflect the simplex from the high point.
		             ytry=amotry(p,y,psum,ndim,funk,ihi,-1.0);
		             if (ytry <= y[ilo-1])
		                  //Gives a result better than the best point, so try an additional extrapolation by a
		                  //factor 2.
		                  ytry=amotry(p,y,psum,ndim,funk,ihi,2.0);
		             else if (ytry >= y[inhi-1]) {
		                  //The reflected point is worse than the second-highest, so look for an intermediate
		                  //lower point, i.e., do a one-dimensional contraction.
		                  ysave=y[ihi-1];
		                  ytry=amotry(p,y,psum,ndim,funk,ihi,0.5);
		                                                   // Cant seem to get rid of that high point. Better
		                  if (ytry >= ysave) {             // contract around the lowest (best) point.
		                       for (i=1;i<=mpts;i++) {	
		                         if (i != ilo) {
		                             for (j=1;j<=ndim;j++)
		                                  p[i-1][j-1]=psum[j-1]=0.5*(p[i-1][j-1]+p[ilo-1][j-1]);
		                             //y[i]=(*funk)(psum);
		                             y[i-1] = funk.funk(psum);
		                         }
		                     }
		                                                  // Keep track of function evaluations.
		                     nfunk[0] += ndim;
		                                                 //  Recompute psum.
		                     //GET_PSUM
		                     for (j=1;j<=ndim;j++) {
		                            for (sum=0.0,i=1;i<=mpts;i++) sum += p[i-1][j-1];
		                            psum[j-1]=sum;}
		                }
		                                                // Correct the evaluation count.
		            } else --nfunk[0];		             //Go back for the test of doneness and the next iteration.
		       }
		     //  free_vector(psum,1,ndim);
		  }
		
		  /** Extrapolates by a factor fac through the face of the simplex across from the high point, tries
		  * it, and replaces the high point if the new point is better. */
		  private  static double amotry(double p[][], double y[], double psum[], int ndim,NRFunctionProvider funk, int ihi, double fac)
		  {
			  
		       int j;
		       double fac1,fac2,ytry,ptry[];
		       //ptry=vector(1,ndim);
		       ptry = new double[ndim];
		       
		       fac1=(1.0-fac)/ndim;
		       fac2=fac1-fac;
		       for (j=1;j<=ndim;j++) ptry[j-1]=psum[j-1]*fac1-p[ihi-1][j-1]*fac2;
		                                        // Evaluate the function at the trial point.
		       //ytry=(*funk)(ptry);
		       ytry = funk.funk(ptry);
		                              //           If its better than the highest, then replace the highest.
		       if (ytry < y[ihi-1]) {
		            y[ihi-1]=ytry;
		            for (j=1;j<=ndim;j++) {
		                psum[j-1] += ptry[j-1]-p[ihi-1][j-1];
		                p[ihi-1][j-1]=ptry[j-1];
		            }
		       }
		       //free_vector(ptry,1,ndim);
		       return ytry;
		  }
		  /**Minimization of a function func of n variables. Input consists of an initial starting point
		 	p[1..n]; an initial matrix xi[1..n][1..n], whose columns contain the initial set of di-
		 	rections (usually the n unit vectors); and ftol, the fractional tolerance in the function value
		 	such that failure to decrease by more than this amount on one iteration signals doneness. On
		 	output, p is set to the best point found, xi is the then-current direction set, fret is the returned
		 	function value at p, and iter is the number of iterations taken. The routine linmin is used.*/
		  /* Never finished this one
		  //void powell(float p[], float **xi, int n, float ftol, int *iter, float *fret,float (*func)(float []))
		  public static int PowellDirectionSet(double p[], double xi[][], int n, double ftol, int iter[], double fret[],NRFunctionProvider func){
			  //void linmin(float p[], float xi[], int n, float *fret,float (*func)(float []));
			  int i,ibig,j;
			  int iter;
			  double del,fp,fptt,t,pt[],ptt[],xit[];
			  pt=new double[n];
			  ptt=new double[n];
			  xit=new double[n];
			  *fret=(*func)(p);  // ??
			  
			  for (j=1;j<=n;j++) pt[j]=p[j];
			  for (iter=1;;++(iter)) {
				  fp=(*fret);
				  ibig=0;
				  // Will be the biggest function decrease.
				  del=0.0;
				  //   In each iteration, loop over all directions in the set.
				  for (i=1;i<=n;i++) {
					  //        Copy the direction,
					  for (j=1;j<=n;j++) xit[j]=xi[j][i];
					  fptt=(*fret);
					  //  minimize along it,
					  linmin(p,xit,n,fret,func);
					  //  and record it if it is the largest decrease
					  if (fptt-(*fret) > del) {
						  //   so far.
						  del=fptt-(*fret);
						  ibig=i;
					  }
				  }
				  if (2.0*(fp-(*fret)) <= ftol*(fabs(fp)+fabs(*fret))+TINY) {
					  //   Termination criterion.
					  free_vector(xit,1,n);
					  free_vector(ptt,1,n);
					  free_vector(pt,1,n);
					  return;
				  }
				  if (*iter == ITMAX) nrerror("powell exceeding maximum iterations.");
				  //    Construct the extrapolated point and the
				  for (j=1;j<=n;j++) {
					  //    average direction moved. Save the
					  ptt[j]=2.0*p[j]-pt[j];
					  //      old starting point.
					  xit[j]=p[j]-pt[j];
					  pt[j]=p[j];
				  }
				  //      Function value at extrapolated point.
				  fptt=(*func)(ptt);
				  if (fptt < fp) {
					  t=2.0*(fp-2.0*(*fret)+fptt)*SQR(fp-(*fret)-del)-del*SQR(fp-fptt);
					  if (t < 0.0) {
						  //    Move to the minimum of the new direc-
						  linmin(p,xit,n,fret,func);
						  //       tion, and save the new direction.
						  for (j=1;j<=n;j++) {
							  xi[j][ibig]=xi[j][n];
							  xi[j][n]=xit[j];
						  }
					  }
				  }
				  //        Back for another iteration.                                                                                                                                                                                                                                                                                                                                                        }
			  }
 		  }*/
		  
		  
		  

			/**
			 * Returns the modified Bessel function of the first kind, modified Bessel function of the second kind, and
			 * its derivatives, all with fractional order.
			 * @param nu The fractional order.
			 * @param x The values at which the Bessel functions should be evaluated.
			 * @return 
			 * ret[0] is the modified Bessel function of the first kind (Inu), 
			 * ret[1] the modified Bessel function of the second kind (Knu), 
			 * ret[2] the derivative of the modified Bessel function of the first kind (dInu/dx)
			 * ret[3] the derivative of the modified Bessel function of the second kind (dKnu/dx)
			 */
			public static double[][] bessik(double nu, double[] x) {
				final double EPS =  1.0e-16;
				final double  FPMIN =  1.0e-30;
				final int  MAXIT  = 10000;
				final double  XMIN  = 2.0;
				final double PI = 3.141592653589793;
							
				int i,l,nl;
				double a,a1,b,c,d,del,del1,delh,dels,e,f,fact,fact2,ff,
					h,p,pimu,q,q1,q2,qnew,ril,ril1,rimu,rip1,ripl,
					ritemp,rk1,rkmu,rkmup,rktemp,s,sum,sum1,x2,xi,xi2,xmu,xmu2;
				double[] ri = new double[x.length], rip = new double[x.length];
				double[] rk = new double[x.length], rkp = new double[x.length];
				double[] gam1 = new double[1], gam2 = new double[1], gampl = new double[1], gammi = new double[1];
				
				for (int n=0;n<x.length;++n) {
					if (x[n] <= 0.0 || nu < 0.0) throw new ArithmeticException("Bad arguments in bessik x[n] = "+x[n]+", nu="+nu);
					nl=(int)(nu+0.5);
					xmu=nu-nl;
					xmu2=xmu*xmu;
					xi=1.0/x[n];
					xi2=2.0*xi;
					h=nu*xi;
					if (h < FPMIN) h=FPMIN;
					b=xi2*nu;
					d=0.0;
					c=h;
					for (i=1;i<=MAXIT;i++) {
						b += xi2;
						d=1.0/(b+d);
						c=b+1.0/c;
						del=c*d;
						h=del*h;
						if (Math.abs(del-1.0) < EPS) break;
					}
					if (i > MAXIT) throw new ArithmeticException("x too large in bessik; try asymptotic expansion");
					ril=FPMIN;
					ripl=h*ril;
					ril1=ril;
					rip1=ripl;
					fact=nu*xi;
					for (l=nl;l>=1;l--) {
						ritemp=fact*ril+ripl;
						fact -= xi;
						ripl=fact*ritemp+ril;
						ril=ritemp;
					}
					f=ripl/ril;
					if (x[n] < XMIN) {
						x2=0.5*x[n];
						pimu=PI*xmu;
						fact = (Math.abs(pimu) < EPS ? 1.0 : pimu/Math.sin(pimu));
						d = -Math.log(x2);
						e=xmu*d;
						fact2 = (Math.abs(e) < EPS ? 1.0 : Math.sinh(e)/e);
						beschb(xmu,gam1,gam2,gampl,gammi);
						ff=fact*(gam1[0]*Math.cosh(e)+gam2[0]*fact2*d);
						sum=ff;
						e=Math.exp(e);
						p=0.5*e/gampl[0];
						q=0.5/(e*gammi[0]);
						c=1.0;
						d=x2*x2;
						sum1=p;
						for (i=1;i<=MAXIT;i++) {
							ff=(i*ff+p+q)/(i*i-xmu2);
							c *= (d/i);
							p /= (i-xmu);
							q /= (i+xmu);
							del=c*ff;
							sum += del;
							del1=c*(p-i*ff);
							sum1 += del1;
							if (Math.abs(del) < Math.abs(sum)*EPS) break;
						}
						if (i > MAXIT) throw new ArithmeticException("bessk series failed to converge");
						rkmu=sum;
						rk1=sum1*xi2;
					} else {
						b=2.0*(1.0+x[n]);
						d=1.0/b;
						h=delh=d;
						q1=0.0;
						q2=1.0;
						a1=0.25-xmu2;
						q=c=a1;
						a = -a1;
						s=1.0+q*delh;
						for (i=2;i<=MAXIT;i++) {
							a -= 2*(i-1);
							c = -a*c/i;
							qnew=(q1-b*q2)/a;
							q1=q2;
							q2=qnew;
							q += c*qnew;
							b += 2.0;
							d=1.0/(b+a*d);
							delh=(b*d-1.0)*delh;
							h += delh;
							dels=q*delh;
							s += dels;
							if (Math.abs(dels/s) < EPS) break;
						}
						if (i > MAXIT) throw new ArithmeticException("bessik: failure to converge in cf2");
						h=a1*h;
						rkmu=Math.sqrt(PI/(2.0*x[n]))*Math.exp(-x[n])/s;
						rk1=rkmu*(xmu+x[n]+0.5-h)*xi;
					}
					rkmup=xmu*xi*rkmu-rk1;
					rimu=xi/(f*rkmu-rkmup);
					ri[n]=(rimu*ril1)/ril;
					rip[n]=(rimu*rip1)/ril;
					for (i=1;i<=nl;i++) {
						rktemp=(xmu+i)*xi2*rk1+rkmu;
						rkmu=rk1;
						rk1=rktemp;
					}
					rk[n]=rkmu;
					rkp[n]=nu*xi*rkmu-rk1;
				}
				
				return new double[][] { ri, rk, rip, rkp };
			}
				

			
			static final double beschb_c1[] = {
				-1.142022680371168e0,6.5165112670737e-3,
				3.087090173086e-4,-3.4706269649e-6,6.9437664e-9,
				3.67795e-11,-1.356e-13};
			static final double beschb_c2[] = {
				1.843740587300905e0,-7.68528408447867e-2,
				1.2719271366546e-3,-4.9717367042e-6,-3.31261198e-8,
				2.423096e-10,-1.702e-13,-1.49e-15};

			/**
			 * Evaluates Gamma1 and Gamma2 by Chebyshev expansion for abs(x) <= 0.5 Also returns
			 * 1/Gamma(1+x) and 1/Gamma(1-x).
			 * @param x
			 */
			public static void beschb(double x, double[] gam1, double[] gam2, double[] gampl, double[] gammi)
			{
				int NUSE1 =  7;
				int NUSE2  = 8;				
				double xx;
				xx=8.0*x*x-1.0;
				gam1[0]=chebev(-1.0,1.0,beschb_c1,NUSE1,xx);
				gam2[0]=chebev(-1.0,1.0,beschb_c2,NUSE2,xx);
				gampl[0]= gam2[0]-x*(gam1[0]);
				gammi[0]= gam2[0]+x*(gam1[0]);
			}
			
			
			/**
			 * Chebyshev evaluation: All arguments are input. c[0..m-1] is an array of Chebyshev coefficients,
			 * the first m elements of c output from chebft (which must have been called with the
			 * same a and b). The Chebyshev polynomial Pm-1 k=0 ckTk(y) − c0=2 is evaluated at a point
			 * y = [x−(b+a)=2]=[(b− a)=2], and the result is returned as the function value.
			 */	
			public static double chebev(double a, double b, double c[], int m, double x) {		
				double d=0.0,dd=0.0,sv,y,y2;
				int j;

				if ((x-a)*(x-b) > 0.0) throw new ArithmeticException("x not in range in routine chebev");
				y2=2.0*(y=(2.0*x-a-b)/(b-a));
				for (j=m-1;j>=1;j--) {
					sv=d;
					d=y2*d-dd+c[j];
					dd=sv;
				}
				return y*d-dd+0.5*c[0];
			}
			
}