HP Forums
Best Regression Fit - Printable Version

+- HP Forums (https://www.hpmuseum.org/forum)
+-- Forum: HP Software Libraries (/forum-10.html)
+--- Forum: HP Prime Software Library (/forum-15.html)
+--- Thread: Best Regression Fit (/thread-9440.html)



Best Regression Fit - Eddie W. Shore - 11-04-2017 02:08 PM

The program BESTFIT compares a set of regressions to determine a best fit. This simulates a feature presented on the Hewlett Packard HP 48S, HP 48G, HP 49G, and HP 50g. BESTFIT compares the correlations of the following four regression models:

1. Linear: y = a * x + b
2. Logarithmic: y = a * ln x + b
3. Exponential: y = b * a^x (y = b * e^(ln a * x))
4. Power: y = b * x^a

The output is a two element list: a string of the best fit equation and its corresponding correlation.

HP Prime Program: BESTFIT
Code:

EXPORT BESTFIT(L1,L2)
BEGIN
// 2017-11-02 EWS
// Simulate Best Fit
// HP 48GX,49G,50g

// initialize
LOCAL clist,m,c,v,s,n;
clist:={0,0,0,0};

// test
// correlation is linear only
// requires approx()
// linear y=a*x+b
clist[1]:=approx(correlation(L1,L2));
// log y=a*LN(x)+b 
c:=approx(correlation(LN(L1),L2));
IF IM(c)==0 THEN
clist[2]:=c;
END;
// exponential y=b*e^(a*x)
c:=approx(correlation(L1,LN(L2)));
IF IM(c)==0 THEN
clist[3]:=c;
END;

// power y=b*a^x
c:=approx(correlation(LN(L1),LN(L2)));
IF IM(c)==0 THEN
clist[4]:=c;
END;

// test
// POS(L0^2,MAX(L0^2))
m:=POS(clist^2,MAX(clist^2));
c:=clist[m];

IF m==1 THEN
v:=linear_regression(L1,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"*X";
RETURN {s,c};
END;

IF m==2 THEN
v:=logarithmic_regression(L1,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"*LN(X)";
RETURN {s,c};
END;

IF m==3 THEN
v:=exponential_regression(L1,L2);
s:=STRING(v[2])+"*"+STRING(v[1])+
"^X";
RETURN {s,c};
END;

IF m==4 THEN
v:=power_regression(L1,L2);
s:=STRING(v[2])+"*X^"+
STRING(v[1]);
RETURN {v,s};
END;

END;

BESTFIT2: An Extended Version

Version 2 adds the following regressions:

5. Inverse: y = b + a/x
6. Simple Logistic: y = 1/(b + a*e^(-x))
7. Simple Quadratic: y = b + a*x^2
8. Square Root: y = √(a*x + b)

HP Prime Program: BESTFIT2
Code:

EXPORT BESTFIT2(L1,L2)
BEGIN
// 2017-11-02 EWS
// Simulate Best Fit
// HP 48GX,49G,50g
// additional models

// initialize
LOCAL clist,m,c,v,s;
clist:={0,0,0,0,0,0,0,0};

// test
// correlation is linear only
// requires approx()

// linear y=a*x+b
clist[1]:=approx(correlation(L1,L2));

// log y=a*LN(x)+b 
c:=approx(correlation(LN(L1),L2));
IF IM(c)==0 THEN
clist[2]:=c;
END;

// exponential y=b*e^(a*x)
c:=approx(correlation(L1,LN(L2)));
IF IM(c)==0 THEN
clist[3]:=c;
END;

// power y=b*a^x
c:=approx(correlation(LN(L1),LN(L2)));
IF IM(c)==0 THEN
clist[4]:=c;
END;

// inverse y=b+a/x
IF POS(L1,0)==0 THEN
clist[5]:=approx(correlation(1/L1,
L2));
END;

// simple logistic
IF POS(L2,0)==0 THEN
clist[6]:=approx(correlation(e^(−L1),
1/L2));
END;

// simple quadratic
clist[7]:=approx(correlation(L1^2,
L2));

// square root 
IF ΣLIST(L2≥0)==SIZE(L2) THEN
clist[8]:=approx(correlation(L1,
L2^2));
END;

// test
// POS(L0^2,MAX(L0^2))
m:=POS(clist^2,MAX(clist^2));
c:=clist[m];

IF m==1 THEN
v:=linear_regression(L1,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"*X";
END;

IF m==2 THEN
v:=logarithmic_regression(L1,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"*LN(X)";
END;

IF m==3 THEN
v:=exponential_regression(L1,L2);
s:=STRING(v[2])+"*"+STRING(v[1])+
"^X";
END;

IF m==4 THEN
v:=power_regression(L1,L2);
s:=STRING(v[2])+"*X^"+
STRING(v[1]);
END;

// inverse
IF m==5 THEN
v:=linear_regression(1/L1,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"/X";
END;

// simple logistic
IF m==6 THEN
v:=linear_regression(e^(−L1),
1/L2);
s:="1/("+STRING(v[2])+"+"+
STRING(v[1])+"*e^(−X))";
END;

// simple quadratic
IF m==7 THEN
v:=linear_regression(L1^2,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"*X^2";
END;

// square root
IF m==8 THEN
v:=linear_regression(L1,L2^2);
s:="√("+STRING(v[2])+"+"+
STRING(v[1])+"*X)";
END;

RETURN {s,c};
END;

Link: http://edspi31415.blogspot.com/2017/11/hp-prime-best-regression-fit.html


RE: Best Regression Fit - salvomic - 11-04-2017 03:51 PM

thanks Eddie,
good program.
It works better in Home: I tried in CAS with the second list with a series of LN() and it return "Error: Bad argument type", the same input is good in Home, however. Please, control it.

Salvo


RE: Best Regression Fit - akmon - 12-17-2017 04:17 PM

Very interesting program. I missed that feature on the HP Prime, till now.

I´ve done a little modification for better input data.

Code:
EXPORT BESTFIT2()
BEGIN
// 2017-11-02 EWS
// Simulate Best Fit
// HP 48GX,49G,50g
// additional models

// initialize
EDITMAT(M1,{"INTRODUZCA LOS DATOS",{},{"X","Y"}});
L1:=mat2list(col(M1,1));
L2:=mat2list(col(M1,2));
LOCAL clist,m,c,v,s;
clist:={0,0,0,0,0,0,0,0};

// test
// correlation is linear only
// requires approx()
// linear y=a*x+b
clist[1]:=approx(correlation(L1,L2));

// log y=a*LN(x)+b 
c:=approx(correlation(LN(L1),L2));
IF IM(c)==0 THEN
clist[2]:=c;
END;

// exponential y=b*e^(a*x)
c:=approx(correlation(L1,LN(L2)));
IF IM(c)==0 THEN
clist[3]:=c;
END;

// power y=b*a^x
c:=approx(correlation(LN(L1),LN(L2)));
IF IM(c)==0 THEN
clist[4]:=c;
END;

// inverse y=b+a/x
IF POS(L1,0)==0 THEN
clist[5]:=approx(correlation(1/L1,
L2));
END;

// simple logistic
IF POS(L2,0)==0 THEN
clist[6]:=approx(correlation(e^(−L1),
1/L2));
END;

// simple quadratic
clist[7]:=approx(correlation(L1^2,
L2));

// square root 
IF ΣLIST(L2≥0)==SIZE(L2) THEN
clist[8]:=approx(correlation(L1,
L2^2));
END;

// test
// POS(L0^2,MAX(L0^2))
m:=POS(clist^2,MAX(clist^2));
c:=clist[m];

IF m==1 THEN
v:=linear_regression(L1,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"*X";
END;

IF m==2 THEN
v:=logarithmic_regression(L1,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"*LN(X)";
END;

IF m==3 THEN
v:=exponential_regression(L1,L2);
s:=STRING(v[2])+"*"+STRING(v[1])+
"^X";
END;

IF m==4 THEN
v:=power_regression(L1,L2);
s:=STRING(v[2])+"*X^"+
STRING(v[1]);
END;

// inverse
IF m==5 THEN
v:=linear_regression(1/L1,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"/X";
END;

// simple logistic
IF m==6 THEN
v:=linear_regression(e^(−L1),
1/L2);
s:="1/("+STRING(v[2])+"+"+
STRING(v[1])+"*e^(−X))";
END;

// simple quadratic
IF m==7 THEN
v:=linear_regression(L1^2,L2);
s:=STRING(v[2])+"+"+STRING(v[1])+
"*X^2";
END;

// square root
IF m==8 THEN
v:=linear_regression(L1,L2^2);
s:="√("+STRING(v[2])+"+"+
STRING(v[1])+"*X)";
END;

RETURN {s,c};
END;



RE: Best Regression Fit - Namir - 12-27-2017 03:31 PM

Here is my version of BESTFIT. It iterates over many powers for Y and X, including powers with fractions.

Code:
EXPORT BESTFIT(Mat, IdxX, IdxY)
BEGIN

// initialize
L1:=mat2list(col(Mat, IdxX)); // X
L2:=mat2list(col(Mat, IdxY)); // Y
LOCAL Lx, Ly;
LOCAL xpwr, ypwr, ix, iy, xscale, yscale;
LOCAL xminIdx, xmaxIdx, yminIdx, ymaxIdx;
LOCAL r2, coeffs;
LOCAL bestR2, bestXpwr, bestYpwr;

// combination of index ranges and scales should iterate
// in range -4, -3.5, -3, ..., -0.5, 0, 0.5, 1, 1.5, ..., 4
// Adjust these values as you see fit
xminIdx := -8;
yminIdx := -8;
xmaxIdx := 8;
ymaxIdx := 8;
xscale := 0.5;
yscale := 0.5;
bestR2 := -1;

FOR iy FROM yminIdx TO ymaxIdx DO
   // Transform Y values
   ypwr := iy * yscale; 
   IF iy == 0 THEN
     Ly := LN(L2);
   ELSE 
     Ly := L2^ypwr;
   END;  
     
   FOR ix FROM xminIdx TO xmaxIdx DO
     xpwr := ix * xscale;   
     // Transform X values
     IF ix == 0 THEN
       Lx := LN(L1);
     ELSE 
       Lx := L1^xpwr;
     END;
     r2 := approx(correlation(Lx, Ly))^2;
     if r2 > bestR2 THEN
       bestR2 := r2;
       bestXpwr := xpwr;
       bestYpwr := ypwr;
       coeffs := linear_regression(Lx, Ly);
       IF r2==1 THEN
         RETURN {bestR2, bestYpwr, bestXpwr, coeffs[1], coeffs[2]};
       END;
     END;
  END;
END;
RETURN {bestR2, bestYpwr, bestXpwr, coeffs[1], coeffs[2]};
END;

The function requires the name of a data matrix and the indices of the columns that select X and Y data. The function returns:

1) Best Rsqr value.
2) Best power for Y. Please note that a 0 means ln(x).
3) Best power for X. Please note that a 0 means ln(x).
5) Slope for best curve.
6) Intercept of best curve.

The fitted models are in the general form:

Y^ypwr = intercept + slope*X^xpwr

When ypwr or xpwr is 0, the term represents a natural logarithm.

You can alter the range for the loops, the scales for X and Y as you see fit. Just make sure you don't try to raise negative values to negative powers or powers with fractions. If you have date the best thing to do is to scale their values to be in the range of 1 to 2, using:

x(i) = 1 + (x(i) - min(x))/(max(x) - min(x))
y(i) = 1 + (y(i) - min(y))/(max(y) - min(y))

Such a scaling ensures that all transformations will not generate a run time errors.

Enjoy

Namir


RE: Best Regression Fit version 2 - Namir - 12-27-2017 09:14 PM

The next version of the best fit, BESTFIT2, tests the power model between X and Y first, before testing all other transformations. If you have negative values in either X or Y the do not use this function:

Code:
EXPORT BESTFIT2(Mat, IdxX, IdxY)
BEGIN

// initialize
L1:=mat2list(col(Mat, IdxX)); // X
L2:=mat2list(col(Mat, IdxY)); // Y
LOCAL Lx, Ly;
LOCAL xpwr, ypwr, ix, iy, xscale, yscale;
LOCAL xminIdx, xmaxIdx, yminIdx, ymaxIdx;
LOCAL r2, coeffs;
LOCAL bestR2, bestXpwr, bestYpwr;

// combination of index ranges and scales should iterate
// in range -4, -3.5, -3, ..., -0.5, 0, 0.5, 1, 1.5, ..., 4
// Adjust these values as you see fit
xminIdx := -8;
yminIdx := -8;
xmaxIdx := 8;
ymaxIdx := 8;
xscale := 0.5;
yscale := 0.5;
// Test power model first
bestR2 := approx(correlation(LN(L1), LN(L2)))^2;
bestXpwr := 0;
bestYpwr := 0;
coeffs := linear_regression(LN(L1), LN(L2));
       
FOR iy FROM yminIdx TO ymaxIdx DO
   // Transform Y values
   ypwr := iy * yscale; 
   IF iy == 0 THEN
     Ly := LN(L2);
   ELSE 
     Ly := L2^ypwr;
   END;  
     
   FOR ix FROM xminIdx TO xmaxIdx DO
     xpwr := ix * xscale;   
     // Transform X values
     IF ix == 0 THEN
       Lx := LN(L1);
     ELSE 
       Lx := L1^xpwr;
     END;
     r2 := approx(correlation(Lx, Ly))^2;
     IF r2 > bestR2 THEN
       bestR2 := r2;
       bestXpwr := xpwr;
       bestYpwr := ypwr;
       coeffs := linear_regression(Lx, Ly);
       IF r2==1 THEN
         RETURN {bestR2, bestYpwr, bestXpwr, coeffs[1], coeffs[2]};
       END;
     END;
  END;
END;
RETURN {bestR2, bestYpwr, bestXpwr, coeffs[1], coeffs[2]};
END;

I wrote this modified version because while testing Y=X^2, I was getting 1/Y^2 = 1/X^4 as the best model. While the answer is theoretically correct, it is not in the desired form.

Namir


RE: Best Regression Fit - Namir - 12-28-2017 12:54 PM

Here is a third version, BESTFIT3, that maps the X and Y data to values in the range of (1, 2), making all transformations possible:

The function returns:
1) Best Rsqr value.
2) Power of best Y transformation (0 means ln(y)).
3) Power of best X transformation (0 means ln(x)).
4) Best slope.
5) Best intercept.
6) Minimum X value.
7) Maximum X value.
8) Minimum Y value.
9) Maximum Y value.

Make sure you use the powers, slope, intercept, minima, and maxima in estimating Yhat values:

Yhat' = (slope * ((X-xmin)/(xmax-xmin)+1)^xpwr + intercept)^(1/ypwr)
Yhat = ymin + (Yhat'-1)*(ymax-ymin)

if xpwr=0 and ywpr!=0 use:

Yhat' = (slope * ln((X-xmin)/(xmax-xmin)+1) + intercept)^(1/ypwr)
Yhat = ymin + (Yhat'-1)*(ymax-ymin)

if xpwr!=0 and ywpr=0 use:

Yhat' = exp(slope * ((X-xmin)/(xmax-xmin)+1)^xpwr + intercept)
Yhat = ymin + (Yhat'-1)*(ymax-ymin)

if both xpwr and ypwr are zero, use:

Yhat' = exp(slope * ln((X-xmin)/(xmax-xmin)+1) + intercept)
Yhat = ymin + (Yhat'-1)*(ymax-ymin)


Use ln() functions when xpwr s 0 and use exp() when the ypwr is 0.

Code:
EXPORT BESTFIT3(Mat, IdxX, IdxY)
BEGIN
LOCAL Lx, Ly;
LOCAL xpwr, ypwr, ix, iy, xscale, yscale;
LOCAL xminIdx, xmaxIdx, yminIdx, ymaxIdx;
LOCAL r2, coeffs;
LOCAL bestR2, bestXpwr, bestYpwr;
LOCAL xmin, xmax, ymin, ymax;

// initialize
L1:=mat2list(col(Mat, IdxX)); // X
L2:=mat2list(col(Mat, IdxY)); // Y
xmin:=MIN(L1);
xmax:=MAX(L1);
ymin:=MIN(L2);
ymax:=MAX(L2);
// Map data into (1,2) range
L1:=L1-xmin;
L1:=L1/(xmax-xmin)+1;
L2:=L2-ymin;
L2:=L2/(ymax-ymin)+1;

// combination of index ranges and scales should iterate
// in range -4, -3.5, -3, ..., -0.5, 0, 0.5, 1, 1.5, ..., 4
// Adjust these values as you see fit
xminIdx := -8;
yminIdx := -8;
xmaxIdx := 8;
ymaxIdx := 8;
xscale := 0.5;
yscale := 0.5;
// Test power model first
bestR2 := approx(correlation(LN(L1), LN(L2)))^2;
bestXpwr := 0;
bestYpwr := 0;
coeffs := linear_regression(LN(L1), LN(L2));
IF bestR2==1 THEN
  RETURN {bestR2, 0, 0, coeffs[1], coeffs[2], xmin, xmax, ymin, ymax};
END;       

FOR iy FROM yminIdx TO ymaxIdx DO
   // Transform Y values
   ypwr := iy * yscale; 
   IF iy == 0 THEN
     Ly := LN(L2);
   ELSE 
     Ly := L2^ypwr;
   END;  
     
   FOR ix FROM xminIdx TO xmaxIdx DO
     xpwr := ix * xscale;   
     // Transform X values
     IF ix == 0 THEN
       Lx := LN(L1);
     ELSE 
       Lx := L1^xpwr;
     END;
     r2 := approx(correlation(Lx, Ly))^2;
     IF r2 > bestR2 THEN
       bestR2 := r2;
       bestXpwr := xpwr;
       bestYpwr := ypwr;
       coeffs := linear_regression(Lx, Ly);
       IF r2==1 THEN
         RETURN {bestR2, bestYpwr, bestXpwr, coeffs[1], coeffs[2], xmin, xmax, ymin, ymax};
       END;
     END;
  END;
END;
RETURN {bestR2, bestYpwr, bestXpwr, coeffs[1], coeffs[2], xmin, xmax, ymin, ymax};
END;