A little offtopic, but I'm trying to avoid real work.
A while back I wrote an implementation of the downhill simplex on the WP34s. While that is generally used for nonlinear optimization and has a host of caveats so it takes some massaging of the problem to get it to work, I nonetheless thought that I would give it a try with the example in your posting. It did give (within the tolerance value) the same solution as WolframAlpha (http://goo.gl/DP2BU). It took about 1000 iterations of the simplex. I ran it on the emulator so I am not sure how fast it is on the actual hardware.
I know that your question was about SandMatrix, but I thought that as a curiosity I would toss out my solution. To run it, you XEQ'NMS' with the number of variables (2) in X on the stack.
To use it, you have to code up a function called FUN which implements your function along with constraints. Because my implementation of the simplex assumes minimization, I changed the function value to be negated. To handle constraints in your example, I simply added a large "penalty" to the value of the function if a constraint is not met.
The NMS routine is not really quite ready to be called general purpose. It currently does not let you enter a starting point (it starts around 0,0) and it does not let you specify the tolerance which is currently hardwired. As soon as I clean those up one of these days I'll probably post it to the articles section.
However, the cool thing is that you can use it to minimize pretty much any type of function(s) with up to 8 variables. (All the traditional pitfalls of NelderMead apply).
The listings below are meant to be run through the wp34s preprocessor / assembler.
And, here is the routine that does the solution. The comments make sense if you follow along with the amoeba code in Numerical Recipes.
/*
Nelder Mead (Downhill Simplex) Method
Multidimensional Minimization
Limitations: The code supports up to 8 dimensions, but you will
probably not be able to use that many because of
memory restrictions. It depends on the size of your
evaluation function, register allocations, etc.
Global Register required: N^2 + N + 20
Stack: Requires a stack size of 8
Local Registers: (19 at deepest point) + (your eval function)
Prerequisite: You need to have a global function called 'FUN'
which evaluates your function. The parameters to FUN
are all passed on the stack. FUN needs to return
one value (the evaluated function) in X.
Input: (X) Number of dimensions
Output: R03: F(x1, x2, x3....) at minimum
R04: x1
R05: x2
R06: X3
...
Configuration: This algorithm is known for getting stuck, especially
if the function has lots of local minima or
discontinuities. There is a configuration value that
defines the maximum number of times it shall adjust
the simplex before giving up. It defaults to 10000.
If you wish to adjust this value, it is assigned to
local register .06 right at the beginning of NMS.
The initial guess is aribrarily picked to be between
0 and 1 on all dimensions. If you wish to change the
initial values, change the initValues function
at LBL 31. Make sure that when that function returns
it has valid values in the Y array for the
simplex vertices you have chosen as the starting point.
Algorithm: This is a direct derivative of the Downhill Simplex
method as described and implemented in Numerical
Recipes. Please see that text for a description
of how the algorithm works.
*/
LBL'NMS'
LocR 007
REGS 100
SSIZE8
CLREGS
/* GREG[GREG_DIM] = NP; */
/* the number of dimensions got passed in in the X register */
STO 00
/* init NMAX */
1
SDL 4
STO .06
/* initValues() */
XEQ 31
/* calcPsum(); */
XEQ 30
/* loop indefintely until exit condition */
/* for (;;) { */
mainLoop:: 0
/* LREG[LREG_AMOEBA_ILO] = 0; */
STO .02
/* GREG[GREG_IHI] = getY(0) >
getY(1) ? (LREG[LREG_AMOEBA_INHI]=1,0) (LREG[LREG_AMOEBA_INHI]=0,1); */
0
XEQ 20
1
XEQ 20
x<? Y
JMP m1
0
STO .03
1
STO 02
JMP m2
m1:: 1
STO .03
0
STO 02
/* for (LREG[LREG_AMOEBA_I]=0;
LREG[LREG_AMOEBA_I] < (int)GREG[GREG_DIM]+1;
LREG[LREG_AMOEBA_I]++) { */
m2:: RCL 00
STO .00
mainLoop1:: RCL .00
/* if (getY((int)LREG[LREG_AMOEBA_I]) <= getY((int)LREG[LREG_AMOEBA_ILO])) */
XEQ 20
RCL .02
XEQ 20
x<? Y
JMP m3
/* LREG[LREG_AMOEBA_ILO] = (int)LREG[LREG_AMOEBA_I]; */
RCL .00
STO .02
/* if (getY((int)LREG[LREG_AMOEBA_I]) > getY((int)GREG[GREG_IHI])) { */
m3:: RCL .00
XEQ 20
RCL 02
XEQ 20
x>=? Y
JMP m4
/* LREG[LREG_AMOEBA_INHI]=(int)GREG[GREG_IHI]; */
RCL 02
STO .03
/* GREG[GREG_IHI]=LREG[LREG_AMOEBA_I]; */
RCL .00
STO 02
JMP m5
/* else if (getY((int)LREG[LREG_AMOEBA_I]) >
getY((int)LREG[LREG_AMOEBA_INHI]) &&
LREG[LREG_AMOEBA_I] != GREG[GREG_IHI]) & */
m4:: RCL .00
XEQ 20
RCL .03
XEQ 20
x>=? Y
JMP m5
RCL 02
RCL .00
x=? Y
JMP m5
/* LREG[LREG_AMOEBA_INHI] = (int)LREG[LREG_AMOEBA_I]; */
STO .03
/* loop */
m5:: DSL .00
JMP mainLoop1
/* If tolerance is met, put the appropriate values into the output and be gone */
/* if (FTOL > 2.0*fabs(getY((int)GREG[02])
getY((int)LREG[02]))/
(fabs(getY((int)GREG[02]))+fabs(getY((int)LREG[02]))+TINY)) { */
RCL 02
XEQ 20
RCL .02
XEQ 20

ABS
RCL 02
XEQ 20
ABS
RCL .02
XEQ 20
ABS
+
1
SDR 10
+
/
2
*
1
EEX
CHS
1
6
x<=? Y
JMP m6
/* GREG[GREG_SOLUTION_Y] = getY(LREG[LREG_AMOEBA_ILO]) */
RCL .02
XEQ 20
STO 03
/* for (LREG[LREG_AMOEBA_I]=0; LREG[LREG_AMOEBA_I] < GREG[GREG_DIM]; LREG[LREG_AMOEBA_I]++) { */
RCL 00
STO .00
DEC .00
/* GREG[04 + (int)LREG[00]] = getP((int)LREG[02], (int)LREG[00]); */
mainLoop2:: RCL .02
RCL .00
XEQ 24
4
RCL+ .00
x[<>] Y
STO[>]Y
/* loop */
DSL .00
JMP mainLoop2
/* done */
GTO 99
/* if ((int)GREG[GREG_NUMCALLS] >= NMAX) */
m6:: RCL 01
x<? .06
JMP m7
CL[alpha]
"Loop Max"
VIEW[alpha]
GTO 99
/* GREG[GREG_NUMCALLS] += 2; */
m7:: INC 01
INC 01
/* LREG[LREG_AMOEBA_YTRY]=amotry(1.0); */
1
CHS
XEQ 26
STO .05
/* if (LREG[LREG_AMOEBA_YTRY] <= getY((int)LREG[LREG_AMOEBA_ILO])) */
RCL .02
XEQ 20
x<? Y
JMP m8
2
XEQ 26
STO .05
JMP m9
/*if (LREG[LREG_AMOEBA_YTRY] >= getY((int)LREG[LREG_AMOEBA_INHI])) { */
m8:: RCL .03
XEQ 20
RCL .05
x<? Y
JMP m10
/* LREG[LREG_AMOEBA_YSAVE] = getY((int)GREG[GREG_IHI]); */
RCL 02
XEQ 20
STO .04
/* LREG[LREG_AMOEBA_YTRY]=amotry(0.5); */
# 1/2
XEQ 26
STO .05
/* if (LREG[LREG_AMOEBA_YTRY] >= LREG[LREG_AMOEBA_YSAVE]) { */
RCL .04
x>? Y
JMP m9
/* for (LREG[LREG_AMOEBA_I]=0; LREG[LREG_AMOEBA_I] < (int)GREG[GREG_DIM]+1 ;LREG[LREG_AMOEBA_I]++) { */
RCL 00
STO .00
mainLoop3:: RCL .00
/* if (LREG[LREG_AMOEBA_I] != (int)LREG[LREG_AMOEBA_ILO]) { */
x=? .02
JMP m11
/* for (LREG[LREG_AMOEBA_J] = 0; LREG[LREG_AMOEBA_J] < GREG[GREG_DIM]; LREG[LREG_AMOEBA_J]++) { */
RCL 00
STO .01
DEC .01
mainLoop4:: RCL .00
/*setPsum((int)LREG[LREG_AMOEBA_J], 0.5 *
(getP((int)LREG[LREG_AMOEBA_I], (int)LREG[LREG_AMOEBA_J]) +
getP((int)LREG[LREG_AMOEBA_ILO], (int)LREG[LREG_AMOEBA_J]))); */
RCL .01
XEQ 24
RCL .02
RCL .01
XEQ 24
+
2
/
RCL .01
x[<>] Y
XEQ 23
/* setP((int)LREG[LREG_AMOEBA_I], (int)LREG[LREG_AMOEBA_J], getPsum((int)LREG[LREG_AMOEBA_J])); */
RCL .00
RCL .01
RCL .01
XEQ 22
XEQ 25
/* increment and loop */
DSL .01
JMP mainLoop4
/* setY((int)LREG[LREG_AMOEBA_I], func(&GREG[(int)GREG_PSUM])); */
RCLS 12
XEQ'FUN'
RCL .00
x[<>] Y
XEQ 21
/* increment and loop */
m11:: DSL .00
JMP mainLoop3
/* GREG[GREG_NUMCALLS] += GREG[GREG_DIM]; */
RCL 00
STO+ 01
XEQ 30
JMP m9
m10:: DEC 01
/* back to the top of the loop */
m9:: JMP mainLoop
/* we are done */
LBL 99
STOP
RTN
/* get the value pointed to by X, and lose the pointer from the stack */
LBL 00
RCL[>]X
x[<>] Y
DROP
RTN
/* getY */
LBL 20
3
+
XEQ 00
RTN
/* setY */
LBL 21
x[<>] Y
3
+
x[<>] Y
STO[>]Y
RTN
/* getPsum */
LBL 22
# 12
+
XEQ 00
RTN
/* setPsum */
LBL 23
x[<>] Y
# 12
+
x[<>] Y
STO[>]Y
RTN
/* getP */
LBL 24
x[<>] Y
RCL[times] 00
+
# 20
+
XEQ 00
RTN
/* setP */
LBL 25
[<>] ZYXT
RCL[times] 00
+
# 20
+
x[<>] Y
STO[>]Y
RTN
/* amotry */
LBL 26
LocR 13 /* max NP of 8 */
/* LREG[LREG_AMOTRY_IN_FAC] = fac */
STO .04
/* LREG[LREG_AMOTRY_FAC1] = (1.0  LREG[LREG_AMOTRY_IN_FAC]) / GREG[GREG_DIM]; */
CHS
1
+
RCL 00
/
STO .01
/* LREG[LREG_AMOTRY_FAC2] = LREG[LREG_AMOTRY_FAC1]LREG[LREG_AMOTRY_IN_FAC]; */
RCL .04

STO .02
/* for (LREG[LREG_AMOTRY_J] = 0; LREG[LREG_AMOTRY_J] < GREG[GREG_DIM]; LREG[LREG_AMOTRY_J]++) { {*/
RCL 00
STO .00
DEC .00
/* LREG[(int)(LREG_AMOTRY_PTRYOFFSET +
LREG[LREG_AMOTRY_J])] = getPsum((int)LREG[LREG_AMOTRY_J]) *
LREG[LREG_AMOTRY_FAC1] 
getP((int)GREG[GREG_IHI], (int)LREG[LREG_AMOTRY_J]) * LREG[LREG_AMOTRY_FAC2]; */
amotryLoop1:: RCL .00
XEQ 22
RCL .01
[times]
RCL 02
RCL .00
XEQ 24
RCL .02
[times]

RCL .00
# 117
+
x[<>] Y
STO[>]Y
/* increment the loop variable and loop */
DSL .00
JMP amotryLoop1
/*LREG[LREG_AMOTRY_YTRY] = func(&LREG[LREG_AMOTRY_PTRYOFFSET]); */
RCLS .05
XEQ'FUN'
STO .03
/* if (LREG[LREG_AMOTRY_YTRY] < getY((int)GREG[GREG_IHI])) { { */
RCL 02
XEQ 20
RCL .03
x>=? Y
JMP amotrycond1
/* setY((int)GREG[GREG_IHI], LREG[LREG_AMOTRY_YTRY]); */
RCL 02
RCL .03
XEQ 21
/* for (LREG[LREG_AMOTRY_J] = 0; LREG[LREG_AMOTRY_J] < GREG[GREG_DIM]; LREG[LREG_AMOTRY_J]++) { { */
RCL 00
STO .00
DEC .00
/* setPsum((int)LREG[LREG_AMOTRY_J], getPsum((int)LREG[LREG_AMOTRY_J]) + LREG[(int)
(LREG_AMOTRY_PTRYOFFSET + LREG[LREG_AMOTRY_J])]  getP((int)GREG[GREG_IHI],
(int)LREG[LREG_AMOTRY_J])); */
amotryLoop2:: RCL .00
ENTER
/* setPsum((int)LREG[LREG_AMOTRY_J], getPsum((int)LREG[LREG_AMOTRY_J]) +
LREG[(int)(LREG_AMOTRY_PTRYOFFSET + LREG[LREG_AMOTRY_J])] 
getP((int)GREG[GREG_IHI], (int)LREG[LREG_AMOTRY_J])); */
XEQ 22
# 117
RCL+ .00
XEQ 00
+
RCL 02
RCL .00
XEQ 24

XEQ 23
/* setP((int)GREG[GREG_IHI], (int)LREG[LREG_AMOTRY_J], LREG[(int)(LREG_AMOTRY_PTRYOFFSET + LREG[LREG_AMOTRY_J])]); */
RCL 02
RCL .00
# 117
RCL+ .00
XEQ 00
XEQ 25
/* increment the loop variable and loop */
DSL .00
JMP amotryLoop2
/* return LREG[LREG_AMOTRY_YTRY]; */
amotrycond1:: RCL .03
RTN
/* calcPsum */
LBL 30
LocR 3
/* for (LREG[LREG_CALCSUM_J] = 0; LREG[LREG_CALCSUM_J] < GREG[GREG_DIM]; LREG[LREG_CALCSUM_J]++) { */
RCL 00
STO .01
DEC .01
calcSumLoop1:: 0
/* for (LREG[LREG_CALCSUM_SUM] = 0.0, LREG[LREG_CALCSUM_I]=0; LREG[LREG_CALCSUM_I] <
(int)GREG[GREG_DIM]+1; LREG[LREG_CALCSUM_I]++) */
STO .02
RCL 00
STO .00
calcSumLoop2:: RCL .00
/* LREG[LREG_CALCSUM_SUM] = LREG[LREG_CALCSUM_SUM] + getP((int)LREG[LREG_CALCSUM_I], (int)LREG[LREG_CALCSUM_J]); */
RCL .01
XEQ 24
STO+ .02
/* increment loop 2 and loop */
DSL .00
JMP calcSumLoop2
/* setPsum((int)LREG[LREG_CALCSUM_J], LREG[LREG_CALCSUM_SUM]); */
RCL .01
RCL .02
XEQ 23
/* loop */
DSL .01
JMP calcSumLoop1
RTN
/* initValues */
LBL 31
LocR 10
/* for (LREG[LREG_INIT_I]=0; LREG[LREG_INIT_I] < (GREG[GREG_DIM]+1); LREG[LREG_INIT_I]++) { */
RCL 00
STO .00
initValuesLoop1:: RCL 00
STO .01
DEC .01
/* for (LREG[LREG_INIT_J] = 0;LREG[LREG_INIT_J] < GREG[GREG_DIM]; LREG[LREG_INIT_J]++) { { */
initValuesLoop2:: RCL .01
/* LREG[LREG_INIT_XOFFSET + (int)LREG[LREG_INIT_J]] = (LREG[LREG_INIT_I] == (LREG[LREG_INIT_J]+1) ? 1.0 : 0.0); */
0
+
RCL .00
0
x[<>] Y
x=? Z
INC Y
DROP
RCL .01
# 114
+
x[<>] Y
STO[>]Y
/* setP((int)LREG[LREG_INIT_I], (int)LREG[LREG_INIT_J], LREG[LREG_INIT_XOFFSET + (int)LREG[LREG_INIT_J]]); */
RCL .00
RCL .01
[<>] ZXYT
XEQ 25
/* increment loop2 and loop */
DSL .01
JMP initValuesLoop2
/* setY((int)LREG[LREG_INIT_I],func(&LREG[LREG_INIT_XOFFSET])); */
RCLS .02
XEQ'FUN'
RCL .00
x[<>] Y
XEQ 21
/* increment loop1 and loop */
DSL .00
JMP initValuesLoop1
RTN