%%%%%%%%%%%%%%%%%%%%%% USER-SPECIFIC MACROS %%%%%%%%%%%%%%%%%%%%%%%%%%%%
\def\Name    {Joy Denise Williams}
\def\Address {Department of Mathematics}
\def\Date    {1998}
\def\Type    {DISSERTATION}
\def\Title   {Spectral Bounds for Entropy Models}
\def\Degree  {Doctor of Philosophy}
\def\Director{Jon Lee}

%%%%%%%%%%%%%%%%%%%%%% BEGIN LATEX DOCUMENT %%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentstyle[12pt,double,matt]{report}
\input psfig.tex

\newcommand{\bb}{{$\bullet$}}
\newcommand{\bsum}[1]{\displaystyle {\sum_{#1}}}
\newcommand{\Int}{{\displaystyle \int}}
\newcommand{\bprod}[1]{\displaystyle {\prod_{#1}}}
\newcommand{\Bprod}[2]{\displaystyle {\prod_{#1}^{#2}}}
\newcommand{\bfrac}[2]{\displaystyle {\frac{#1}{#2}}}
\newcommand{\Bsum}[2]{\displaystyle {\sum_{#1}^{#2}}}
\newtheorem{proposition}{Proposition}[section]
\newtheorem{lemma}[proposition]{Lemma}          %[section]
\newtheorem{corollary}[proposition]{Corollary}  %[section]
\newtheorem{example}[proposition]{Example}  %[section]
\newtheorem{theorem}[proposition]{Theorem}      %[section]
\newtheorem{question}[proposition]{Question}     %[section]
\newtheorem{conjecture}[proposition]{Conjecture}   %[section]
\newtheorem{obs}{Observation}
\newenvironment{proof}{\noindent{\em Proof:} }{\hfill $\Box$}

\begin{document}
%\ABSTRACTCOVER      % DISSERTATION ONLY
%\ABSTRACTTITLE      % DISSERTATION ONLY
%\ABSTRACT  
%                   \begin{abstract}
%We consider the problem of selecting from a finite set $N$, an
%optimal subset $S$ \doublespace of predetermined size $s$.
%By varying the optimality criterion, we generate a family
%of problems, each of which we formulate as an integer nonlinear
%program.
%
%The first such problem we consider is the
%``Generalized Constrained Maximum-Entropy
%Sampling Problem'' (GCMESP).
%Given an integer $t$ ($0 < t \leq s$)
%and a symmetric positive-semidefinite matrix $C$, the optimality
%criterion for the GCMESP is taken to be the
%(logarithm of the) product of the $t$ largest eigenvalues
%of an $s \times s$ principle submatrix of $C$. We construct
%an eigenvalue-based upper-bounding function for the GCMESP 
%and incorporate it
%into a branch-and-bound algorithm. 
%
%Next, we show that for certain choices of $t$, the resulting
%programs are well-known problems with far-reaching
%applications; we examine in detail two such special cases, 
%namely the ``Constrained Maximum-Entropy Sampling Problem'' (CMESP)
%and the ``D-Optimality Problem'' (DOP).
%In each case, the optimality criterion reduces to a 
%determinant, and we show how these determinants are related to
%the {\it entropy function}, which measures
%the level of uncertainty concerning the outcome of the experiment
%whereby we select (and observe) a particular subset $S$ of $N$.
%
%We then examine more closely the CMESP, focusing on one of its
%special cases (the ``Constrained Maximum-Entropy Sampling Problem
%with Fixed Costs'' (CMESPF)),
%as well as on a new problem that arises
%from a modification of its optimality criterion (the
%``Constrained Maximum-Entropy Remote Sampling Problem''(CMERSP)).
%For each
%of the resulting integer nonlinear programs, we discuss a method
%of solution.
%
%We coded in C the algorithms for solving the CMESP, the CMERSP,
%and the CMESPF. We include
%a description of the serial and parallel
%code as well as results from the 
%computer implementations.
%%%%%               The dissertation abstract must not exceed
%%%%               three hundred fifty (350) words
%%%%               and must be \underline{double} spaced.
%%%%               It must be signed and dated by the student.
%%%%               The dissertation abstract is preceded by a
%%%%               cover page
%%%%               and
%%%%               title page.
%                   \end{abstract}
%\APPROVAL
%\RULES
%\COVERPAGE
%\TITLEPAGE
%\CPRIGHT 
%\newpage~           % BLANK PAGE REQUIRED
%\thispagestyle{empty}
%\ACKNOWLEDGMENT     % OPTIONAL
%                   \begin{doublespace}
%                   \begin{acknowledgment}
%
%To my advisor and dissertation chair, Dr. Jon Lee, I extend my
%deepest gratitude \doublespace for  all of his guidance and support during
%my years as a graduate student. His superior  intellect, his
%constant patience, and his high level of integrity are all 
%qualities I admire  and respect.
%
%Next, I wish to thank Dr. Carl Lee, Dr. Tom Hayden, Dr. Raymond 
%Rishel, and Dr. William Griffith for serving on my dissertation
%committee and for being such positive influences on my life these
%past five years. 
%
%Finally, I wish to express appreciation to my parents, Fred and
%JoAnn Williams, who have supported me and encouraged me
%in the pursuit of every one of my dreams and aspirations, 
%including that of obtaining a Ph.D.
%%%%               The acknowledgment page and all pages following
%%%%              until the first page of the text are numbered in
%%%%              small Roman numerals beginning with
%%%%              Roman numeral "iii".  Arabic numerals should be
%%%%              used beginning with the first page of the text.
%                   \end{acknowledgment}
%                    \end{doublespace}
%\addcontentsline{toc}{chapter}{{\bf Acknowledgments}}
%\addcontentsline{toc}{chapter}{{\bf List of Tables}}
%\addcontentsline{toc}{chapter}{{\bf List of Figures}}
%\clearpage 
{\def\thepage{\roman{page}}
\setcounter{page}{4}
\noindent{\Huge {\bf Table of Contents}} \\ \\ \\
{\bf Acknowledgments} \hfill {\bf iii} \\ \\
{\bf List of Tables} \hfill {\bf v} \\ \\
{\bf List of Figures} \hfill {\bf vi} \\ \\
{\bf 1} \hspace{.08in} {\bf Introduction} \hfill {\bf 1} \\ 
\\
{\bf 2}  \hspace{.08in} {\bf The Generalized Constrained
Maximum-Entropy Sampling Problem
\hspace{.15in}}  

 {\bf (GCMESP)} \hfill {\bf 11} 

 2.1 ~The CMESP and the DOP as Special Cases \dotfill\ 11 

 2.2 ~Upper Bounds \dotfill\ 13 

 2.3 ~An Algorithm for Solving the GCMESP \dotfill\ 28

 2.4 ~Computer Implementation \dotfill\ 45

 2.5 ~The Constrained Maximum-Entropy Sampling Problem with

\hspace{.27in} Fixed Costs (CMESPF) \dotfill\ 57 \\
 \\
{\bf 3} \hspace{.06in} {\bf The Constrained Maximum-Entropy Remote Sampling Problem} 

 {\bf (CMERSP)} \hfill {\bf 64} 

 3.1 ~Formulation \dotfill\ 64

 3.2 ~Complexity \dotfill\ 66

 3.3 ~Upper Bounds \dotfill\ 67

 3.4 ~Computational Results \dotfill\ 80  \\
 \\
{\bf Appendices} \hfill {\bf 81}

Appendix A: The CMESP as a model for the
problem of finding a 

\hspace{.27in} ``most-informative subset'' \dotfill\  82

Appendix B: The subroutine worker.c, as 
 implemented in parallel for 

\hspace{.27in} the CMESP \dotfill\  86 \\ \\
{\bf References} \hfill {\bf 111}  \\ \\
{\bf Vita} \hfill {\bf 114}
\newpage
\noindent{\Huge {\bf List of Tables}} \\ \\ \\

2.1  ~Choice of Parameters for Option Set \dotfill\ 51

2.2  ~Number of Subproblems \dotfill\ 52

2.3  ~Wall Seconds \dotfill\ 52

2.4  ~CPS Functions (from the CONVEX Exemplar Programming Guide) \dotfill\ 54

2.5  ~Results of Parallel Implementation \dotfill\ 57

2.6  ~Costs and Entropies for CMESPFs (Uniform Costs) \dotfill\ 62

2.7  ~Entropies for CMESPFs (Non-Uniform Costs) \dotfill\ 63
\\

3.1  ~Total Solution Time (seconds) \dotfill\ 81
\\

\newpage
\noindent{\Huge {\bf List of Figures}} \\ \\ \\

2.1  ~Graph of $v_1(\pi )$ \dotfill\ 16

2.2  ~Graph of $v_1(\pi )$ \dotfill\ 17

2.3  ~Graph of $v_1''(\pi )$ \dotfill 42

\newpage}

\TEXT
\chapter*{\vskip -1in Chapter 1}
\noindent {\Huge {\bf Introduction}} \\ \\ \\
\setcounter{chapter}{1}
In this thesis, we introduce and provide solution methods for
the Generalized Constrained
Maximum-Entropy Sampling Problem (GCMESP). The GCMESP is a 
problem of maximizing a particular nonlinear function
of subsets of a finite set, possibly subject to linear constraints.
To make this more precise,
let $n$ be a nonnegative integer, and let $N$ be an index set
of cardinality $n$.
Let $s,t$ be integers,
with $0 < t \leq s \leq n$. 
Let $C$ be a symmetric positive-semidefinite matrix 
with rows and columns indexed by $N$, and with rank at least $t$.
For a subset $S$ of
$N$, let $C[S,S]$ denote the principle submatrix of $C$ indexed by $S$.
Let $M$ be a set of cardinality
$m \geq 0$ that indexes the ``linear inequalities''
$\sum_{j \in S} a_{ij} \leq b_i$ ($i \in M$).
Using 
$\lambda_{\bar{l}}$ to denote the $l^{\mbox{th}}$ greatest eigenvalue,
we define the GCMESP as follows:
\[ \mbox{(GCMESP)} \hspace{.5in} \begin{array}{llll}
 z & := &  \max &
 \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C[S,S]) \right) \\
& & & \\
& & \mbox{s.t.} &
\bsum{j \in S} a_{ij} \leq b_i, \hspace{.1in} \forall ~i \in M; \\
& & & \\
& & & S \subseteq N; \\
& & &  \\
& & &  |S| = s.
\end{array} \]

In later exposition,
it will be useful to refer to an
equivalent formulation of the above GCMESP. In particular, we identify
with a subset $S$ of $N$ its
{\it characteristic vector}
$x(S)$ in $\Re^N$ defined by
\[ x_j(S) := \left \{ \begin{array}{ll}
      1, & \mbox{if} \hspace{.15in} j \in S; \\
& \\
      0, & \mbox{if} \hspace{.15in} j \not \in S,
\end{array}  \right. \]
%\topmargin -0.50in
%\textheight 9.00 in
for $j \in N$. Likewise, we identify with a 0/1-valued vector $x$ in $\Re^N$
its {\it support}
$\underline{x}$, defined by
\[  \underline{x}:= \{ j \in N : x_j=1 \}. \]
Since there is a one-to-one correspondence between
subsets $S$ of $N$, and 0/1-valued vectors $x$ in $\Re^N$, we can
reformulate the GCMESP as the integer nonlinear program
\[ \begin{array}{llll}
z & := &  \max &  \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},
\underline{x}]) \right)\\
& & & \\
& & \mbox{s.t.} & \bsum{j \in N} a_{ij} x_{j} \leq b_i,  \hspace{.2in} 
\forall ~i \in M; \\
& & & \\
& & &
 \bsum{j \in N} x_j = s; \\
& & & \\
& & &   x_j \in \{0,1 \},  \hspace{.2in}  \forall ~j \in N.
\end{array} \]


We note that when $C$ is a diagonal matrix, the GCMESP can be
transformed into an integer {\it linear} program, by introducing
additional variables and constraints:
\[ \begin{array}{lllll}
&z & := &  \max & \bsum{j \in N} (\ln c_{jj}) y_j \\
& & & & \\
(1) & & & \mbox{s.t.} & y_j \leq x_j, \hspace{.2in} \forall ~j \in N; \\
& & & &   \\
& &  & & \bsum{j \in N} a_{ij} x_{j} \leq b_i,  \hspace{.2in} 
\forall ~i \in M; \\
& & & & \\
(2) & & & &
  \bsum{j \in N} x_j = s; \\
& & & &  \\
(3) & & & &   \bsum{j \in N} y_j = t; \\
& & & &  \\
& & & &   x_j, y_j \in \{0,1 \},  \hspace{.2in}  \forall ~j \in N.
\end{array} \]
Here, the variables $x_j$ choose an $s$-subset $S$ from $N$, and the
variables $y_j$ choose a $t$-subset $S'$ from $S$.
Constraints
(1) ensure that $S' \subseteq S$.
That the sets
are of the correct size is guaranteed by constraints (2) and (3).

In Section 2.1, we examine two special cases of the GCMESP.
These special cases have applications in
such areas as environmental monitoring
and medicine. 
The first special case that
we consider is the Constrained Maximum-Entropy
Sampling Problem (CMESP), which arises from the GCMESP
by taking $t=s$.  In this case, the product of eigenvalues
that appears in the objective function reduces to a
determinant, so the CMESP assumes the following form:
\[ \mbox{(CMESP)} \hspace{.3in}
\begin{array}{llll}
z & := &  \max  &  \ln \det C[S,S]  \\
& & & \\
 & & \mbox{s.t.} & \bsum{j \in S} a_{ij} \leq b_i, 
\hspace{.2in}  \forall ~i \in M; \\
& & & \\
& & & S \subseteq N; \\
& & & \\
& & &  |S|=s.
\end{array} \]
The MESP (the CMESP with $m=0$) was
introduced originally by Shewry and Wynn (1987)
%\cite{SHEWRY}
by relating its objective
function to the {\it entropy} of a set of random variables having a joint
Gaussian distribution.
In particular, they associate with $N$ an $n$-set of 
continuous random variables. 
For an $s$-subset $S \subseteq N$  with
marginal joint-density function $g_S (\cdot)$, 
the {\it entropy of S} is defined to be
\[ H(S) := -E[\ln g_S(S)]
 = - \int_{\Re^s} g_S(S) \ln (g_S(S)) ~ dS \]
(see Shannon (1948)).
In the case where $N$
has a joint Gaussian distribution with positive-definite 
covariance matrix $C$,
one can show
that 
\[ H(S) = k_s + \alpha \ln \det C[S,S], \hspace{.3in} \forall ~S \subseteq N,
~ |S| = s, \]
where $\alpha>0$ and $k_s$ are constants (see Shewry and Wynn).
%\cite{SHEWRY}.
Therefore, assuming this distribution, the above CMESP (with $m=0$)
is precisely the problem of selecting an $s$-subset $S$ of $N$ with
maximum entropy.

Entropy is important in that it
serves as a measure of the level of uncertainty concerning
the outcome of the  experiment whereby we observe the $s$-subset
$S$.
In particular, the greater
the entropy of a subset $S$, the greater the inherent uncertainty
associated with the corresponding experiment.
Entropy  has been  extensively studied in many different areas.
The concept of entropy was first introduced in the early
1870's by Rudolph Clausius. In 1877, the physicist Boltzmann (1877)
%\cite{BOLTZ}
gave a formal mathematical definition.
In the mid 1900's, Shannon (1948)
%\cite{SHANNON}
and Blackwell  (1951)
%\cite{BLACKWELL} 
further developed
entropy in the contexts of information theory and statistics,
respectively.

Ko, Lee, and Queyranne (1995) showed that the MESP
(the CMESP with $M = \emptyset$)
is NP-Hard.
They
proposed a
branch-and-bound algorithm for the MESP, which
generates upper bounds on MESP subproblems, using
eigenvalue calculations. 
Their algorithm was the first method besides complete
enumeration that sought to prove the optimality of its
solution; earlier approaches to solving the CMESP
had been heuristic (see Shewry and Wynn).
%\cite{SHEWRY})
Ko, Lee, and Queyranne
coded their
algorithm in FORTRAN 77,
using LINPACK (a precursor to LAPACK (1992)), and ran
the code on problems arising
from an application in
environmental monitoring. Lee (1994) extended these methods
to the case of nonempty $M$.

To make the CMESP more concrete, we consider the application
to environmental monitoring. In particular, let $N$ be a 
network of $n$ environmental monitoring stations;
we are interested in
measuring weekly concentrations of various undesirable ions
in wet deposition at each station. However, there is only enough funding
available to continue monitoring
$s$ stations, where $0 < s < n$, and so
the idea is to monitor a subset $S$ of $s$ stations,
and then use the data obtained at these monitored sites
to make inferences about the concentrations
at the remaining unmonitored sites. Each subset $S$
of $N$ will yield a varying
amount of information regarding the entire system, and the objective
is to select a subset $S$ that provides the maximum such amount.
Shewry and Wynn (1987) show
that the above problem can be modeled by the
problem where we select a subset $S$ of $N$, with $|S|=s$,
so that the entropy of $S$ is maximized. (We include a derivation of
this result in Appendix A.)
Since entropy is a measure of
the amount of uncertainty in a system, maximizing
might seem counterintuitive to what we are trying to accomplish.
However, in choosing the subset with maximum
entropy, we are observing the subset with the most inherent uncertainty;
that is, it is precisely at those sites about which we are most
uncertain, that we obtain data directly.

The above environmental monitoring application of the CMESP is
one that appears again and again in literature on the topic. However,
there are other applications as well. Here, we briefly describe a second
application in a completely different area: space 
exploration. In this application, our
network is not one of environmental monitoring sites, but of locations
on a space station.  In particular, each space station has as one
of its components a {\it truss}, which is essentially a framework
made up of bars and joints. As the station orbits
in space, it is constantly being bombarded by objects
that cause vibrations; these vibrations may significantly affect the
operation of the station.  Prior to launch, one can vibrate the station
and use pairs of accelerometers to collect data at pairs of locations.
By moving the accelerometers to  ${n \choose 2}$
%$\left( \begin{array}{l} n \\ 2 \end{array} \right)$
different pairs of locations,
we can estimate the order-$n$ covariance matrix.
Now, each accelerometer costs in the millions of dollars,
and so we can only afford to place, say, $s$ accelerometers, where $s < n$.
The problem becomes that of selecting a subset of sites at which to
place the accelerators, so as to obtain as much information as possible
about vibrations
along the entire structure. (See Glassburn and Smith (1994), and
Maschinot (1996)
%\cite{SMITH}
for a more complete description of this problem.) 

In addition to the CMESP, we consider 
the D-Optimality Problem (DOP) as
a second special case of the GCMESP for  appropriate choices
of $C$ and $t$. 
Let $K$ be a $k$-set, and
let $N$  be an $n$-set that indexes
 $\{ x_j^T \in \Re^K \}$.
Let $X$  be the $n \times k$ matrix with $j^{\mbox{th}}$ row  
equal to $x_j$, and assume that $X$ has full column rank.
Let $s$ be an integer satisfying $k \leq s < n$.
For 
$S \subseteq N$ ($|S|=s$), let $X(S)$ be the $s \times k$ submatrix
of $X$ with rows indexed by $S$, and define $D(S):=(X(S))^TX(S).$
Let the $m$-set $M$ index the set $\sum_{j \in S} a_{ij} \leq b_i$
($i \in M$).
We then define the DOP as:
\[ (\mbox{DOP}) \hspace{.5in}
\begin{array}{llll}
z & := &  \max &  \ln \det D(S) \\
& & & \\
 & & \mbox{s.t.} & \bsum{j \in S} a_{ij} \leq b_i, \hspace{.2in} \forall
~i \in M; \\
& & & \\
& & & S \subseteq N; \\
& & & \\
& & & |S| = s. 
\end{array} \]
At this point, it is not evident that the DOP is a special case of 
the GCMESP. In Section 2.1, we establish that  in fact it is.

The quantity $\det D(S)$ has been of interest to mathematicians and
statisticians for decades. 
The statistician Wald (1943) first
proposed  $\det D(S)$ as an optimality criterion
in the context of hypothesis testing.
Keifer and Wolfowitz (1959) further studied and developed the
criterion, then named it the ``D-Optimality criterion'';
it is this expression that appears in subsequent literature.
Dykstra (1971), Federov (1972),
Wynn (1970, 1972), and Mitchell (1974) all suggested heuristics
for searching for  D-Optimal subsets, and  Ko, Lee, and Wayne (1997)
showed that the DOP is NP-Hard.
Welch (1982) and Ko, Lee, and Wayne (1997)
devised 
algorithms for finding a solution that was provably
D-Optimal. Their algorithms are based on a branch-and-bound strategy,
with upper bounds being computed on DOP subproblems. The 
difference in the two algorithms lies in how these bounds are generated;
Welch's bound is based on Hadamard's inequality, while
the bound of Ko, Lee, and Wayne is based on singular values. 

To understand why 
the DOP is of such interest to statisticians,
we consider the application that provided its original 
motivation.
To this end, we interpret
the vectors $x_j \in \Re^K$ ($j \in N$) as  ``design points,'' and
we associate with each fixed design point $x_j$, a random ``response,''
$y_j \in \Re$.  The ``design matrix,'' $X$, is 
the $n \times k$ matrix  with $j^{\mbox{th}}$ row
equal to $x_j^T$.
Let $y \in \Re^N$  be the vector with
$j^{\mbox{th}}$ component  $y_j$. For an $s$-subset $S$ of $N$,
let $y(S) \in \Re^S$ 
be the 
vector with components  $y_j, ~ j \in S$.
We assume that the points
$x_j$ and $y_j$ ($j \in N$) are related by the following linear model:
\[ y_j = x_j^T \beta + \epsilon_j, \hspace{.2in} \forall ~j \in N, \]
where $\beta$ is a $k \times 1$ vector of parameters to be
estimated, and the $\epsilon_j$ are independent, 
identically-distributed
random variables with E[$\epsilon_j$]=0 and Var[$\epsilon_j$]=
$\sigma^2$. 
%Equivalently, we can express the same linear model
%in matrix notation:
%\[ y = X \beta + \epsilon \]
%where
%$\epsilon \in \Re^N$ is
%the vector with $i^{\mbox{th}}$ component  $\epsilon_i$.
Now suppose that we are interested in constructing a least-squares
estimator of $\beta$, but due to the high cost of experimentation,
it is possible to perform only
$s$ experiments (i.e. observe $s$ design points).
The least-squares estimator
$\hat{\beta}^S$ of $\beta$, based only on the pairs
$(x_j,y_j)$ that are indexed
by the $s$-subset $S$ in the above model (where $X(S)$ is assumed
to have rank $k$),
is the unique solution to
\[ \min_{\hat{\beta} \in \Re^K} \bsum{j \in S} [ y_j - x_j^T \hat{\beta} ]^2. \]
The estimator
$\hat{\beta}^S$ assumes the form:
\[ (D(S))^{-1} (X(S))^Ty(S)  \]
where $D(S)$ is as defined previously (see Fox (1984), for example).
% \cite{FOX}.

Now, among all possible $s$-subsets of $N$, which
one should we select? We would like to
use a selection criterion that judges
the error incurred by choosing a particular $S$.
In the case where $k=1$ (so that $\beta \in \Re^1$),
a logical choice would be the
variance of the estimator, in which case we choose
the subset $S$ so that the variance of $\hat{\beta}^S$ is a
 minimum.
For $k>1$, we can no longer
speak of the variance of $\hat{\beta}^S$ per se; instead we 
consider the variance-covariance matrix Cov($\hat{\beta}^S$),
where
\[
  (\mbox{Cov}(\hat{\beta}^S))_{ij} = 
       \mbox{cov}(\hat{\beta}^S_i, \hat{\beta}^S_j) . \]
One can show (see Fox, for example) 
%\cite{FOX}
that
this matrix Cov($\hat{\beta}^S)$ is of the form
\[   \mbox{Cov}(\hat{\beta}^S) = \sigma^2 (D(S))^{-1}.\]
To compare
the matrices Cov($\hat{\beta}^S)$
for various $S$,
we use the ``generalized variance''
as a basis of comparison. The
generalized variance is defined to be the
determinant of the matrix Cov($\hat{\beta^S}$).
%\cite{WALD}. 
(See Wald, for example). 
To understand why the determinant is a
natural extension of variance to higher dimensions, we explore
its relationship to confidence regions for $\beta$.
A $100(1 - \alpha) \%$  confidence region
for $\beta$ is described by
\[ ( \hat{\beta}^S - \beta)^T D(S) (\hat{\beta}^S - \beta) \leq
\frac{k}{s-k} (e^T e) F_{\alpha}, \]
where $e = y(S) - X\hat{\beta}^S$, and where $F_{\alpha}$ is 
such that a random variable $Z$ having an F distribution with
$k$ and $s-k$ degrees of freedom satisfies
Pr($Z \leq F_{\alpha}) = 1- \alpha$.
Recall that this 
region has the property that, with repeated sampling (and hence
with various values of $\hat{\beta}^S$),
$100(1 -\alpha) \%$
of the regions generated will contain the actual value of
the parameter vector $\beta$.
The above confidence region lies
in $k$-dimensional
space; its boundary is a $k$-dimensional ellipsoid, and its volume is 
\[ \mbox{vol}(B_k(s,\alpha)) \sqrt{\det(D(S))^{-1}}, \]
where $\mbox{vol}(B_k(s,\alpha))$ is the volume of the $k$-dimensional ball
of radius $\frac{k}{s-k} (e^Te) F_{\alpha}$
(see Schrijver (1986), for example). 
Thus, in the case of general $k$, we  choose the $s$-subset $S$
that minimizes the volume of the confidence region for $\beta$,
that is, the one minimizing
$\det((D(S))^{-1})$. This is equivalent to
maximizing $\det D(S)$, which, under the usual assumption that
$X$ has full column rank,
is equivalent to maximizing $\ln \det D(S)$.  Under this assumption,
our problem is precisely the DOP.

To perhaps make the above discussion 
more tangible, we consider an example
of a linear model, of the form described above,
that might appear in the field of medicine. We take
$K$ to be a $k$-set of drugs that potentially
affect
blood pressure, and we let
$N$ be a set of design points, where a design point
$x_j \in \Re^k$ represents a combination of amounts of drugs 
that we would like to consider administering to a patient. That is,
each component $x_{jl}$ of $x_j$ is some
quantity of drug $l$. Let $y_j$ be the associated response, or
change in blood pressure, observed in a patient who is given
the combination
of $x_{j1}$ units of drug 1, $x_{j2}$ units of drug 2, ..., and
$x_{jk}$ units of drug $k$. We assume  that
$x_j$ and $y_j$ are related by the linear model given previously.
Again, we are faced with the task of constructing the 
least-squares estimator for the vector $\beta$ of coefficients
in the model. Although we might like to sample all $n$ design points,
experiments might be expensive, and we might be restricted
to administering only $s$ combinations, where $k \leq s < n$. Selecting
the subset $S$ of $N$ that minimizes the generalized variance
of the least squares estimator of $\beta$ is precisely the
D-Optimality  Problem. The author selected the blood pressure
example because it is a classic one. A more current application
would be the so-called ``AIDS cocktail,'' a drink containing a
combination of drugs believed to reduce the negative effects of
the HIV virus on infected patients.

In Chapter 2, the GCMESP and its special cases
are examined in more detail. In Section 2.1, we review the CMESP
and DOP, and show that the DOP is in fact a special case of the GCMESP.
In Section 2.2, we construct eigenvalue-based upper bounds for the GCMESP,
derive expressions for the gradients of these upper bounds,
and state additional results. In Section 2.3, we discuss a 
branch-and-bound algorithm for solving the GCMESP,  and show how
the methods of computing
upper bounds for the GCMESP (as derived in 2.2) can be used
to compute upper bounds for GCMESP subproblems generated during
the branch-and-bound process.
In Section 2.4, we give serial and parallel computational
results for the CMESP.

In Section 2.5, 
we describe a cost-constrained version of the CMESP,
which models a special case of the environmental monitoring
problem modeled by the CMESP. In the cost-constrained case, we
are interested in  monitoring concentrations of several
ions in wet deposition at
sites in a network, and there is the opportunity to measure
just a subset of those ions at each site. The observation of
at least one ion at a particular location $l$ incurs a fixed cost
$f_l$. The observation of ion $i$ at location $l$ incurs
an additional cost $d_{il}$. We have a budget limit
$\beta$, and we wish
to observe a total of $p$ ions at a total of $t$ locations, so as
to maximize entropy. We formulate this
``Constrained Maximum-Entropy Sampling Problem with Fixed Costs''
(CMESPF)
as an integer nonlinear program
and show that it is in fact a special case of the CMESP. We
also give computational results for
the CMESPF.

In Chapter 3, we consider the Constrained Maximum-Entropy Remote
Sampling Problem (CMERSP), which is a variation of the CMESP.
We again start with an $n$-set $N$
of indices, but now we introduce an additional set $T$, with $N \cap T = 
\emptyset$. Let the $m$-set $M$ index the set $\sum_{j \in S} a_{ij}
\leq b_i$ ($i \in M$) of constraints.
Let $C$ be a symmetric matrix with rows and columns indexed by
$N \cup T$, and assume that $C$ is positive definite.
For a subset $S$ of $N$, let
\[ C_S[T,T] := C[T,T] - C[T,S] (C[S,S])^{-1} C[S,T]. \]
We define the CMERSP as:
\[\mbox{(CMERSP)} \hspace{.3in}
\begin{array}{llll}
z & := &  \min &  \ln \det C_S[T,T] \\
& & & \\
& & \mbox{s.t.} &
\bsum{j \in S} a_{ij} \leq b_i, \hspace{.2in} \forall ~i \in M; \\
& & & \\
& &  &  S \subseteq N; \\
& & & \\
& & &  |S|=s.
\end{array} \]

To understand why the CMERSP is of such interest to us, we consider
it in the context of its applications. In particular, the CMERSP models
a problem closely related to the sampling problem modeled by the CMESP,
where we seek
a most informative subset $S$ from a
set $N$ of points in a network. In {\it remote} sampling,
there is a set
of random variables $N$ called {\it observing points}  that we can monitor,
 and a set of random variables
$T$ called  {\it target points}, that we want information about. We have no
inherent interest in the points $N$, and we are unable to 
directly observe the points $T$. We seek to choose $s$ points $S$
from $N$, to observe, so as to minimize the uncertainty remaining
in $T$ after observing $S$. That is, we want to choose the subset $S$
that minimizes the entropy of $T$, conditioned on $S$.
Let $C$ be the $|N \cup T| \times |N \cup T|$ covariance matrix
for $N \cup T$; then the matrix
\[ C_S[T,T] := C[T,T] - C[T,S] (C[S,S])^{-1} C[S,T] \]
is the
covariance matrix for $T$ conditioned on $S$. Then assuming that
$N \cup T$ has a joint Gaussian distribution,
the {\it conditional entropy}  of $T$ (conditioned on $S$)
is defined by
\[ H_S(T) := k_t + \alpha \ln \det C_S[T,T], \]
where $k_t$ and $\alpha>0$ are constants. 
In Section 3.1, we show  that 
minimizing the conditional entropy is equivalent to maximizing the
difference $H(S) - H_T(S)$, so the CMERSP is precisely the problem
of selecting and observing a subset $S$ of $N$ that provides
the most information about $T$.
Adapting an idea of Ko, Lee, and Queyranne, Bueso et al. (1998a)
%\cite{BUESO}
derived
lower bounds for  $\min_{S \subseteq N} H_S(T)$. Such bounds were then
incorporated into a branch-and-bound strategy. Also, see Lee (1998)
and Bueso et al. (1998b).
%\cite{LEE2}.

In Section 3.2,
we show how the CMESP is a special case of the
CMERSP, and we discuss
the complexity of the CMERSP. In Section 3.3, 
we construct upper bounding functions for the CMERSP, derive
expressions for the gradients of these upper bounding
functions, and state
additional results. 
In Section 3.4, we give
computational results
for the CMERSP.
\newpage

\chapter*{\vskip -1in Chapter 2}
\setcounter{chapter}{2}
{\Huge {\bf The Generalized Constrained \\
Maximum-Entropy 
Sampling \\
Problem (GCMESP)}} \\ 
\section{The CMESP and the DOP as Special Cases}

As before, let $n$ be a nonnegative integer, and let $N$ be an index set
of cardinality $n$.
Let $s,t$ be integers,
with $0 < t \leq s \leq n$. 
Let $C$ be a symmetric positive-semidefinite matrix with rows and columns indexed by $N$.
Let $M$ be a set of cardinality
$m \geq 0$ that indexes the set $\sum_{j \in S} a_{ij} \leq b_i$ ($i \in M$)
of linear constraints.
In Chapter 1, we introduced the Generalized Constrained Maximum-Entropy
Sampling Problem (GCMESP) as:
\[ \mbox{(GCMESP)} \hspace{.5in} \begin{array}{llll}
 z & := &  \max &
 \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C[S,S]) \right) \\
& & & \\
& & \mbox{s.t.} &
\bsum{j \in S} a_{ij} \leq b_i, \hspace{.1in} \forall ~i \in M; \\
& & & \\
& & & S \subseteq N; \\
& & &  \\
& & &  |S| = s.
\end{array} \]
In addition,
we introduced the Constrained Maximum-Entropy
Sampling Problem (CMESP) and the D-Optimality  Problem (DOP),
claiming that each was a special case of the GCMESP.
Recall first the formulation of the CMESP:
\[ \mbox{(CMESP)} \hspace{.5in}
\begin{array}{llll}
z & := &  \max  &  \ln \det C[S,S]  \\
& & & \\
 & & \mbox{s.t.} & \bsum{j \in S} a_{ij} \leq b_i,  \hspace{.2in}  i \in M; \\
& & & \\
& & & S \subseteq N; \\
& & & \\
& & &  |S|=s.
\end{array} \]
Since the determinant of a matrix is equal to
the product
of its eigenvalues, it is clear that the CMESP is a special case
of the GCMESP, taking $t$ to equal $s$.

Now consider the DOP.
Recall that in the formulation of the DOP,
we let $K$ be a $k$-set that indexes $\{ y_i \in \Re \}$, and
 $N$  be an $n$-set that indexes
 $\{ x_i^T \in \Re^K \}$.
Let $X$  be the $n \times k$ matrix with $i^{\mbox{th}}$ row  
equal to $x_i$, and assume that $X$ has full column rank.
Let $s$ be an integer satisfying $k \leq s < n$.
For 
$S \subseteq N$ ($|S|=s$), let $X(S)$ be the $s \times k$ submatrix
of $X$ with rows indexed by $S$; let $D(S): = (X(S))^T X(S)$.
In Chapter 1, we formulated the DOP as: 
\[ (\mbox{DOP}) \hspace{.5in}
\begin{array}{llll}
z & := &  \max & \ln  \det D(S) \\
& & & \\
 & & \mbox{s.t.} & \bsum{j \in S} a_{ij} \leq b_i, \hspace{.2in} \forall
~i \in M; \\
& & & \\
& & & S \subseteq N; \\
& & & \\
& & & |S| = s.
\end{array} \]

To show that the DOP is a special case of the GCMESP,
we begin by letting $C := XX^T$. Note that $C$ is in fact symmetric
positive semidefinite. For $S \subseteq N$, $C[S,S] = X(S) (X(S))^T$,
and
\[ \begin{array}{lll}
\det D(S) & = &  \Bprod{l=1}{k} \lambda_{\bar{l}} ((X(S))^T X(S)) \\
& & \\
 & = &  \Bprod{l=1}{k} \lambda_{\bar{l}} (X(S) (X(S))^T) \\
& & \\
 & = &  \Bprod{l=1}{k} \lambda_{\bar{l}} (C[S,S]) .
\end{array} \]
We can therefore express the above DOP as the following
equivalent program:
\[ \begin{array}{llll}
z & := & \max  &
 \ln \left( \Bprod{l=1}{k} \lambda_{\bar{l}} (C[S,S]) \right) \\
& & & \\
& & \mbox{s.t.} & \bsum{j \in S} a_{ij} \leq b_i, \hspace{.2in} 
\forall ~i \in M; \\
& & & \\
& & &  
S \subseteq N; \\
& & & \\
& & & |S| = s.
\end{array} \]
Expressed in this form, it is clear that the DOP is a special case
of the GMESP, where $t$ is taken to equal $k$, and $C$ is $XX^T$.

\section{Upper Bounds}

The algorithm that we have developed for
the GCMESP is of a branch-and-bound
variety.  Subproblems are described by the indices that are ``fixed''
into the solution and those indices that remain eligible.
For each subproblem, the ``size'' of its feasibility region is determined.
If the subproblem is infeasible, it becomes
inactive (i.e. creates no new subproblems of its own).
If there is a unique feasible point,
its objective value is computed, and again it becomes inactive. Otherwise,
upper and lower bounds are computed, and two new subproblems are created,
by selecting an eligible index and setting it equal to 1
in one subproblem and to 0 in the second subproblem.
We leave a detailed
discussion of the algorithm for the next section.
What is important to know at this point is that all of the subproblems
generated during this process are nearly identical in structure
to the original GCMESP (we make this more precise in Section 2.3).
In this section, we develop upper bounds for $z$, the value of the  GCMESP.
In the next section, as we discuss the branch-and-bound strategy
more thoroughly, we show how the methods described here can be
extended to compute upper bounds
for the subproblems generated during the branch-and-bound process.

For $\pi \in \Re^M$, we define a diagonal matrix $D_{\pi}$ by letting
\[ d_{\pi}^{lj} := \left \{ \begin{array}{ll}
                     \exp \{ - \frac{1}{2} \sum_{i \in M} \pi_i a_{ij} \},
& \mbox{if} \hspace{.1in} l=j; \\
& \\
                     0,
& \mbox{if} \hspace{.1in} l \neq j. \end{array} \right. \]
Let $C_{\pi} := D_{\pi} C D_{\pi}$. For $ \pi \in \Re^M$,
let 
\[ v_1(\pi) :=
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi}) \right) + \bsum{i \in M}
\pi_i b_i  - \min_{K \subset N, |K| = s-t} \bsum{j \in K} \bsum{i \in M}
\pi_i a_{ij}~. \]
The following lemmas and corollary are used to prove subsequent results.
\begin{lemma}\label{H774}
(see Horn and Johnson (1985), Corollary 7.7.4, p. 471). If
$A,B \in M_n$ are symmetric positive definite and if $A \succeq B$, then
$\lambda_{\bar{l}}(A) \geq
\lambda_{\bar{l}}(B)$ for all $l=1, \ldots, n$.
\end{lemma}
\begin{lemma}\label{H}
(see Horn and Johnson (1991), Theorem 3.3.4, pp. 171-2). For
compatible matrices $A,B$,
\[ \Bprod{l=1}{t} \sigma_l(AB) \leq \Bprod{l=1}{t} \sigma_l (A)
\sigma_l(B) \]
where $\sigma_l$ denotes the $l^{\mbox{th}}$ greatest singular value.
\end{lemma}
 
\begin{corollary} \label{HJ}
 {\it For compatible matrices $A,B$, with $A,B$ symmetric
positive definite,}
\begin{enumerate}
\item $\Bprod{l=1}{t} \lambda_{\bar{l}}(AB) \leq \Bprod{l=1}{t}
\lambda_{\bar{l}}(A)
\lambda_{\bar{l}}(B)$;
\item $\Bprod{l=1}{t} \lambda_{\underline{l}}(AB) \geq \Bprod{l=1}{t}
\lambda_{\underline{l}}(A)
\lambda_{\underline{l}}(B)$ ~.
\end{enumerate}
\end{corollary}
\begin{proof} Inequality
{\it 1} follows directly from Lemma \ref{H}. We obtain
{\it 2} by applying {\it 1} to the pair $B^{-1},A^{-1}$.
In particular, we have
\[ \bfrac{1}{\Bprod{l=1}{t} \lambda_{\underline{l}}(AB)} =
\Bprod{l=1}{t} \lambda_{\bar{l}}(B^{-1} A^{-1})  \leq
\Bprod{l=1}{t} \lambda_{\bar{l}}(B^{-1})
 \lambda_{\bar{l}}(A^{-1}) =
\bfrac{1}{\Bprod{l=1}{t} \lambda_{\underline{l}}(B)}
\bfrac{1}{\Bprod{l=1}{t} \lambda_{\underline{l}}(A)}. \]
\end{proof}

Next, we use the above lemmas and corollary to show that
$v_1$ provides upper bounds for the optimal value of the GCMESP.

\begin{theorem}\label{ub}
For all $\pi \in \Re^M_+$, 
$z \leq v_1(\pi)$.
\end{theorem}
\begin{proof}
Let $\pi \in \Re^M_+$, and 
let $S$ solve GCMESP. Among all $s$ diagonal entries of
the matrix $(D_{\pi}[S,S])^{-1}$, let $T$ denote the index set of the
$t$ largest.
Then
\[ \begin{array}{llll}
& z & = & \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C[S,S]) \right) \\
& & \\
&  & = & \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} ((D_{\pi}[S,S])^{-1}
D_{\pi}[S,S] C[S,S] D_{\pi}[S,S] (D_{\pi}[S,S])^{-1}) \right)\\
& & \\
(1) &  & \leq & \ln
   \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (D_{\pi}[S,S])^{-1}
~ \lambda_{\bar{l}}
(D_{\pi}[S,S] C[S,S] D_{\pi}[S,S]) ~ \lambda_{\bar{l}} (D_{\pi}[S,S])^{-1}
\right) \\
& & & \\
&   & = & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}}
(D_{\pi}[S,S] C[S,S] D_{\pi}[S,S]) \right)  + 2 \ln
\left(\Bprod{l=1}{t} \lambda_{\bar{l}} (D_{\pi}[S,S])^{-1} \right) \\
\end{array} \]
\[ \begin{array}{llll}
&   & = & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}}
(C_{\pi}[S,S])\right)  + \bsum{i \in M} \bsum{j \in T} \pi_i a_{ij} \\
& & \\
(2) &   & \leq & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}}
(C_{\pi})\right)  + \bsum{i \in M} \bsum{j \in T} \pi_i a_{ij} +
\bsum{i \in M}\pi_i( b_i - \bsum{j \in S} a_{ij}) \\
& & & \\
&   & = & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}}
(C_{\pi})\right)  + \bsum{i \in M}\pi_i b_i - \bsum{i \in M}
\bsum{j \in S \backslash T} \pi_i a_{ij} \\
& & & \\
&  & \leq & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}}
(C_{\pi})\right)  + \bsum{i \in M}\pi_i b_i
 - \min_{K \subset N, |K| = s-t} \bsum{j \in K} \bsum{i \in M}
\pi_i a_{ij}~ \\
& & & \\
& &   = & v_1(\pi).
\end{array}  \]
We note that the inequality $(1)$ in the proof does {\it not} generally
hold multiplicand -by-multiplicand (except for $l=1$). That it does
hold for the product follows from Corollary \ref{HJ}. 
\end{proof}

The best upper bound of this form is therefore
\[  \min_{\pi \in \Re^M_+} v_1(\pi) . \]
The following examples  show that the minimizing $\pi$ may in fact
be nonzero.
\begin{example}
Let 
\[ C:= \left[ \begin{array}{rrrrr}
5.0 & 0.25 & 0.5 & 0.75 & 0.0 \\
& & & & \\
0.25 & 1.0 & 0.5 & 0.5 & -0.1\\
& & & & \\
0.5 & 0.5 & 6.0 & -0.5  & 1.3\\
& & & & \\
0.75 & 0.5 & -0.5 & 2.0  & 0.2 \\
& & & & \\
0.0 &  -0.1 &  1.3 & 0.2 & 6.0
\end{array} \right]. \]
Take $s=3$, $t=2$, and impose the constraint
\[ x_1 - x_2 + x_3 \leq 0. \]
Then for $\pi \in \Re_+$,
\[ v_1(\pi) = \ln \left(\Bprod{l=1}{2} \lambda_{\bar{l}}(C_{\pi})
\right) + \pi . \]
A plot of 
the function $v_1(\pi)$ (see Figure
\ref{fig8}), indicates that
\[ \min_{\pi \in \Re_+} v_1(\pi) \approx v_1(0.3) \approx 3.628162701 \]
and
\[ v_1(0) \approx 3.663714817. \]
\begin{figure}
\hspace{1in}
\psfig{figure=blowup.ps,height=4in,width=4in,angle=270}   
\caption{Graph of $v_1(\pi)$} \label{fig8}
\end{figure} 
\end{example} 

%\begin{figure}
%\hspace{1.75in}
%\psfig{figure=f1_3num2.ps,height=5in,width=5in,angle=270}   
%\caption{Graph of $v_1(\pi)$, $0 \leq \pi \leq 3$} \label{fig1}
%\end{figure}  


>From the above example, one might get the mistaken impression
that the function $v_1$ is always smooth. The next example
shows that this is not the case.
We consider an extreme case where the 
imposed constraint actually forces several of the variables to zero.
\begin{example}
Let $C:=diag(10,9,4,3,2,1)$.
The matrix $C$ is clearly positive definite.
Take $s=3$, $t=2$, and impose the constraint
\[ x_1 + x_2  \leq 0. \]
Then for $\pi \in \Re_+$,
\[ v_1(\pi) = \ln \left(\Bprod{l=1}{2} \lambda_{\bar{l}} (C_{\pi})
\right) . \]
A plot of the function $v_1(\pi)$ (see Figure \ref{fig2}),
indicates that
\[ \min_{\pi \in \Re_+} v_1(\pi) \leq v_1(2.0) \approx 2.484906650 \]
and
\[ v_1(0) \approx 4.499809670. \]
\end{example}
\begin{figure}[h]
\hspace{1in}
\psfig{figure=exgood.ps,height=4in,width=4in,angle=270}  
\caption{Graph of $v_1(\pi)$ } \label{fig2}
\end{figure}

The proof of Theorem \ref{ub} suggests the existence of a second
upper bound for $z$. In particular, we achieve this upper bound by
replacing inequality (2) in the proof with 
  \[  \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}}
(C_{\pi}[S,S])\right)  + \bsum{i \in M} \bsum{j \in L} \pi_i a_{ij} 
  \leq 
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi}) \right) +
\max_{K \subset N, |K| = t} \bsum{j \in K} \bsum{i \in M}
\pi_i a_{ij}. \]

\begin{corollary}
For $\pi \in \Re^M$, define
\[ v_2(\pi) := \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi})
\right) +
\max_{K \subset N, |K| = t} \bsum{j \in K} \bsum{i \in M}
\pi_i a_{ij}~. \]
Then for all $\pi \in \Re^M$, $z \leq v_2(\pi)$.
\end{corollary}

However, this bound is not useful, as the following result indicates,
since there are no examples
where $\mbox{argmin} \{v_2(\pi) : \pi \in \Re^M \} \neq 0$.
\begin{theorem}
For all $\pi \in \Re^M, v_2(\pi) \geq v_2(0).$
\end{theorem}
\begin{proof}
Let $\pi \in \Re^M$.  Among all $n$ diagonal entries of the matrix
$(D_{\pi})^{-1}$, let $T$ denote the index set of the $t$ largest.
Then
 \begin{eqnarray*}
v_2(0) &  =  & 
  \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C) \right) \\
& & \\
& = & \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} ((D_{\pi})^{-1}
D_{\pi} C D_{\pi} (D_{\pi})^{-1}) \right) \\
& & \\
 & \leq & \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (D_{\pi})^{-1} ~
\lambda_{\bar{l}}(D_{\pi} C D_{\pi}) ~ \lambda_{\bar{l}} (D_{\pi})^{-1}
\right)\\
& & \\
 & = & \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (D_{\pi})^{-1} ~
\lambda_{\bar{l}}(C_{\pi}) ~ \lambda_{\bar{l}} (D_{\pi})^{-1} \right) \\
& & \\
& = & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}}(C_{\pi})
\right) + 2 \ln \left(\Bprod{l=1}{t}
\lambda_{\bar{l}} (D_{\pi})^{-1} \right) \\
& & \\
& = & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}}(C_{\pi})
\right) + \bsum{i \in M} \bsum{j \in T}
\pi_i a_{ij} \\
& & \\
& \leq & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}}(C_{\pi}) \right) +
\max_{K \subset N, |K| = t} \bsum{i \in M} \bsum{j \in K}
\pi_i a_{ij} \\
& & \\
& = & v_2(\pi).
\end{eqnarray*} 
\end{proof}

We now refocus our attention on the upper bound $v_1$.
Note that in the unconstrained case ($M = \emptyset$), $v_1$  reduces
to 
\[ v_1: = \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C) \right). \]
In the case where $s=t$, so that the GCMESP reduces to the CMESP,
we have that
\[ v_1(\pi) = \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}} (C_{\bar{\pi}})
\right) + \bsum{i \in M} \pi_i b_i , \]
which coincides with the bound given in Lee (1994).
Finally,
for the case where $C$ is diagonal and there are no side constraints,
we have that $z = v_1$; thus, in the diagonal case, $v_1$ is a best possible
upper bound on the optimal value of GCMESP.

Recalling that the best upper bound of this form is achieved by finding
\[ \min_{\pi \in \Re^M_+} v_1 (\pi). \]
We next show that $v_1$ is convex, which implies that a local
minimum of $v_1$ will be a global minimum.
For $\pi \in \Re^M_+$,
define 
\[ \begin{array}{l}
 w(\pi): = \ln \left(\Bprod{l=1}{t}
 \lambda_{\bar{l}} (C_{\pi}) \right), \\
 \\
 w_1(\pi) := 
 \displaystyle \min_{K \subset N, |K| = s-t} \bsum{j \in K} \bsum{i \in M}
\pi_i a_{ij}~ .
\end{array} \]

\begin{lemma}
The function $w$ is convex.
\end{lemma}
\begin{proof}
Let $\pi^1$ and $\pi^2$ be arbitrary points in $\Re^M$.
\[\begin{array}{llll}
&2w(\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2) & = & \ln  \left( \Bprod{l=1}{t}
\lambda_{\bar{l}}^2  ( C_{\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2}) \right) \\
& & & \\
& &  = & \ln \left( \Bprod{l=1}{t}
\lambda_{\bar{l}}^2  ( D_{\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2} C
 D_{\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2} )
 \right) \\
& & & \\
& & = & \ln \left( \Bprod{l=1}{t} 
\lambda_{\bar{l}} ( D_{\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2} C
 D_{\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2} 
D_{\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2} C
 D_{\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2} )
 \right) \\
& & & \\
& & = & \ln  \left( \Bprod{l=1}{t}
\lambda_{\bar{l}}  (D_{-\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2}
D_{\pi^1}  C D_{\pi^1} D_{\pi^2} C D_{\pi^2}
 D_{\frac{1}{2} \pi^1 - \frac{1}{2} \pi^2} )
 \right) \\
\end{array} \]
\[\begin{array}{llll}
& & = & \ln  \left( \Bprod{l=1}{t}
\lambda_{\bar{l}}   (D_{\pi^1}
C D_{\pi^1} D_{\pi^2} C D_{\pi^2}) \right) \\
& & & \\
(1) &  & \leq & \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (
 D_{\pi^1}
C D_{\pi^1} )  \lambda_{\bar{l}} (
D_{\pi^2} C D_{\pi^2}) \right) \\
& & &  \\
& & = & \ln  \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (
 D_{\pi^1}
C D_{\pi^1}) \right) + \ln  \left( \Bprod{l=1}{t} (
D_{\pi^2} C D_{\pi^2} ) \right) \\
& & & \\
& &  = & w(\pi^1) + w(\pi^2).
\end{array} \]
(Again, we note that the inequality (1) in the proof does {\it not}
generally hold multiplicand-by-multiplicand (except for $l=1$). That
it does hold for the product follows from Corollary \ref{HJ}.)
\end{proof}
\begin{lemma}
The function $w_1$ is concave.
\end{lemma}
\begin{proof}
Let $\pi^1$ and $\pi^2$ be arbitrary points in $\Re^M_+$.
 \begin{eqnarray*}
w_1(\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2) & = & \min_{K \subset N,
|K| = s-t} \bsum{j \in K} \bsum{i \in M} (\frac{1}{2} \pi^1_i
+ \frac{1}{2} \pi^2_i) a_{ij} \\
& & \\
& = & 
\frac{1}{2} \min_{K \subset N,
|K| = s-t} \bsum{j \in K} \bsum{i \in M} (\pi^1_i a_{ij}
+ \pi^2_i a_{ij} )\\
& & \\
& \geq  & 
\frac{1}{2} \min_{K \subset N,
|K| = s-t} \bsum{j \in K} \bsum{i \in M} (\pi^1_i a_{ij})
+ 
\frac{1}{2} \min_{K \subset N,
|K| = s-t} \bsum{j \in K} \bsum{i \in M} (\pi^2_i a_{ij}) \\
& & \\
& = & \frac{1}{2} w_1(\pi^1) + \frac{1}{2} w_1(\pi^2).
\end{eqnarray*} 
\end{proof}
\begin{corollary}
The function $v_1$ is convex.
\end{corollary}
\begin{proof}
Since $v_1(\pi) = w(\pi) + \bsum{i \in M} \pi_i b_i - w_1(\pi)$,
where $w$ is convex, $w_1$ is concave (hence $-w_1$ is convex),
and $\bsum{i \in M} \pi_i b_i$ is linear in $\pi$, it follows
that $v_1$ is convex.
\end{proof}

Recall that the best upper bound of this form arises by
minimizing $v_1$ over
$\pi \in \Re^M_+$. Next, we derive an expression for the subgradient
of $v_1(\pi)$. Armed with this information, we can apply a descent
algorithm to seek a global minimum of $v_1$.

\begin{lemma}\label{GR1}
{\it For $i \in M$,}
\[ \bfrac{\partial C_{\pi}}{\partial \pi_i} (\bar{\pi}) =
- \frac{1}{2} \left( \mbox{diag} (a_{i1}, \ldots, a_{in})  \cdot C_{\bar{\pi}
}
+ C_{\bar{\pi}} \cdot \mbox{diag} (a_{i1}, \ldots, a_{in}) \right), \]
{\it for all $\bar{\pi} \in \Re^M$.}
\end{lemma}
\begin{proof}
\[ c_{\pi}^{lk} = c_{lk} \exp \left \{ - \frac{1}{2} \bsum{j \in M} \pi_j(a_{jl}
+a_{jk}) \right \}. \]
\[ \begin{array}{lll}
 \bfrac{\partial c_{\pi}^{lk}}{\partial \pi_i} &  =  &
 c_{lk} \exp \left \{ - \frac{1}{2} \bsum{j \in M} \pi_j(a_{jl}
+a_{jk}) \right \} (-\frac{1}{2} (a_{il}+a_{ik})) \\
& & \\
& = & -\frac{1}{2} (a_{il}+a_{ik}) c_{\pi}^{lk}.
\end{array} \]
The result follows.
\end{proof}

Let $u_l( \bar{\pi})$ denote an eigenvector associated with
$\lambda_l(C_{\bar{\pi}})$, normalized to have Euclidean length 1.
We use $u_{\bar{l}}(\pi)$ to denote an eigenvector associated with
the $l^{\mbox{th}}$ greatest eigenvalue of $C_{\pi}$.
For
$j \in N$, we write coordinate $j$ of $u_l(\bar{\pi})$ as $u_{lj}(\bar{\pi})$.
Recall that the eigenvector $u_{\bar{l}}(\bar{\pi})$ is unique
(up to sign) if and only if $\lambda_{\bar{l}}(C_{\bar{\pi}})$ is simple.
If $\lambda_{\bar{l}}(C_{\bar{\pi}})$ is not simple, but instead
has multiplicity $r+1$ and
$\lambda_{\bar{l}}(C_{\bar{\pi}}) = 
\lambda_{\overline{l+1}}(C_{\bar{\pi}}) = \ldots = 
\lambda_{\overline{l+r}}(C_{\bar{\pi}})$, then we choose the vectors
$u_{\bar{l}}(C_{\bar{\pi}}), 
u_{\overline{l+1}}(C_{\bar{\pi}}), \ldots, 
u_{\overline{l+r}}(C_{\bar{\pi}})$ to form an orthonormal basis for the
associated eigenspace. For $\bar{\pi} \in \Re^M$, we define
the {\it continuous $t$-design associated with $\bar{\pi}$} to be the
vector $\bar{x}(\bar{\pi}) \in \Re^N$ given by
\[ \bar{x}_j (\bar{\pi}):= \sum_{l=1}^{t} u_{\bar{l}j}^2 (\bar{\pi}) \]
(see Lee (1994)). Its name comes from applying the following theorem
to the case where $t=s$.
\begin{theorem}
For any $\bar{\pi} \in \Re^M$, the vector $\bar{x}(\bar{\pi})$ satisfies
\[ \begin{array}{l}
\bsum{j \in N} \bar{x}_j(\bar{\pi}) = t; \\ \\
0 \leq \bar{x}_j(\bar{\pi}) \leq 1, \hspace{.2in} \forall ~j \in N.
\end{array} \]
\end{theorem}
\begin{proof}
The proof is identical to that of Proposition 5 of Lee (1994). We 
include it here for completeness.
\[
\bsum{j \in N} \bar{x}_j (\bar{\pi}) =  \bsum{j \in N} \Bsum{l=1}{t}
u_{\bar{l}j}^2 (\bar{\pi}) = 
 \Bsum{l=1}{t} \bsum{j \in N}
u_{\bar{l}j}^2 (\bar{\pi})   \]
\[ = \Bsum{l=1}{t} || u_{\bar{l}} (\bar{\pi}) ||_2^2 = \Bsum{l=1}{t} ~1 = t. \]
Clearly, $\bar{x}(\bar{\pi})$ is nonnegative. Let $U$ be the matrix having as
columns the eigenvectors $u_{\bar{l}}$, $l=1, \ldots, n$. By the choice
of the columns, we have that $U^TU = I$. That is, $U^T$ is the
inverse of $U$, hence we also have $U U^T = I$. But, the diagonal
entries of $UU^T$ are precisely the values $\sum_{l=1}^{n} u_{\bar{l}j}^2
(\bar{\pi})$, therefore $\sum_{l=1}^{t} u_{\bar{l}j}^2 (\bar{\pi}) \leq 1$.
\end{proof}

In Section 2.3, we discuss how we use continuous $t$-designs to
generate lower bounds. We continue now with computations leading
to expressions for gradients and subgradients.

\begin{lemma}\label{GR2}
{\it For $i \in M$, $\bar{\pi} \in \Re^M$, and
$1 \leq l \leq t$,}
\[ u_{\bar{l}}^T(\bar{\pi}) \cdot \bfrac{\partial C_{\pi}}{\partial
\pi_i} (\bar{\pi}) \cdot u_{\bar{l}}(\bar{\pi}) = 
- \lambda_{\bar{l}}(C_{\bar{\pi}}) \bsum{j \in N} a_{ij} \bar{x}_j(\bar{\pi})
~. \]
\end{lemma}
\begin{proof}
Using Lemma \ref{GR1}, and letting
\[ A_i:= -\frac{1}{2} diag (a_{i1}, \ldots, a_{in}), \]
we have that
 \begin{eqnarray*}
& &  u_{\bar{l}}^T(\bar{\pi}) \cdot \bfrac{\partial C_{\pi}}{\partial
\pi_i} (\bar{\pi}) \cdot u_{\bar{l}}(\bar{\pi})  \\
& & \\
& = & u_{\bar{l}}^T(\bar{\pi}) \cdot ( A_i
\cdot C_{\bar{\pi}} + C_{\bar{\pi}} \cdot
A_i) \cdot u_{\bar{l}}(\bar{\pi}) \\
& & \\
& = & u_{\bar{l}}^T(\bar{\pi}) \cdot A_i  \cdot 
C_{\bar{\pi}} u_{\bar{l}}(\bar{\pi}) + 
u_{\bar{l}}^T(\bar{\pi})  C_{\bar{\pi}} \cdot  A_i \cdot 
u_{\bar{l}}(\bar{\pi})  \\
& & \\
& = & u_{\bar{l}}^T(\bar{\pi}) \cdot  A_i \cdot \lambda_{\bar{l}}
(C_{\bar{\pi}})u_{\bar{l}}(\bar{\pi}) + 
 \lambda_{\bar{l}}(C_{\bar{\pi}})u_{\bar{l}}^T(\bar{\pi})
\cdot A_i \cdot  u_{\bar{l}}(\bar{\pi}) \\
& & \\
& = & 
- \lambda_{\bar{l}}(C_{\bar{\pi}}) \bsum{j \in N} a_{ij} u_{\bar{l}j}^2(\bar{\pi}) ~.
\end{eqnarray*} 
\end{proof}


\begin{lemma}\label{GR3}
{\it For $i \in M$, $\bar{\pi} \in \Re^M$, and
$1 \leq l \leq t$, if each of the $t$ greatest eigenvalues of
$C_{\bar{\pi}}$ is simple, then}
\[ \bfrac{\partial \lambda_{\bar{l}}(C_{\pi})}{\partial \pi_i} (\bar{\pi}) =
- \lambda_{\bar{l}}(C_{\bar{\pi}}) \bsum{j \in N} a_{ij} 
\bar{x}_j(\bar{\pi})
~. \]
\end{lemma}
\begin{proof}
One can show that
\[ \bfrac{\partial \lambda_{\bar{l}}(C_{\pi})}{\partial \pi_i} (\bar{\pi}) = 
u_{\bar{l}}^T
( \bar{\pi}) \cdot \bfrac{\partial C_{\pi}}{\partial \pi_i} (\bar{\pi})
\cdot u_{\bar{l}} (\bar{\pi}) \]
(see Tsing, Fan, and Verriest (1994), for example).
The result follows from Lemma \ref{GR2}.
\end{proof}

\begin{lemma}\label{GR4}
{\it For $i \in M$ and $\bar{\pi} \in \Re^M_+$,
if each of the $t$ greatest eigenvalues of
$C_{\bar{\pi}}$ is simple, then}
\[ \bfrac{\partial w(\pi)}{\partial \pi_i} (\bar{\pi}) =  -
\bsum{j \in N} a_{ij} \bar{x}_j(\bar{\pi}). \]
\end{lemma}

\begin{proof}
If $\lambda_{\bar{l}} (C_{\bar{\pi}})$ is simple for $1 \leq l \leq t$,
we have
 \begin{eqnarray*}
\bfrac{\partial w(\pi)}{\partial \pi_i} & = & \bfrac{\partial}{\partial \pi_i}
\left( \ln \Bprod{l=1}{t} \lambda_{\bar{\l}}(C_{\pi}) \right) \\
& & \\
& = &  \bfrac{\partial}{\partial \pi_i} \Bsum{l=1}{t} \ln \lambda_{\bar{l}}
(C_{\pi}) \\
& & \\
& = & \Bsum{l=1}{t}  \bfrac{\partial}{\partial \pi_i} \ln \lambda_{\bar{l}}
(C_{\pi}) \\
& & \\
& = & \Bsum{l=1}{t} \bfrac{1}{\lambda_{\bar{l}}(C_{\pi})} \bfrac{\partial
\lambda_{\bar{l}}(C_{\pi})}{\partial \pi_i} ,
\end{eqnarray*} 
and the result follows.
\end{proof}

To compute the gradient of $w$ at a point $\bar{\pi}$ under the
weaker assumption that $\lambda_{\bar{t}} (C_{\bar{\pi}})
> \lambda_{\overline{t+1}} (C_{\bar{\pi}})$, we appeal to a result
of Tsing, Fan, and Verriest.
%\cite{TSING}.

\begin{theorem} \label{tsing}
(Tsing, Fan, and Verriest (1994))
Let $A(z)$ be an $n \times n$ matrix whose elements depend analytically
on $z \in \Re^m$. Let
$P=\{ s_1, \ldots, s_q \}$ be a partition of  $ \{ 1, \ldots,
n \}$, and
let $f: \Re^n \rightarrow \Re$ be analytic and be invariant under
permutations of its arguments within the blocks $s_1, \ldots, s_q$.
(A function $f$ that satisfies this latter condition is said
to be ``symmetric with respect to $P$''). Let $P_{\lambda(a)}$
be the partition that groups the indices $i$ for which $\lambda_i(a)$
have the same value. Define a partial ordering $\prec$ on the set of
all partitions on $\{1, \ldots, n \}$ by $P' \prec P$ if and only if $P'$ is
formed by a possible further partitioning of the elements of $P$. 
\begin{enumerate}
\item
For any $a \in \Re^m$, if $P_{\lambda(a)} \prec P$, then the
composite function $g(x) = f(\lambda(x))$ is analytic at $a$.
\item 
Furthermore, suppose that $P_{\lambda(a)} \prec P$ for some $a \in \Re^n$. Then
for $i=1, \ldots, m$,
\[ \frac{\partial g(a)}{ \partial x_i} = \sum_{k=1}^{n} \frac{\partial
 f(\lambda(a))}{ \partial \lambda_k}  \cdot u_k^T(a) \frac{\partial
A(a)}{\partial x_i} u_k(a). \]
\end{enumerate}
\end{theorem}
Armed with the above theorem, we can now prove our result.

\begin{theorem}
{\it For $i \in M$ and $\bar{\pi} \in \Re^M_+$,
if $\lambda_{\bar{t}} (C_{\bar{\pi}}) >
\lambda_{\overline{t+1}} (C_{\bar{\pi}})$, then}
\[ \bfrac{\partial w(\pi)}{\partial \pi_i} (\bar{\pi}) =  -
\bsum{j \in N} a_{ij} \bar{x}_j(\bar{\pi}). \]
\end{theorem}

\begin{proof}
 Let $x \in \Re^n_+$, and consider 
the function
\[ f(x) := \ln \Bprod{l=1}{t} x_{l}.  \]
Then $f$ is analytic at all $x \in \Re^n_+$,
 and is symmetric with respect to the partition
$P = \{ \{1, \ldots, t \}, \{ t+1, \ldots, n \} \}$.
Since each entry of $C_{\pi}$ is analytic at all $\pi \in \Re^M$, it
follows that $C_{\pi}$ has continuous eigenvalues (see 
%\cite{TSING}
Tsing et al., p. 160).
Let $\{ \lambda_i (C_{\pi}) \}$ be the family of eigenvalues of
$C_{\pi}$. 
Since the eigenvalues of $C_{\pi}$ are continuous, there
exists a permutation
$\phi$ of $\{1, \ldots, n \}$ such that for some sufficiently
small neighborhood $U$ of $\bar{\pi}$,
\[ \lambda_{\phi(1)}(C_{\pi}) \geq
 \lambda_{\phi(2)}(C_{\pi}) \geq \ldots \geq
 \lambda_{\phi(n)}(C_{\pi}), \hspace{.2in} \forall ~\pi \in U. \]
 Let $\lambda (C_{\pi}) = 
( \lambda_{\phi(1)}(C_{\pi}),
 \lambda_{\phi(2)}(C_{\pi}), \ldots,
 \lambda_{\phi(n)}(C_{\pi}) ).$
It then follows that
\[ w(\pi) = f(\lambda(C_{\pi})), \hspace{.2in} \forall ~\pi \in U. \]
It follows from Theorem \ref{tsing}  that $w$ is analytic at $\bar{\pi}$
(under the assumption that
 $\lambda_{\bar{t}} (C_{\bar{\pi}}) > 
\lambda_{\overline{t+1}} (C_{\bar{\pi}})$).
Moreover, by the same theorem,
the first order partial derivatives of $w$ are given by:
\[ \bfrac{\partial w(\pi)}{\partial \pi_i} (\bar{\pi}) =
\bsum{l \in N} \bfrac{\partial f(\lambda(\bar{\pi}))}{\partial \lambda_l}
\cdot u_l^T(\bar{\pi}) \bfrac{\partial C_{\pi}}{\partial \pi_i}
(\bar{\pi}) u_l (\bar{\pi}). \]
The result follows from Lemma \ref{GR2}.
\end{proof}


Now we turn our attention to $w_1$. While it may
be that
$w_1$ is not differentiable
at a given point $\bar{\pi}$, it
will have a subgradient.

\begin{lemma}\label{GR5}
For $\bar{\pi} \in \Re^M_+$, let
\[ K_1(\bar{\pi}):= \{ K: K \subseteq N, |K| = s-t, w_1(\bar{\pi}) = 
\bsum{j \in K} \bsum{i \in M} \bar{\pi}_i a_{ij} \}. \]
If $K$ is in $K_1(\bar{\pi})$,
then $g(\bar{\pi})$ defined by $g_i(\bar{\pi}) := \bsum{j \in K}
a_{ij}$ is a subgradient of $w_1$ at $\bar{\pi}$.
\end{lemma}
\begin{proof}
Let  $K \in K_1(\bar{\pi})$.
Since $w_1$ is concave, we must show that
\[  (\pi - \bar{\pi})^T g(\bar{\pi}) 
\geq  w_1(\pi) - w_1(\bar{\pi}) \]
for all $\pi \in \Re^M$. Well,
 \begin{eqnarray*}
  (\pi - \bar{\pi})^T g(\bar{\pi})  
& = & \bsum{i \in M} \bsum{j \in K} \pi_i a_{ij} - 
 \bsum{i \in M} \bsum{j \in K} \bar{\pi}_i a_{ij} \\
& & \\
& = & \bsum{i \in M} \bsum{j \in K} \pi_i a_{ij} - 
 w_1(\bar{\pi}) \\
& & \\
& \geq & w_1(\pi) - w_1(\bar{\pi}).
\end{eqnarray*} 
\end{proof}

\begin{theorem} \label{big}
 Let $\bar{\pi} \in \Re^M_+$, and let
\[ K_1(\bar{\pi}):= \{ K: K \subseteq N, |K| = s-t, w_1(\bar{\pi}) = 
\bsum{j \in K} \bsum{i \in M} \bar{\pi}_i a_{ij} \}. \]
Assume that $\lambda_{\bar{t}}(C_{\bar{\pi}}) > 
\lambda_{\overline{t+1}}(C_{\bar{\pi}}).$
Then if $K$ is in $K_1(\bar{\pi})$, a subgradient of $v_1$
at $(\bar{\pi})$ is given by
\[ h(\bar{\pi}) + b - g(\bar{\pi}) \]
where $h(\bar{\pi})$ is defined by 
\[ h_i(\bar{\pi}) =
- \bsum{j \in N} a_{ij} \bar{x}_j(\bar{\pi}), \hspace{.2in} \forall
~i \in M, \]
and $g(\bar{\pi})$ is defined by
\[ g_i(\bar{\pi}) = \bsum{j \in K} a_{ij}, \hspace{.2in} \forall
~i \in M. \]
\end{theorem}
\begin{proof}
This follows from Lemmas \ref{GR4} and \ref{GR5}.
\end{proof}

Notice that in the case where $s=t$, we obtain an expression
for the {\it gradient}
of $v_1(\pi)$.

The only question remaining is how to proceed in the case
that, while searching for $\min_{\pi \in \Re^M_+} v_1(\pi)$,
we generate a $\bar{\pi}$ with
$\lambda_{\bar{t}}(C_{\bar{\pi}}) =
\lambda_{\overline{t+1}}(C_{\bar{\pi}}).$
In this case, we could terminate our search, and use $v_1 (\bar{\pi})$
as our upper bound.  In practice, however, we do not test
for this, and just blithely apply our descent method.

For our descent method, we use
the Broyden-Fletcher-Goldfarb-Shanno (BFGS) Quasi-Newton
method (see Peressini, Sullivan, and Uhl (1988));
this method takes an initial point $\pi^0$
and initial 
symmetric positive-definite matrix $D_0$,
then generates a sequence of  updated
symmetric positive-definite matrices $D_k$, and a sequence
of points
$\pi^k$ that, under certain conditions, converge
to the minimum.  The formula for
the $(k+1)^{\mbox{th}}$ iterate $\pi^{k+1}$ is given by:
\[ \pi^{k+1} = \pi^k - t_k ~D_k^{-1}~ \nabla v_1(\pi^k), \]
for an appropriate ``step-size'' $t_k$ (as determined by several
criteria). 
The FORTRAN routine (Zhu, Byrd, Lu, and Nocedal (1994))
that implements this descent
method requires on input
an initial point $\pi^0$ and an initial symmetric positive-definite
matrix $D_0$,
as well as expressions for
$v_1$ and its gradient.
In our
current computer implementation, we take $\pi^0 =$ {\bf 0}
and $D_0 = I$. However,
our descent method might perform more efficiently were we to let
$D_0$ be the hessian of $v_1$ at $\pi^0$.
We therefore derive below, an expression
for the hessian of $v_1$.

To compute the hessian of $v_1$, we appeal to a result of
Meyer and Stewart (1988) (also see Tsing et al.).
For
$\bar{\pi} \in 
\Re^M_+$ and $l=1, \ldots, t$, let
$\lambda_{\bar{l}}(C_{\bar{\pi}})$ denote the $l^{\mbox{th}}$ greatest
eigenvalue of $C_{\bar{\pi}}$, with
corresponding eigenvector $u_{\bar{l}}(\bar{\pi})$. We assume
that $\lambda_{\bar{l}}(C_{\bar{\pi}})$ is simple, and
that 
$u_{\bar{l}}(\bar{\pi})$ has Euclidean length 1.
Let $(U_{\bar{l}}(\bar{\pi}))_{n \times n-1}$
be a matrix whose columns form an orthonormal
basis for the range of $C_{\bar{\pi}} - (\lambda_{\bar{l}}(C_{\bar{\pi}}))I$.
Let $P_{\bar{l}}(\bar{\pi}) := (u_{\bar{l}}(\bar{\pi}) |
 U_{\bar{l}}(\bar{\pi}))$. Then 
$P_{\bar{l}}(\bar{\pi})$ is nonsingular, with
\[ (P_{\bar{l}}(\bar{\pi}))^{-1} = \left( 
\begin{array}{c}
(u_{\bar{l}}(\bar{\pi}))^T \\
------------- \\
(U_{\bar{l}}(\bar{\pi}))^T(I - 
(u_{\bar{l}}(\bar{\pi}) (z_{\bar{l}}(\bar{\pi}))^T) 
\end{array} \right) \]
(see Meyer and Stewart).
Define 
\[ R_{\bar{l}}(\bar{\pi}) :=
(U_{\bar{l}}(\bar{\pi}))^T C_{\bar{\pi}}
 U_{\bar{l}}(\bar{\pi})) - (\lambda_{\bar{l}}(C_{\bar{\pi}}))I . \]
It can be shown that $R_{\bar{l}}(\bar{\pi})$ is nonsingular (see
Meyer and Stewart). Finally, define
\[ Q_{\bar{l}}(\bar{\pi}) := P_{\bar{l}}(\bar{\pi}) \left( \begin{array}{ll}
    0 & \bf{0}  \\
    \bf{0} & (R_{\bar{l}}(\bar{\pi}))^{-1}
\end{array} \right) (P_{\bar{l}}(\bar{\pi}))^{-1}. \]
\begin{lemma} \label{MS}  (Meyer and Stewart (1988))
Let $A(\pi)$ be an $n \times n$  symmetric
matrix whose entries are real-valued
functions of $\pi \in \Re^M$.
Let $\lambda(\pi)$
be an eigenvalue of $A$ with associated eigenvector
$u(\pi)$, normalized to have Euclidean length 1.
Assume that $A(\pi), \lambda(\pi),$ and $u(\pi)$
are all defined on some domain $D$.
Let $\bar{\pi}$
be a point in $D$ such that $\lambda(\bar{\pi})$ is simple and  such that
each of $\frac{\partial A(\pi)}{\partial \pi_i} (\bar{\pi})$,
$\frac{\partial \lambda(\pi)}{\partial \pi_i} (\bar{\pi})$, and
$\frac{\partial u(\pi)}{\partial \pi_i} (\bar{\pi})$ exists, for
all $i \in M$. Then for all $i \in M$,
\[ \frac{\partial u(\pi)}{\partial \pi_i}(\bar{\pi}) = 
-Q(\bar{\pi}) \left( \frac{\partial A(\pi)}{\partial \pi_i}(\bar{\pi})
\right) u(\bar{\pi}), \]
where $Q(\bar{\pi})$ is defined in the natural way according to the
above discussion.
\end{lemma}
\begin{corollary}
Let $i,k \in M$, and  let $\bar{\pi} \in \Re^M_+$.
If each of the $t$
greatest eigenvalues of $C_{\bar{\pi}}$ is simple, then
\[ \frac{\partial^2 v_1 (\pi)}{\partial \pi_k ~\partial \pi_i}
(\bar{\pi}) = - 2 \bsum{j \in N} a_{ij} 
\Bsum{l=1}{t} u_{\bar{l}j} (\bar{\pi}) \nu_{\bar{l}j}(\bar{\pi}), \]
where $\nu_{\bar{l}j}$ is the $j^{\mbox{th}}$ component of the vector
\[ - Q_{\bar{l}}(\bar{\pi}) \left(\frac{\partial C(\pi)}{\partial \pi_k}
(\bar{\pi}) \right) u_{\bar{l}}(\bar{\pi}). \]
\end{corollary}
\begin{proof}
This follows directly from  Lemma \ref{MS} and Theorem \ref{big}.
\end{proof}

Since we now have an expression for the hessian of $v_1$
(at least in the case where the $t$ greatest eigenvalues
are distinct),
we could, as an alternative to the BFGS descent
method, 
use Newton's Method for minimizing $v_1$. In the case where
the hypothesis of Corollary 2.2.22 fails, we could 
still apply the
result to obtain an approximate hessian, taking the eigenvector $u$
from an orthonormal basis for the associated eigenspace.
Deriving the hessian
is relatively expensive, however; it requires computing
a matrix inverse and performing matrix multiplication.
Since we have not yet incorporated Newton's Method into our
computer implementation, it remains questionable
as to whether one descent method would outperform the other.
\section{An Algorithm for Solving the GCMESP}

Ko, Lee, and Queyranne 
%\cite{KLQ}
show that the CMESP is NP-Hard.
It follows that the GCMESP is NP-Hard as well, since the CMESP is
a special case of the GCMESP.

In this section, we describe an algorithm for solving the GCMESP.
Before we describe the algorithm in detail, we address the
situation where an $f$-set $F \subseteq N$, with $f < s$,
is assumed to be fixed into the solution from the onset.
Such fixing can be achieved by incorporating 
constraints that force each index in
$F$ to the value 1. When our GCMESP is a CMESP, there exists an alternate
technique, which relies on the observation that if $f$
elements $F$ are fixed into $S$, then one needs to choose $s':=s-f$ 
elements $S'$ from the $n'=n-f$ elements
$N'$, so as to maximize the {\it conditional} entropy of $S'$. 
In the case where $N$ is joint Gaussian, the
conditional distribution of $N'$ is joint Gaussian, with
covariance matrix
\[ C_{N'|F}:= C[N',N'] - C[N',F] (C[F,F])^{-1} C[F,N'] \]
(see Ko, Lee, Queyranne (1995)).
Assuming this distribution,
the conditional entropy of $S'$ (conditioned on
$F$) is, up to constants, equal to
\[ \ln \det C_{N'|F}[S',S'], \]
so we instead consider
the CMESP defined in terms of
$N'$, $s'$, and $C_{N'|F}$.  In particular, the ``preprocessed''
version of our CMESP is: 
\[ \begin{array}{llllll}
z & := &  \ln \det C[F,F] & + &  \max &
 \ln \det C_{N'|F}[S',S']  \\
& & & & & \\
& & &  & \mbox{s.t.} &
\bsum{j \in S'} a_{ij} \leq b_i - \bsum{j \in F} a_{ij},
\hspace{.1in} \forall ~i \in M; \\
& & & & & \\
& &  & & & S' \subseteq N'; \\
& & & & &  \\
& &  & & &  |S'| = s'.
\end{array} \]


Having dealt with the initial fixing of indices, whether by
introducing additional constraints in the general case, or 
by ``preprocessing'' the data in the case of the CMESP,
we are now ready to discuss the algorithm for solving the GCMESP.
The algorithm is of a branch-and-bound
variety, and incorporates the upper-bounding
techniques of the previous section. We use the BFGS descent method
to seek a minimum of our upper-bounding function.
In addition to the upper-bounding routine, the algorithm also
incorporates a procedure for computing lower bounds. This procedure
involves solving an integer program whose vector of objective function
coefficients equals the continuous $t$-design $\bar{x}(\bar{\pi})$
(see Section 2.2)
associated with $\bar{\pi}$,
the minimizer of $v_1({\pi})$. In the case where $t=s$, we
consider the following
integer program:
\[ \begin{array}{llll}
  & & \max & \bsum{j \in N} \bar{x}_j(\bar{\pi}) ~ x_j \\
& & & \\
& & \mbox{s.t.} & \bsum{j \in N} a_{ij} x_{j} \leq b_i,  \hspace{.2in} 
\forall ~i \in M; \\
& & & \\
& & &
 \bsum{j \in N} x_j = s; \\
& & & \\
& & &   x_j \in \{0,1 \},  \hspace{.2in}  \forall ~j \in N.
\end{array} \]
If $\bar{x}( \bar{\pi})$ is integer-valued and satisfies the
constraints $\sum_{j \in N} a_{ij} x_j \leq b_i$ $(i \in M)$,
then $x=\bar{x}(\bar{\pi})$
solves the above program.
If the continuous $t$-design is not integer-valued, then 
any feasible solution $x$ to the above program can be used to calculate
the lower bound $\ln \det C[\underline{x}, \underline{x}]$ for the CMESP.

In the general case, (where $t$ and $s$ are not necessarily equal),
we instead consider the integer program
\[ \mbox{(LB)} \hspace{.5in} \begin{array}{llll}
  & & \max & \bsum{j \in N} \bar{x}_j(\bar{\pi}) ~ y_j \\
& & & \\
 & & \mbox{s.t.} & y_j \leq x_j, \hspace{.2in} \forall ~j \in N; \\
 & & &   \\
 &  & & \bsum{j \in N} a_{ij} x_{j} \leq b_i,  \hspace{.2in}
\forall ~i \in M; \\
 & & & \\
 & & &
  \bsum{j \in N} x_j = s; \\
 & & &  \\
 & & &   \bsum{j \in N} y_j = t; \\
 & & &  \\
 & & &   x_j, y_j \in \{0,1 \},  \hspace{.2in}  \forall ~j \in N.
\end{array} \]
In this general case,
if $\bar{x}( \bar{\pi})$ is integer-valued and is such that there
exists a feasible vector $x$ so that 
 $\bar{x}_j(\bar{\pi}) \leq x_j$ for all $j \in N$,
then this vector $x$ together with the vector $y$, with
$y_j = \bar{x}_j(\bar{\pi})$, solve the above program.
Again, 
if the continuous $t$-design is not integer-valued, then
any integer solution to $\mbox{LB}$ can be used to calculate
a lower bound for the GCMESP.

In Section 2.4, with respect to the computer implementation,
we discuss a
set of options
that can be changed from one problem instance to the next;
one such option is the choice of whether to solve the above integer program
LB to generate a lower bound, or to give up if the solution of its
linear programming relaxation is not integer-valued.

One additional feature of the algorithm that warrants a discussion here
is a test for the ``size'' of the feasible region of the GCMESP.
The test we use is a natural extension of the following result.
\begin{theorem} \label{FEAS} (see Freund, Roundy, and Todd (1985))
Let $M_1, M_2$ be pairwise disjoint indexing sets.
Given the following set of constraints:
\[ \mbox{(P)  }  \hspace{.3in} \left \{
\begin{array}{l}
\bsum{j \in N}a_{ij} x_j \leq b_i, \hspace{.2in}
\forall ~i \in M_1;  \\
\\
\bsum{j \in N}a_{ij} x_j = b_i, \hspace{.2in}
\forall ~i \in M_2,  \\
\end{array}  \right.\]
consider the program
\[ (Q) \hspace{.3in}
 \begin{array}{lll}
    & {\rm max} & \displaystyle \bsum{i \in M_1, M_2}
                                        u_i\\
& & \\
                 & {\rm s.t.  } & \displaystyle \bsum{j \in N} a_{ij}x_j
                  +u_i - b_i w \leq 0, \hspace{.2in} \forall ~i \in M_1; \\
& & \\
                 &  & \displaystyle \bsum{j \in N} a_{ij}x_j
                  +u_i - b_i w = 0, \hspace{.2in} \forall ~i \in M_2; \\
& & \\
& &      0 \leq u_i \leq 1, \hspace{.2in} \forall ~i \in M_1, M_2; \\
& & \\
& & w \leq 1.
  \end{array} \]
Suppose $\bar{x}, \bar{u}, \bar{w}$ solves (Q). Then for $i \in M_1,
M_2$,
\[ \bar{u}_i = \left \{ \begin{array}{l}
        \mbox{1 if there exist solutions to $P$ that satisfy
inequality i with strict inequality;} \\
\\
        \mbox{0 if all solutions to $P$ satisfy inequality $i$ 
as an equation,}
   \end{array}
   \right. \]
and $\bar{x}/  \bar{w}$ is a relative interior solution of (P). 
\end{theorem}


Consider the GCMESP expressed as an integer program.
\[\mbox{(GCMESP)} \hspace{.3in}
\begin{array}{llllll}
& z & := &  \max & 
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},
\underline{x}]) \right) &\\
& & & & & \\
(1) &  & & \mbox{s.t.} & \bsum{j \in N} a_{ij} x_{j} \leq b_i,  \hspace{.2in} 
\forall ~i \in M;  &
\\
& & & & & \\
(2)  & & & &
 \bsum{j \in N} x_j = s;  & \\
& & & & & \\
& & & &    x_j \in \{0,1 \},  \hspace{.2in}  \forall ~j \in N. &
\end{array} \]
As the first step of our algorithm,
we take $P$ to be the set (1) of constraints indexed by $M$,
together with the additional cardinality constraint (2), 
and the
lower and upper-bounding constraints
\[ -x_j \leq 0, ~x_j \leq 1, \hspace{.2in} \forall ~j \in N. \]
We then solve the
corresponding linear program $Q$. If the program $Q$ is infeasible,
then so is the GCMESP. If the $\bar{u}$ vector
indicates that
every variable is always equal to one of
its upper or lower bound, then the GCMESP has a unique
feasible point. If the relative
interior solution $\bar{x} / \bar{w}$ is fractional, we
test for uniqueness. In particular, we construct the matrix $A$
of tight-constraint coefficients (using information from the $\bar{u}$-vector)
and compute its singular value decomposition (using LAPACK).
The number of nonzero singular values is the rank of $A$. If the
rank of $A$ equals the number of variables $n$, then we have found
$n$ linearly-independent tight constraints, so the dimension
of the relative interior is zero. This  indicates that the fractional
relative interior solution is unique, and hence the original integer
program is infeasible.
In each of these cases,
we are finished. Otherwise, our
next step is to compute an
upper bound and a lower bound, 
using the techniques described previously.
We are now ready to begin the branching process. To understand
how subproblems are generated, we first need to define for 
the original problem (and later for subproblems) two associated
lists of indices. In particular, we define for each
problem $P_i$ in the
branching queue, an associated list of (fixed) indices $F_i$,
such that in $P_i$, $x_j=1$ for all $j \in F_i$; and an associated list of
(eligible) indices $E_i$,
such that in $P_i$, $x_j=0$ for all $j \in N \backslash (F_i \cup E_i)$.
 Thus, letting
$P_0$ denote the original problem, we have
\[ F_{0} = \emptyset, \hspace{.2in} E_{0} = N. \]
(Recall that if we preprocessed our data in the case of a CMESP, our
problem has been re-expressed
in terms of a new
set of variables. The set $N$ described here refers to this new
set of variables, obtained after preprocessing. Hence,
we always have $F_{0} = \emptyset.$)

>From the original problem, we create two new subproblems, $P_1$ and $P_2$,
by selecting an eligible variable, say $\hat{\j}$,  and assigning
it the value 1  in one new subproblem ($P_1$), and the value  0 in the
second new subproblem ($P_2$). That is, we define $P_1$ and $P_2$ by:
\[
(P_1) \hspace{.3in}
\begin{array}{llll}
& & \max &  \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},
\underline{x}]) \right)\\
& & & \\
& & \mbox{s.t.} & \bsum{j \in N} a_{ij} x_{j} \leq b_i,  \hspace{.2in} 
\forall ~i \in M; \\
& & & \\
& & &
 \bsum{j \in N} x_j = s; \\
\end{array} \]
\[ \begin{array}{llll}
& & & x_{\hat{\j}} \geq 1; \\
& & & \\
& & &   x_j \in \{0,1 \},  \hspace{.2in}  \forall ~j \in N; \\
& & & \\
& & & \\
\end{array}  \]
\[ (P_2) \hspace{.3in}
\begin{array}{llll}
& & \max &  \ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},
\underline{x}]) \right)\\
& & & \\
& & \mbox{s.t.} & \bsum{j \in N} a_{ij} x_{j} \leq b_i,  \hspace{.2in} 
\forall ~i \in M; \\
& & & \\
& & &
 \bsum{j \in N} x_j = s; \\
& & & \\
& & & x_{\hat{\j}} \leq 0; \\
& & & \\
& & &   x_j \in \{0,1 \},  \hspace{.2in}  \forall ~j \in N.
\end{array} \]
Letting $F_i$ ($E_i$) denote the set of fixed (eligible)
indices associated with problem $P_i$, we have 
\[ \begin{array}{ll}
F_1 = F_0 \cup \{ \hat{\j} \}, \hspace{.2in} & E_1 = E_0 \backslash \{
\hat{\j} \},  \\
& \\
F_2 = F_0,  \hspace{.2in} & E_2 = E_0 \backslash \{
\hat{\j} \} .
\end{array} \]
Note that a solution to the original GCMESP
will be a solution to exactly one of these subproblems.
Also note that the subproblems $P_1$ and $P_2$ are identical
to the original problem $P_0$ except for the addition of a 
single constraint.
We can therefore test for feasibility and  compute upper and lower bounds
using the methods described for $P_0$. When dealing with the CMESP,
there is an alternative way of handling the additional constraints.
Let us concentrate first on $P_1$. The subproblem
$P_1$ is identical to $P_0$ except that we assume in $P_1$
that variable $\hat{\j}$ is fixed into the solution. Hence,
we may view  $P_1$ as a CMESP in its own right, and therefore employ
the method previously described to ``preprocess'' the associated
set of variables, covariance matrix,
and constraints.  As for $P_2$, we adjust
its set of variables and covariance matrix as well. Since
in $P_2$, we are removing the index $\hat{\j}$ from consideration,
our problem reduces to that of
choosing $s$ indices from $N \backslash \{ \hat{\j} \}$, where our
updated $(n-1) \times (n-1)$ covariance matrix is simply the
original covariance matrix with row $\hat{\j}$ and column $\hat{\j}$
deleted.  Letting $C\backslash \{\hat{\j} \}$ denote this matrix
obtained by deleting row $\hat{\j}$ and column $\hat{\j}$ from
$C$, we can write the updated versions of $P_1$ and $P_2$
(in the case of the CMESP) as follows:
\[
\mbox{adjusted} \hspace{.1in} (P_1) 
\begin{array}{llll}
  & \ln \det C[ \{ \hat{\j} \}, \{ \hat{\j} \}] ~+ & \max & \ln \det 
C_{N'| \{\hat{\j} \}}[\underline{x},
\underline{x}] \\
& & & \\
& & \mbox{s.t.} & \bsum{j \in N'} a_{ij} x_{j} \leq b_i- 
a_{i \hat{\j}},  \hspace{.2in} 
\forall ~i \in M; \\
& & & \\
& & &
 \bsum{j \in N'} x_j = s-1; \\
& & & \\
& & &   x_j \in \{0,1 \},  \hspace{.2in}  \forall ~j \in N'; \\
& & & 
\end{array}  \]
\[
\mbox{adjusted} \hspace{.1in} (P_2) 
\begin{array}{llll}
& & \max &  \ln \det (C \backslash \{ \hat{\j}  \})
[\underline{x},
\underline{x}] \\
& & & \\
& & \mbox{s.t.} & \bsum{j \in N'} a_{ij} x_{j} \leq b_i,  \hspace{.2in} 
\forall ~i \in M; \\
& & & \\
& & &
 \bsum{j \in N'} x_j = s; \\
& & & \\
& & &   x_j \in \{0,1 \},  \hspace{.2in}  \forall ~j \in N'.
\end{array} \]

Now for each of $P_1$ and $P_2$ (which, in the case of the CMESP
have been adjusted in the way described above), the entire
process is repeated:
for each
subproblem, we solve an auxiliary linear program providing us with
feasibility information,
and compute an upper and lower bound. A subproblem
deemed infeasible is {\it fathomed by infeasibility}, and it
creates no new subproblems of its own. A subproblem having 
a unique feasible point is {\it fathomed by integrality}. Such a 
feasible point is not necessarily optimal to the original GCMESP,
but it is certainly a candidate, and we store the point in 
a global variable, say {\it bestpoint}, along with
its corresponding objective value in a global variable, say {\it bestvalue.}
That is, at any given time during the branch-and-bound process, {\it 
bestvalue}
contains the greatest (since we are maximizing) among
all objective values corresponding to points known to be feasible,
and {\it bestpoint} contains a feasible point with objective value
equal to  {\it bestvalue}.
If the subproblem has not been fathomed by either infeasibility or
integrality, its upper bound {\it ub} is computed,  using the technique
described in the previous section. If the value of
 {\it ub} is less than that of {\it bestvalue},
 the subproblem is {\it fathomed by bounds}. Since {\it ub}
is, by definition, an upper bound for that particular subproblem, 
all points feasible to that subproblem (and hence to any of its
subproblems) are guaranteed to have an objective value no greater than
{\it ub.} Since
we have already found a feasible point
yielding the larger value {\it bestvalue},
there is no reason to explore this subproblem further. Assuming
the subproblem is not fathomed by any of these three methods, a lower
bound is computed, {\it bestvalue} is possibly updated, and
two new subproblems are created. The entire process is then repeated
for each new subproblem.
Variations
in the algorithm occur by altering the order of branching; one can
explore subproblems depth-first, breadth-first, or according to the
values of their upper bounds. Branching continues until all subproblems
have been fathomed, in which case the current {\it bestpoint} is a 
solution, and the current
value of {\it bestvalue} is
the value of the original GCMESP.

The correctness of this algorithm does not depend on the method for
generating upper bounds; however, the 
efficiency of this algorithm does. Using a sufficiently large number,
say $s \ln \lambda_{\mbox{max}}(C)$,
for an upper bound at each
step of the branching process would not cause the above algorithm
to fail. However, poor upper bounds essentially eliminate the possibility
of ever fathoming by bounds, thereby causing many additional
subproblems to be generated. In fact, in this case, the above algorithm
would not perform any better than would the 
impractical strategy of enumerating all possible
feasible points, computing their objective values, and choosing the
best point.

Recall that
at any given stage of the branching process, each (feasible)
subproblem $P_i$ in the
branching tree has an associated list of (fixed) indices $F_i$,
such that in $P_i$, $x_j=1$ for all $j \in F_i$; and an associated list of
(eligible) indices $E_i$,
such that in $P_i$, $x_j=0$ for all $j \in N \backslash (F_i \cup E_i)$.
The questions arise as to
(1) whether we achieve a better upper bound 
by including in the constraint set for $P_i$, $|N \backslash (F_i \cup E_i)|$
separate
constraints, each of the form $x_j \leq 0$,
where $j \in N \backslash (F_i \cup E_i)$, or
by including a single constraint of the form
$ \bsum{j \in N \backslash (F_i \cup E_i)} x_j \leq 0$; and
(2) whether we achieve a better upper bound 
by including in the constraint set for $P_i$, $|F_i|$ separate
constraints, each of the form $x_j \geq 1$,
where $j \in F_i$, or by including a single constraint of the form
$ \bsum{j \in F_i} x_j \geq |F_i|$.

We show that when $M = \emptyset$, the upper bounds are the same in each
case. We require the following lemmas to prove this assertion.

\begin{lemma}\label{SD} (see Lancaster and Tismenetsky (1985), p.218)
If $A,B$ are positive semi-definite matrices, then the
matrix $AB$ is positive semi-definite.
\end{lemma}

\begin{lemma}\label{SD2}
Let $A,B_1,B_2$ be symmetric positive semi-definite matrices. If
$B_2 \succeq B_1$, then $B_2 A B_2 \succeq B_1 A B_1$.
\end{lemma}
\begin{proof}
Since each of $A, B_1, B_2 - B_1$ 
is positive semi-definite, then by Lemma \ref{SD},
the matrix $B_1 A (B_2-B_1) = B_1 A B_2 - B_1 A B_1$ is positive semi-definite.
Similarly,
since each of $A, B_2, B_2 - B_1$ 
is positive semi-definite, 
the matrix $B_2 A (B_2-B_1) = B_2 A B_2 - B_2 A B_1$ is positive semi-definite.
Hence,
\[ \begin{array}{lll}
B_1 A B_2 \succeq B_1 A B_1 & \Rightarrow &
(B_1 A B_2)^T \succeq (B_1 A B_1)^T \\
& & \\
& \Rightarrow & B_2 A B_1 \succeq B_1 A B_1.
\end{array} \]
Since $B_2 A B_2 \succeq B_2 A B_1$, we have that $B_2 A B_2 \succeq 
B_1 A B_1.$
\end{proof}

\begin{lemma}
Let $C$ be a symmetric positive-definite matrix, with rows and columns
indexed by $N = \{ 1, \ldots, n \}$. Let $0 < t \leq s \leq n$.
Let $E,F \subseteq N$, and let $L = N \backslash (E \cup F)$.
Consider the following two problems $P_1$ and $P_2$,
which have the same solution:
\[ (P_1) \hspace{.3in}
 \begin{array}{lllll}
& z & := &  {\rm max}  &
 \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},\underline{x}])
\right) \\
& & & & \\
& & & {\rm s.t.} &
x_j \leq 0, \hspace{.1in} \forall ~j \in L; \\
& & & & \\
& & & &  \bsum{j \in N} x_j = s; \\
& & & & \\
& & & & x_j \in \{ 0,1 \},
\hspace{.1in} \forall  ~j \in N ,
\end{array} \]
and
\[ (P_2)   \hspace{.3in}
 \begin{array}{lllll}
& z & := & {\rm max} &
 \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},\underline{x}])
\right) \\
& & & &  \\
& & & {\rm s.t.} &  \bsum{j \in L} x_j \leq 0; \\
& & & &  \\
& & &  &  \bsum{j \in N} x_j = s; \\
& & & &  \\
& & & &  x_j \in \{ 0,1 \},
\hspace{.1in} \forall ~j \in N . 
\end{array} \]
For $\pi \in \Re^L_+$, let $v_1'(\pi)$ be the upper bound for $z$
as derived from $P_1$,
and for $\pi \in \Re_+$, let $v_1''(\pi)$ be the upper bound for 
$z$ as derived from $P_2$.
Then, if $P_1$ ($P_2$) is feasible,
\[ \min_{\pi \in \Re^L_+} v_1'(\pi) = 
 \min_{\pi \in \Re_+} v_1''(\pi)  .\]
\end{lemma}
\begin{proof}
We first show that 
\[ \min_{\pi \in \Re^L_+} v_1'(\pi) \leq
 \min_{\pi \in \Re_+} v_1''(\pi)  . \]
Let $\pi'' = \mbox{argmin} \{ v_1''(\pi) : \pi \in \Re_+ \}.$
Then defining $\pi' \in \Re^L_+$ by $\pi_i' = \pi''$, we have
that  \[v_1'(\pi') = v_1''(\pi'') = 
 \min_{\pi \in \Re_+} v_1''(\pi) \]
thus establishing the inequality.
Next, we show that
\[  \min_{\pi \in \Re_+} v_1''(\pi)  \leq
 \min_{\pi \in \Re^L_+} v_1'(\pi)  . \]
Let $\pi' = \mbox{argmin} \{v_1'(\pi) : \pi \in \Re^L_+ \}.$
Let $\pi'' = \max_{i \in L} \pi_i'$.
Then $0 \preceq 
D_{\pi''} \preceq D_{\pi'}$, hence $D_{\pi''} C D_{\pi''} \preceq
D_{\pi'} C D_{\pi'}$ (by Lemma \ref{SD2}), hence $\ln ( \Bprod{l=1}{t} \lambda_{\bar{l}}
(C_{\pi''})) \leq
\ln ( \Bprod{l=1}{t} \lambda_{\bar{l}}
(C_{\pi'}))$ (by Lemma \ref{H774}).
%\cite{HORN}).
Let $ A_1 = \{ a'_{ij} \} $ be the coefficient matrix of the 
system of constraints in $P_1$, and
let $ A_2 = \{ a''_{ij} \} $ be the coefficient matrix of the 
system of constraints in $P_2$.
Since the problem is feasible, $|N \backslash L| \geq s \geq s-t$, hence
\[ \min_{K \subset N, |K| = s-t} \bsum{j \in K} \bsum{i \in L} \pi_i'
a'_{ij} = 
 \min_{K \subset N, |K| = s-t} \bsum{j \in K} \pi'' a''_{ij} = 0 \]
so
\[ v_1''(\pi'') =
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi''}) \right) \leq 
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi'})\right) = v_1'(\pi') \]
and the second inequality is established.
\end{proof}

We would like to verify a similar result for indices fixed into
the solution (i.e. variables set equal to 1).
We might hope that the proof would mimic that of the above theorem.
Attempting such a proof, a logical choice for $\pi''$ in the second
(or reverse) inequality is
$\pi'' = \min_{i \in F} \pi_i'$
(where $F$ is the set of fixed indices), since we took
$\pi'' = \max_{i \in L} \pi_i'$ above.
When $C$ is diagonal, this is a valid choice.

\begin{lemma}\label{DIAG}
Let $C$ be a symmetric positive-definite diagonal matrix, with rows and columns
indexed by $N= \{ 1, \ldots, n \}$. Let $0 < t \leq s \leq n$.
Let $F \subseteq N$.
Consider the following two problems $P_1$ and $P_2$,
which have the same solution:
\[ (P_1) \hspace{.3in}
 \begin{array}{lllll}
& z & := & \max  &
 \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},\underline{x}])
\right) \\
& & & & \\
& & & {\rm s.t.} &  x_j \geq 1, \hspace{.1in} \forall ~j \in F; \\
& & & & \\
& & & &  \bsum{j \in N} x_j=s; \\
& & & & \\
& & & &  x_j \in \{0,1 \},
\hspace{.1in} j \in N;
\end{array} \]
\[ (P_2)  \hspace{.3in}
 \begin{array}{lllll}
& z& := & \max &
 \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},\underline{x}])
\right) \\
& & & & \\
& & & {\rm s.t.} &  \bsum{j \in F} x_j \geq |F|; \\
& & & & \\
& & & & \bsum{j \in N} x_j=s; \\
& & & & \\
& & & &  x_j \in \{0,1 \},
\hspace{.1in} \forall ~j \in N. 
\end{array} \]
For $\pi \in \Re^F_+$, let $v_1'(\pi)$ be the upper bound for $z$
as derived from $P_1$,
and for $\pi \in \Re_+$, let $v_1''(\pi)$ be the upper bound for 
$z$ as derived from $P_2$. Then if $P_1$ ($P_2$) is feasible,
\[ \min_{\pi \in \Re^F_+} v_1'(\pi) =
 \min_{\pi \in \Re_+} v_1''(\pi)  .\]
\end{lemma}
\begin{proof}
We first show that
\[ \min_{\pi \in \Re^F_+} v_1'(\pi) \leq
 \min_{\pi \in \Re_+} v_1''(\pi)  .\]
Let $\pi'' = \mbox{argmin} \{ v_1''(\pi) : \pi \in \Re_+ \}.$
Then defining $\pi' \in \Re^F_+$ by $\pi_i' = \pi''$, we have
that  \[v_1'(\pi') = v_1''(\pi'') =
 \min_{\pi \in \Re_+} v_1''(\pi) \]
thus establishing the inequality.

In showing the reverse inequality, we must consider two cases.
 The first case is where $|F|-(s-t) \leq 0.$
Let $\pi' = \mbox{argmin} \{v_1'(\pi) : \pi \in \Re^F_+ \}.$
Let $\pi'' = \min_{i \in F} \pi_i'$.
Then $0 \preceq
D_{\pi''} \preceq D_{\pi'}$, hence $D_{\pi''} C D_{\pi''} \preceq
D_{\pi'} C D_{\pi'}$ (by Lemma \ref{SD2}), hence $\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l
}}
(C_{\pi''})\right) \leq
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}}
(C_{\pi'})\right)$ (by Lemma \ref{H774}).
%\cite{HORN}).
Let $ A_1 = \{ a'_{ij} \} $ be the coefficient matrix of the
system of constraints in $P_1$, and
let $ A_2 = \{ a''_{ij} \} $ be the coefficient matrix of the
system of constraints in $P_2$. Then
\[ \bsum{i \in F} (-1)\pi_i' -  \min_{K \subset N, |K| = s-t} 
\bsum{j \in K} 
\bsum{i \in F} \pi_i'
a'_{ij} = -|F| \pi'' -
 \min_{K \subset N, |K| = s-t} \bsum{j \in K} \pi'' a''_{ij} = 0 \]
so
\[ v_1''(\pi'') =
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi''})\right) \leq
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi'}) \right) = v_1'(\pi') \]
and the second inequality is established. 

We now consider the case where $|F|-(s-t) \geq 0$.
Let $\pi' = \mbox{argmin} \{ v_1'(\pi) : \pi \in \Re^F_+ \}$.
Let $\pi'' = \min_{i \in F} \pi_i'$. 
Let $\sigma$ be a permutation of $\{ 1, \ldots, n \}$ so that
$\pi'_{\sigma_{(1)}} \leq \ldots \leq \pi'_{\sigma_{(n)}}$.
Note that
\[ v_1'(\pi') = \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi'}) \right)
-\pi'_{\sigma_{(1)}} - \ldots -\pi'_{\sigma_{(|F|-(s-t))}} \]
and
\[ v_1''(\pi'') = \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi''}) 
\right)
- (|F| - (s-t)) \pi''. \]
The claim is that
$v_1''(\pi'') \leq v_1'(\pi').$  Let $T' \subseteq N$ be such that
\[ v_1'(\pi') = \bsum{i \in T' \backslash F} \ln c_i + \bsum{i \in T' \cap F}
(\ln c_i + \pi_i') - 
\pi'_{\sigma_{(1)}} - \ldots -\pi'_{\sigma_{(|F|-(s-t))}}. \]
Let $T'' \subseteq N$ be such that
\[ v_1''(\pi'') = \bsum{i \in T'' \backslash F} \ln c_i +
\bsum{i \in T'' \cap F} (\ln c_i + \pi'') - 
(|F| - (s-t)) \pi''. \]
Let $L = T'' \cap F$. 
Note that we can assume $|L| \geq |F|-(s-t)$.
For suppose that $|L| < |F|-(s-t)$. Then there is some $i \in T'' \backslash
F$ and $j \in (N \backslash T'') \cap F$ such that $\ln c_i 
\geq \ln c_j + \pi''$.
Among all such possible pairs $c_i$ and $c_j$, choose one so that
the difference $\ln c_i - (\ln c_j + \pi'')$ is as small as possible.
Assume the difference is attained by $c_{\tilde{\i}}$ and $c_{\tilde{\j}}$.
Now, let $\pi''' = \pi'' + (\ln c_{\tilde{\i}} - (\ln c_{\tilde{\j}} + \pi''))$.
Let $T''' \subseteq N$ be such that
\[ v_1''(\pi''') = \bsum{i \in T''' \backslash F} \ln c_i +
\bsum{i \in T''' \cap F} (\ln c_i + \pi''') - 
(|F| - (s-t)) \pi'''. \]
Then $v_1''(\pi''') \leq  v_1''(\pi'')$ and $|T''' \cap F| > |T'' \cap F|$,
so can take $\pi' = \pi'''$.
Let $L'$ be any cardinality $|F| - (s-t)$ subset of $L$.
Then,
 \begin{eqnarray*}
v_1'(\pi') & = & 
\ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi'}) \right) -  
\Bsum{i=1}{|F|-(s-t)} \pi_{\sigma_{(i)}}' \\
& & \\
& = & \bsum{i \in T' \backslash F} \ln c_i + \bsum{i \in T' \cap F}
(\ln c_i + \pi_i') - 
\Bsum{i=1}{|F|-(s-t)} \pi_{\sigma_{(i)}}' \\
& & \\
& \geq & 
\bsum{i \in T'' \backslash F} \ln c_i + \bsum{i \in T'' \cap F}
(\ln c_i + \pi_i') - 
\Bsum{i=1}{|F|-(s-t)} \pi_{\sigma_{(i)}}' \\
& & \\
& \geq & 
\bsum{i \in T'' \backslash F} \ln c_i + \bsum{i \in T'' \cap F}
(\ln c_i + \pi_i') - \bsum{i \in L'} \pi_i' \\
& & \\
& = & \bsum{i \in T''} (\ln c_i) + \bsum{i \in T'' \backslash L'} \pi_i' \\
& & \\
& \geq &
 \bsum{i \in T''} (\ln c_i) + \bsum{i \in T'' \backslash L'} \pi'' \\
& & \\
& = & v_1''(\pi''). 
\end{eqnarray*} 
\end{proof}

Although true for the diagonal case,
it is not true in general that given 
$\pi' = \mbox{argmin} \{v_1'(\pi) : \pi \in \Re^F_+ \}$, we have that
$v_1''(\pi'') \leq v_1'(\pi')$ where $\pi'' = \min_{i \in F} \pi_i'$.
To see this, consider any example where $s=t$. Then using the notation
from Lemma \ref{DIAG}, and defining the diagonal matrix $\tilde{D}$ by
\[ (\tilde{D})_{ii} = \left \{ \begin{array}{ll}
          \exp \{ \frac{1}{2}(\pi'_i - \pi'') \}, & \mbox{if}
\hspace{.1in} i \in F; \\
& \\
          1, & \mbox{otherwise},
\end{array} \right. \]
we have that
\[  \begin{array}{llll}
& v_1'(\pi') & =  & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi'})
\right)
-\Bsum{i=1}{|F|} \pi'_{\sigma_{(i)}} \\
& & & \\
&  & =  & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (D_{\pi'}C D_{\pi'})
\right)
-\Bsum{i=1}{|F|} \pi'_{\sigma_{(i)}} \\
\end{array} \]
\[  \begin{array}{llll}
&  & =  & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (\tilde{D}D_{\pi''}C 
D_{\pi''} \tilde{D}) \right)
-\Bsum{i=1}{|F|} \pi'_{\sigma_{(i)}} \\
& & & \\
(1) & & \leq  & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (\tilde{D})^2
\lambda_{\bar{l}} (D_{\pi''}C 
D_{\pi''}) \right)
-\Bsum{i=1}{|F|} \pi'_{\sigma_{(i)}} \\
& & & \\
(2) & & = & \bsum{i \in F}(\pi_i' -\pi'') 
 + \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi''}) \right )
-\Bsum{i=1}{|F|} \pi'_{\sigma_{(i)}} \\
& & & \\
& & = &  \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi''}) \right) - 
|F| \pi'' \\
& & & \\
& & = & v_1''(\pi''),
\end{array} \]
where equality $(2)$ follows from the fact that $t \geq |F|$.
In general, $(1)$ will not be an equality, so we need only find
an example where the inequality is strict.

\begin{example}
Let $N = \{ 1,2,3,4 \}$, $F = \{ 1,2 \}$, $s=t=3$, and
\[ C:= \left[ \begin{array}{rrrr}
  5.0 & 1.0 & -1.0 & 2.0 \\
& & & \\
  1.0 & 4.0 & 0.5 & 1.5 \\
& & & \\
  -1.0 & 0.5 & 3.5 & -2.0 \\
& & & \\
  2.0 & 1.5 & -2.0 & 6.0
\end{array} \right]. \]
\end{example}
Here, $C$ is positive definite since all of its eigenvalues are positive.
The first claim is that  $v_1''(\pi)$ is decreasing. To see this,
let $\pi' > \pi''$.
Define the diagonal matrix $\tilde{D}$ by $(\tilde{D})_{ii} =
\exp(\frac{1}{2}(\pi'-\pi''))$. Then
\[ \begin{array}{llll}
& \ln \left(\Bprod{l=1}{t} (\lambda_{\bar{l}} (C_{\pi'}) \right) -
\ln \left(\Bprod{l=1}{t} (\lambda_{\bar{l}} (C_{\pi''}) \right) & = &
\ln \left(\Bprod{l=1}{t} (\lambda_{\bar{l}} (C_{\pi'}) 
(\lambda^{-1}_{\bar{l}}(C_{\pi''})
) \right) \\
& & & \\
& & \leq &
\ln \left(\Bprod{l=1}{t} (\lambda_{\bar{l}} (\tilde{D}D_{\pi''}
C D_{\pi''} \tilde{D})(\lambda^{-1}_{\bar{l}}(C_{\pi''}) \right) \\
& & & \\
(1) & & \leq &
\ln \left(\Bprod{l=1}{t} (\lambda_{\bar{l}} (\tilde{D})^2
\lambda_{\bar{l}} (C_{\pi''})
(\lambda^{-1}_{\bar{l}}(C_{\pi''}) \right) \\
& & & \\
& & = &
\ln \left(\Bprod{l=1}{t} (\lambda_{\bar{l}} (\tilde{D})^2 \right)  \\
\end{array}\]
\[ \begin{array}{llll}
& & & \\
& & = & \min(t, |F|) \cdot (\pi'-\pi'') \\
& & & \\
(2) &  &  =  &|F|(\pi'-\pi'') \\
& & & \\
& & = & (|F| - (s-t)) (\pi'-\pi'').
\end{array}  \]
(Inequality (1) follows from Corollary \ref{HJ}; equation (2) follows
from the fact that $t=s \geq |F|$.)
Hence, $v_1''(\pi)$ is decreasing. 
Moreover, since $v_1''(5.0) = v_1''(6.0) = 4.74$, and since $v_1''$ is convex,
$\min_{\pi \in \Re} v_1''(\pi) = 4.74$ and $v_1''(\pi) = 4.74$ for
all $\pi \geq 5.0$. \\
\begin{figure}[h]
\hspace{1.20in}
\psfig{figure=down.ps,height=3.5in,width=3.5in,angle=270}  
\caption{Graph of $v_1''(\pi)$ } \label{fig3}
\end{figure} 

Moreover, by convexity of $v_1'$, we have that
for any $\pi' \in \Re^F_+$ such that $\pi'_i \geq
5$ for all $i$, we will have $v_1'(\pi') = v_1''(5) = 4.74$. Again, since
$v_1'$ is convex, it must be that 
\[ \min_{\pi \in \Re^F_+} v_1'(\pi') = 4.74. \]
Defining $\pi' \in \Re^F_+$ by $\pi_1' = 1.0$, $\pi_2' = 9.0$,
one can verify that $v_1'(\pi') = 4.74$. Hence
\[  [ 1.0, 9.0]^T = \mbox{argmin} \{v_1'(\pi) : \pi \in \Re^F_+ \}. \]
However, defining $\pi'':= \min_{i \in F} \pi_i' = 1.0$, we have that
$v_1''(\pi'') =  4.78 > 4.74$.

To prove that $\min_{\pi \in \Re_+^F} v_1'(\pi) =
\min_{\pi \in \Re_+} v_1''(\pi)$ in the more general case,
we resort to a different strategy.
\begin{lemma}
Let $C$ be a symmetric positive-definite matrix, with rows and columns
indexed by $N= \{ 1, \ldots, n \}$. Let $0 < t \leq s \leq n$.
Let $F \subseteq N$.
Consider the following two problems $P_1$ and $P_2$,
which have the same solution:
\[ (P_1) \hspace{.3in}
 \begin{array}{lllll}
& z& := & \max &
 \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},\underline{x}])
\right) \\
& & & & \\
& & & {\rm s.t.} &  x_j \geq 1, \hspace{.1in} \forall ~j \in F;  \\
& & & & \\
& & & & \bsum{j \in N} x_j=s; \\
& & & & \\
& & & &  x_j \in \{0,1 \},
\hspace{.1in} \forall  ~j \in N,
\end{array} \]
\[ (P_2)  \hspace{.3in}
 \begin{array}{lllll}
& z & := & \max  &
 \ln  \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C[\underline{x},\underline{x}])
\right) \\
& & & & \\
& & & {\rm s.t.} &  \bsum{j \in F} x_j \geq |F|;\\
& & & & \\
& & & &  \bsum{j \in N} x_j=s; \\
& & & & \\
& & & &  x_j \in \{0,1 \},
\hspace{.1in} j \in N. 
\end{array} \]
For $\pi \in \Re^F_+$, let $v_1'(\pi)$ be the upper bound for $z$
as derived from $P_1$,
and for $\pi \in \Re_+$, let $v_1''(\pi)$ be the upper bound for 
$z$ as derived from $P_2$.
Then, if $P_1$ ($P_2$) is feasible, then
\[ \min_{\pi \in \Re^F_+} v_1'(\pi) =
 \min_{\pi \in \Re_+} v_1''(\pi)  .\]
\end{lemma}
\begin{proof}
By Lemma \ref{DIAG}, we have that
\[ \min_{\pi \in \Re^F_+} v_1'(\pi) \leq
 \min_{\pi \in \Re_+} v_1''(\pi)  .\]
In showing the reverse inequality, we must consider two cases.
In the first case, we assume $|F|-(s-t) \leq 0$.
Let $\pi' = \mbox{argmin} \{v_1'(\pi) : \pi \in \Re^F_+ \}.$
Let $\pi'' = \min_{i \in F} \pi_i'$.
Then $0 \preceq
D_{\pi''} \preceq D_{\pi'}$, hence $D_{\pi''} C D_{\pi''} \preceq
D_{\pi'} C D_{\pi'}$ (by Lemma \ref{SD2}), hence $\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l
}}
(C_{\pi''}) \right) \leq
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}}
(C_{\pi'}) \right)$ (by Lemma \ref{H774}).
%\cite{HORN}).
Let $ A_1 = \{ a'_{ij} \} $ be the coefficient matrix of the
system of constraints in $P_1$, and
let $ A_2 = \{ a''_{ij} \} $ be the coefficient matrix of the
system of constraints in $P_2$. Then
\[ \bsum{i \in F} (-1)\pi_i' -  \min_{K \subset N, |K| = s-t} \bsum{j \in K} 
\bsum{i \in F} \pi_i'
a'_{ij} = -|F| \pi'' -
 \min_{K \subset N, |K| = s-t} \bsum{j \in K} \pi'' a''_{ij} = 0 \]
so
\[ v_1''(\pi'') =
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi''})\right) \leq
\ln \left( \Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi'})\right) = v_1'(\pi') \]
and the second inequality is established. 

Now, we consider the case where $|F|-(s-t) \geq 0$.
Let $\pi' = \mbox{argmin} \{v_1'(\pi) : v \in \Re^F_+ \}$.
Let $\sigma$ be a permutation of $\{ 1, \ldots, n \}$ so that
$\pi'_{\sigma_{(1)}} \leq \ldots \leq \pi'_{\sigma_{(n)}}$.
Note that
\[ v_1'(\pi') = \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi'}) \right)
-\pi'_{\sigma_{(1)}} - \ldots -\pi'_{\sigma_{(|F|-(s-t))}}. \]
Let $\pi'' = \pi'_{\sigma_{(|F|-(s-t))}}.$
Then
\[ v_1''(\pi'') = \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi''})
\right)
- (|F| - (s-t)) \pi''. \]
The claim is that
$v_1''(\pi'') \leq v_1'(\pi').$ 
\[ \begin{array}{llll} 
& v_1'(\pi') & = & 
\ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (C_{\pi'}) \right)
- \Bsum{i=1}{|F|-(s-t)} \pi_{\sigma_{(i)}}' \\
& & & \\
& & = & \ln \left(\Bprod{l=1}{t} \lambda_{\bar{l}} (D_{\pi'} C D_{\pi'})
\right)
- \Bsum{i=1}{|F|-(s-t)} \pi_{\sigma_{(i)}}' \\
& & & \\
& & = & 
 \ln \left(\Bprod{l=1}{t} 
\lambda_{\bar{l}}^2
(D_{\pi''}D_{\pi'}^{-1})
\lambda_{\bar{l}}^{-2}
(D_{\pi''}D_{\pi'}^{-1})
\lambda_{\bar{l}}
(D_{\pi'} C D_{\pi'})\right)
- \Bsum{i=1}{|F|-(s-t)} \pi_{\sigma_{(i)}}' \\
& & & \\
& & \geq & 
 \ln \left(\Bprod{l=1}{t} 
\lambda_{\bar{l}}
(D_{\pi''} C D_{\pi''})
\lambda_{\bar{l}}^{-2}
(D_{\pi''}D_{\pi'}^{-1}) \right)
- \Bsum{i=1}{|F|-(s-t)} \pi_{\sigma_{(i)}}' \\
& & & \\
& & = & 
 \ln \left(\Bprod{l=1}{t} 
\lambda_{\bar{l}}(C_{\pi''}) \lambda_{\underline{l}}^2
(D_{\pi'}D_{\pi''}^{-1}) \right)
- \Bsum{i=1}{|F|-(s-t)} \pi_{\sigma_{(i)}}' \\
& & & \\
(1) &  & = & 
 \ln \left(\Bprod{l=1}{t} 
\lambda_{\bar{l}}(C_{\pi''}) \right) + \Bsum{i=1}{|F|-(s-t)}
(\pi_{\sigma_{(i)}}' - \pi'') 
- \Bsum{i=1}{|F|-(s-t)} \pi_{\sigma_{(i)}}' \\
\end{array} \]
\[ \begin{array}{llll}
& & & \\
&  & = & 
 \ln \left(\Bprod{l=1}{t} 
\lambda_{\bar{l}}(C_{\pi''}) \right)  - (|F|-(s-t)) \pi'' \\
& & & \\
& & = & v_1''(\pi''),
\end{array}  \]
where $(1)$ follows from the facts that
 \[ t \geq |F|-(s-t) \]
and
\[ n-|F| \geq t-(|F|-(s-t)). \]
\end{proof}

Thus, we have shown that 
whether we force a set of variables to 1 (or 0)
using one or several constraints, we obtain the same
upper bound in both cases.

\section{Computer Implementation}

We coded the branch-and-bound algorithm  for solving the CMESP,
a special case of the GCMESP,
in C.  We first describe the serial version; we discuss
the parallel implementation later in this section.

For a given problem PROB, we start with the  preprocessing
stage described in Section 2.3, if in fact there is a set
of indices assumed to be initially fixed into the solution. Several files
are read during the preprocessing stage. The first such
file, PROB.OH, contains the original values of $n$ (the number
of eligible indices),
$s$ (the number of indices to select),
and $m$ (the number of constraints).
We assume that $N = \{ 0, \ldots, n-1 \}$.
Next, the original $n \times n$
covariance matrix is read from the file PROB.OC,
and the set of $m$ constraints is read from the file
PROB.OA.  Finally,
the number $f$ of fixed indices is read, as is the
list $F$ itself, from the file PROB.F. At this point, 
the numbers $n-f$, $s$, and $m$ are written to a new
header file PROB.H.
The adjusted  covariance matrix and constraints are
written to the files PROB.C and PROB.A respectively, 
thereby completing the preprocessing
stage. Note that upon termination of the preprocessing stage,
PROB represents a completely different CMESP; the numbers
of eligible indices, indices to select, and constraints
are written in the file
PROB.H, its covariance matrix is that contained in the
file PROB.C, and its constraint set is that contained in the
file PROB.A.
>From the program's perspective,
there is no record that any preprocessing was ever
performed on this problem; it is treated as any other CMESP.

We now turn our attention to the main program, which
actually implements the branch-and-bound algorithm. One of
the key features of the program is the data
structure used to store the active subproblems;
the structure we use is a {\it queue.}
That is, at any given stage of the process, there is a  ``master
queue'' containing all of the active subproblems, where
a subproblem is deemed active if it has had its upper bound
computed, has not been fathomed, and  has not yet
created subproblems of its own. A subproblem in the
queue is uniquely determined by three pieces of information:
its associated list of eligible indices, its associated
list of fixed indices, and its upper bound.

Understanding how subproblems are stored, we can now outline
the main program.
We begin by reading a header file PROB.H, containing
the values of $n$, $s$, and $m$.
Next, the constraints are read in from the file PROB.A,
followed by the covariance matrix from the file PROB.C. 
The next input file is an options file, PROB.O, containing
nine options that affect the execution of the program.
We describe each option below.
\begin{enumerate}
\item The CPLEX option. Its syntax is:
\[
\mbox{CPLEX IP/LP/OFF.} \]
Our code
makes use of the linear programming library CPLEX 4.0.
We use CPLEX routines to determine relative interior
solutions; we also use them 
to solve 
lower-bounding programs (see Section 2.3).
The CPLEX option regulates the extent to which we use CPLEX
to attempt to solve the lower-bounding programs, if at all.
Selecting IP indicates to CPLEX to use integer
programming methods;
selecting LP indicates to CPLEX to use
linear programming methods on the relaxation of the
lower-bounding integer program.
Selecting OFF 
omits the lower-bounding procedure completely. If we opt for IP,
then we have a greater chance of finding an (integer) feasible
point, which can then be used to possibly update the global
lower bound (i.e. the objective value corresponding to the
best feasible point currently known).
Recall that good lower bounds often lead to increased instances
of fathoming.
However, the time required for CPLEX to solve an
integer program 
usually exceeds the time required to solve its linear programming
relaxation.
Each of these two competing factors affect the overall
run time;
it is conceivable that IP will be the more efficient
choice for some problem instances, whereas LP will be the more
efficient choice for others.


\item The UPPER option. Its syntax is:
\[
\mbox{UPPER ON/OFF}. \]
Selecting ON causes the program to calculate the
maximum upper bound of the active subproblems at each iteration. 
The calculation of this upper bound necessitates a search through the entire
queue of subproblems. 
To understand why this could be a useful procedure,
note that
as we conduct the search, we compare each
upper bound with the global lower bound, 
and then fathom by bounds if possible. Therefore,
this procedure potentially saves memory, by reducing the 
length of the master queue. There is one case where the maximum
upper bound is computed by default, regardless of the choice of parameter
for this option. This is explained in the description of the BRANCH
option (see below).
Barring this exception, selecting OFF
causes the program not to calculate the maximum upper bound
of the active subproblems at each iteration.

\item The BRANCH option. Its syntax is:
\[ \mbox{BRANCH DFS/BFS/MAX}. \]
Selecting DFS uses a depth-first search strategy to access
subproblems on the master queue. That is, after work is
completed on one subproblem, the next one considered is the
one located at the tail of the list (i.e. the one most recently
placed on the queue).
Selecting BFS uses a breadth-first search strategy to access
subproblems on the master queue. That is, after work is completed
on one subproblem, the next one considered is the one located
at the head of the list.
Selecting MAX causes branching to be done on the subproblem with the
maximum upper bound. If in fact the BRANCH option is set to MAX,
the maximum upper bound of all active subproblems is computed after
each iteration (regardless of the choice of parameter for the UPPER option).
Upon completion of the search for this maximum upper bound,
a pointer is directed at a subproblem yielding that upper bound.
In the event that UPPER is set to OFF,
it is computationally more expensive to
locate and remove from the queue a subproblem with maximum upper bound
than it is to remove
a subproblem located either at the head or at the tail
of the queue, due to the  search procedure required in the former case.
We maintain a global variable that points to the head
of the current queue, and it is not difficult to redirect this
pointer to the next active subproblem. We maintain a similar
pointer to the tail of the queue, and again, we can through a simple
adjustment, redirect
this pointer to the preceding subproblem. 
However, in the event that we
set UPPER to ON, thereby mandating the computation of the maximum upper
bound regardless of the choice of the BRANCH parameter,
no time is saved by opting for the DFS or BFS strategies.
To understand the effect of the various parameters on the overall
running of the program, we need to examine for each removal strategy,
the relationship between
the subproblem that is being removed from the queue, and the
subproblems that remain on the queue.
If we opt for DFS, then  the subproblem we remove is the one
most recently added; it is therefore very likely that the number of
indices in its eligible set will be less than that of the subproblems
remaining on the queue. Hence, it seems more likely that a child
of this subproblem will be infeasible or will yield a unique feasible point.
If we opt for BFS, then
the subproblem we remove is the one that has been on the queue
the greatest amount of time. Hence, the sizes of its eligible set and 
feasible region are
more likely to exceed those of the remaining subproblems on the queue.
It therefore seems more likely that a child of this subproblem will
produce a good feasible point.
If we opt for MAX, then the subproblem
we remove has the maximum upper bound. Therefore (depending on
the quality of the upper bounds), the global upper bound may
decrease, which will decrease the gap between the global upper
and lower bounds.
The above discussion explains some of the trends
that might appear upon selection of the various parameters.

\item The ORDER option. Its syntax is

\[ \mbox{ORDER ON/OFF}. \]
Selecting ON causes the program to open the file PROB.R
and read in a particular branching order for the eligible indices.  If
ORDER is set to OFF, no ordering for the eligible indices is read in,
and the default ordering, $0,1, \ldots, n-1$, is assumed.

\item The INITLO option. Its syntax is:
\[ \mbox{INITLO ON/OFF}. \]
The option INITLO indicates whether we will calculate an
initial lower bound for the original subproblem. This lower bound
is computed by solving the lower-bounding integer program described
in Section 2.3.
Regardless of the choice of parameter for the CPLEX option,
integer (rather than linear) programming techniques are used on
this initial subproblem.
Although solving this integer program does take some time,
it will be well worth it if a good feasible point is found. In fact,
since this is the first subproblem, any feasible point found
during this process will yield a global lower bound, which
can then lead to fathoming of later subproblems.

\item The INITFEAS option. Its syntax is:
\[ \mbox{INITFEAS ON/OFF}. \]
The option INITFEAS indicates whether we will read in an initial
lower bound from a file. 
If INITFEAS is set to
ON, a file PROB.FEAS is opened, and the initial lower bound is read.
Such a feasible point might arise from some heuristic.

\item 
In addition to the spectral upper bounds described in this thesis,
the program also allows for the possibility of computing upper bounds
using other techniques. Such techniques involve the
use of nonlinear programming relaxations of the CMESP;
for a complete description,
see Anstreicher et al. (1996).
%\cite{CMESP2}.
The next two options, NLP and EIG,  influence
how the upper bounds are calculated. NLP has the following syntax:
\[ \mbox{NLP TRACE/DIAG/IDENT/OFF,} \]
and EIG has the syntax:
\[ \mbox{EIG ON/OFF.} \]
If NLP is set either to TRACE, DIAG, or IDENT, upper bounding
techniques described in Anstreicher et al. (1996)
%\cite{CMESP2}
are used.
If NLP is set to OFF, none of these additional techniques is
used.
If EIG is set to ON, the spectral bounding techniques of Section 2.2 are
used; if EIG is set to OFF, they are not.
The following choice of  parameters is invalid:

\[ \begin{array}{l}
\mbox{NLP OFF} \\
\mbox{EIG OFF}.
\end{array} \]
       

\item The OUTPUT option. 
This option controls how much
output is to be written to the output file, PROB.OUT.  Its possible parameters
are 0,1,2,3, and 4, where the level of output
increases as the parameter increases.
\end{enumerate}
\begin{table}
\hspace{2.2in}
\begin{tabular}{l}
CPLEX LP \\
UPPER ON \\
BRANCH MAX \\
ORDER OFF \\
INITLO ON \\
INITFEAS OFF \\
NLP OFF \\
EIG ON \\
OUTPUT 1
\end{tabular}
\caption{Choice of Parameters for Option Set} \label{options}
\end{table}

Having read all of the above files, feasibility of the
initial subproblem
is determined by setting
up and solving the linear program  described in Theorem \ref{FEAS}.
In particular, if no relative interior solution is found, the
problem is deemed infeasible, and the program is exited.
Otherwise, we use the ``$u$-vector'' (see Theorem \ref{FEAS}) to 
determine which, if any, variables obtain
one of their upper or lower bound in every feasible solution.
The former are fixed into the solution, and the latter are
deleted from the variable set; this is achieved by the techniques
discussed earlier. If after this step, every variable has
been removed from the eligible set (either by fixing 
or by deletion),
there is a unique feasible
point; this solves the original problem, and the program
terminates. If the relative interior solution is fractional,
we test for uniqueness; if it is unique, the original
integer program is infeasible, and we exit the program. Otherwise,
upper and lower bounds are computed, 
the initial problem
is placed on the master queue, and the branch-and-bound process
described in Section 2.3 begins. In the code, the branch-and-bound
process is contained in a large ``while'' loop;
the loop is exited
when the queue of subproblems is empty.

In addition to the linear programming library CPLEX 4.0 (1994), which
is written in  C,
our code uses the
matrix library LAPACK 2.0 (see Anderson, Bai, Bischof, Demmel, Dongarra,
Du Croz, Greenbaum, Hammarling, McKenney, Ostrouchov, and Sorensen (1992)),
which is written in FORTRAN 77, and L-BFGS-B subroutines (see
Zhu et al. (1994)), also written
in FORTRAN 77.
We had access to an order-63 covariance matrix (see Guttorp et al. (1992))
derived from sulfate
data obtained from a 
network of 63 environmental monitoring stations in the United States.
We applied the algorithm to 
ten different covariance matrices (C0-C9), and five sets of
constraints (A0/A2/A5/A10/A15). 
Each covariance matrix is of order 30,
and was derived from the order-63 matrix mentioned above.
We worked under the assumption that 33 stations had been fixed
into the solution from the onset. We achieved such fixing
by first preprocessing the set of variables and the covariance
matrix, 
using the method described in the previous section. Each of our problems
therefore uses an order-30 conditional covariance matrix arising
from fixing a particular choice
of 33 stations. In each problem, we  took
$s=15$, so our problems
are network expansion problems in which we seek to augment a network
of 33 stations with the optimal choice of 15 more stations from a set
of 30 potential stations. Each constraint set A$m$ contains $m$
constraints. Using all combinations of the ten matrices and five 
sets of constraints, we worked with fifty distinct problems. We used
the same set of options for each problem, namely those in Table
\ref{options}.
We ran
our code on a 50MHZ HP 9000/715, a very modest workstation. In
Tables \ref{tab1} and  \ref{tab2},
we report for each problem, the number of subproblems
generated
and the total solution time.
\begin{table}
\hspace{.25in}
\begin{tabular}{r||r|r|r|r|r|r|r|r|r|r|r}
& C0 & C1 & C2 & C3 & C4 & C5 & C6 & C7 & C8 & C9 & Mean   \\
\hline  
\hline 
A0 & 241 & 266 & 782 & 1161 & 2178 & 328 & 1161 & 956 & 4837 & 565 & 1247.5  \\
\hline 
A2 & 227 & 246 & 960 & 4344 & 3667 & 319 & 903 & 4835 & 3182 & 513 & 1919.6 
   \\
\hline 
A5 & 197 & 553 & 907 & 1618 & 3833 & 144 & 1236 & 728 & 5632 & 505 & 1535.3 
   \\
\hline 
A10 & 258 & 643 & 919 & 1023 & 4769 & 244 & 574 & 1819 & 2790 & 477 & 1351.6  
   \\
\hline 
A15 & 37 & 147 & 85 & 85 & 289 & 217 & 67 & 143 & 347 &  47 & 146.4 
\end{tabular}
\caption{Number of Subproblems} \label{tab1}
\end{table}
\begin{table}
\hspace{.25in}
\begin{tabular}{r||r|r|r|r|r|r|r|r|r|r|r}
& C0 & C1 & C2 & C3 & C4 & C5 & C6 & C7 & C8 & C9 & Mean   \\
\hline  
\hline 
A0 & 112 & 116 & 335 & 502 & 942 & 139 & 503 & 417 & 2092 & 245 & 540.4  \\
\hline
A2 & 137 & 148 & 511 & 2766 & 2399 & 221 & 490 & 2887 & 1881 & 314 & 1175.3  \\
\hline
A5 & 149 & 367 & 537 & 992 & 2414 & 110 & 762 & 479 & 3622 & 326 & 975.6  \\
\hline
A10 & 225 & 572 & 659 & 897 & 4273 & 226 & 411 & 1330 & 1924 & 441 & 1096.0  \\
\hline
A15 & 76 & 214 & 157 & 142 & 401 & 236 & 148 & 225 & 477 & 86 & 216.3  
\end{tabular}
\caption{Wall Seconds} \label{tab2}
\end{table}

Once we had the code running in serial, it became evident
that our run times would decrease if we could efficiently
use multiple processors to share the work. 
We were noticing that with many
data sets, 
large numbers of subproblems would  accumulate on the queue. Having access to
only a single processor meant that work could only be done on one
subproblem at a time.
Using multiple processors, work could be done
on several subproblems simultaneously; also, subproblems would
conceivably be fathomed sooner due to earlier access to good
lower bounds. For these reasons, we set out to parallelize our code.
We ultimately developed a parallel code for the
Convex Exemplar, a multiprocessor
with shared memory. The University
of Kentucky's Exemplar  currently has  32 processors,
each of which is essentially an HP 9000/735 running
at 125MHZ with a fast cache. 
Processors are grouped to form hypernodes,
where each hypernode contains 8 processors. 
Computations are confined to subcomplexes which can be configured to be
comprised of an arbitrary set of processors. Communication is fastest
between processors in the same hypernode. At the time
we were working on the parallel code, there were a total
of six subcomplexes; each consisted of processors from
a single hypernode.

There are several options for parallel programming on the Exemplar.
At the one extreme are platform-independent languages like PVM
or MPI. Such
approaches have the advantage of easy portability to other platforms.
To take more direct advantage of the architecture of the Exemplar,
it is possible to use compiler options (with C or FORTRAN) and hope that
the compiler is smart enough to allow several processors
to share the work. The next option is to use commands to specifically
identify loops that should be parallelized.
Finally, essentially complete control of the parallelization can be 
accomplished through use of the Compiler Parallel Support (CPS) Library,
a set of thread management and synchronization routines. After
initial experimentation with all four methods, we decided to use
the CPS Library.

Of the approximately 45
functions that comprise the CPS Library, we ultimately
incorporated four into our code. They are given in Table \ref{tab20},
and we briefly describe them here.
\begin{table}
\hspace{.3in}
\begin{tabular}{|l|}
\hline \hline
typedef struct \{ \\
\hspace{.2in} int node;  /* node from which processors will come */ \\
\hspace{.2in} int min;  /* minimum number of processors to use */ \\
\hspace{.2in} int max; /* maximum number of processors to use */ \\
\hspace{.2in} int threadscope; /* thread scope attributes */ \\
\} spawn\_sym\_t; \\
\\
int cps\_ppcalln(spawn\_sym\_t *params, void(*funct)(void *), const int *n, \\
void *arg1,..void *argn); \\
\hline
int c\_init32(cache\_sema\_t *cs, const int *value); \\
\hline
int c\_lock(cache\_sema\_t *cs); \\
\hline
int c\_unlock(cache\_sema\_t *cs); \\
\hline
\end{tabular} 
\caption{CPS Functions (from the CONVEX Exemplar Programming Guide)}  \label{tab20}
\end{table}

The first, cps\_ppcalln, designates the number of processors
we wish to use, then sends to each
processor a copy
of the function ``funct'' with the arguments ``arg1,''... ``argn''.
What had been our main program in the serial version becomes
two separate subroutines in the parallel version. In the first of these,
which we call the 
``master'' program,
all of the initialization takes place; work is done on the
original problem which is then placed on the master queue.
The branch-and-bound process is completely contained within
the second, ``worker'' subroutine. This worker subroutine
is the ``funct'' appearing as a parameter in the above
CPS function.
The cps\_ppcalln calling statement actually appears
as part of the code within the master program;
this single function call essentially begins the
parallelization process. Available processors are given
copies of the worker subroutine, and each performs the work
contained within. In particular, each processor 
removes problems from the queue, creates subproblems, checks for feasibility,
computes upper bounds (using LAPACK and L-BFGS-B),
fathoms if possible, computes lower bounds (using CPLEX), and
places the new subproblems back on the queue. 
Note, however,
that there is only one master queue, to which all processors
have access. It is therefore conceivable for two or more
processors to simultaneously attempt to access  the queue in an effort
to remove subproblems. This is a situation that must be avoided to
preserve the integrity of the queue.
This scenario prompts the use of the remaining
three CPS functions appearing in Table \ref{tab20}. Before
a processor can remove a subproblem from the queue, it
must first acquire a ``lock'' for the queue. This is achieved
through the use of the function c\_lock, which operates in
the following way. Upon a c\_lock call, one of two events
occurs. If the lock is free (i.e. no other processor possesses
it), the current processor acquires the lock, and proceeds to
remove the subproblem. If another processor is
currently accessing the queue, in which case the lock is not
free, the processor waits until the lock is released, then
acquires it, and removes the subproblem. Having finished
removing the subproblem from the queue, the processor releases
the lock (using the function c\_unlock),
which now becomes available to remaining processors.
Note that processors must go through the same procedure
to {\it add} subproblems to the queue; to access the queue
for any reason, a processor must acquire a lock. In addition
to the lock for the master queue, there are several other
locks allocated (using the function c\_init32) during the
master subroutine. In particular, every global variable
that appears in the code must have with it an associated lock.
Since global variables are available to all processors, 
there is always a  chance that two or more will try to
access the same variable simultaneously. 

There are many other potential pitfalls in a parallel
code that one must be aware of, and take measures to avoid.
One such example involves the use of nested locks, where a 
single processor that has already  acquired one lock, acquires
a second lock before releasing the first. Consider
the following situation: processor A has acquired lock Y,
and is waiting to acquire a second lock Z. Meanwhile, processor
B has acquired lock Z, and is waiting to acquire lock Y.
In this case, processors A and B essentially become idle
and can perform no work, because neither will release its lock.
Meanwhile, other processors, continuing their
work, will inevitably require at
least one of locks Y and Z, and they too are forced to wait.
Ultimately, the entire program  becomes suspended and never terminates.
In our program, 
we were careful to avoid nested locks as much as possible,
and
this required some restructuring of the code. Note that there do 
remain some nested locks in our code, but
there are none that could lead to a situation
like that described above.

There is yet another potential problem concerning the master queue.
As with the serial version, the entire branch-and-bound
process as coded in parallel is completely contained within a ``while'' loop.
Recall that
in the serial version, the stopping criterion is an empty queue.
In the parallel version, we impose an additional requirement,
namely that there are no ``busy'' processors.  The while loop (in fact
the entire worker subroutine) is
exited when 
the number of subproblems (numsub)
on the queue and the number of busy workers (numbusy) are both zero.
If, however, numsub is at least one,
we lock the queue, and prepare to remove a subproblem.
Consider the situation, however,
where the value of numsub is very small, and between the time
this processor checked the value of numsub, and then locked the queue,
some other processors were able to remove all of the remaining subproblems
from the queue, leaving it now empty. This processor, unaware
of what has happened, attempts to remove a subproblem from the
empty queue,
causing a catastrophic error. To avoid this, we perform a second
check, {\it after} the processor has acquired the queue lock.
While the queue is locked, we recheck
the number of subproblems on the queue.
If the number of subproblems is still positive,
the processor can successfully remove one;
if not, the processor immediately releases the queue lock.

Once all processors have exited
their respective ``copies'' of the worker subroutine (which occurs
when both numsub and numbusy are found to equal zero),
a single
processor 
finishes the work remaining in the master program (prints the
solution to an output file, closes remaining files, and
frees arrays).

A final note we make is that the parallel code, like the
serial code, uses CPLEX linear programming software. We made
use of CPLEX 4.0,
%\cite{CPLEX},
which is ``thread safe.''
Earlier versions are not.

We made extensive runs with the full order-63 covariance matrix of
Guttorp et al. Such runs all had $n=63$, and here we report results
choosing $s=31$ indices. Problems of this size were found to be
intractable using the spectral upper bounds.
To obtain run times for these problems, we used the NLP option DIAG
and the EIG option OFF.
We made 10 runs each using 1,2,4, and 8 processors, all on the same
hypernode, for sets of 0,2,5,10, and 15
constraints. With the exception of the upper-bounding options NLP and EIG,
we used the same option set as we did for our earlier (serial) runs.
Our results are reported in Table \ref{tab3}. Under the column
labeled ``1'' (processor), we list
the average run time over the 10 runs when a single processor is
used. The columns labeled ``2'', ``4'', and ``8'' (processors) indicate
the speed-up factor using that number of processors, where
\[ \mbox{speed-up factor for $n$ processors} = 
\frac{\mbox{average run time for 1 processor}}{\mbox{average run time
for $n$ processors}}. \]
Our results
clearly indicate approximately linear speed-up on a set of difficult
problems.

In order to run a program, 
it is necessary to first submit
a request to use a particular
subcomplex. Such a request is placed on a queue until
enough memory is available to accommodate the program.
Often, multiple programs by multiple users will run
simultaneously on the same processors, in which case
it is not possible to obtain ``clean'' run times. That is,
under these conditions, any run times obtained during
the execution of a given  program will be tainted because
of competition with other programs running concurrently.
Our runs were conducted on a subcomplex to which no other users
had access.

\begin{table}
\hspace{1.35in}
\begin{tabular}{l|l|lll}
Number of & \multicolumn{4}{c}{Number of processors} \\
constraints & 1 & 2 &  4 & 8  \\
           &  (seconds) & \multicolumn{3}{c}{(speed-up factor)} \\
\hline
0 & 62615 & 1.99 & 4.04 & 6.97 \\
2 & 34619 & 2.03 & 4.10 & 8.04 \\
5 & 5551 & 2.02 & 4.00 & 7.56 \\
10 & 14815 & 1.95 & 4.00 & 7.15 \\
15 & 12153 & 1.97 & 3.97 & 7.81 
\end{tabular}
\caption{Results of Parallel Implementation} \label{tab3}
\end{table}
\section{The
Constrained Maximum-Entropy Sampling Problem with
Fixed Costs (CMESPF)}

Recall that the original CMESP serves as a  model for 
the sampling problem whereby we select, from a network
of sites, a most informative subset
of specified cardinality. In particular, we addressed
the environmental monitoring problem, where our network consists
of sites
at which we are interested in the concentration
 of a particular ion in wet deposition, and where we seek to observe
a subset that provides as much information as possible 
about the entire network. We now consider a variation of
this problem. Assume that we
are interested in a {\it set} of ions, and that there is the opportunity
to measure just a subset of those ions at each monitoring station.
Let {\it I} be the set of ions, and let {\it L} be the set of
potential monitoring locations. Let $C$ be  a covariance
matrix with rows and columns  indexed by the ion-location
pairs $(i,l) \in I \times  L$. We assume a
Gaussian distribution on the set of ion-location pairs.
The observation of at
least one ion at location $l$ incurs a fixed cost $f_l$.
The observation of ion $i$ at location $l$ incurs an additional cost
$d_{il}$. We have a budget limit $\beta$, and we wish
to observe a total of $p$ ions at a total of $t$ locations, so as
to maximize the entropy. We next show how this cost-constrained
problem can be modeled as an integer nonlinear program.

We start by defining at each location a ``pseudo-ion'' $o$.
For each $l \in {\it L}$, the pair $(o,l)$ is assumed to have
unit variance, and to be independent
of all other ion-location pairs. We therefore have the following
extended covariance matrix:

\[
\widehat{C} = \bordermatrix{
  &  \{ o \} \times {\it L} & {\it I} \times {\it L}  \cr 
& I & 0 \cr
&  0 & C  \cr 
}\ .
\]

We then formulate our problem  as the following integer
nonlinear program.

\[ \begin{array}{llrll}
\max &  & \ln \det \widehat{C}[\underline{x},\underline{x}] & & \\
& & & & \\
\mbox{s.t.} & (1) &  \bsum{(i,l) \in {\it I} \times
{\it L}} x_{il} & = & p \\
& & & & \\
& (2)  & \bsum{i \in {\it L}} x_{ol} & = & t; \\
& & & & \\
& (3) &
\bsum{l \in {\it L}} f_l x_{ol} + \bsum{(i,l) \in {\it I} \times {\it L}}
d_{il} x_{il} & \leq & \beta; \\
& & & & \\
& (4)  & x_{il} - x_{ol} & \leq & 0, ~ \forall ~(i,l) \in {\it I} \times {\it L}; \\
& & & & \\
& & x_{ol} & \in &  \{ 0,1 \}, ~ \forall ~l \in {\it L}; \\
& & & & \\
& &  x_{il} & \in &  \{ 0,1 \}, ~ \forall ~(i,l) \in {\it I} \times {\it L}.
\end{array} \]

Constraint (1) ensures that we observe a total of $p$ ions;
constraint (2) ensures that the observations are made at a total of $t$
locations.
Constraints (4) force $x_{ol}=1$ whenever 
$x_{il}=1$ for some $(i,l) \in {\it I} \times {\it L}$.
These constraints, together with the constraints (3), ensure that
we pay the fixed cost $f_l$ whenever we make any observation at location
$l$.

Next, we verify
that inclusion of pseudo-ions does not
affect the entropy of a solution.
Recall that the entropy of a $p$-subset $S$ of original
ion-location pairs $(i,l)$ ($i \neq o$) is, up to constants, equal
to $\ln \det C[S,S]$. Note, however, that every 
principle submatrix  $\widehat{C}[\underline{x},\underline{x}]$
corresponding to a feasible point $x$ of the above program is of the form
\[ \left( \begin{array}{ll}
  I_t & 0 \\
  0 & C[S,S]
\end{array} \right) \]
for some subset $S$ with $|S|=p$. Also, every principle
submatrix $C[S,S]$ of $C$ (with $|S|=p$) can be
extended to a $(p+t) \times (p+t)$ submatrix of ${\widehat C}$
of the above form. Since
\[ \det C[S,S] = \det
 \left( \begin{array}{ll}
  I_t & 0 \\
  0 & C[S,S]
\end{array} \right),\]
the result follows.

We now verify that the above program can be re-expressed
as a CMESP. To see this, 
note that constraints (1) and (2) above can be equivalently
expressed as
\[ \begin{array}{lrll}
(1') &  \bsum{j \in {\it L}} x_{ol} +
 \bsum{(j,l) \in {\it I} \times
{\it L}} x_{jl} & = & t + p, \\
& & & \\
(2') &  \bsum{l \in {\it L}} x_{ol} & = & t .
\end{array} \]

Therefore, we get the following formulation of the above program as a
CMESP, where the ``sample size'' $s$ is $p+t$.
\[ \begin{array}{llrll}
 \max & & \ln \det \widehat{C}[\underline{x},\underline{x}] & & \\
& & & & \\
 \mbox{s.t.} & (1) & 
   \bsum{l \in {\it L}} x_{ol} +
 \bsum{(j,l) \in {\it I} \times
{\it L}} x_{jl} & = &  p+t; \\
& & & & \\
&  & \bsum{l \in {\it L}} x_{ol} & \leq  & t; \\
& & & & \\
&  & -\bsum{l \in {\it L}} x_{ol} & \leq  & - t; \\
& & & & \\
&  &
\bsum{l \in {\it L}} f_l x_{ol} + \bsum{(j,l) \in {\it I} \times {\it L}}
d_{jl} x_{jl} & \leq & \beta; \\
& & & & \\
&  & x_{jl} - x_{ol} & \leq & 0, ~ \forall ~(j,l) \in {\it I} \times {\it L}; \\
& & & & \\
&  & x_{ol} & \in &  \{ 0,1 \}, ~ \forall ~l \in {\it L}; \\
& & & & \\
&  &  x_{jl} & \in &  \{ 0,1 \}, ~ \forall ~(j,l) \in {\it I} \times {\it L}.
\end{array} \]
Note that $(1)$ is the ``cardinality constraint.''

We call this cost-constrained version of the CMESP the ``constrained
maximum-entropy sampling problem with fixed costs'' (CMESPF); it can be solved
using the branch-and-bound algorithm outlined in Section 2.3.

We ran the computer program from Section 2.4 on several
cost-constrained problems. We had access to an order-92 covariance
matrix derived from 
$\mbox{SO}_4$ and $\mbox{O}_3$ data obtained from a network of
46 environmental monitoring stations in the the United States.
Originally, zero, one, or both ions could be observed at any
given station. In all of the problems we considered,  however,
we worked under the assumption that a total of 29 sites and
31 ions had been fixed into the solution from the onset.
Such fixing was achieved using the methods in Section 2.3.
After the initial fixing, there remained 27 stations
at which exactly one ion was being observed, and 17 stations
at which neither ion was being observed. 

We then applied the algorithm to two distinct sets of problems.
In the first set of problems, we used the costs given in 
Zidek et al. (1997).
In particular, observing a new ion
(either $\mbox{SO}_4$ or $\mbox{O}_3$) at any station, regardless
of whether the other ion was already being observed there,
cost \$291.667 per month.
Adding in a new site (that is, observing at least one ion at
a site that had previously not been monitored) incurred a cost
of \$1041.667 per month. That is,
we took $d_{il} = 291.667$ for all ions $i \in I$
and all locations $l \in L$, and
$f_l=1041.667$ for all locations $l \in L$. There is only one value
of $\beta_{p,t}$ that makes sense when using such uniform costs, namely
$\beta_{p,t} = 291.667p + 1041.667t$. Since the corresponding budget
constraint is implied by the other constraints, we deleted it from
our constraint set.
We then ran the program for various combinations of $p$ and $t$,
using the spectral upper bounds
of Section 2.2 and/or the nonlinear
programming upper bounds of Anstreicher et al. (1997a).
Note that the solutions do not
depend on the costs, owing to their uniformity. 
In Table \ref{fixed1},
we give, for 15 choices of $p$ and $t$, the  corresponding  total
monitoring costs (i.e. the values of $291.667p+1041.667t$),
as well as the corresponding
entropies.
We found these problems to be, in some sense, much harder than
the ones we examined in Section 2.4. Even for small values
of $p$ and $t$, the run times and
number of subproblems generated are very large.
As an example, taking $(p,t)=(3,2)$ (so that $s=5$) and using the spectral
upper bounds, a total of 152790 subproblems are generated, and the total
number of wall seconds is 70630. 
The value of ${n \choose s}$ for this CMESPF, namely
${78 \choose 5}$, is less than double that  for the CMESP in Section 2.4,
namely
${30 \choose 15}$, and yet the run times and numbers of subproblems
exceed those of Section 2.4 by much greater factors.
The bounds of Anstreicher et al. (1997a)
improved these numbers to some extent, but they still were
extremely large.

Recall
that the entropy function depends on $s$ through an additive constant,
that until now we have  referred to only as $k_s$ (see Chapter 1, page 3).
In order to compare the entropies of the solutions to the problems,
we used the precise formula for the entropy of a set of Gaussian
random variables (see
Shannon and Weaver (1963)):
\[ H(\underline{x}) := \frac{p}{2} \ln(2 \pi e) + 
\frac{1}{2} \ln \det \widehat{C}[\underline{x}, \underline{x}]. \]
We have listed the problems in order of increasing monitoring cost;
note that in general, the entropy of a solution 
corresponding to $(p,t) = (a,z)$ (for $z=0,1$) is significantly higher than the
entropy of a solution corresponding to $(p,t) = (b,z+1)$,
when
the total monitoring costs of the two associated problems are
comparable. Recall that our objective is to maximize entropy,
so assuming that we have only limited funds to spend on
monitoring,
it makes more sense to 
observe a larger number of ions but
restrict the number of stations we activate, than it does to
activate new stations and observe fewer ions.

\begin{table}
\hspace{1.8in}
\begin{tabular}{|l|l|l|l|}
\hline
\hline
$p$ & $t$ & total cost & entropy \\
\hline
\hline
4 & 0 & 1166.668 & 12.198
\\
\hline
1 & 1 & 1333.334 & 3.965
\\
\hline
5 & 0 & 1458.335 & 14.967
\\
\hline
2 & 1 & 1625.001 & 7.424
\\
\hline
6 & 0 & 1750.002 & 17.721
\\
\hline
3 & 1 & 1916.668 &  10.418
\\
\hline
7 & 0 & 2041.669 & 20.422
\\
\hline
4 & 1 & 2208.335 &   13.283
\\
\hline
8 & 0 & 2333.336 &  23.121
\\
\hline
5 & 1 & 2500.002 & 16.144 
\\
\hline
9 & 0 & 2625.003 & 25.802
\\
\hline
2 & 2 & 2666.668 & 7.849
\\
\hline
6 & 1 & 2791.669 & 18.913
 \\
\hline
10 & 0 & 2916.670 & 28.450
\\
\hline 
3 & 2 & 2958.335 & 11.296
\\
\hline
\hline
\end{tabular} 
\caption{Costs and Entropies for CMESPFs (Uniform Costs)} 
\label{fixed1}
\end{table}
%\newpage
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (1,0) & (2,0) & (3,0) & (4,0) & (5,0)  \\
%\hline
%& 0.54 & 1.24 & 8.04 & 25.72 & 66.46 \\
%\hline 
%\end{tabular} 
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) &  (6,0) & (7,0) & 
%(8,0) & (9,0) & (10,0) \\
%\hline
%&  126.06 & 220.07 & 286.72 & 300.57 &
%287.05  \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (1,1) & (2,1) & (3,1) & (4,1) & (5,1)  \\
%\hline
%& 155.44 & 273.59 & 2454.45 & 10728.83 & 15529.38 \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (6,1) & (7,1) & 
%(8,1) & (9,1) & (10,1) \\
%\hline
%&  72727.56
%& 115400.31 & 474834.47 & & \\
%\hline
%\end{tabular}
%\caption{Wall Seconds: Spectral Bounds} 
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (1,0) & (2,0) & (3,0) & (4,0) & (5,0)  \\
%\hline
%& 77 & 77,73 & 77,73,71 & 77,73,71,57 & 77,73,71,57,70 \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (6,0) & (7,0) & (8,0) & (9,0) & (10,0)  \\
%\hline
%&  77,73,71,57,70, & 
%  77,73,71,57,70, & 
%  77,73,71,57,70, & 
%  77,73,71,57,70, & 
%  77,73,71,57,70,  \\
%& 53 & 53,51 & 53,51,69 & 53,51,69,72 & 53,51,69,72,66 \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (1,1) & (2,1) & (3,1) & (4,1) & (5,1)  \\
%\hline
%& 6,29 & 6,29,77 & 6,29,77,73 & 6,29,77,73,71 & 6,29,77,73,71,57  \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (6,1) & (7,1) & (8,1) & (9,1) & (10,1)  \\
%\hline
%\hline
%&  6,29,77,73,71,57, & 
%  6,29,77,73,71,57, & 
%  6,29,77,73,71,57, & 
%  6,29,77,73,71,57, & 
%  6,29,77,73,71,57,  \\
%& 70 & 70,53 & 70,53,51 & 70,53,51,69 & 70,53,51,69,72  \\
%\hline
%\end{tabular}
%\caption{Optimal Subsets $S$}
%\end{table}

In the second set of problems, we varied the costs associated with
the ions and locations.  We considered 15 distinct problems,
corresponding to the same choices of $p$ and $t$ given
in Table \ref{fixed1}. 
In a preprocessing stage, we
used a random number generator to produce
values for $f_l$ ($l \in L$) 
between 700 and 1350, as well as values for
$d_{il}$ between 200 and 400.
(Note that
the average of 700 and 1350  is approximately 1041.667, and
that the average of 200 and 400 is approximately 291.667.)
Having specified the costs during the preprocessing stage, their values
remained the same for the 15 problems. We then grouped these 15 problems
into subsets of size three, and used the same value of $\beta$
for each problem in a given subset. Values of $\beta$ were chosen large
enough to ensure feasibility of the problems,  but small
enough to affect the feasible regions. After completing the 15 runs,
we randomized for a second time, producing a completely new set
of ion and location costs (but within the same ranges as previously).
The entire procedure was
then repeated for a second group of 15 problems; their values
of $p$, $t$, and $\beta$ matched the first, but the associated costs
differed. 
In Table \ref{fixed2}, we give the entropies
of the solutions to all 30
problems. (E$i$ is the entropy of a solution to a problem using costs
generated during the $i^{\mbox{th}}$ randomization. Note that
after the second randomization, one of the problems became
infeasible.) 
Even with a budget constraint, it still is significantly more cost-effective
to observe more ions at a fewer number of stations.
Noting this, we ran four additional problems, for larger values
of $t$, and we provide entropies corresponding to those problems as well.
We used the nonlinear programming upper bounds
of Anstreicher et al. (1997a) in each case; we found that
run times and numbers of subproblems generated were slightly less
than those for the problems with uniform costs, but they were still
quite large.

%\begin{table}
%\hspace{1.75in}
%\begin{tabular}{|l|l|l|l|l|}
%\hline
%\hline
%$p$ & $t$ & $\beta$ & total cost & entropy \\
%\hline
%\hline
%4 & 0 & 1500 & 987.455 & 12.198
%\\
%\hline
%1 & 1 & 1500 & 1055.420 & 3.965
%\\
%\hline
%5 & 0 & 1500 & 1311.470 & 14.967
%\\
%\hline
%\hline
%2 & 1 & 1750 & 1327.815 &  7.424
%\\
%\hline
%6 & 0 & 1750 & 1575.240 &  17.721
%\\
%\hline 
%3 & 1 & 1750 & 1534.925 &  10.418
%\\
%\hline
%\hline
%7 & 0 & 2000 & 1899.495 & 20.422
%\\
%\hline
%4 & 1 & 2000 & 1774.735 & 13.283
%\\
%\hline
%8 & 0 & 2000 & 1994.515 & 22.830
%\\
%\hline
%\hline
%5 & 1  & 2250 & 2042.875  & 16.144
%\\
%\hline
%9 & 0 &  2250 & 2248.050 &  25.451
%\\
%\hline
%2 & 2 &  2250 & 2160.165 &   7.791
%\\
%\hline
%\hline
%6 & 1 & 2500  & 2366.890  & 18.913
% \\
%\hline
%10 & 0 &  2500 & 2491.170 &  27.951
%\\
%\hline 
%3 & 2 &  2500 & 2432.560 &  11.230
%\\
%\hline
%\hline
%\end{tabular} \caption{Entropies for CMESPFs (Non-Uniform Costs)}
%\label{fixed2}
%\end{table}
\begin{table}
\hspace{1.75in}
\begin{tabular}{|l|l|l|l|l|}
\hline
\hline
$p$ & $t$ & $\beta$ & E1 & E2 \\
\hline
\hline
4 & 0 & 1500 & 12.198 & 12.198 
\\
\hline
1 & 1 & 1500 & 3.965 & 3.960 
\\
\hline
5 & 0 & 1500 &  14.967 & infeas. 
\\
\hline
\hline
2 & 1 & 1750 &   7.424 & 7.403 
\\
\hline
6 & 0 & 1750 &  17.721 & 17.419 
\\
\hline 
3 & 1 & 1750 &  10.418 & 10.235
\\
\hline
\hline
7 & 0 & 2000 &  20.422 & 20.006
\\
\hline
4 & 1 & 2000 &  13.283 & 12.961 
\\
\hline
8 & 0 & 2000 &  22.830 & 22.103 
\\
\hline
\hline
5 & 1  & 2250 & 16.144 & 15.694 
\\
\hline
9 & 0 &  2250 &   25.451 & 24.609 
\\
\hline
2 & 2 &  2250 &   7.791 & 7.593 
\\
\hline
\hline
6 & 1 & 2500  &  18.913 & 18.479 
 \\
\hline
10 & 0 &  2500 &  27.951 & 26.867 
\\
\hline 
3 & 2 &  2500 &   11.230 & 11.007
\\
\hline
\hline
3 & 3 & 3800 & 11.662 & 11.423 \\
\hline
4 & 4 & 5100 & 15.367 & 15.154 \\
\hline
5 & 5 & 6500 & 18.970 & 18.674 \\
\hline
6 & 6 & 7600 & 22.379 & 22.023 \\
\hline
\hline
\end{tabular} \caption{Entropies for CMESPFs (Non-Uniform Costs)}
\label{fixed2}
\end{table}
\newpage

\chapter*{\vskip -1in Chapter 3}
\setcounter{chapter}{3}
{\Huge {\bf The Constrained Maximum-Entropy Remote Sampling Problem \\
(CMERSP)}} \\ 

\setcounter{section}{0}
\setcounter{table}{0}
\section{Formulation}

In this chapter, we consider the Constrained 
Maximum-Entropy {\it Remote} Sampling Problem
(CMERSP) as a
variation of
the Constrained Maximum-Entropy Sampling Problem. 
We again start with an $n$-set $N$
of indices, but now we introduce an additional set $T$, with $N \cap T = 
\emptyset$. Let the $m$-set $M$ index the set $\sum_{j \in S} a_{ij}
\leq b_i$ ($i \in M$) of constraints.
Let $C$ be a symmetric matrix with rows and columns indexed by
$N \cup T$, and assume that $C$ is positive definite.
For a subset $S$ of $N$,  let
\[ C_S[T,T] := C[T,T] - C[T,S] (C[S,S])^{-1} C[S,T]. \]
We define the CMERSP as:
\[\mbox{(CMERSP)} \hspace{.3in}
\begin{array}{llll}
z & := &  \min &  \ln \det C_S[T,T] \\
& & & \\
& & \mbox{s.t.} &
\bsum{j \in S} a_{ij} \leq b_i, \hspace{.2in} \forall ~i \in M; \\
& & & \\
& &  &  S \subseteq N; \\
& & & \\
& & &  |S|=s.
\end{array} \]

Recall from Chapter 1 that the CMERSP models
a problem closely related to the sampling problem modeled by the CMESP,
where we seek
a most informative subset $S$ from a
set $N$ of points in a network. In particular, we have
in remote sampling,
a set of random variables $N$, called
observing points, that we
can monitor, and a set of random variables $T$, called
target points, that we want
information about. We have no inherent interest in the points $N$,
and we are unable to directly observe the points $T$. Observations
are expensive, and so 
we cannot observe all of $N$, but only
a subset, of predetermined size $s$. Each subset $S$ of $N$
(with $|S|=s$) will provide a varying amount of information about
$T$, and among all possible such subsets, we would like to choose
the subset $S$ that minimizes the uncertainty remaining in $T$ after
observing $S$. That is, we want to choose the subset $S$ that
minimizes the entropy of $T$, conditioned on $S$.

Let $C$ be the $|N \cup T| \times |N \cup T|$ covariance matrix
for $N \cup T$; then the matrix $C_S[T,T]$ is precisely the 
covariance matrix for $T$ conditioned on $S$. (See Shewry and
Wynn, for example.)
%\cite{SHEWRY}.
In the case where $N \cup T$ is joint Gaussian,
the {\it conditional entropy}  of $T$ (conditioned on $S$)
is, up to constants, equal to
\[ H_S(T) := \ln \det C_S[T,T]. \]
So, our problem of finding the $s$-subset
of $N$ that minimizes the conditional entropy of $T$ is exactly the
CMERSP.

We claim that
that minimizing $H_S(T)$ is equivalent to maximizing
$H(S) - H_T(S)$, where $H(S)$ is the (unconditional) entropy of
$S$. To see this, note that
\[ \begin{array}{lll}
H_S(T) & = & \ln \det (C_S[T,T]) \\
& & \\
 & = & \ln \det (C[T,T] - C[T,S] (C[S,S])^{-1} C[S,T]) \\
& & \\
 & = & \ln \left( \bfrac{\det C[T \cup S, T \cup S]}{ \det C[S,S]}
\right) \\
& & \\
 & = & \ln \det C[T \cup S, T \cup S] - \ln \det C[S,S] \\
& & \\
 & = & H(T \cup S) - H(S),
\end{array} \]
and hence
\[ H_T(S) = H(T \cup S) - H(T) \]
so
\[ H_S(T) = H_T(S) + H(T) - H(S) . \]
Since $H(T)$ is a constant, we have that
\[ \displaystyle \min_{S \subseteq N, |S|=s} H_S(T) = 
H(T) - 
\displaystyle \max_{S \subseteq N, |S|=s} (H(S) - H_T(S)). \]
Recall that $H(S)$ is just the entropy of $S$, so in the Gaussian
case, we solve

\[\mbox{(CMERSP)} \hspace{.3in}
\begin{array}{llll}
z & := &  \max &  \ln \det C[S,S] - \ln \det C_T[S,S] \\
& & & \\
& & \mbox{s.t.} &
\bsum{j \in S} a_{ij} \leq b_i, \hspace{.2in} \forall ~i \in M; \\
& & & \\
& &  &  S \subseteq N; \\
& & & \\
& & &  |S|=s.
\end{array} \]
In the next section, we will see precisely how the CMESP is a special
case of the CMERSP.

\section{Complexity}

In this section, we demonstrate
an efficient reduction of the CMESP to the CMERSP. As 
the CMESP is NP-Hard (see Ko, Lee, and Queyranne),
%\cite{KLQ}), 
we will have established
the same for the CMERSP.

Suppose that we have an instance of the CMESP, specified by positive-definite
matrix $C[N,N]$ and $s$. We can assume that all eigenvalues of $C[N,N]$ are 
greater than unity, by scaling the matrix. Consider the
matrix
\[
\widehat{C}=\widehat{C}[N\cup T,N\cup T]= \bordermatrix{   
      & N & T \cr
N  & C[N,N] & I_n \cr
T & I_n & (C[N,N]-I)^{-1}\cr
}\ .
\]
The set $T$ that we have constructed has the same cardinality as $N$.
Moreover, the form of $\widehat{C}$ indicates a pairing between
the $i^{\hbox{th}}$ element of $N$ and the $i^{\hbox{th}}$ element of $T$.
Consider an $s$-subset $S$ of $N$, and let $S'$ be the subset of
$T$ that corresponds to $S$. Then,
\[
\widehat{C}[S,T]=\bordermatrix{
 & S' & T\setminus S'\cr
 S & I_s & 0}
=\widehat{C}^t[T,S]\ .
\]
Now,
\begin{eqnarray*}
H(S)-H_T(S)&=
        & \ln \det(\widehat{C}[S,S])-\ln \det\left(\widehat{C}[S,S]-
         \widehat{C}[S,T]\left(\widehat{C}[T,T]\right)^{-1}\widehat{C}[T,S]
         \right)\\
       & = & \ln \det(C[S,S])-\ln \det\left(C[S,S]-
         [I_s|0] \left( C[N,N]-I_n \right) \left[ 
\begin{array}{l}
I_s \\
0
\end{array} \right]
\right)\\
       & = & \ln \det(C[S,S])-\ln \det\left(C[S,S]-\left(C[S,S]-I_s\right)\right)\\
       & = & \ln \det(C[S,S])-\ln \det\left(I_s\right)\\
       & = & \ln \det(C[S,S])\\
       & = & H(S)\ .
\end{eqnarray*}

Therefore, a solution of the CMERSP on $\widehat{C}$ is a solution
of the CMESP on $C$. The only detail to check is that $\widehat{C}$
is positive definite, so that we have a legitimate instance
of the CMERSP. 

\begin{lemma}\label{CPD}
Assume that positive-definite $C$ has been scaled so that all eigenvalues
are greater than unity. Then $\widehat{C}$ is positive definite.
\end{lemma}

\begin{proof}
It suffices to show that some nested-sequence of principal
submatrices of $\widehat{C}$,
of all orders, have positive determinants. 
Certainly this is true for $\widehat{C}[T,T]=(C[N,N]-I)^{-1}$,
which, by our scaling of $C$, has all positive eigenvalues. Now consider
an arbitrary subset $S$ of $N$. Since
\begin{eqnarray*}
\det(\widehat{C}[S\cup T,S\cup T]) & = &
\det (\widehat{C}[T,T])\cdot \det\left(\widehat{C}[S,S]-
\widehat{C}[S,T]\left(\widehat{C}[T,T]\right)^{-1}\widehat{C}[T,S]\right)\\
& = & \det (\widehat{C}[T,T]) ,
\end{eqnarray*}
we can conclude that $\det(\widehat{C}[S\cup T,S\cup T])$ is
positive for every subset $S$ of $N$. Therefore $\widehat{C}$
is positive definite.
\end{proof}

Our construction and Lemma \ref{CPD}, together with the result
of Ko, Lee, and Queyranne (1995),
allow us to establish
the following result.

\begin{theorem}\label{NP}
The CMERSP is NP-Hard.
\end{theorem}

\section{Upper Bounds}

The algorithm that we have developed for
the CMERSP is very similar to that developed for the GCMESP.
Just as with the GCMESP, the algorithm for the CMERSP is
of a branch-and-bound
variety.  Subproblems are described by the indices that are fixed
into the solution and those indices that remain eligible.
For each subproblem, the size of its feasibility region is determined,
by setting up and solving a linear program  like that in 
Theorem \ref{FEAS}.
If the subproblem is infeasible, it becomes
inactive, creating no new subproblems of its own.
If there is a unique feasible point,
its objective function value is computed,
and again it becomes inactive. Otherwise,
an upper bound is computed, and two new subproblems are created,
by selecting an eligible index and setting it equal to 1
in one subproblem and to 0 in the second subproblem.  Except for
the added constraints that force variables either to 0 or to 1,
successive subproblems are identical in structure to
previous ones, and in particular to the original CMERSP.
In this section, we develop a spectral
upper bounding function for $z$, the value of the  CMERSP. We show
that this function is convex, and we
derive an expression for its gradient. For a given subproblem
generated during our branch-and-bound process, we apply the BFGS
descent method to find the minimum of this function over its domain;
we then use this minimum as an upper bound for the subproblem.
We note that in the computer implementation, all of the options
described in Section 2.3 are still available, including that
of selecting alternative upper bounding techniques (see
Anstreicher et al. (1997b)). In the case where only the spectral
bounds are used, no lower bounds are computed for the subproblems
(with regard to Section 2.3, this is equivalent to setting CPLEX OFF).
In the case where the bounds of Anstreicher et al. (1997b) are
used, lower bounds are computed, by solving 
$\mbox{LB}$ (see Section 2.3) for an appropriate
choice of objective function coefficients (see Anstreicher et al. (1997b)).

Consider the following reformulation of CMERSP:
\[ \begin{array}{llll}
 z & := &  \max &  \ln \det C[S,S] - \ln \det C_T[\hat{S},\hat{S}] \\
& & & \\
& &  \mbox{s.t.} &
  \bsum{j \in S} a_{ij} \leq b_i, \hspace{.2in}
\forall ~i \in M;   \\
& & & \\
& & &  \hat{S} = S \subseteq N; \\
& & & \\
& & &  |S| = s.
\end{array} \]

For $\gamma\in \Re^N$,
define a diagonal matrix $D^{\gamma}$,
with diagonal entries, indexed by $N\cup T$, given by
\[ 
D^{\gamma}_{jj} := \left\{ \begin{array}{ll}
                               \exp(\frac{1}{2} \gamma_j) & j\in N; \\
                               1 & j\in T.
                            \end{array} \right.
\]
Let $C^{\gamma}:= D^{\gamma} C D^{\gamma}$.

Also, for  $\gamma\in \Re^N$ and $\pi\in \Re_+^M$,
define a diagonal matrix $F^{\gamma,\pi}$,
with diagonal entries, indexed by $N\cup T$, given by
\[ 
F^{\gamma,\pi}_{jj} := \left \{ \begin{array}{ll}
                  \exp \left( \frac{1}{2} \gamma_j- \frac{1}{2}
            \sum_{i\in M}\pi_i
           a_{ij}  \right) & j\in N;\\
                               1 & j\in T.
                            \end{array}  \right. 
\]
Let $C^{\gamma,\pi}:= F^{\gamma,\pi} C F^{\gamma,\pi}$.

Finally, let
\[ 
v(\gamma,\pi) := 
\ln \left( \prod_{l=1}^s \lambda_{\bar{l}}\left( C^{\gamma,\pi}[N,N]\right)
\right)-
\ln \left( \prod_{l=1}^s \lambda_{\underline{l}}
\left(\left(C^{\gamma}\right)_T[N,N]\right) \right)
+ \sum_{i\in M} \pi_i b_i \ . 
\]
We next show that $v(\gamma, \pi)$ is an upper bound for $z$.

\begin{theorem}\label{SBound}
For all $\gamma\in \Re^N$ and $\pi\in \Re_+^M$,
$v(\gamma,\pi)\ge z$.
\end{theorem}

\begin{proof}

Let $S, \hat{S}$ solve the CMERSP.
Since
\[ \Bprod{l=1}{s} \lambda_{\bar{l}} (C^{\gamma,\pi}[N,N]) \geq
 \Bprod{l=1}{s} \lambda_{\bar{l}} (C^{\gamma,\pi}[S,S]) =
\det (C^{\gamma,\pi} [S,S]) > 0, \]
and
\[
\Bprod{l=1}{s} \lambda_{\underline{l}} ((C^{\gamma})_{T}[N,N]) \leq
\Bprod{l=1}{s} \lambda_{\underline{l}} ((C^{\gamma})_{T}[S,S]) =
\det ((C^{\gamma})_T[\hat{S},\hat{S}])> 0, \]
we have that
\[ \begin{array}{ll}
&  \bfrac{\Bprod{l=1}{s} \lambda_{\bar{l}} (C^{\gamma,\pi}[N,N])}{
\Bprod{l=1}{s} \lambda_{\underline{l}} ((C^{\gamma})_{T}[N,N])}
\geq \bfrac{\det (C^{\gamma,\pi} [S,S])}{\det ((C^{\gamma})_T[\hat{S},\hat{S}])}\\
& \\
\Rightarrow &
 v(\gamma,\pi) - \bsum{i \in M} \pi_i b_i \geq \ln \det (C^{\gamma,\pi}[S,S])
- \ln \det((C^{\gamma})_T[\hat{S},\hat{S}]) ~. 
\end{array} \]
Hence,
 \begin{eqnarray*}
v(\gamma,\pi) & \geq & \ln \det (C^{\gamma,\pi} [S,S]) - \ln \det((C^{\gamma})_T
[\hat{S}, \hat{S} ]) + 
 \bsum{i \in M} \pi_i b_i \\
& & \\
& = & \ln \left( \exp \left( \bsum{j \in S}
 \gamma_j - \bsum{i \in M} \bsum{j \in S} \pi_i
a_{ij} \right) \det C[S,S] \right) -  \\
& & \\
& &  \ln \left( \exp \left( \bsum{j \in \hat{S}} \gamma_j \right) \det
C_T[\hat{S},\hat{S}] \right) +  
 \bsum{i \in M} \pi_i b_i \\
& & \\
& = & \ln \det C[S,S] - \ln \det C_T[\hat{S},\hat{S}] +
\bsum{j \in S} \gamma_j - \bsum{j \in \hat{S}} \gamma_j 
- \bsum{i \in M} \bsum{j \in S} \pi_i a_{ij}
+ \bsum{i \in M}
\pi_ib_i
 \\
& & \\
& = & \ln \det C[S,S] - \ln \det C_T[\hat{S},\hat{S}] +
 \bsum{i \in M}
\pi_i \left( b_i
-  \bsum{j \in S}  a_{ij} \right) \\
& & \\
& \geq  & \ln \det C[S,S] - \ln \det C_T[\hat{S},\hat{S}] \\
& & \\
& = & z ~.
\end{eqnarray*} 
\end{proof}

Thus, we have just generated a {\it family} of upper bounds for
$z$. The best upper bound of this form is
\[ \min_{\gamma \in \Re^N, \pi \in \Re^M_+} v(\gamma,\pi). \]
We next show that $v$ is convex. Later in this section, we
derive an expression for the gradient of $v$.
We can therefore use a descent method to find a global minimum
of $v$.


For $\gamma \in \Re^N$, define 
$\tilde{D}^{\gamma}:= D^{\gamma}[N,N]$.
For $\gamma \in \Re^N$ and $\pi \in \Re^M_+$, define
$\tilde{F}^{\gamma, \pi}:= F^{\gamma, \pi}[N,N]$.
For an arbitrary
$n \times n$ matrix $B$, define $B^{\gamma} := \tilde{D}^{\gamma} B
\tilde{D}^{\gamma}$ and
define $B^{\gamma, \pi} := \tilde{F}^{\gamma,\pi} B
\tilde{F}^{\gamma,\pi}$.

\begin{lemma} \label{same_T}
{\it For $\gamma \in \Re^N$,}
$(C^{\gamma})_T[N,N] = ({C_T[N,N]})^{\gamma}.$ 
\end{lemma}

\begin{proof}
 \begin{eqnarray*}
   (C^{\gamma})_T[N,N] & = & C^{\gamma}[N,N] - C^{\gamma}[N,T]
(C^{\gamma}[T,T])^{-1}
 C^{\gamma}[T,N] \\
&  & \\
 & =  & (D^{\gamma} C D^{\gamma})[N,N] - (D^{\gamma}CD^{\gamma})[N,T] \cdot ((D^{\gamma}C
D^{\gamma})[T,T])^{-1} \cdot (D^{\gamma} C D^{\gamma})[T,N] \\
& & \\
 & =  & \tilde{D}^{\gamma} (C[N,N]) \tilde{D}^{\gamma} - \tilde{D}^{\gamma} (C[N,T]
(C[T,T])^{-1} C[T,N])\tilde{D}^{\gamma} \\
& & \\
 &  = &  \tilde{D}^{\gamma}(C[N,N] - C[N,T](C[T,T])^{-1} C[T,N]) \tilde{D}^{\gamma} \\
& & \\
& = & \tilde{D}^{\gamma} (C_T[N,N]) \tilde{D}^{\gamma} \\
& & \\
& = & (C_T[N,N])^{\gamma} ~.
\end{eqnarray*} 
\end{proof}

\begin{theorem}\label{SConvex}
The function $v$ is convex.
\end{theorem}

\begin{proof}
For $\gamma \in \Re^N$, and $\pi \in \Re^M_+$, define
\[ f(\gamma,\pi):= \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}} (C^{\gamma,\pi}[N,N]) \right)
\hspace{.2in} \mbox{and} \hspace{.2in}
g(\gamma):= \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}} ((C^{\gamma})_T[N,N]) \right) ~. \]
So $v(\gamma,\pi) = f(\gamma,\pi)-g(\gamma) +
\bsum{i \in M} \pi_{i} b_i$.
Since
$\bsum{i \in M} \pi_{i} b_i$ is linear in $\pi$, it is enough to show
convexity of $f$ and concavity of $g$.

\[ \begin{array}{llll} 
& & & 2g(\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2) \\
& & & \\
& & = &
 \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}^2
((C^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2})_T[N,N]) \right)  \\
& & &  \\
& & = &
 \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}^2
(C_T[N,N])^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2}  \right) \\
& & & \\
& & =  & \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}^2(
\tilde{D}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2} (C_T[N,N])
 \tilde{D}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2}) \right)  \\
& & & \\
& & =  & \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}(
\tilde{D}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2} (C_T[N,N])
 \tilde{D}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2} 
\tilde{D}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2} (C_T[N,N])
 \tilde{D}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2}) 
\right)  \\
& & & \\
& & =  & \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}(
\tilde{D}^{-\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2} \tilde{D}^{\gamma^1}
(C_T[N,N]) \tilde{D}^{\gamma^1} \tilde{D}^{\gamma^2} (C_T[N,N]) 
\tilde{D}^{\gamma^2}
 \tilde{D}^{\frac{1}{2} \gamma^1 - \frac{1}{2} \gamma^2})  
\right)  \\
\end{array} \]
\[ \begin{array}{llll}
& & =  & \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}(
\tilde{D}^{\gamma^1}
(C_T[N,N]) \tilde{D}^{\gamma^1} \tilde{D}^{\gamma^2} (C_T[N,N]) 
\tilde{D}^{\gamma^2})
\right)  \\
& & & \\
& (1) & \geq  & \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}(
\tilde{D}^{\gamma^1} (C_T[N,N]) \tilde{D}^{\gamma^1}) \lambda_{\underline{l}} (
\tilde{D}^{\gamma^2} (C_T[N,N]) \tilde{D}^{\gamma^2}) \right) \\
& & & \\
& & =  & \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}(
\tilde{D}^{\gamma^1} (C_T[N,N]) \tilde{D}^{\gamma^1}) \right) + \ln
\left( \Bprod{l=1}{s}
\lambda_{\underline{l}} (
\tilde{D}^{\gamma^2} (C_T[N,N]) \tilde{D}^{\gamma^2})  \right)\\
& & & \\
& & = & g(\gamma^1) + g(\gamma^2).
\end{array}  \]
(Inequality ($1$) follows from the second part
of Corollary \ref{HJ}.)
A similar argument shows that
$f$ is convex:

\[ \begin{array}{lll}
& & 2f(\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2, 
\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2) \\
& & \\
 & = &  \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}^2
(C^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2,
\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2
}[N,N]) \right)  \\
 & & \\
 & = &
 \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}^2
(C[N,N])^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2,
\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2}
  \right) \\
 & & \\
 & =  & \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}^2(
\tilde{F}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2,
\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2}
 (C[N,N])
 \tilde{F}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2,
 \frac{1}{2} \pi^1 + \frac{1}{2} \pi^2})
 \right)  \\
 & & \\
 & =  & \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}(
\tilde{F}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2,
\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2}
 (C[N,N])
 \tilde{F}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2,
 \frac{1}{2} \pi^1 + \frac{1}{2} \pi^2}
  \right.  \\
 & & \\
 & & \hspace{.1in}
  \left. \tilde{F}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2,
\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2}
 (C[N,N])
 \tilde{F}^{\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2,
 \frac{1}{2} \pi^1 + \frac{1}{2} \pi^2})
 \right) \\
 & & \\
 & =  & \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}(
\tilde{F}^{-\frac{1}{2} \gamma^1 + \frac{1}{2} \gamma^2,
-\frac{1}{2} \pi^1 + \frac{1}{2} \pi^2} \tilde{F}^{\gamma^1, \pi^1}
 (C[N,N]) \tilde{F}^{\gamma^1, \pi^1}
  \tilde{F}^{\gamma^2, \pi^2} (C[N,N]) \right. \\
& & \\
& & \left. \hspace{.1in} \tilde{F}^{\gamma^2,\pi^2}
 \tilde{F}^{\frac{1}{2} \gamma^1 - \frac{1}{2} \gamma^2,
 \frac{1}{2} \pi^1 - \frac{1}{2} \pi^2})
 \right)   \\
 & &  \\
 & =  & \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}(
\tilde{F}^{\gamma^1, \pi^1}
 (C[N,N]) \tilde{F}^{\gamma^1, \pi^1}
  \tilde{F}^{\gamma^2, \pi^2} (C[N,N]) \tilde{F}^{\gamma^2,\pi^2})
 \right)    \\
 & & \\
(2)  &  \leq & 
\ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}(
\tilde{F}^{\gamma^1, \pi^1}
 (C[N,N]) \tilde{F}^{\gamma^1, \pi^1}) \lambda_{\bar{l}}
  (\tilde{F}^{\gamma^2, \pi^2} (C[N,N]) \tilde{F}^{\gamma^2,\pi^2})
 \right)   \\
& & \\
&  = &
\ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}(
\tilde{F}^{\gamma^1, \pi^1}
 (C[N,N]) \tilde{F}^{\gamma^1, \pi^1}) \right)  + \ln
\left( \Bprod{l=1}{s} \lambda_{\bar{l}}
  (\tilde{F}^{\gamma^2, \pi^2} (C[N,N]) \tilde{F}^{\gamma^2,\pi^2})
 \right)   \\
\end{array} \]
\[ \begin{array}{lll}
 & = & f(\gamma^1, \pi^1) + f(\gamma^2, \pi^2).
\end{array} \]
Inequality (2) 
follows from  part one of Corollary \ref{HJ}.
\end{proof}

We next derive expressions for 
$\bfrac{\partial v(\gamma,\pi)}{\partial \gamma_j}$ and
$\bfrac{\partial v(\gamma,\pi)}{\partial \pi_i}$.
For $j \in N$, let $H_{j}$ be the $n \times n$ diagonal matrix
with unique nonzero entry equal to $(H_j)_{jj} := \frac{1}{2}$. \\

\begin{lemma} \label{grad1}
 {\it Let $B$ be an $n \times n$ symmetric
positive-definite matrix. For $\gamma \in \Re^N$ and $j \in N$, }
\[ \frac{\partial B^{\gamma}}{\partial \gamma_j} (\bar{\gamma}) = 
H_{j} \cdot
B^{\bar{\gamma}} +
B^{\bar{\gamma}} \cdot
H_{j}  ~. \]
\end{lemma}

\begin{proof} Let $B = (b_{lk})_{l,k \in N}$. Then
\[ (B^{\gamma})_{lk} =
    b_{lk} \exp \left( \frac{\gamma_l + \gamma_k}{2} \right)  \]
So, for $j \in N$,
\[ \frac{\partial(B^{\gamma})_{lk}}{\partial \gamma_j} =
\left \{ \begin{array}{ll}
    b_{jj} \exp(\gamma_j) & \mbox{if} \hspace{.2in} j=l=k \\
& \\
    \frac{1}{2}b_{jk} \exp(\frac{\gamma_j + \gamma_k}{2}) &
\mbox{if} \hspace{.2in}
  l=j, l \neq k \\
& \\
    \frac{1}{2}b_{lj} \exp(\frac{\gamma_l + \gamma_j}{2}) & 
\mbox{if} \hspace{.2in}
  k=j, l \neq k  \\
& \\
 0 & \mbox{otherwise}.
\end{array} \right. \]
The result follows. 
\end{proof}

\begin{corollary} \label{grad2}
{\it For $\gamma \in \Re^N$} and $j 
\in N$,
\[ \frac{\partial ((C^{\gamma})_T[N,N])}{\partial \gamma_j} (\bar{\gamma}) = 
H_{j} \cdot
((C^{\bar{\gamma}})_T[N,N]) +
((C^{\bar{\gamma}})_T[N,N]) \cdot
H_{j}  ~.\]
\end{corollary}

\begin{proof}
Note that $(C^{\gamma})_T[N,N] = (C_T[N,N])^{\gamma}$
by Lemma \ref{same_T}, and that 
$(C^{\gamma})_T[N,N]$ is symmetric positive definite. Apply Lemma \ref{grad1}
taking $B=C_T[N,N]$. 
\end{proof}

For $i \in M$,
let $A_{i}$ be the $n \times n$
diagonal matrix 
\[ A_i := - \frac{1}{2} 
\mbox{diag}(a_{ij})_{j \in N} ~. \]
\begin{lemma} \label{grad3}
{\it Let $B$ be an $n \times n$ symmetric
positive-definite matrix. For $\gamma \in \Re^N$ and
$\pi \in \Re^M_+$,}
\[ \frac{\partial B^{\gamma,\pi}}{\partial \gamma_j} (\bar{\gamma},\bar{\pi}) = 
H_{j} \cdot
B^{\bar{\gamma},\bar{\pi}} +
B^{\bar{\gamma},\bar{\pi}} \cdot
H_{j}, \hspace{.2in} j \in N; \]
\[ \frac{\partial B^{\gamma,\pi}}{\partial \pi_i} (\bar{\gamma},\bar{\pi}) = 
A_{i} \cdot
B^{\bar{\gamma},\bar{\pi}} +
B^{\bar{\gamma},\bar{\pi}} \cdot
A_{i}, \hspace{.2in} i \in M  ~. \]
\end{lemma}
\begin{proof}
Let $B = (b_{lk})_{l,k \in N}$. Then
\[ (B^{\gamma,\pi})_{lk} =
    b_{lk} \exp
 \left( \frac{1}{2} (\gamma_l + \gamma_k)
 - \frac{1}{2}  \bsum{i \in M}
\pi_{i}(a_{il}+a_{ik})  \right) \]
So, for $j \in N$,
\[
 \frac{\partial(B^{\gamma,\pi})_{lk}}{\partial \gamma_j}
= \left \{ \begin{array}{ll}
    b_{jj} \exp
 \left( \gamma_j
 -  \bsum{i \in M}
\pi_{i}a_{ij})  \right)
 & \mbox{if} \hspace{.2in} j=l=k \\
& \\
    \frac{1}{2}b_{jk} \exp
 \left( \frac{1}{2} (\gamma_j + \gamma_k)
 - \frac{1}{2}  \bsum{i \in M}
\pi_{i}(a_{ij}+a_{ik})  \right)
 & \mbox{if} \hspace{.2in}
  l=j, l \neq k \\
& \\
    \frac{1}{2}b_{lj} \exp
 \left( \frac{1}{2} (\gamma_l + \gamma_j)
 - \frac{1}{2}  \bsum{i \in M}
\pi_{i}(a_{il}+a_{ij})  \right)
 & \mbox{if} \hspace{.2in}
  k=j, l \neq k  \\
& \\
 0 & \mbox{otherwise}
\end{array} \right. \]
and for $i \in M$,
\[  \frac{\partial(B^{\gamma,\pi})_{lk}}{\partial \pi_i} = -\frac{1}{2} b_{lk}
(a_{il}+a_{ik}) \exp
 \left( \frac{1}{2} (\gamma_l + \gamma_k)
 - \frac{1}{2}  \bsum{t \in M}
\pi_{t}(a_{tl}+a_{tk})  \right) \]

The result follows.
\end{proof}
\begin{corollary} \label{grad9}
{\it For all $\bar{\gamma} \in 
\Re^N$, $\bar{\pi} \in \Re^M_+$},
\[ \frac{\partial (C^{\gamma,\pi}[N,N])}{\partial \gamma_j} (\bar{\gamma},
\bar{\pi}) = 
H_{j} \cdot
C^{\bar{\gamma},\bar{\pi}}[N,N] +
C^{\bar{\gamma},\bar{\pi}}[N,N] \cdot
H_{j}, \hspace{.2in} j \in N; \]
\[ \frac{\partial (C^{\gamma,\pi}[N,N])}{\partial \pi_i} (\bar{\gamma},
\bar{\pi}) = 
A_{i} \cdot
C^{\bar{\gamma},\bar{\pi}}[N,N] +
C^{\bar{\gamma},\bar{\pi}}[N,N] \cdot
A_{i}, \hspace{.2in} i \in M ~. \]
\end{corollary}
\begin{proof}
 Note that
$C^{\gamma,\pi}[N,N] = (C[N,N])^{\gamma,\pi}$, and that
$C^{\gamma,\pi}[N,N]$ is symmetric positive definite. Apply Lemma 
\ref{grad3}
taking $B=C[N,N]$.
\end{proof}
\begin{lemma} \label{grad4}
{\it 
Let $B$ be an $n \times n$ 
symmetric positive-definite matrix.
Let $\bar{\gamma} \in \Re^N$, $\bar{\pi} \in \Re^M_+$.
Let $\{ \lambda_j(B^{\bar{\gamma}}) \}_{j \in N}$
be the set of eigenvalues of $B^{\bar{\gamma}}$, and for $ l \in N$, 
let $q_l(\bar{\gamma}) = (q_{lj}(\bar{\gamma}))_{j \in N}$
be the eigenvector of $B^{\bar{\gamma}}$
corresponding to the eigenvalue $\lambda_l(B^{\bar{\gamma}})$,
normalized to have Euclidean length 1.
Then for $j,l \in N$,
\[ \frac{\partial \lambda_l(B^{\gamma})}{\partial \gamma_j} (\bar{\gamma}) = 
\lambda_l(B^{\bar{\gamma}}) \cdot (q_{lj} (\bar{\gamma}))^2  \]
provided that $\lambda_l(B^{\bar{\gamma}})$ is simple (i.e. has 
multiplicity 1).} \\
\end{lemma}
\begin{proof}
Since 
$\lambda_l(B^{\bar{\gamma}})$
is simple, it follows that
$\lambda_l(B^{\gamma})$ is analytic at $\gamma = \hat{\gamma}$.
Using a standard result concerning eigenvalue perturbations,
\[ \frac{\partial \lambda_l(B^{\gamma})}{\partial \gamma_j} (\bar{\gamma}) = 
q_l^T(\bar{\gamma}) \cdot \frac{\partial B^{\gamma}}{\partial \gamma_j}
 (\bar{\gamma})
\cdot q_l(\bar{\gamma}) ~. \]
Applying Lemma \ref{grad1}  and letting $H = H_{j}$, we have that for
 $j \in N$,
\[ \begin{array}{lll}
 \bfrac{\partial \lambda_l(B^{\gamma})}{\partial \gamma_j} (\bar{\gamma}) & = &
q_l^T(\bar{\gamma})~(H \cdot B^{\bar{\gamma}} + B^{\bar{\gamma}} \cdot H)~ 
q_l(\bar{\gamma}) \\
& &  \\
& = & q_l^T(\bar{\gamma})~ H \cdot B^{\bar{\gamma}} ~q_l(\bar{\gamma}) + q_l^T(\bar{\gamma})~
B^{\bar{\gamma}}  \cdot H ~q_l(\bar{\gamma}) \\
& & \\
& = & q_l^T(\bar{\gamma}) ~H ~\lambda_l (B^{\bar{\gamma}})
~ q_l(\bar{\gamma}) +
\lambda_l (B^{\bar{\gamma}}) 
~ q_l^T(\bar{\gamma})
~H ~q_l(\bar{\gamma}) \\
& & \\
& = & \lambda_l(B^{\bar{\gamma}}) \cdot (q_{lj} (\bar{\gamma}) )^2 ~.
\end{array}  \]
\end{proof}
\begin{lemma} \label{grad5}
{\it 
Let $B$ be an $n \times n$ 
symmetric positive-definite matrix.
Let $\bar{\gamma} \in \Re^N$, $\bar{\pi} \in \Re^M_+$.
Let $\{ \lambda_j(B^{\bar{\gamma},\bar{\pi}}) \}_{j \in N}$
be the set of eigenvalues of $B^{\bar{\gamma},\bar{\pi}}$, and for
 $ j \in N$, 
let $u_l(\bar{\gamma},\bar{\pi})=
(u_{lj}(\bar{\gamma},\bar{\pi}))_{j \in N}$
 be the eigenvector of 
$B^{\bar{\gamma},\bar{\pi}}$
corresponding to the eigenvalue $\lambda_l(B^{\bar{\gamma},\bar{\pi}})$,
normalized to have Euclidean length 1.
Then for $l \in N$,
\[ \frac{\partial \lambda_l(B^{\gamma,\pi})}{\partial \gamma_j} 
(\bar{\gamma},\bar{\pi}) = 
\lambda_l(B^{\bar{\gamma},\bar{\pi}})
 \cdot (u_{lj} (\bar{\gamma},\bar{\pi}))^2, \hspace{.2in}  j \in N; \]
\[ \frac{\partial \lambda_l(B^{\gamma,\pi})}{\partial \pi_i} 
(\bar{\gamma},\bar{\pi}) = 
- \lambda_l(B^{\bar{\gamma},\bar{\pi}}) \bsum{k \in N}a_{ik} 
(u_{lk}(\bar{\gamma},\bar{\pi})^2), \hspace{.2in} i \in M \]
provided that $\lambda_l(B^{\bar{\gamma},\bar{\pi}})$ is simple (i.e. has 
multiplicity 1).} 
\end{lemma}
\begin{proof}
 Since 
$\lambda_l(B^{\bar{\gamma},\bar{\pi}})$
is simple, it follows that
$\lambda_l(B^{\gamma,\pi})$ is analytic at $\pi = \hat{\pi}$, $\gamma =
\hat{\gamma}$.
Using a standard result concerning  eigenvalue perturbations,
\[ \frac{\partial \lambda_l(B^{\gamma,\pi})}{\partial \gamma_j}
 (\bar{\gamma},\bar{\pi}) = 
u_l^T(\bar{\gamma},\bar{\pi}) \cdot \frac{\partial
 B^{\gamma,\pi}}{\partial \gamma_j} (\bar{\gamma},\bar{\pi})
\cdot u_l(\bar{\gamma},\bar{\pi}), \hspace{.2in} j \in N; \]
\[ \frac{\partial \lambda_l(B^{\gamma,\pi})}{\partial \pi_i}
 (\bar{\gamma},\bar{\pi}) = 
u_l^T(\bar{\gamma},\bar{\pi}) \cdot \frac{\partial
 B^{\gamma,\pi}}{\partial \pi_i} (\bar{\gamma},\bar{\pi})
\cdot u_l(\bar{\gamma},\bar{\pi}), \hspace{.2in} i \in M ~. \]
For $j \in N, i \in M$, let $H=H_j$, $A=A_i$.
Applying Lemma \ref{grad3}, we have that
 for $j \in N$,
 \begin{eqnarray*}
 \bfrac{\partial \lambda_l(B^{\gamma,\pi})}{\partial \gamma_j}
 (\bar{\gamma},\bar{\pi}) & =  &
u_l^T(\bar{\gamma},\bar{\pi})~(H \cdot B^{\bar{\gamma},\bar{\pi}} +
 B^{\bar{\gamma},\bar{\pi}} \cdot  H)~ u_l(\bar{\gamma},\bar{\pi}) \\
& &  \\
& =  & u_l^T(\bar{\gamma},\bar{\pi})~ H \cdot B^{\bar{\gamma},\bar{\pi}}
 ~u_l(
\bar{\gamma},\bar{\pi}) + u_l^T(\bar{\gamma},\bar{\pi})~
B^{\bar{\gamma},\bar{\pi}}  \cdot H ~u_l(\bar{\gamma},\bar{\pi}) \\
& & \\
& = & u_l^T(
\bar{\gamma},\bar{\pi})  H ~\lambda_l (B^{
\bar{\gamma},\bar{\pi}})
~ u_l(\bar{\gamma},\bar{\pi}) +
\lambda_l (B^{\bar{\gamma},\bar{\pi}}) 
~ u_l^T(\bar{\gamma},\bar{\pi})
~H ~u_l(\bar{\gamma},\bar{\pi}) \\
& & \\
& =  & \lambda_l(B^{\bar{\gamma},\bar{\pi}}) \cdot (u_{lj} 
(\bar{\gamma},\bar{\pi}) )^2 ~.
\end{eqnarray*} 
Also,
 \begin{eqnarray*}
 \bfrac{\partial \lambda_l(B^{\gamma,\pi})}{\partial \pi_i}
 (\bar{\gamma},\bar{\pi}) & =  &
u_l^T(\bar{\gamma},\bar{\pi})~(A  \cdot
 B^{\bar{\gamma},\bar{\pi}} +
 B^{\bar{\gamma},\bar{\pi}} \cdot A)~ u_l(
\bar{\gamma},\bar{\pi}) \\
& & \\
& = & u_l^T(\bar{\gamma},\bar{\pi})~ A \cdot B^{\bar{\gamma},\bar{\pi}}
 ~u_l(
\bar{\gamma},\bar{\pi}) + u_l^T(\bar{\gamma},\bar{\pi})~
B^{\bar{\gamma},\bar{\pi}} \cdot A ~u_l(\bar{\gamma},\bar{\pi}) \\
& & \\
& = & u_l^T(\bar{\gamma},\bar{\pi}) ~A ~\lambda_l (B^{\bar{\gamma},\bar{\pi}})
~ u_l(\bar{\gamma},\bar{\pi}) +
\lambda_l (B^{\bar{\gamma},\bar{\pi}}) 
~ u_l^T(\bar{\gamma},\bar{\pi})
~A ~u_l(\bar{\gamma},\bar{\pi}) \\
& & \\
& = & - \lambda_l(B^{\bar{\gamma},\bar{\pi}}) \bsum{k \in N}a_{ik} 
(u_{lk}(\bar{\gamma},\bar{\pi})^2) ~.
\end{eqnarray*} 
\end{proof}


For $\bar{\gamma} \in \Re^N$,
and $\bar{\pi} \in \Re^M_+$,
let $\{ \lambda_j(C^{\bar{\gamma},\bar{\pi}}
[N,N]) \}_{j \in N}$ be the set of
eigenvalues of $C^{\bar{\gamma},\bar{\pi}}[N,N]$,
and for $l \in N$,
let $u_l(\bar{\gamma},\bar{\pi}) =
(u_{lj}(\bar{\gamma},\bar{\pi}))_{j \in N}$
be the eigenvector of
 $C^{\bar{\gamma},\bar{\pi}}[N,N]$
corresponding to the eigenvalue $\lambda_l(C^{\bar{\gamma},\bar{\pi}}[N,N])$,
normalized to have Euclidean length 1.
Similarly, let
$\{ \lambda_j((C^{\bar{\gamma}})_T[N,N]) \}_{j \in N}$ be the set of
eigenvalues of $(C^{\bar{\gamma}})_T[N,N]$,
and for $l \in N$,
let $q_l(\bar{\gamma}) =
(q_{lj}(\bar{\gamma}))_{j \in N}$ 
be the eigenvector of $(C^{\bar{\gamma}})_T[N,N]$
corresponding to the eigenvalue $\lambda_l((C^{\bar{\gamma}})_T[N,N])$, normalized
to have Euclidean length 1.
\\
\begin{corollary} \label{grad6}
{\it For $\bar{\gamma} \in \Re^N$ and $j,l \in N$,
\[ \frac{\partial \lambda_l((C^{\gamma})_T[N,N])}{\partial \gamma_j}
 (\bar{\gamma}) = 
\lambda_l((C^{\bar{\gamma}})_T[N,N]) \cdot (q_{lj}(\bar{\gamma}))^2 \]
provided that $\lambda_l((C^{\bar{\gamma}})_T[N,N])$ is simple.} 
\end{corollary}
\begin{proof}
 Note that $(C^{\gamma})_T[N,N] = (C_T[N,N])^{\gamma}$
by Lemma \ref{same_T}, and that
$(C^{\gamma})_T[N,N]$ is symmetric positive definite. The result follows from
Lemma \ref{grad4}, taking $B = C_T[N,N]$. 
\end{proof}
\begin{corollary} \label{grad7}
{\it For $\bar{\gamma} \in \Re^N$,
 $\bar{\pi} \in \Re^M_+$, and $l \in N$,
\[ \frac{\partial \lambda_l(C^{\gamma,\pi}[N,N])}{\partial \gamma_j} 
(\bar{\gamma},\bar{\pi}) = 
\lambda_l(C^{\bar{\gamma},\bar{\pi}}[N,N])
 \cdot (u_{lj} (\bar{\gamma},\bar{\pi}))^2,  \hspace{.2in} j \in N; \]
\[ \frac{\partial \lambda_l(C^{\gamma,\pi}[N,N])}{\partial \pi_i} 
(\bar{\gamma},\bar{\pi}) = 
- \lambda_l(C^{\bar{\gamma},\bar{\pi}}[N,N]) \bsum{k \in N}a_{ik} 
(u_{lk}(\bar{\gamma},\bar{\pi})^2), \hspace{.2in} i \in M \]
provided that $\lambda_l (C^{\bar{\gamma},\bar{\pi}}[N,N])$ is simple.} \\
\end{corollary}
\begin{proof}
Note that $C^{\gamma,\pi}[N,N] = (C[N,N])^{\gamma,\pi}$, and that
$C^{\gamma,\pi}[N,N]$ is symmetric positive definite. The result follows from
Lemma \ref{grad5}, taking $B = C[N,N]$. 
\end{proof}

For  $l \in N$,
let $u_{\bar{l}}(\bar{\gamma},\bar{\pi}) =
(u_{\bar{l}j}(\bar{\gamma},\bar{\pi}))_{j \in N}$ denote
the eigenvector of length 1 corresponding to the
$l^{\mbox{th}}$
greatest eigenvalue of $C^{\bar{\gamma},\bar{\pi}}[N,N]$.
Let $q_{\underline{l}}(\bar{\gamma}) =
(q_{\underline{l}j}(\bar{\gamma}))_{j \in N}$
denote the eigenvector of length 1 corresponding to the
$l^{\mbox{th}}$ smallest eigenvalue of
$(C^{\bar{\gamma}})_T[N,N]$. 
\begin{theorem} \label{grad8}
{\it Suppose that $\bar{\gamma} \in \Re^N$, $\bar{\pi} \in \Re^M_+$. If
$\lambda_{\bar{s}} (C^{\bar{\gamma}, \bar{\pi}}[N,N]) >
\lambda_{\overline{s+1}}(C^{\bar{\gamma}, \bar{\pi}}[N,N])$, and
$\lambda_{\underline{s}} ((C^{\bar{\gamma}})_T[N,N]) <
\lambda_{\underline{s+1}}((C^{\bar{\gamma}})_T[N,N])$, then
\[ \frac{\partial v(\gamma,\pi)}{\partial \gamma_j} (\bar{\gamma},\bar{\pi}) =
 \Bsum{l=1}{s}
(u_{\bar{l}j}(\bar{\gamma},\bar{\pi}))^2 - \Bsum{l=1}{s}
(q_{\underline{l}j}(\bar{\gamma}))^2 \hspace{.2in} \]
for $j \in N$, and
\[ \frac{\partial v(\gamma,\pi)}{\partial \pi_i} (\bar{\gamma},\bar{\pi}) =
-\Bsum{l=1}{s} \bsum{k \in N} a_{ik} (u_{\bar{l}k}(\bar{\gamma},\bar{\pi})^2)
+b_i \]
for $i \in M$.}
\end{theorem}
\begin{proof}
 Under these conditions we can write, for $j  \in N$,
 \begin{eqnarray*}
 \bfrac{\partial v(\gamma,\pi)}{\partial \gamma_j} & = &
\bfrac{\partial}{\partial \gamma_j} \left( 
\ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}
(C^{\gamma,\pi}[N,N]) \right)
 - \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}((C^{\gamma})_T[N,N])
\right)
+ \bsum{i \in M} \pi_i b_i  \right) \\ 
& &  \\
& = &  \displaystyle \sum_{l=1}^{s} \bfrac{\partial}{\partial \gamma_j} 
\ln \lambda_{\bar{l}}
(C^{\gamma,\pi}[N,N])
- \displaystyle \sum_{l=1}^{s} \bfrac{\partial}{\partial \gamma_j} 
\ln \lambda_{\underline{l}}
((C^{\gamma})_T[N,N]) \\
& & \\
& = &  \displaystyle \sum_{l=1}^{s}
 \bfrac{1}{\lambda_{\bar{l}}(C^{\gamma,\pi}[N,N])} \cdot 
\bfrac{\partial \lambda_{\bar{l}}(C^{\gamma,\pi}[N,N])}{\partial \gamma_j} 
- \\
&  & \displaystyle \sum_{l=1}^{s} 
\bfrac{1}{\lambda_{\underline{l}}((C^{\gamma})_T[N,N])} \cdot 
\bfrac{\partial 
\lambda_{\underline{l}}((C^{\gamma})_T[N,N])}{\partial \gamma_j} 
~.
\end{eqnarray*} 
Similarly, we have that for $i \in M$,
 \begin{eqnarray*}
 \bfrac{\partial v(\gamma,\pi)}{\partial \pi_i} & =  &
\bfrac{\partial}{\partial \pi_i} \left( 
\ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}
(C^{\gamma,\pi}[N,N]) \right)
 - \ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}((C^{\gamma})_T[N,N])
\right)
+ \bsum{t \in M} \pi_t b_t  \right) \\ 
& &  \\
& = &  \displaystyle \sum_{l=1}^{s} \bfrac{\partial}{\partial \pi_i} 
\ln \lambda_{\bar{l}}
(C^{\gamma,\pi}[N,N])
- \displaystyle \sum_{l=1}^{s} \bfrac{\partial}{\partial \pi_i} 
\ln \lambda_{\underline{l}}
((C^{\gamma})_T[N,N]) \\
&  & \\
& = &
 \displaystyle \sum_{l=1}^{s}
 \bfrac{1}{\lambda_{\bar{l}}(C^{\gamma,\pi}[N,N])} \cdot 
\bfrac{\partial \lambda_{\bar{l}}(C^{\gamma,\pi}[N,N])}{\partial \pi_i}  \\
& & \\
 &  & - \displaystyle \sum_{l=1}^{s} 
\bfrac{1}{\lambda_{\underline{l}}((C^{\gamma})_T[N,N])} \cdot 
\bfrac{\partial \lambda_{\underline{l}}((C^{\gamma})_T[N,N])}{\partial \pi_i}
+ b_i .
\end{eqnarray*} 
The result follows from 
Corollaries \ref{grad6} and \ref{grad7}.
\end{proof}

The only question remaining is how to proceed in the case
that, while searching for $\min_{\gamma \in \Re^N,\pi \in \Re^M_+} 
v(\gamma,\pi)$,
we generate a $\bar{\gamma},\bar{\pi}$ with
$\lambda_{\bar{s}}(C^{\bar{\gamma},\bar{\pi}}[N,N]) =
\lambda_{\overline{s+1}}(C^{\bar{\gamma},\bar{\pi}}[N,N]),$ or
$\lambda_{\underline{s}}((C^{\bar{\gamma}})_T[N,N]) =
\lambda_{\underline{s+1}}((C^{\bar{\gamma}})_T[N,N]).$ 
In this case, we could terminate our search, and use 
$v(\bar{\gamma},\bar{\pi})$
as our upper bound.  In practice, however, we do not test
for this, and just apply our descent method. 

In the previous section, we demonstrated 
how the CMERSP can be modeled as a CMESP. 
Next, we demonstrate that our spectral bound, $\min_{\gamma,\pi}
 v(\gamma, \pi)$,
for the CMERSP, when applied to the
CMESP via the construction of the previous section,
yields the spectral bound, $\min_{\pi} v_0(\pi)$, of Lee (1994)
%\cite{L}
for the CMESP, where, following the notation of Lee (1994),
\[ v_0(\pi) := 
\ln  \left( \prod_{l=1}^{s} \lambda_{\bar{l}}(C_{\pi})  
\right)+ \sum_{i \in M}
\pi_i b_i ~. \]

\begin{theorem}\label{SReduce1}
{\it For any $\pi \in \Re^M_+$, there exist a $\hat{\gamma} \in \Re^N$,
and a $\hat{\pi} \in \Re^M_+$ such that $v(\hat{\gamma},\hat{\pi})=
v_0(\pi)$.}
\end{theorem}

\begin{proof}
Let $\pi \in \Re^M_+$. Define $\gamma_j = 0$ for $j \in  N$, and
define $\hat{\pi}_i = \pi_i$ for $i \in M$. Then
$D^{\hat{\gamma}} = I_{n + t}$, so
\[  (\hat{C}^{\hat{\gamma}})_{T}[N,N]
 =
\hat{C}_T[N,N]
 =
I_{n} ~ 
\Rightarrow ~
\ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}
 ((\hat{C}^{\hat{\gamma}})_{T}[N,N]) \right)
  = \ln 1
  = 0 ~. \]
Moreover,
  $\hat{C}^{\hat{\gamma},\hat{\pi}}[N,N]
=   C_{\pi}$
so
\[ \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}
(\hat{C}^{\hat{\gamma},\hat{\pi}}[N,N])\right)  =
\ln \left(\Bprod{l=1}{s} \lambda_{\bar{l}}
(C_{\pi}) \right) ~. \]
Since 
$\sum_{i \in M} \pi_i b_i =
 \sum_{i \in M} \hat{\pi}_{i} b_i$,
we have that $v(\hat{\gamma},\hat{\pi}) = v_0(\pi)$.
\end{proof}

\begin{theorem}\label{SReduce2}
{\it For any $\gamma \in \Re^N$, and 
$\pi \in \Re^M_+$, there exists a 
$\hat{\pi} \in \Re^M_+$ such that $v_0(\hat{\pi})  \leq
 v(\gamma,\pi)$.}
\end{theorem}

\begin{proof}
Let $\gamma \in \Re^N$ and $\pi \in \Re^M_+$. Define 
$\hat{\pi}_i = \pi_i$
for $i \in M$.
Since $\sum_{i \in M} \pi_i b_i = \sum_{i \in M} \hat{\pi}_{i} b_i$, it is
enough to show
\[ \ln \left( \prod_{l=1}^{s} \lambda_{\bar{l}}(C_{\hat{\pi}})
\right)\leq
 \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}
(\hat{C}^{\gamma,\pi}[N,N] ) \right) -
\ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}}
((\hat{C}^{\gamma})_{T}[N,N]) \right) ~.\]
However,
$ \hat{C}^{\gamma,\pi}[N,N] = \hat{D}^{\gamma} C_{\hat{\pi}} \hat{D}^{\gamma} $
and
$ (\hat{C}^{\gamma,\pi})_T[N,N] =
(\hat{D}^{\gamma})^2 $
so equivalently, we need to show
\[ \ln  \left( \prod_{l=1}^{s} \lambda_{\bar{l}}(C_{\hat{\pi}}) \right) \leq
 \ln \left( \Bprod{l=1}{s} \lambda_{\bar{l}}
(\hat{D}^{\gamma} C_{\hat{\pi}} \hat{D}^{\gamma}) \right) -
\ln \left(  \Bprod{l=1}{s} \lambda_{\underline{l}}
(\hat{D}^{\gamma})^2  \right). \]
Letting $D:= \hat{D}^{\gamma}$, we have that
\[ \begin{array}{lll}
 & & \Bprod{l=1}{s} \lambda_{\bar{l}}(C_{\hat{\pi}})
  =  \Bprod{l=1}{s} \lambda_{\bar{l}} (D^{-1}D C_{\hat{\pi}} D D^{-1})  \\
& & \\
\end{array}\]
\[ \begin{array}{lll}
 & &  \leq  \Bprod{l=1}{s} \lambda_{\bar{l}} (D^{-1})
 \lambda_{\bar{l}} (DC_{\hat{\pi}} D)
 \lambda_{\bar{l}} (D^{-1})
  =  \bfrac{1}{\prod_{l=1}^{s} \lambda_{\underline{l}}(D)}
\Bprod{l=1}{s} \lambda_{\bar{l}}(D C_{\hat{\pi}} D)
\bfrac{1}{\prod_{l=1}^{s} \lambda_{\underline{l}}(D)} \\
& & \\
& \Rightarrow
& \Bprod{l=1}{s} \lambda_{\bar{l}}(C_{\hat{\pi}})
\lambda_{\underline{l}} (D)^2
 \leq  \Bprod{l=1}{s} \lambda_{\bar{l}} (DC_{\hat{\pi}} D)  \\
& & \\
& \Rightarrow
& \ln \left(\Bprod{l=1}{s} \lambda_{\bar{l}}(C_{\hat{\pi}}) \right)
 +  \ln \left(\Bprod{l=1}{s} \lambda_{\underline{l}} (D)^2 \right)
 \leq  \ln  \left( \Bprod{l=1}{s} \lambda_{\bar{l}} (DC_{\hat{\pi}} D) 
\right)
\end{array} \]
where the first inequality follows from Corollary \ref{HJ}.
The result follows.
\end{proof}

Finally, we establish that a minimizer of the spectral bound that
we have introduced is sharp when the elements of $N$ 
are independent of one another and also independent of $T$.
We note that this is not the case with the weaker spectral bound
of Bueso et al. 
%\cite{BUESO} 
(see Lee (1998)).
%\cite {LEE2}).

\begin{theorem}\label{SSharp}
If $C[N,N]$ is diagonal and  $C[N,T]=0$ (hence, $C[T,N]=0$), then 
$\gamma_j= -\ln(c_{jj})$, $\pi_i = 0$ yields $z=v(\gamma,\pi)$ .
\end{theorem}

\begin{proof}
Under these conditions, $C_T[S,S] = C[S,S]$ for all $S \subseteq N$, so
$z=0$.
Note that $(C^{\gamma,\pi})_T[N,N] = C^{\gamma,\pi}[N,N]$
and that 
\[ (C^{\gamma,\pi})_{ij} = (C^{\gamma})_{ij} = \left \{ \begin{array}{ll}
                     1 & \mbox{if} \hspace{.1in} i=j \hspace{.1in}
                                 \mbox{and} \hspace{.1in}
                                      j \in N \\
& \\
                     c_{ij} & \mbox{if}
                             \hspace{.1in}
                                      i,j \in T \\
& \\
                     0 & \mbox{otherwise.}
                    \end{array} \right. \]
Therefore,
\[ v(\gamma,\pi)  = \ln  \left( \Bprod{l=1}{s} \lambda_{\bar{l}} 
(C^{\gamma,\pi}[N,N]) \right) -
\ln \left( \Bprod{l=1}{s} \lambda_{\underline{l}} ((C^{\gamma})_{T}[N,N])
\right)
 = \ln  \left( \Bprod{l=1}{s} 1 \right) - \ln  \left( \Bprod{l=1}{s} 1
\right)= 0 ~ . \]
\end{proof}

\section{Computational Results}

We coded the algorithm for solving the CMERSP in C on
a 50MHZ HP 9000/715. Again, our code uses the matrix library LAPACK 2.0, 
%\cite{BAI},
which is written in FORTRAN 77, 
L-BFGS-B
subroutines for constrained optimization, also written in FORTRAN,
and the linear programming library CPLEX 4.0,
%\cite{CPLEX}
which is
written in C. Using the same order-63 covariance matrix 
as we did for the CMESP application, we derived several different
covariance matrices $C$ and $C_T$ for
various choices of $N$ and $T$.  In particular, we solved five
distinct problems, by letting
$t$ range through the set $L:=\{ 5, 10, 20, 25, 30 \}$. We fixed $n=30$
and $s=15$,
but we varied $N$ for each choice of $T$. Also, the set $T$
of size $l$ was a subset of the set $T$ of size $l+5$ ($l \in L$).
In each case, we took $m=0$. We found that these problems were intractable
using the upper bounds described in this chapter. To obtain
run times for these problems, we
used three upper bounds
derived from  a nonlinear programming relaxation (see Anstreicher et
al. (1997b)).
In Table 3.1,
we report for each problem
the total solution time, based on one run, corresponding to each
of these three upper bounding methods. (Method 1 corresponds to the
method ``diagonal'' in Anstreicher et al. (1997b); method 2 corresponds to 
``identity;'' method 3 corresponds to ``sdp.'')
We use P$t$ to denote the
problem with  $t$ target points. Our results suggest that for fixed $n$,
the run time increases as we increase $t$. 

Recall that in Section 2.4, we were able to use the spectral upper bounds
to solve the CMESP for unconstrained and constrained problems
taking $n=30$, $s=15$. In the remote case, for the same values of $n$ and
$s$, and the same covariance matrix $C$, even unconstrained problems 
were intractable
using the spectral bounds.
These results suggest that the level of difficulty in solving
a CMERSP exceeds that in solving a CMESP. (Recall that both are NP-Hard.)

\begin{table}
\hspace{1.22in}
\begin{tabular}{r||r|r|r|r|r|}
 & P5 & P10 & P20 & P25 & P30    \\
\hline
\hline
Method 1 & 4.25 & 2.52 & 311.14 & 180.33 & 214.09 \\
Method 2 & 2.39 & 2.33 & 310.75 & 129.45 & 103.03 \\
Method 3 & 3.92 & 3.83 & 472.56 & 165.60 & 169.92
\end{tabular}
\caption{Total Solution Time (seconds)}  
\end{table}

As a future project, we intend to parallelize the code 
for the CMERSP. 

\end{document}
\newpage
\chapter*{\vskip -1in Chapter 4}
\setcounter{chapter}{4}
\setcounter{section}{0}
\setcounter{subsection}{0}
\setcounter{table}{0}
{\Huge {\bf The Constrained Maximum-Entropy Sampling Problem with
Fixed Costs (CMESPF)}} \\

\section{Formulation}

Recall that the original CMESP serves as a good model for 
the sampling problem whereby we select, from a network
of sites, a most informative subset
of specified cardinality. In particular, we addressed
the environmental monitoring problem, where our network consists
of sites
at which we are interested in the concentration
 of a particular ion in wet deposition, and where we seek to observe
a subset that provides as much information as possible 
about the entire network. We now consider a variation of
this problem. Assume that we
are interested in a {\it set} of ions, and that there is the opportunity
to measure just a subset of those ions at each monitoring station.
Let {\it I} be the set of ions, and let {\it L} be the set of
potential monitoring locations. Let $C$ be  a covariance
matrix with rows and columns  indexed by the ion-location
pairs $(i,l) \in I \times  L$. We assume a
Gaussian distribution on the set of ion-location pairs.
The observation of at
least one ion at location $l$ incurs a fixed cost $f_l$.
The observation of ion $i$ at location $l$ incurs an additional cost
$d_{il}$. We have a budget limit $\beta$, and we wish
to observe a total of $p$ ions at a total of $t$ locations, so as
to maximize the entropy. We next show how this cost-constrained
problem can be modeled as an integer nonlinear program.

We start by defining at each location a ``pseudo-ion'' $o$.
For each $l \in {\it L}$, the pair $(o,l)$ is assumed to have
unit variance, and to be independent
of all other ion-location pairs. We therefore have the following
extended covariance matrix:

\[
\widehat{C} = \bordermatrix{
  &  \{ o \} \times {\it L} & {\it I} \times {\it L}  \cr 
& I & 0 \cr
&  0 & C  \cr 
}\ .
\]

We then formulate our problem  as the following integer
nonlinear program.

\[ \begin{array}{llrll}
&  \max & \ln \det \widehat{C}[\underline{x},\underline{x}] & & \\
& & & & \\
(1) & \mbox{s.t.} &   \bsum{(i,l) \in {\it I} \times
{\it L}} x_{il} & = & p \\
& & & & \\
(2) & & \bsum{i \in {\it L}} x_{ol} & = & t; \\
& & & & \\
(3)& &
\bsum{l \in {\it L}} f_l x_{ol} + \bsum{(i,l) \in {\it I} \times {\it L}}
d_{il} x_{il} & \leq & \beta; \\
& & & & \\
(4) & & x_{il} - x_{ol} & \leq & 0, ~ \forall ~(i,l) \in {\it I} \times {\it L}; \\
& & & & \\
& & x_{ol} & \in &  \{ 0,1 \}, ~ \forall ~l \in {\it L}; \\
& & & & \\
& &  x_{il} & \in &  \{ 0,1 \}, ~ \forall ~(i,l) \in {\it I} \times {\it L}.
\end{array} \]

Constraint (1) ensures that we observe a total of $p$ ions;
constraint (2) ensures that the observations are made at a total of $t$
locations.
Constraints (4) force $x_{ol}=1$ whenever 
$x_{il}=1$ for some $(i,l) \in {\it I} \times {\it L}$.
These constraints, together with the constraints (3), ensure that
we pay the fixed cost $f_l$ whenever we make any observation at location
$l$.

Next, we verify
that inclusion of pseudo-ions does not
affect the entropy of a solution.
Recall that the entropy of a $p$-subset $S$ of original
ion-location pairs $(i,l)$ ($i \neq o$) is, up to a constant multiple, equal
to $\ln \det C[S,S]$. Note, however, that every 
principle submatrix  $\widehat{C}[\underline{x},\underline{x}]$
corresponding to a feasible point $x$ of the above program is of the form
\[ \left( \begin{array}{ll}
  I_t & 0 \\
  0 & C[S,S]
\end{array} \right) \]
for some subset $S$ with $|S|=p$. Also, every principle
submatrix $C[S,S]$ of $C$ (with $|S|=p$) can be
extended to a $(p+t) \times (p+t)$ submatrix of ${\widehat C}$
of the above form. Since
\[ \det C[S,S] = \det
 \left( \begin{array}{ll}
  I_t & 0 \\
  0 & C[S,S]
\end{array} \right),\]
the result follows.

We now verify that the above program can be re-expressed
as a CMESP. To see this, 
note that constraints (1) and (2) above can be equivalently
expressed by 
\[ \begin{array}{lrll}
(1') &  \bsum{j \in {\it L}} x_{ol} +
 \bsum{(j,l) \in {\it I} \times
{\it L}} x_{jl} & = & t + p, \\
& & & \\
(2') &  \bsum{l \in {\it L}} x_{ol} & = & t .
\end{array} \]

Therefore, we get the following formulation of the above program as a
CMESP, where the ``sample size'' $s$ is $p+t$.
\[ \begin{array}{llrll}
& \max & \ln \det \widehat{C}[\underline{x},\underline{x}] & & \\
& & & & \\
(1) &  \mbox{s.t.} & 
   \bsum{l \in {\it L}} x_{ol} +
 \bsum{(j,l) \in {\it I} \times
{\it L}} x_{jl} & = &  p+t; \\
& & & & \\
&  & \bsum{l \in {\it L}} x_{ol} & \leq  & t; \\
& & & & \\
&  & -\bsum{l \in {\it L}} x_{ol} & \leq  & - t; \\
& & & & \\
&  &
\bsum{l \in {\it L}} f_l x_{ol} + \bsum{(j,l) \in {\it I} \times {\it L}}
d_{jl} x_{jl} & \leq & \beta; \\
& & & & \\
&  & x_{jl} - x_{ol} & \leq & 0, ~ \forall ~(j,l) \in {\it I} \times {\it L}; \\
& & & & \\
&  & x_{ol} & \in &  \{ 0,1 \}, ~ \forall ~l \in {\it L}; \\
& & & & \\
&  &  x_{jl} & \in &  \{ 0,1 \}, ~ \forall ~(j,l) \in {\it I} \times {\it L}.
\end{array} \]
Note that $(1)$ is the ``cardinality constraint.''

We call this cost-constrained version of the CMESP the ``constrained
maximum-entropy sampling problem with fixed costs'' (CMESPF); it can be solved
using the branch-and-bound algorithm outlined in Section 2.3.

\section{Computational Results}
We ran the computer program from Section 2.4 on several
cost-constrained problems. We had access to an order-92 covariance
matrix derived from 
$\mbox{SO}_4$ and $\mbox{O}_3$ data obtained from a network of
46 environmental monitoring stations in the the United States.
Originally, zero, one, or both ions could be observed at any
given station. In all of the problems we considered,  however,
we worked under the assumption that a total of 29 sites and
31 ions had been fixed into the solution from the onset.
Such fixing was achieved using the methods in Section 2.3.
After the initial fixing, there remained 27 stations
at which exactly one ion was being observed, and 17 stations
at which neither ion was being observed. 

We then applied the algorithm to two distinct sets of problems.
In the first set of problems, we used the costs given in 
Zidek et al. (1997).
In particular, observing a new ion
(either $\mbox{SO}_4$ or $\mbox{O}_3$) at any station, regardless
of whether the other ion was already being observed there,
cost \$291.667 per month.
Adding in a new site (that is, observing at least one ion at
a site that had previously not been monitored) incurred a cost
of \$1041.667 per month. So, using the notation from the previous
section, we took $d_{il} = 291.667$ for all ions $i \in I$
and all locations $l \in L$, and
$f_l=1041.667$ for all locations $l \in L$. Imposing a budget constraint
does not make sense when using such uniform costs, so we deleted the
constraint from our constraint set.
(Equivalently, one could have taken $\beta_{p,t} = 291.667p + 1041.667t$.)
We then ran the program for various combinations of $p$ and $t$,
using the spectral upper bounds
of Section 2.2 and/or the nonlinear
programming upper bounds of Anstreicher et al. (1997a).
Note that the solutions do not
depend on the costs, owing to their uniformity. 
In Table \ref{fixed1},
we give, for several choices of $p$ and $t$, the  corresponding entropies.
Recall that the entropy function depends on $s$ through an additive constant,
that until now we have simply referred to as $k_s$ (see Chapter 1, page 3).
In order to compare the entropies of the solutions to the problems,
we had to use the precise formula for entropy,
as given by Shannon and Weaver (1963):
\[ H(\underline{x}) := \frac{p}{2} \ln(2 \pi e) + 
\frac{1}{2} \ln \det \widehat{C}[\underline{x}, \underline{x}]. \]
We have listed the problems in order of increasing monitoring cost;
note that in general, the entropy of a solution 
corresponding to $(p,t) = (a,0)$ is significantly higher than the
entropy of a solution corresponding to $(p,t) = (b,1)$, when
the total monitoring costs of the two associated problems are
comparable.

\begin{table}
\hspace{2in}
\begin{tabular}{|l|l|l|l|}
\hline
\hline
$p$ & $t$ & total cost & entropy \\
\hline
\hline
4 & 0 & 1166.668 & 47.751
\\
\hline
1 & 1 & 1333.334 & 39.518
\\
\hline
5 & 0 & 1458.335 & 50.520
\\
\hline
2 & 1 & 1625.001 & 42.977
\\
\hline
6 & 0 & 1750.002 & 53.274
\\
\hline
3 & 1 & 1916.668 & 45.971
\\
\hline
7 & 0 & 2041.669 & 55.975
\\
\hline
4 & 1 & 2208.335 &  48.836
\\
\hline
8 & 0 & 2333.336 & 58.674
\\
\hline
5 & 1 & 2500.002 & \\
\hline
9 & 0 & 2625.003 & 61.355
\\
%\hline
%2 & 2 & 2666.668 & 43.402
%\\
\hline
6 & 1 & 2791.669 & \\
\hline
10 & 0 & 2916.670 & 64.003
\\
\hline
\hline
\end{tabular} 
\caption{Costs and Entropies for CMESPFs (Uniform Costs)} 
\label{fixed1}
\end{table}
%\newpage
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (1,0) & (2,0) & (3,0) & (4,0) & (5,0)  \\
%\hline
%& 0.54 & 1.24 & 8.04 & 25.72 & 66.46 \\
%\hline 
%\end{tabular} 
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) &  (6,0) & (7,0) & 
%(8,0) & (9,0) & (10,0) \\
%\hline
%&  126.06 & 220.07 & 286.72 & 300.57 &
%287.05  \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (1,1) & (2,1) & (3,1) & (4,1) & (5,1)  \\
%\hline
%& 155.44 & 273.59 & 2454.45 & 10728.83 & 15529.38 \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (6,1) & (7,1) & 
%(8,1) & (9,1) & (10,1) \\
%\hline
%&  72727.56
%& 115400.31 & 474834.47 & & \\
%\hline
%\end{tabular}
%\caption{Wall Seconds: Spectral Bounds} 
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (1,0) & (2,0) & (3,0) & (4,0) & (5,0)  \\
%\hline
%& 77 & 77,73 & 77,73,71 & 77,73,71,57 & 77,73,71,57,70 \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (6,0) & (7,0) & (8,0) & (9,0) & (10,0)  \\
%\hline
%&  77,73,71,57,70, & 
%  77,73,71,57,70, & 
%  77,73,71,57,70, & 
%  77,73,71,57,70, & 
%  77,73,71,57,70,  \\
%& 53 & 53,51 & 53,51,69 & 53,51,69,72 & 53,51,69,72,66 \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (1,1) & (2,1) & (3,1) & (4,1) & (5,1)  \\
%\hline
%& 6,29 & 6,29,77 & 6,29,77,73 & 6,29,77,73,71 & 6,29,77,73,71,57  \\
%\hline
%\end{tabular}
%\end{table}
%\begin{table}
%\begin{tabular}{|l||l|l|l|l|l|}
%\hline
%($p,t$) & (6,1) & (7,1) & (8,1) & (9,1) & (10,1)  \\
%\hline
%\hline
%&  6,29,77,73,71,57, & 
%  6,29,77,73,71,57, & 
%  6,29,77,73,71,57, & 
%  6,29,77,73,71,57, & 
%  6,29,77,73,71,57,  \\
%& 70 & 70,53 & 70,53,51 & 70,53,51,69 & 70,53,51,69,72  \\
%\hline
%\end{tabular}
%\caption{Optimal Subsets $S$}
%\end{table}

In the second set of problems, we varied the costs associated with
the ions and locations.  In a preprocessing stage, we
used random number generators to produce
values for $f_l$ ($l \in L$) 
between 700 and 1350, as well as values for
$d_{il}$ between 200 and 400.
(Note that 1041.667 is approximately the midpoint of the interval [700,1350],
and 291.667 is approximately the midpoint of the interval [200,400].)
Having specified the costs during the preprocessing stage, their values
remained the same for all problem instances.
\end{document}

\newpage
\chapter{Appendices}
\section{Appendix A: The CMESP as a model for the problem of finding
a ``most informative subset''} 
Here we show
that the CMESP models the problem
of selecting among $n$ random variables $N$, an $s$-subset
to observe, so as to obtain as much information as possible
about all of $N$. 

For $S \subseteq N$, 
let $\bar{S}: = N \backslash S$. Also,
let $\bar{s}: = n-s$.
Assume that $S \subseteq N$ has distribution $p(S)$, and
assume that $\bar{S}$ has prior distribution
$p(\bar{S})$. 
Recall that the entropy $H(S)$ of a subset $S$ is defined by:
\[ H(S) := -E[\ln p(S)]
 = - \int_{\Re^s} p(S) \ln (p(S)) ~ dS. \]
Since entropy $H$
is a measure of the amount of uncertainty, it is often useful
to think of $-H$ as a measure of the amount of information.
The amount of information about $\bar{S}$, before observing
$S$, is therefore
\[ -H(\bar{S}) =
 \int_{\Re^{\bar{s}}} p(\bar{S}) \ln p(\bar{S}) ~d\bar{S}.\]
After observing $S$, the amount of information about $\bar{S}$ is
\[ -H(\bar{S}|S) =
 \int_{\Re^{\bar{s}}} p(\bar{S}|S) \ln p(\bar{S}|S) ~d\bar{S}\]
where $p(\bar{S}|S)$ is the density function of $\bar{S}$ conditioned on $S$,
and where $H(\bar{S}|S)$ is the {\it conditional entropy} of
$\bar{S}$ conditioned on $S$.
Our goal is to select an $s$-subset $S$
so as to maximize the expected amount of information
obtained on  $\bar{S}$. That is, we attempt to maximize
\[ -E[H(\bar{S}|S)] =
 E \left[\int_{\Re^{\bar{s}}} p(\bar{S}|S) \ln p(\bar{S}|S) ~d\bar{S} \right] =
 \int_{\Re^s} p(S) \int_{\Re^{\bar{s}}} p(\bar{S}|S) \ln p(\bar{S}|S) ~d\bar{S} ~dS  . \]

Let $p(S,\bar{S})$ be the joint distribution of $S$ and $\bar{S}$.
Then the {\it joint entropy} of $S$ and $\bar{S}$ is
\[ H(S,\bar{S}) :=
 - \int_{\Re^s} \int_{\Re^{\bar{s}}} p(S,\bar{S}) \ln p(S,\bar{S}) ~d\bar{S} ~dS. \]
(Note that $H(S,\bar{S})$ is just the entropy of $N$.)
So we have that
 \begin{eqnarray*}
 E[H(\bar{S}|S)] & = &
  - \displaystyle \int_{\Re^s} p(S) \int_{\Re^{\bar{s}}} p(\bar{S}|S)  \ln p(\bar{S}|S)
~d\bar{S} ~dS \\
& & \\
& = & - \displaystyle \int_{\Re^s} \int_{\Re^{\bar{s}}} p(S,\bar{S}) 
\ln \frac{p(S,\bar{S})}{p(S)} ~d\bar{S} ~dS \\
& & \\
& = & - \displaystyle 
\int_{\Re^s} \int_{\Re^{\bar{s}}} p(S,\bar{S}) \ln p(S,\bar{S})
~d\bar{S} ~dS + \int_{\Re^s} \int_{\Re^{\bar{s}}} p(S,\bar{S}) \ln p(S)
     ~d\bar{S}  ~dS\\
& & \\
& = & - \displaystyle \int_{\Re^s} \int_{\Re^{\bar{s}}} p(S, \bar{S}) \ln p(S,\bar{S}) 
~d\bar{S} ~dS +  \int_{\Re^s} \ln p(S) \left( \int_{\Re^{\bar{s}}} p(S,\bar{S})
~d\bar{S} \right) ~dS \\
& & \\
& = & - \displaystyle \int_{\Re^s} \int_{\Re^{\bar{s}}} p(S,\bar{S}) \ln p(S,\bar{S})
~d\bar{S} ~dS +  \int_{\Re^s} p(S) \ln p(S) ~dS \\
& & \\
& = & H(S,\bar{S}) - H(S),
\end{eqnarray*} 
so that
\[ H(S,\bar{S}) = H(S) + E[H(\bar{S}|S)] .\]

Recall that we attempt to maximize
$-E[H(\bar{S}|S)]$ (minimize
$E[H(\bar{S}|S)].$
Since the entropy $H(S,\bar{S})=H(N)$  is fixed,
minimizing $E[H(\bar{S}|S)]$ is equivalent to
maximizing $H(S)$. In other words, we solve
\[ \begin{array}{llll}
z & := &  \max  & H(S)  \\
& & & \\
& & \mbox{s.t.} & S \subseteq N; \\
& & & \\
& & &  |S| = s .
\end{array} \]

In the case where $N$ has a joint Gaussian distribution with covariance
matrix $C$, where $C$ is symmetric positive semidefinite,
one can show that
\[ H(N) = k_s + \alpha \ln \det C  \]
where $\alpha>0$ and $k_s$ are constants (see Shewry and Wynn, for
example).
%\cite{SHEWRY}.
For $S,T \subseteq N$, let $C[S,T]$ be the submatrix of $C$ with
rows indexed by $S$ and columns indexed by $T$.
Then, representing $C$ as
\[ C = \left[ \begin{array}{ll}
 C[S,S] & C[S, \bar{S}] \\
& \\
C[\bar{S},S] & C[\bar{S},\bar{S}]

\end{array}
\right] \]
we can use Schur complements to obtain
\[ \begin{array}{ll}
&  \det C = \det (C[S,S]) \det (C[S,S] - C[S,\bar{S}]
 (C[\bar{S},\bar{S}])^{-1} C[\bar{S},S]) \\
& \\
\Rightarrow
& \ln  \det C = \ln \det (C[S,S]) + \ln \det (C[S,S] - C[S,\bar{S}]
 (C[\bar{S},\bar{S}])^{-1} C[\bar{S},S]) .
\end{array} \]
The matrix
$C[S,S] - C[S,\bar{S}]
(C[\bar{S},\bar{S}])^{-1} C[\bar{S},S]$ is the conditional covariance
matrix for $\bar{S}|S$. (See Shewry and Wynn.)
%\cite{SHEWRY}.
Therefore, in the Gaussian case, our
problem reduces to that of solving

\[ \begin{array}{llll}
z & := &  \max &  \ln \det C[S,S]  \\
& & & \\
& & \mbox{s.t.} &  S \subseteq N; \\
& & & \\
& & &  |S|=s.
\end{array} \]

It might be necessary to impose
additional linear constraints on the
set $S$ we select. Such constraints
might arise from budgetary considerations, for example.
In particular, letting the $m$-set
$M$ index our constraint set, and imposing
the constraints
\[ \bsum{j \in S} a_{ij} \leq b_i, \hspace{.2in} \forall ~i \in M, \]
we obtain the CMESP.
\newpage
\section{Appendix B: The subroutine worker.c, as implemented in parallel
for the CMESP}
\singlespace
\begin{verbatim}
#include <stdio.h>
#include "/usr/local/apps/cplex4.0/cplex.h"
#include <math.h>
#include <sys/resource.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/times.h>
#include <cps.h>
#include <errno.h>
#include <string.h>
#include <search.h>
#include <stdlib.h>
#include "ErrorMessages.h"
#include "param.h"
#include "protos.h"

#define EPS2 0.000000001

extern FILE *out;  /* pointer to the output file   */
CPXENVptr CPXopenCPLEX(int *);
int CPXcloseCPLEX(CPXENVptr *);

extern double **c; /* covariance matrix */ 
extern double **a; /* constraint matrix */
extern double *b;  /* constraint right-hand-sides */

extern int Ne;     /* number of (originally) eligible indices */
extern int Ns;     /* number of indices to select */
extern int M;      /* number of inequalities */

extern int numsub;      /* current number of active subproblems */
extern double gupper;   /* global upper bound  */
extern double lambda;
extern int callfix;

extern char *s;         /* candidate solution indices  */
extern double glower;   /* global lower bound  (value of candidate) */
extern cache_sema_t q_lock,   /* lock for master queue */
       glower_lock,           /* lock used when updating glower */
       gupper_lock;           /* lock used when updating gupper */ 
extern int num_busy;    /* number of workers that are currently
                           busy  */
extern char *cplx,      /* indicator of whether cplex will be used 
                           and in what way */
     *tempcplx,   /* temporary indicator of which method to use, used    
                     at the beginning where we begin with IP  */
     *branch,     /* indicator of which method will be used in the    
                     branch and bound process  */
     *upper,      /* indicator of whether the new upper bound will be 
                     determined after each iteration */
     *ord,        /* indicator of whether a specific order of 
                     branching is requested  */
     *initlo,     /* indicator of whether to compute an initial lower
                     bound */
     *bail,       /* option for NLP bounding routines */
     *eig,        /* indicator of whether to compute spectral upper
                     bounds */
     *nlp,        /* indicator of whether NLP bounds will be
                     computed */
     temp[ORDER];  /* temporary character array */
extern int output; /* indicator of how much output will be printed */
extern p_prob *head,
              *tail; /* head and tail pointers of the linked list of 
                        active subproblems */

void worker(char *sense, int *mmatbeg, int *mmatcnt, int *mmatind, 
  double *mmatval)
{
/* INPUT: constraints for original subproblem are stored in mmatbeg,
          mmatcnt, mmatind, mmatval, sense */
  int okay,kount,go_on,stop,ubrepeat;    /* indicator variables */
  stid_t cps_stid();             
  int c_lock(cache_sema_t *cs);     /* cps function */
  int c_unlock(cache_sema_t *cs);   /* cps function */
  int num_ub;
  p_prob *maxpos; /* pointer to the position of the subproblem with 
                     the maximum upper bound */
  p_prob *pop;    /* a subproblem structure for removal from the 
                     queue */
  p_prob *tchild; /* used for temporary storage of a child 
                     subproblem */
  p_prob *Del;    /* used for temporary storage of a fathomed
                     subproblem */
  int temp_sub;   /* temporary variable to store number of 
                     subproblems in the queue */
  int temp_busy;  /* stores the number of busy workers */
  double newup;   /* new maximum upper bound */
  double tup;     /* temporary copy of the upper bound */
  char *fixed,    /* pointers to the fixed and eligible indices
     *elig;          arrays to be used in a special test case */
  double *relis;  /* stores a relative interior solution */
  double **tight; /* stores the tight constraints */
  double **basis; /* stores a basis for the nullspace of the tight
                     constraint matrix */
  double begcpu,endcpu;         /* used to compute cpu times */
  double cputime(double *t);
  double temptime=0.0;          /* used to compute run times */
  double temptime2 = 0.0;
  CPXENVptr env; 
  double *obj;
  double val;
  char *e,ic;
  int x,i,j,k,index,found,fathom,calllb,feasi,status,flag,status_p;
  int numtight = 0;
  int rank = 0;
  int nulldim = 0; 
  int append;
  int *u, *bounds;
  double czero;
  double objtive;

  /********************************************/
  /* Start cputime variable                   */
  /********************************************/
  
  czero = (double)0.0;
  begcpu = cputime(&czero);

    /********************************************/
    /* Allocate memory for obj                  */
    /********************************************/
    obj = (double *)calloc(Ne,sizeof(double));
    if (obj == NULL)
      ErrorMessage(VMemoryFail);

    /*******************************/
    /* Open cplex                  */
    /*******************************/
    status_p = 1;
    env = CPXopenCPLEX(&status_p);
    if (env == NULL)
    {
       printf("unable to open cplex\n");
       printf("status_p = %d\n", status_p);
       exit(1);
    }
    
    /********************/
    /* branch-and-bound */
    /********************/

    /********************************************************/
    /* while at least one worker is busy or there are still */
    /* subproblems on the master queue ...                  */
    /********************************************************/

    go_on = 1;
    while (go_on == 1)
    {

    /****************************************************/
    /* store num_busy and numsub in temporary variables */
    /****************************************************/
    okay = c_lock(&q_lock);
    temp_busy = num_busy;
    temp_sub = numsub;
    okay = c_unlock(&q_lock);

    if ((temp_busy == 0) && (temp_sub == 0))
       /***************/
       /* we're done! */
       /***************/
       go_on = 0;
    else if (temp_sub > 0)
      { 
        /*********************/
        /* lock master queue */
        /*********************/
        okay = c_lock(&q_lock);

        /*******************************/
        /* if numsub=0, skip over code */
        /*******************************/
        if (numsub == 0)
         {
           okay = c_unlock(&q_lock);
           printf("false alarm...queue empty \n");
         }
        else
        /*****************************************************/
        /* if numsub>0, remove a subproblem from the list of */
        /* active subproblems                                */
        /*****************************************************/
        {
           /****************************/
           /* this worker becomes busy */
           /****************************/
           num_busy = num_busy + 1;

           stop = 0;

           pop = ( p_prob *)malloc(sizeof(p_prob));
           if (pop == NULL)
              ErrorMessage(VMemoryFail);
           if (!strcmp(branch,"DFS"))
              pop = StackDelete(head);
           else if (!strcmp(branch,"BFS"))
              pop = QueueDelete(tail);
           else if (!strcmp(branch,"MAX"))
           {
              /************************************************/
              /* re-compute upper bound; determine new maxpos */
              /************************************************/
              maxpos = head->suc;
              newup = GetUpperBound(head,&maxpos);

              okay = c_lock(&gupper_lock);
              gupper = newup;  
              okay = c_unlock(&gupper_lock);

              /*************************************************/
              /* make sure numsub > 0 (in GUB, subproblems are */
              /* potentially deleted)                          */
              /*************************************************/
              if (numsub == 0)
              {
                if (num_busy == 1)
                  {
                     okay = c_unlock(&q_lock);
                     goto THEEND;
                  }
                else
                     okay = c_unlock(&q_lock);
                free(pop);
                printf("false alarm...queue empty \n");
                stop = 1;
              }
              else
                pop = MidDelete(maxpos);
           }

           okay = c_unlock(&q_lock);

           if (stop == 0)
           {
           tup = pop->upper;

           /***********************/
           /* fathom if necessary */
           /***********************/
           okay = c_lock(&glower_lock);
           if ((tup <=glower) && ( !strcmp(bail,"OFF")))
           {
             okay = c_unlock(&glower_lock);
             fprintf(out,"fathomed by bounds\n"); 
             fr_sub(pop);
           }
           else
           {
           okay = c_unlock(&glower_lock);

           u = (int *)calloc(M+1,sizeof(int));
           if (u == NULL)
             ErrorMessage(VMemoryFail);

           /**************************************************/
           /* create child subproblems; there are four cases */
           /**************************************************/

           if ( (1 < pop->ns) && (pop->ns < (pop->ne)-1) )
           /**********/
           /* CASE 1 */
           /**********/
           {
           /*********************************************/
           /* allocate space to temporarily store first */
           /* child                                     */
           /*********************************************/
           tchild = (p_prob *)malloc(sizeof(p_prob));
             if (tchild == NULL)
                ErrorMessage(VMemoryFail);
           tchild -> pred = NULL ;
           tchild -> suc = NULL;

           /***********************************************/
           /* create first child, but don't add to master */
           /* queue                                       */
           /***********************************************/
           temp_mk_in(pop,tchild);

           relis = (double *)calloc(Ne,sizeof(double));
           if (relis == NULL)
             ErrorMessage(VMemoryFail);

           ubrepeat = 1;
           while (ubrepeat == 1)
           {

           ubrepeat = 0;
           append = 0;

           bounds = (int *)calloc(Ne,sizeof(int));
           if (bounds == NULL)
             ErrorMessage(VMemoryFail);

           /********************************************/
           /* try to find a relative interior solution */
           /********************************************/
           status = Rel_Int(env,tchild->nf,tchild->ns,
             tchild->ne,tchild->f,
             tchild->e,sense,relis,u,bounds);
  
           /******************************************************/
           /* if no relative interior solution was found, fathom */
           /******************************************************/
           if (status == -1)
           {
             /* fprintf(out,"fathomed by infeasibility\n"); */
            fr_sub(tchild);
             free(bounds);
           }
           /*****************************************************/
           /* otherwise, a relative interior solution WAS found */
           /*****************************************************/
           else
           {

             /*************************************/
             /* update fixed and eligible indices */
             /*************************************/
             update_ef(tchild,bounds,relis);

             /******************************************************/
             /* flag indicates when we are finished with this child*/
             /******************************************************/
             flag = 1;

             /*****************************************************/
             /* see if all eligible variables had to attain their */
             /* upper or lower bounds                             */
             /*****************************************************/
             found = 0;
             index = 0;
             while ((found == 0) && (index < Ne))
             {
               if (bounds[index] == -1)
               found = 1;
               index++;
             }

             if (found == 0)
             /*************************************************/
             /* this relative interior solution is the unique */
             /* solution; moreover, it's INTEGER, so          */
             /* check its objective value, and                */
             /* update lower bound, if possible               */
             /*************************************************/
             {
               val = comp_obj(tchild->f, tchild->e,relis,
                tchild->nf, tchild->ne);

               /**************************************************/
               /* don't need to do any further work on this child*/
               /**************************************************/
               fr_sub(tchild);
               flag = 0;
             }

             if (flag == 1)
             {

             /****************************************/
             /* load tight constraints into a matrix */
             /****************************************/
             tight = (double **)calloc(M+1+Ne,
               sizeof(double *));
             if (tight == NULL)
               ErrorMessage(VMemoryFail);
             tight[0] = (double *)calloc((M+1+Ne)*Ne,
               sizeof(double));
             if (tight[0] == NULL)
               ErrorMessage(VMemoryFail);
             for (i=1;i<M+1+Ne;++i)
               tight[i] = tight[i-1]+Ne;

             loadtight(tchild->ne,tchild->e,
               u,tight,&numtight);
             free(bounds);

             /************************************************/
             /* allocate memory to store basis for nullspace */
             /************************************************/
             basis = (double **)malloc(Ne*sizeof(double *));
             if (basis == NULL)
               ErrorMessage(VMemoryFail);
             basis[0] = (double *)malloc(Ne*Ne*sizeof(double));
             if (basis[0] == NULL)
               ErrorMessage(VMemoryFail);
             for (i=1;i<Ne;++i)
               basis[i] = basis[i-1]+Ne;

             /*************************************/
             /* find rank and nullspace of matrix */
             /*  of tight constraints             */
             /*************************************/

             nullspace(tight,numtight,tchild->ne,&rank,basis);
             free(tight[0]);
             free(tight);

             /*********************************************/
             /* if matrix is of full rank, this fractional*/
             /*  relative interior solution is unique;    */
             /*  fathom by infeasibility                  */
             /* NOTE: this relative interior solution is  */
             /* not integer-valued; we already took care  */
             /* of that case!                             */
             /*********************************************/
             if (rank == tchild->ne)
             {
              fprintf(out,"fathomed by infeasibility\n");
              fr_sub(tchild);

              /**************************************************/
              /* don't need to do any further work on this child*/
              /**************************************************/
              flag = 0;
              free(basis[0]);
              free(basis);
             }

             if (flag == 1)
             /*******************************************/
             /* continue working on this child          */
             /*******************************************/
             {
               nulldim = (tchild->ne) - rank;

               /***********************/
               /* compute upper bound */
               /***********************/

               ub(tchild,obj,sense,
                mmatbeg,mmatcnt,mmatind,
                mmatval,basis,nulldim,relis,u,
                &fathom,&calllb, &ubrepeat);
               free(basis[0]);
               free(basis);

               /**************************************************/
               /* fathom by infeasibility or bounds, if possible */
               /**************************************************/
               if (fathom == 1)
               {
                 fprintf(out,"subproblem fathomed\n");  
                 fr_sub(tchild);
               }
               else
               {
                 if ((calllb == 1) && ((!strcmp(cplx,"IP")) ||
                    (!strcmp(cplx,"LP"))))
                   /*************************************************/
                   /* solve lower-bounding LP with obj equal to the */
                   /* continuous-solution that gave best bound      */
                   /*************************************************/
                   feasi=lb(env,tchild,sense,obj);
                 append = 1;

                 }
                }
               }
              }
             }

               if (append == 1)
               {
                 /*************************************************/
                 /* finally, add child to master queue            */
                 /*************************************************/
                 okay = c_lock(&q_lock);
                 Insert(head,tchild);
                 okay =c_unlock(&q_lock);
               }
                 
         ZeroOut(obj);
         free(relis);
                
         /*************************************/
         /* repeat procedure for second child */
         /*************************************/

           /****************************************************/
           /* allocate space to temporarily store second child */
           /****************************************************/
           tchild = (p_prob *)malloc(sizeof(p_prob));
           if (tchild == NULL)
             ErrorMessage(VMemoryFail);
           tchild -> pred = NULL ;
           tchild -> suc = NULL;

           /******************************************************/
           /* create second child, but don't add to master queue */
           /******************************************************/

           temp_mk_out(pop,tchild);

           relis = (double *)calloc(Ne,sizeof(double));
           if (relis == NULL)
             ErrorMessage(VMemoryFail);

           ubrepeat = 1;
           while (ubrepeat == 1)
           {

           ubrepeat = 0;
           append = 0;

           bounds = (int *)calloc(Ne,sizeof(int));
           if (bounds == NULL)
             ErrorMessage(VMemoryFail);

           /********************************************/
           /* try to find a relative interior solution */
           /********************************************/
           status = Rel_Int(env,tchild->nf,tchild->ns,
             tchild->ne,tchild->f,
             tchild->e,sense,relis,u,bounds);
   
           /******************************************************/
           /* if no relative interior solution was found, fathom */
           /******************************************************/
           if (status == -1)
           {
             /* fprintf(out,"fathomed by infeasibility\n"); */
             fr_sub(tchild);
             free(bounds);
           }
           /*****************************************************/
           /* otherwise, a relative interior solution WAS found */
           /*****************************************************/
           else
           {

             /*************************************/
             /* update fixed and eligible indices */
             /*************************************/
             update_ef(tchild,bounds,relis);

             flag = 1;

               /****************************************************/
               /* see if all eligible variables had to attain their*/
               /* upper or lower bounds                            */
               /****************************************************/
               found = 0;
               index = 0;
               while ((found == 0) && (index < Ne))
               {
                 if (bounds[index] == -1)
                 found = 1;
                 index++;
               }

               if (found == 0)    
               /*************************************************/
               /* this relative interior solution is the unique */
               /* solution; check its objective value           */
               /*************************************************/
               {
                 val = comp_obj(tchild->f,tchild->e, relis,
                   tchild->nf, tchild->ne);

                 /**************************************************/
                 /* don't need to do any further work on this child*/
                 /**************************************************/
                 fr_sub(tchild);
                 flag = 0;
               }

             if (flag == 1)
             {   
             /****************************************/
             /* load tight constraints into a matrix */
             /****************************************/
             tight = (double **)calloc((M+1+Ne),sizeof(double *));
             if (tight == NULL)
               ErrorMessage(VMemoryFail);
             tight[0] = (double *)calloc((M+1+Ne)*Ne,
               sizeof(double));
             if (tight[0] == NULL)
               ErrorMessage(VMemoryFail);
             for (i=1;i<M+1+Ne;++i)
               tight[i] = tight[i-1]+Ne;

             loadtight(tchild->ne,tchild->e,
               u,tight,&numtight);
             free(bounds);

             /************************************************/
             /* allocate memory to store basis for nullspace */
             /************************************************/
             basis = (double **)malloc(Ne*sizeof(double *));
             if (basis == NULL)
                ErrorMessage(VMemoryFail);
             basis[0] = (double *)malloc(Ne*Ne*sizeof(double));
             if (basis[0] == NULL)
               ErrorMessage(VMemoryFail);
             for (i=1;i<Ne;++i)
               basis[i] = basis[i-1]+Ne;

             /*************************************/
             /* find rank and nullspace of matrix */
             /* of tight constraints              */
             /*************************************/
             nullspace(tight,numtight,tchild->ne,&rank,basis);
             free(tight[0]);
             free(tight);

             /**********************************************/
             /* if matrix is of full rank, this fractional */
             /* relative interior solution is unique;      */
             /* fathom by infeasibility                    */
             /**********************************************/
             if (rank == tchild->ne)
             {
              /* fprintf(out,"fathomed by infeasibility\n"); */
              fr_sub(tchild);

              /**************************************************/
              /* don't need to do any further work on this child*/
              /**************************************************/
              flag = 0;
              free(basis[0]);
              free(basis);
             } 

             if (flag == 1)
             {
               nulldim = (tchild->ne)-rank;

               /***********************/
               /* compute upper bound */
               /***********************/
               ub(tchild,obj,sense,
                mmatbeg,mmatcnt,mmatind,
                mmatval,basis,nulldim,relis,u,
                &fathom,&calllb, &ubrepeat);

               free(basis[0]);
               free(basis);

               /**************************************************/
               /* fathom by infeasibility or bounds, if possible */
               /**************************************************/
               if (fathom == 1)
               {
                 fprintf(out,"subproblem fathomed\n");  
                 fr_sub(tchild);
               }
               else
               {
                 if ((calllb == 1) && ((!strcmp(cplx,"IP")) ||
                    (!strcmp(cplx,"LP"))))
                  /*************************************************/
                  /* solve lower-bounding LP with obj equal to the */
                  /* continuous-solution that gave best bound      */
                  /*************************************************/
                   feasi=lb(env,tchild,sense,obj);
                 append = 1;
               }
              }
             }
            }
           }

             if (append == 1)
              {
                 /*****************************/
                 /* add child to master queue */
                 /*****************************/
                 okay = c_lock(&q_lock);
                 Insert(head,tchild);
                 okay = c_unlock(&q_lock);
              }
           ZeroOut(obj);
           free(relis);
           }
           else if ( (1 < pop->ns) && (pop->ns == (pop->ne)-1) )
           {
           /************/
           /* CASE 2   */
           /************/
     
           /*******************************************/
           /* first child is created exactly as above */
           /*******************************************/

           /* allocate space to temporarily store first child */
           tchild = (p_prob *)malloc(sizeof(p_prob));
           if (tchild == NULL)
             ErrorMessage(VMemoryFail);
           tchild -> pred = NULL ;
           tchild -> suc = NULL;

           /* create first child, but don't add to master queue */
           temp_mk_in(pop,tchild);

           relis = (double *)calloc(Ne,sizeof(double));
           if (relis == NULL)
             ErrorMessage(VMemoryFail);

           ubrepeat = 1;
           while (ubrepeat == 1)
           {

           ubrepeat = 0;
           append = 0;

           bounds = (int *)calloc(Ne,sizeof(int));
           if (bounds == NULL)
             ErrorMessage(VMemoryFail);

           /* try to find a relative interior solution */
           status = Rel_Int(env,tchild->nf,tchild->ns,
             tchild->ne,tchild->f,
             tchild->e,sense,relis,u,bounds);
    
           /* if no relative interior solution was found, fathom */
           if (status == -1)
           {
             /* fprintf(out,"fathomed by infeasibility\n"); */
             fr_sub(tchild);
             free(bounds);
           }
           /* otherwise, a relative interior solution WAS found */
           else
           {
             update_ef(tchild,bounds,relis);

             flag = 1;

               /* see if all eligible variables had to attain their
               upper or lower bounds */
               found = 0;
               index = 0;
               while ((found == 0) && (index < Ne))
               {
                 if (bounds[index] == -1)
                 found = 1;
                 index++;
               }

               if (found == 0)
               /* this relative interior solution is the unique
                  solution; check its objective value */
               {
                 val = comp_obj(tchild->f, tchild->e,relis,
                   tchild->nf, tchild->ne);

                 /* don't need to do any further work on this child*/
                 fr_sub(tchild);
                 flag = 0;
               }

             if (flag == 1)
             {
             /* load tight constraints into a matrix */
             tight = (double **)calloc(M+1+Ne,sizeof(double *));
             if (tight == NULL)
               ErrorMessage(VMemoryFail);
             tight[0] = (double *)calloc((M+1+Ne)*Ne,
               sizeof(double));
             if (tight[0] == NULL)
               ErrorMessage(VMemoryFail);
             for (i=1;i<M+1+Ne;++i)
               tight[i] = tight[i-1]+Ne;

             loadtight(tchild->ne,tchild->e,
               u,tight,&numtight);
             free(bounds);

             /* allocate memory to store basis for nullspace */
             basis = (double **)malloc(Ne*sizeof(double *));
             if (basis == NULL)
               ErrorMessage(VMemoryFail);
             basis[0] = (double *)malloc(Ne*Ne*sizeof(double));
             if (basis[0] == NULL)
               ErrorMessage(VMemoryFail);
             for (i=1;i<Ne;++i)
               basis[i] = basis[i-1]+Ne;

             /* find rank and nullspace of matrix
                of tight constraints */
             nullspace(tight,numtight,tchild->ne,&rank,basis);
             free(tight[0]);
             free(tight);

             /* if matrix is of full rank, this fractional
                relative interior solution is unique;
                fathom by infeasibility */
             if (rank == tchild->ne)
             {
              /* fprintf(out,"fathomed by infeasibility\n"); */
              fr_sub(tchild);

              /* don't need to do any further work on this child*/
              flag = 0;
              free(basis[0]);
              free(basis);
             } 

             if (flag == 1)
             {
               nulldim = (tchild->ne)-rank;

               /* compute upper bound */
               ub(tchild,obj,sense,
                mmatbeg,mmatcnt,mmatind,mmatval,basis,
                nulldim,relis,u,&fathom,&calllb, &ubrepeat);

               free(basis[0]);
               free(basis);

               /* fathom by infeasibility or bounds, if possible */
               if (fathom == 1)
               {
                 /* fprintf(out,"subproblem fathomed\n");  */
                 fr_sub(tchild);
               }
               else
               {
                 if ((calllb == 1) && ((!strcmp(cplx,"IP")) ||
                    (!strcmp(cplx,"LP"))))
                   feasi=lb(env,tchild,sense,obj);
                 append = 1;
               }
              }
             }
            }
           }

             if (append == 1)
             {
                 okay = c_lock(&q_lock);
                 Insert(head,tchild);
                 okay = c_unlock(&q_lock);
             }
           ZeroOut(obj);
           free(relis);

           /**************************************************/
           /* second child has unique solution; compute its  */
           /* entropy and delete; second child is not added  */
           /* to queue                                       */
           /**************************************************/
           feas_out(pop,sense);
           }
           else if ( (1 == pop->ns) && (pop->ns < (pop->ne)-1) )
           {
           /**********/
           /* CASE 3 */
           /**********/

           /**************************************************/
           /* first child has unique solution; compute its   */
           /* and delete; first child is not added to queue  */
           /**************************************************/
           feas_in(pop,sense);

           /****************************************/
           /* second child is created as in Case 1 */
           /****************************************/
      
           tchild = (p_prob *)malloc(sizeof(p_prob));
           if (tchild == NULL)
             ErrorMessage(VMemoryFail);
           tchild->pred = NULL;
           tchild->suc = NULL;

           temp_mk_out(pop,tchild);

           relis = (double *)calloc(Ne,sizeof(double));
           if (relis == NULL)
             ErrorMessage(VMemoryFail);

           ubrepeat = 1;
           while (ubrepeat == 1)
           {

           ubrepeat = 0;
           append = 0;

           bounds = (int *)calloc(Ne,sizeof(int));
           if (bounds == NULL)
             ErrorMessage(VMemoryFail);

           /* try to find a relative interior solution */
           status = Rel_Int(env,tchild->nf,tchild->ns,
             tchild->ne,tchild->f,
             tchild->e,sense,relis,u,bounds);
    
           /* if no relative interior solution was found, fathom */
           if (status == -1)
           {
             /* fprintf(out,"fathomed by infeasibility\n"); */
             fr_sub(tchild);
             free(bounds);
           }
           /* otherwise, a relative interior solution WAS found */
           else
           {
             update_ef(tchild,bounds,relis);

             flag = 1;

             /* see if all eligible variables had to attain their
              upper or lower bounds */
             found = 0;
             index = 0;
             while ((found == 0) && (index < Ne))
               {
                 if (bounds[index] == -1)
                 found = 1;
                 index++;
               }

               if (found == 0)
               /* this relative interior solution is the unique
                  solution; check its objective value */
               {
                 val = comp_obj(tchild->f,tchild->e, relis,
                   tchild->nf, tchild->ne);

                 /* don't need to do any further work on this child*/
                 fr_sub(tchild);
                 flag = 0;
               }

             if (flag == 1)
             {
             /* load tight constraints into a matrix */
             tight = (double **)calloc(M+1+Ne,sizeof(double *));
             if (tight == NULL)
               ErrorMessage(VMemoryFail);
             tight[0] = (double *)calloc((M+1+Ne)*Ne,
               sizeof(double));
             if (tight[0] == NULL)
               ErrorMessage(VMemoryFail);
             for (i=1;i<M+1+Ne;++i)
               tight[i] = tight[i-1]+Ne;

             loadtight(tchild->ne,tchild->e,
               u,tight,&numtight);
             free(bounds);

             /* allocate memory to store basis for nullspace */
             basis = (double **)malloc(Ne*sizeof(double *));
             if (basis == NULL)
               ErrorMessage(VMemoryFail);
             basis[0] = (double *)malloc(Ne*Ne*sizeof(double));
             if (basis[0] == NULL)
               ErrorMessage(VMemoryFail);
             for (i=1;i<Ne;++i)
               basis[i] = basis[i-1]+Ne;

             /* find rank and nullspace of matrix
                of tight constraints */
             nullspace(tight,numtight,tchild->ne,&rank,basis);
             free(tight[0]);
             free(tight);

             /* if matrix is of full rank, this fractional
                relative interior solution is unique;
                fathom by infeasibility */
             if (rank == tchild->ne)
             {
               /* fprintf(out,"fathomed by infeasibility\n"); */
               fr_sub(tchild);

               /* don't need to do any further work on this child*/
               flag = 0;
               free(basis[0]);
               free(basis);
             }

             if (flag == 1)
             {
               nulldim = (tchild->ne)-rank;

               /* compute upper bound */
               ub(tchild,obj,sense,
                mmatbeg,mmatcnt,mmatind,
                mmatval,basis,nulldim,relis,u,
                &fathom,&calllb, &ubrepeat);

               free(basis[0]);
               free(basis);

               /* fathom by infeasibility or bounds, if possible */
               if (fathom == 1)
               {
                 /* fprintf(out,"subproblem fathomed\n");  */
                 fr_sub(tchild);
               }
               else
               {
                 if ((calllb == 1) && ((!strcmp(cplx,"IP")) ||
                    (!strcmp(cplx,"LP"))))
                  /*solve lower-bounding LP with obj equal to the
                    continuous-solution that gave best bound */
                  feasi=lb(env,tchild,sense,obj);
                 append = 1;
               }
              }
             }
            }
           }

             if (append == 1)
             {
                 okay = c_lock(&q_lock);
                 Insert(head,tchild);
                 okay = c_unlock(&q_lock);
             }
           ZeroOut(obj);
           free(relis);
           }
           else if ( (1 == pop->ns) && (pop->ns == (pop->ne)-1) )
           {
           /**********/
           /* CASE 4 */
           /**********/

           /******************************************************/
           /* both children have unique feasible points. Compute */
           /* the entropies; children are not added to queue     */
           /******************************************************/
           feas_in(pop,sense); 
           feas_out(pop,sense); 
           }
           else
           {
             printf ("\n error in main branch-and-bound loop:\n");
             exit(1);
           }
  
           /*****************************/
           /* free the arrays pop and u */
           /*****************************/
           free(pop);
           free(u);

           /*********************************************************/
           /* Obtain the max upper bound of all active subproblems, */
           /* if we branched on a subproblem with upper bound       */
           /* equal to the global upper bound.                      */
           /* Do this only if the option for upper is 'ON',         */
           /* or the method of branching is 'MAX'.                  */
           /*********************************************************/
           okay = c_lock(&gupper_lock);
           if ((tup >= gupper) || (!strcmp(branch,"MAX"))
              || (!strcmp(upper,"ON")))
           {
              okay = c_unlock(&gupper_lock);
              okay = c_lock(&q_lock);

              if ((numsub > 0) && (((!strcmp(upper,"ON")))
                || (!strcmp(branch,"MAX"))))
              {
               newup = GetUpperBound(head,&maxpos);

               okay = c_lock(&gupper_lock);
               gupper = newup; 
               okay = c_unlock(&gupper_lock);

               num_ub = numsub;
               okay = c_unlock(&q_lock);

               okay = c_lock(&glower_lock);
               printf("Gap (numsub,UB) = %3.15f (%d,%.15e)\n",
                newup-glower,num_ub,newup); 
               okay = c_unlock(&glower_lock);
              }
              else
                  okay = c_unlock(&q_lock);
             } 
             else
               okay = c_unlock(&gupper_lock);

           }
          }
 
          /*********************************/
          /* this worker is no longer busy */
          /*********************************/
          okay = c_lock(&q_lock);
          num_busy = num_busy - 1;
          okay = c_unlock(&q_lock);

          }
        } 
      }

THEEND:
free(obj);
status = CPXcloseCPLEX(&env);
return; 
}
\end{verbatim}


\begin{thebibliography}{[26]}

\bibitem{BAI}
E. Anderson, Z. Bai, C. Bischof, J. Demmel, J. Dongarra, J. Du Croz,
A. Greenbaum, S. Hammarling, A. McKenney, S. Ostrouchov, and D.
Sorensen (1992), LAPACK {\it Users' Guide}. SIAM, Philadelphia.

\bibitem{CMESP2}
K.M. Anstreicher, M. Fampa, J. Lee, and J. Williams (1996),
Continuous relaxations for constrained maximum-entropy sampling.
In {\it Integer Programming and Combinatorial Optimization,}
W.H. Cunningham, S.T. McCormick, and M. Queyranne, eds.,
{\it Lecture Notes in Computer Science} {\bf 1084}, Springer Verlag,
Berlin, 234-248.

\bibitem{CMERSP}
K.M. Anstreicher, M. Fampa, J. Lee, and J. Williams (1997), 
Maximum-entropy remote sampling.

\bibitem{CMESP}
K.M. Anstreicher, M. Fampa, J. Lee, and J. Williams (1997), Using
continuous nonlinear relaxations to solve constrained maximum-entropy
sampling problems, {\it CORE Discussion Paper} No. 9729.


\bibitem{BAZ}
M.S. Bazaraa and C.M. Shetty (1979), ``Nonlinear Programming:
Theory and Algorithms,'' John Wiley \& Sons, Inc., New York.

\bibitem{BLACKWELL}
D. Blackwell (1951), Comparison of experiments. In {\it Proceedings
of the 2nd Berkeley Symposium}, 93-102, University of California
Press, Berkeley.

\bibitem{BOLTZ}
L. Boltzmann (1877), Beziehung zwischen dem zweiten Haupstatz der
W$\ddot{\mbox{a}}$rmetheorie und der Wahrscheinlichkeitsrechnung
resp. den S$\ddot{\mbox{a}}$tzen $\ddot{\mbox{u}}$ber das
W$\ddot{\mbox{a}}$rmegleichgewicht (Complexionen-Theorie). {\it
Wien Ber,} {\bf 76$^2$}, p.373.


\bibitem{BUESO}
M.C. Bueso, J.M. Angulo, and F.J. Alonso (1997), A State-space-model
approach to optimum spatial sampling design based on entropy, to
appear in {\it Environmental and Ecological Statistics.}

\bibitem{EXEMPLAR}
Convex Press, {\it CONVEX Exemplar Programming Guide, X1.0.0.2 edition,}
Richardson, Texas.

\bibitem{CPLEX}
CPLEX Optimization, Inc. (1994), {\it Using the CPLEX Callable Library.}

\bibitem{DYK}
O. Dykstra, Jr. (1971), The augmentation of experimental data to maximize
$|X'X|$, {\it Technometrics}, {\bf 13}, 682-688.

\bibitem{FED2}
V. Federov (1972), ``Theory of Optimal Experiments,'' Academic
Press, New York.


\bibitem{FEDOROV}
V. Federov and W. Mueller (1989), Comparison of two approaches
in the optimal design of an observation network, {\it Statistics}
{\bf 20}, 339-351.

\bibitem{FOX}
J. Fox (1984), ``Linear Statistical Models and Related Methods,''
John Wiley \& Sons, New York.

\bibitem{FREUND}
R. Freund, R. Roundy, and M.J. Todd (1985), Identifying the set of always-active
constraints in a system of linear inequalities by a single linear program,
Sloan School Working Paper \#1674-85, MIT, Cambridge, MA.

\bibitem{SMITH}
R. S. Glassburn, S. W. Smith, Evaluation of Sensor Placement Algorithms
for On-Orbit Identification of Space Platforms. Presented at the
IES/AIAA/ASTM/NASA/CSA 18th Space Simulation Conference, 1994.

\bibitem{GUTTORP}
  P. Guttorp, N.D. Le, P.D. Sampson, and J.V. Zidek (1992), Using
entropy in the redesign of an environmental monitoring network.
Technical Report \#116, Department of Statistics, The University
of British Columbia,  Vancouver, British Columbia.

\bibitem{HELM}
  C. Helmberg (1996), A Note on Constrained Maximum-Entropy Sampling.

\bibitem{HORNII}
 R.A. Horn and C.R. Johnson (1985), ``Matrix Analysis,'' Cambridge
University Press, Cambridge. 

\bibitem{HORN}
 R.A. Horn and C.R. Johnson (1991), ``Topics in Matrix Analysis,'' Cambridge
University Press, Cambridge. 

\bibitem{KEIFER}
J. Keifer and J. Wolfowitz (1959), Optimum designs in regression
problems, {\it Annals of Mathematical Statistics} {\bf 30}, 271-294.

\bibitem{KLQ}
  C.-W. Ko, J. Lee, and M. Queyranne (1995), An exact algorithm for
maximum entropy sampling, {\it Operations Research} {\bf 43}, 684-691.

\bibitem{KLW}
  C.-W. Ko, J. Lee, and K. Wayne (1997), A spectral bound for
D-optimality.


\bibitem{LANC}
 P. Lancaster and M. Tismenetsky (1985), ``The Theory of Matrices Second
Edition, with Applications,'' Academic Press, Inc., San Diego.

\bibitem{LEE2}
  J. Lee (1997), Commentary on: ``A state-space-model approach to
optimal spatial sampling design based on entropy,'' to appear in
{\it Environmental and Ecological Statistics.}

\bibitem{LEE}
 J. Lee (1994), Constrained maximum-entropy sampling, to appear in
{\it Operations Research}.

\bibitem{MITCHELL}
T.J. Mitchell (1974), An algorithm for the construction of ``D-Optimal''
experimental designs, {\it Technometrics} {\bf 16}, 203-210.

\bibitem{PERE}
A.L. Peressini, F.E. Sullivan, and J.J. Uhl, Jr. (1988), ``The
Mathematics of Nonlinear Programming,'' Springer-Verlag, New York.

\bibitem{SHANNON2}
C.E. Shannon (1948), The mathematical theory of communication,
{\it Bell Systems Technical Journal} {\bf 27}, 379-423, 623-656.

\bibitem{SHANNON}
  C.E. Shannon and W. Weaver (1963), ``The Mathematical
Theory of Communication,'' The University of Illinois Press, Urbana.

\bibitem{SHEWRY}
  M.C. Shewry and H.P. Wynn (1987), Maximum entropy sampling, 
{\it Journal of Applied Statistics} {\bf 46}, 165-170.

\bibitem{SMITH2}
K. Smith (1918), On the standard deviations of adjusted and interpolated
values of an observed polynomial function and its constants and the
guidance they give towards a proper choice of the distribution of
observations, {\it Biometrika} {\bf 12}, 1-85.

\bibitem{TSING}
  N.-K. Tsing, M.K.H. Fan and E.I. Verriest (1994), On analyticity
of functions involving eigenvalues, {\it Linear Algebra and its
Applications} {\bf 207}, 159-180.

\bibitem{WALD}
  A. Wald (1943), On the efficient design of statistical investigations, 
{\it Annals of Mathematical Statistics} {\bf 14}, 134-140.

\bibitem{WELCH}
  W.J. Welch (1982), Branch-and-bound search for experimental
designs based on D optimality and other criteria, {\it Technometrics}
{\bf 24}, 41-48.

\bibitem{WYNN1}
H.P. Wynn (1970), The sequential generation of D-optimum experimental
design, {\it Annals of Mathematical Statistics} {\bf 41}, 1655-1664.

\bibitem{WYNN2}
H.P. Wynn (1972), Results in the theory and construction of D-optimum
experimental designs, {\it Journal of the Royal Statistical Society}
{\bf B34}, 133-147.

\bibitem{ZHU}
 C. Zhu, R.H. Byrd, P. Lu,  and J. Nocedal (1994), L-BFGS-B --
FORTRAN subroutines for large-scale bound constrained optimization,
Department of Electrical Engineering, Northwestern University.

\end{thebibliography}

\newpage
\doublespace
\noindent {\Huge {\bf Vita}} \\

\begin{description}

\item[{\large {\bf Date and Place of Birth}}] $~$ \\  \\
\begin{tabular}{l}
August 28, 1971 \\ \\
Frankfort, Kentucky, USA
\end{tabular}

\item[{\large {\bf Education}}]$~$ 

\begin{itemize}
\item Ph.D., Mathematics, University
of Kentucky, anticipated May 1998  
\item M.S., Operations Research, University of Kentucky, May 1995 
\item B.A., Mathematics, Transylvania University, May 1993 
Minors: Computer Science, French 
\end{itemize}

\item[{\large {\bf Professional Positions Held}}]$~$ 
\begin{itemize} 

\item Teaching Assistant - August 1994 to present \\
Department of Mathematics, University of Kentucky \\
Responsible for all aspects of teaching mathematics
courses including lecturing and writing examinations.
\begin{description}
\item [$\bullet$]  Primary Instructor of
\begin{itemize}
\item Ordinary Differential Equations (Calculus IV)
\item Calculus III
\item Math Excel / Calculus II
\item Math Excel / Calculus I
\item Finite Mathematics 
\item College Algebra
\end{itemize}
Math Excel is a special calculus workshop geared towards rural
and minority first year calculus students, based on the ideas of
Uri Treisman.

\item [$\bullet$] Primary Instructor of the
1997 University of Kentucky
Freshman Summer Program. During the summer of 1997, I
taught college algebra to a group of minority
students who had just graduated from high school and were planning
on entering UK the following fall. As the instructor, I lectured
five days a week; in addition, I led two workshops each week,
where students would work together in groups on worksheets
I had prepared. 

\item [$\bullet$] 
Supervisor / Coordinator of 27 sections of Laboratory
Calculus I 
(each section consisted of
25+ undergraduate students, a professor, and a graduate teaching
assistant).
The semester I held this position was the first semester 
the University of Kentucky had implemented Laboratory Calculus
across all sections of calculus.
\\
Responsibilities included:
\begin{itemize}
\item Instructing 15+ graduate teaching assistants in the use
of graphing calculators
\item Writing worksheets that were distributed to all sections of
students during laboratory workshops
\item Writing exams that were administered to all sections
\item Frequently meeting with the professors and graduate students
involved in Laboratory Calculus
\item Meeting weekly with the 
director of mathematics undergraduate studies
\end{itemize}
\end{description}

\item Research Assistant - Summer 1994, Summer 1995,
Fall 1995, Spring 1996 \\
Department of Mathematics, University of 
Kentucky \\
Supervisor: Dr. Jon Lee \\
Worked in developing a branch-and-bound algorithm for solving
the Constrained Maximum-Entropy Sampling Problem. I ultimately
implemented the algorithm on the University of Kentucky
$32$-processor Exemplar Supercomputer.
\end{itemize}

\item[{\large {\bf Scholastic and Professional Honors}}]$~$ 
\begin{itemize}
\item Recipient of the 1997 {\it Wimberly C. Royster Outstanding
Teaching Assistant Award}. This award is presented annually by the University
of Kentucky Department of Mathematics for excellence in
teaching. I received the award the first year I was eligible.
\item Recipient of a ``Quality Achievement Fellowship,'' awarded by
the Graduate School at the University of Kentucky (1993-94,
1994-95, 1995-96).
\item Recipient of a ``Fellowship for Women,'' awarded by the Graduate
School at the University of Kentucky (1993-94).
\item Recipient of a research grant by the Center for Computational
Sciences at the University of Kentucky (Summer 1995 - Summer 1996).
\item Recipient of a full four-year academic scholarship to 
Transylvania University.
\item Recipient of the Transylvania Senior Academic Achievement
Award.
\item Recipient of the Ruchman Mathematics Award, awarded to the 
top senior math major at Transylvania.
\item Recipient of Lexington Rotary Club academic achievement award
given to the top two Transylvania juniors.
\item Valedictorian, Highlands High School class of 1989.
\end{itemize}

\item[{\large {\bf Professional Publications}}]$~$ 

\begin{itemize}
\item Anstreicher, K.; Fampa, M.; Lee, J.; and Williams, J., Continuous
Relaxations for Constrained Maximum-Entropy Sampling. In: {\em Lecture
Notes in Computer Science: Integer Programming and Combinatorial
Optimization}; Cunningham, McCormick, and Queyranne, Eds. 234-248,
Springer-Verlag, 1996. 
\item Anstreicher, K.; Fampa, M.; Lee, J.; and Williams, J., Using
Continuous Nonlinear Relaxations to Solve Constrained Maximum-Entropy
Sampling Problems, {\it CORE Discussion Paper} No. 9729.
\item Anstreicher, K.; Fampa, M.; Lee, J.; and Williams, J., 
Maximum-Entropy Remote Sampling, in preparation.
\end{itemize}
\end{description} 
\vfill
\rule{3in}{0.015in} \\
Joy Denise Williams \\


\end{document}