From fd6962f528d203026555f57bd6be730ef7e36959 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Tue, 21 Feb 2017 15:08:46 +0000
Subject: [PATCH 1/4] ENH: Add patch files to allow LAPACK 3.2.2 to be f2c'd

This doesn't yet actually generate the files, since they would cause the diff to balloon
---
 numpy/linalg/lapack_lite/README.rst           |  9 ++--
 numpy/linalg/lapack_lite/clapack_scrub.py     |  2 +-
 numpy/linalg/lapack_lite/f2c_c_lapack.f.patch | 32 +++++++++++++
 numpy/linalg/lapack_lite/f2c_config.c.patch   |  4 +-
 numpy/linalg/lapack_lite/f2c_d_lapack.f.patch | 32 +++++++++++++
 numpy/linalg/lapack_lite/f2c_lapack.f.patch   | 48 +++++++++++++++++++
 numpy/linalg/lapack_lite/f2c_s_lapack.f.patch | 32 +++++++++++++
 numpy/linalg/lapack_lite/f2c_z_lapack.f.patch | 32 +++++++++++++
 numpy/linalg/lapack_lite/make_lite.py         |  8 +++-
 9 files changed, 190 insertions(+), 9 deletions(-)
 create mode 100644 numpy/linalg/lapack_lite/f2c_c_lapack.f.patch
 create mode 100644 numpy/linalg/lapack_lite/f2c_d_lapack.f.patch
 create mode 100644 numpy/linalg/lapack_lite/f2c_lapack.f.patch
 create mode 100644 numpy/linalg/lapack_lite/f2c_s_lapack.f.patch
 create mode 100644 numpy/linalg/lapack_lite/f2c_z_lapack.f.patch

diff --git a/numpy/linalg/lapack_lite/README.rst b/numpy/linalg/lapack_lite/README.rst
index 16fa0639610c..b25ce1e74f24 100644
--- a/numpy/linalg/lapack_lite/README.rst
+++ b/numpy/linalg/lapack_lite/README.rst
@@ -27,10 +27,9 @@ similar to that done to generate the CLAPACK_ distribution.
 
 .. _CLAPACK: http://netlib.org/clapack/index.html
 
-The versions in the numpy git repo use the LAPACK source from the
-`Debian package lapack3`_, version 3.0.20000531a-6. It was found that these
-(being regularly maintained) worked better than the patches to the last
-released version of LAPACK available at the LAPACK_ page.
+The output C files in git use the LAPACK source from the LAPACK_ page, using
+version 3.2.2. Unfortunately, newer versions use newer FORTRAN features, which
+are increasingly not supported by ``f2c``. As these are found, the patch files
+will need to be changed to re-express new constructs with legacy constructs.
 
-.. _Debian package lapack3: https://archive.debian.net/source/etch/lapack3
 .. _LAPACK: http://netlib.org/lapack/index.html
diff --git a/numpy/linalg/lapack_lite/clapack_scrub.py b/numpy/linalg/lapack_lite/clapack_scrub.py
index cfaa7e585cf0..6ce107cb6008 100644
--- a/numpy/linalg/lapack_lite/clapack_scrub.py
+++ b/numpy/linalg/lapack_lite/clapack_scrub.py
@@ -70,7 +70,7 @@ def endArgs(self, text):
                       "i_len", "do_fio", "do_lio") + iofun
 
     # Routines to not scrub the ftnlen argument from
-    keep_ftnlen = (Str('ilaenv_') | Str('s_rnge')) + Str('(')
+    keep_ftnlen = (Str('ilaenv_') | Str('iparmq_') | Str('s_rnge')) + Str('(')
 
     lexicon = Lexicon([
         (iofunctions,                           TEXT),
diff --git a/numpy/linalg/lapack_lite/f2c_c_lapack.f.patch b/numpy/linalg/lapack_lite/f2c_c_lapack.f.patch
new file mode 100644
index 000000000000..bcf7507baa7c
--- /dev/null
+++ b/numpy/linalg/lapack_lite/f2c_c_lapack.f.patch
@@ -0,0 +1,32 @@
+@@ -13163,5 +13163,6 @@
+ !                 Skip any trailing zeros.
+                   DO LASTV = N, I+1, -1
+-                     IF( V( LASTV, I ).NE.ZERO ) EXIT
++                     IF( V( LASTV, I ).NE.ZERO ) GO TO 15
+                   END DO
++   15             CONTINUE
+                   J = MIN( LASTV, PREVLASTV )
+@@ -13175,5 +13176,6 @@
+ !                 Skip any trailing zeros.
+                   DO LASTV = N, I+1, -1
+-                     IF( V( I, LASTV ).NE.ZERO ) EXIT
++                     IF( V( I, LASTV ).NE.ZERO ) GO TO 16
+                   END DO
++   16             CONTINUE
+                   J = MIN( LASTV, PREVLASTV )
+@@ -13223,5 +13225,6 @@
+ !                    Skip any leading zeros.
+                      DO LASTV = 1, I-1
+-                        IF( V( LASTV, I ).NE.ZERO ) EXIT
++                        IF( V( LASTV, I ).NE.ZERO ) GO TO 35
+                      END DO
++   35                CONTINUE
+                      J = MAX( LASTV, PREVLASTV )
+@@ -13239,5 +13242,6 @@
+ !                    Skip any leading zeros.
+                      DO LASTV = N, I+1, -1
+-                        IF( V( I, LASTV ).NE.ZERO ) EXIT
++                        IF( V( I, LASTV ).NE.ZERO ) GO TO 36
+                      END DO
++   36                CONTINUE
+                      J = MAX( LASTV, PREVLASTV )
diff --git a/numpy/linalg/lapack_lite/f2c_config.c.patch b/numpy/linalg/lapack_lite/f2c_config.c.patch
index 8650322ac935..4c43f8aa2a5f 100644
--- a/numpy/linalg/lapack_lite/f2c_config.c.patch
+++ b/numpy/linalg/lapack_lite/f2c_config.c.patch
@@ -1,4 +1,4 @@
-@@ -690,7 +690,7 @@ L10:
+@@ -696,7 +696,7 @@
  doublereal dlamc3_(doublereal *a, doublereal *b)
  {
      /* System generated locals */
@@ -7,7 +7,7 @@
  
  
  /*
-@@ -1773,7 +1773,7 @@ L10:
+@@ -1773,7 +1773,7 @@
  doublereal slamc3_(real *a, real *b)
  {
      /* System generated locals */
diff --git a/numpy/linalg/lapack_lite/f2c_d_lapack.f.patch b/numpy/linalg/lapack_lite/f2c_d_lapack.f.patch
new file mode 100644
index 000000000000..cd750cec096d
--- /dev/null
+++ b/numpy/linalg/lapack_lite/f2c_d_lapack.f.patch
@@ -0,0 +1,32 @@
+@@ -19075,5 +19075,6 @@
+ !                 Skip any trailing zeros.
+                   DO LASTV = N, I+1, -1
+-                     IF( V( LASTV, I ).NE.ZERO ) EXIT
++                     IF( V( LASTV, I ).NE.ZERO ) GO TO 15
+                   END DO
++   15             CONTINUE
+                   J = MIN( LASTV, PREVLASTV )
+@@ -19087,5 +19088,6 @@
+ !                 Skip any trailing zeros.
+                   DO LASTV = N, I+1, -1
+-                     IF( V( I, LASTV ).NE.ZERO ) EXIT
++                     IF( V( I, LASTV ).NE.ZERO ) GO TO 16
+                   END DO
++   16             CONTINUE
+                   J = MIN( LASTV, PREVLASTV )
+@@ -19131,5 +19133,6 @@
+ !                    Skip any leading zeros.
+                      DO LASTV = 1, I-1
+-                        IF( V( LASTV, I ).NE.ZERO ) EXIT
++                        IF( V( LASTV, I ).NE.ZERO ) GO TO 35
+                      END DO
++   35                CONTINUE
+                      J = MAX( LASTV, PREVLASTV )
+@@ -19147,5 +19150,6 @@
+ !                    Skip any leading zeros.
+                      DO LASTV = N, I+1, -1
+-                        IF( V( I, LASTV ).NE.ZERO ) EXIT
++                        IF( V( I, LASTV ).NE.ZERO ) GO TO 36
+                      END DO
++   36                CONTINUE
+                      J = MAX( LASTV, PREVLASTV )
diff --git a/numpy/linalg/lapack_lite/f2c_lapack.f.patch b/numpy/linalg/lapack_lite/f2c_lapack.f.patch
new file mode 100644
index 000000000000..c743c1f627c7
--- /dev/null
+++ b/numpy/linalg/lapack_lite/f2c_lapack.f.patch
@@ -0,0 +1,48 @@
+@@ -267,9 +267,10 @@
+      Scan up each column tracking the last zero row seen.
+          ILACLR = 0
+          DO J = 1, N
+             DO I = M, 1, -1
+-               IF( A(I, J).NE.ZERO ) EXIT
++               IF( A(I, J).NE.ZERO ) GO TO 10
+             END DO
++   10       CONTINUE
+             ILACLR = MAX( ILACLR, I )
+          END DO
+       END IF
+@@ -395,9 +396,10 @@
+      Scan up each column tracking the last zero row seen.
+          ILADLR = 0
+          DO J = 1, N
+             DO I = M, 1, -1
+-               IF( A(I, J).NE.ZERO ) EXIT
++               IF( A(I, J).NE.ZERO ) GO TO 10
+             END DO
++   10       CONTINUE
+             ILADLR = MAX( ILADLR, I )
+          END DO
+       END IF
+@@ -1078,9 +1080,10 @@
+      Scan up each column tracking the last zero row seen.
+          ILASLR = 0
+          DO J = 1, N
+             DO I = M, 1, -1
+-               IF( A(I, J).NE.ZERO ) EXIT
++               IF( A(I, J).NE.ZERO ) GO TO 10
+             END DO
++   10       CONTINUE
+             ILASLR = MAX( ILASLR, I )
+          END DO
+       END IF
+@@ -1206,9 +1209,10 @@
+      Scan up each column tracking the last zero row seen.
+          ILAZLR = 0
+          DO J = 1, N
+             DO I = M, 1, -1
+-               IF( A(I, J).NE.ZERO ) EXIT
++               IF( A(I, J).NE.ZERO ) GO TO 10
+             END DO
++   10       CONTINUE
+             ILAZLR = MAX( ILAZLR, I )
+          END DO
+       END IF
diff --git a/numpy/linalg/lapack_lite/f2c_s_lapack.f.patch b/numpy/linalg/lapack_lite/f2c_s_lapack.f.patch
new file mode 100644
index 000000000000..2e82d986e62e
--- /dev/null
+++ b/numpy/linalg/lapack_lite/f2c_s_lapack.f.patch
@@ -0,0 +1,32 @@
+@@ -17359,5 +17359,6 @@
+ !                 Skip any trailing zeros.
+                   DO LASTV = N, I+1, -1
+-                     IF( V( LASTV, I ).NE.ZERO ) EXIT
++                     IF( V( LASTV, I ).NE.ZERO ) GO TO 15
+                   END DO
++   15             CONTINUE
+                   J = MIN( LASTV, PREVLASTV )
+@@ -17371,5 +17372,6 @@
+ !                 Skip any trailing zeros.
+                   DO LASTV = N, I+1, -1
+-                     IF( V( I, LASTV ).NE.ZERO ) EXIT
++                     IF( V( I, LASTV ).NE.ZERO ) GO TO 16
+                   END DO
++   16             CONTINUE
+                   J = MIN( LASTV, PREVLASTV )
+@@ -17415,5 +17417,6 @@
+ !                    Skip any leading zeros.
+                      DO LASTV = 1, I-1
+-                        IF( V( LASTV, I ).NE.ZERO ) EXIT
++                        IF( V( LASTV, I ).NE.ZERO ) GO TO 35
+                      END DO
++   35                CONTINUE
+                      J = MAX( LASTV, PREVLASTV )
+@@ -17431,5 +17434,6 @@
+ !                    Skip any leading zeros.
+                      DO LASTV = N, I+1, -1
+-                        IF( V( I, LASTV ).NE.ZERO ) EXIT
++                        IF( V( I, LASTV ).NE.ZERO ) GO TO 36
+                      END DO
++   36                CONTINUE
+                      J = MAX( LASTV, PREVLASTV )
diff --git a/numpy/linalg/lapack_lite/f2c_z_lapack.f.patch b/numpy/linalg/lapack_lite/f2c_z_lapack.f.patch
new file mode 100644
index 000000000000..1e6fc8c07075
--- /dev/null
+++ b/numpy/linalg/lapack_lite/f2c_z_lapack.f.patch
@@ -0,0 +1,32 @@
+@@ -15278,5 +15278,6 @@
+ !                 Skip any trailing zeros.
+                   DO LASTV = N, I+1, -1
+-                     IF( V( LASTV, I ).NE.ZERO ) EXIT
++                     IF( V( LASTV, I ).NE.ZERO ) GO TO 15
+                   END DO
++   15             CONTINUE
+                   J = MIN( LASTV, PREVLASTV )
+@@ -15290,5 +15291,6 @@
+ !                 Skip any trailing zeros.
+                   DO LASTV = N, I+1, -1
+-                     IF( V( I, LASTV ).NE.ZERO ) EXIT
++                     IF( V( I, LASTV ).NE.ZERO ) GO TO 16
+                   END DO
++   16             CONTINUE
+                   J = MIN( LASTV, PREVLASTV )
+@@ -15338,5 +15340,6 @@
+ !                    Skip any leading zeros.
+                      DO LASTV = 1, I-1
+-                        IF( V( LASTV, I ).NE.ZERO ) EXIT
++                        IF( V( LASTV, I ).NE.ZERO ) GO TO 35
+                      END DO
++   35                CONTINUE
+                      J = MAX( LASTV, PREVLASTV )
+@@ -15354,5 +15357,6 @@
+ !                    Skip any leading zeros.
+                      DO LASTV = N, I+1, -1
+-                        IF( V( I, LASTV ).NE.ZERO ) EXIT
++                        IF( V( I, LASTV ).NE.ZERO ) GO TO 36
+                      END DO
++   36                CONTINUE
+                      J = MAX( LASTV, PREVLASTV )
diff --git a/numpy/linalg/lapack_lite/make_lite.py b/numpy/linalg/lapack_lite/make_lite.py
index 314a8c93b507..4c64952292b6 100755
--- a/numpy/linalg/lapack_lite/make_lite.py
+++ b/numpy/linalg/lapack_lite/make_lite.py
@@ -23,7 +23,7 @@
 # Arguments to pass to f2c. You'll always want -A for ANSI C prototypes
 # Others of interest: -a to not make variables static by default
 #                     -C to check array subscripts
-F2C_ARGS = ['-A']
+F2C_ARGS = ['-A', '-Nx800']
 
 # The header to add to the top of the f2c_*.c file. Note that dlamch_() calls
 # will be replaced by the macros below by clapack_scrub.scrub_source()
@@ -285,6 +285,12 @@ def main():
         print('creating %s ...'  % c_file)
         routines = library.allRoutinesByType(typename)
         concatenateRoutines(routines, fortran_file)
+
+        # apply the patch
+        patch_file = fortran_file + '.patch'
+        if os.path.exists(patch_file):
+            subprocess.check_call(['patch', '-u', fortran_file, patch_file])
+            print("Patched {}".format(fortran_file))
         try:
             runF2C(fortran_file, output_dir)
         except F2CError:

From febacb2eb2ae691829d83b52023f8312a29a1c68 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Thu, 2 Mar 2017 00:26:32 +0000
Subject: [PATCH 2/4] ENH: Add more functions copied to f2c_lite.

These are copied from the libf2c source code, which is presumably where all
the others came from
---
 numpy/linalg/lapack_lite/f2c.c | 84 ++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/numpy/linalg/lapack_lite/f2c.c b/numpy/linalg/lapack_lite/f2c.c
index 89feb38857c8..1114bef3b1c3 100644
--- a/numpy/linalg/lapack_lite/f2c.c
+++ b/numpy/linalg/lapack_lite/f2c.c
@@ -192,6 +192,17 @@ return( (*x)>=0 ?
 }
 
 
+#ifdef KR_headers
+double floor();
+integer i_nint(x) real *x;
+#else
+#undef abs
+integer i_nint(real *x)
+#endif
+{
+return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x));
+}
+
 #ifdef KR_headers
 double pow();
 double pow_dd(ap, bp) doublereal *ap, *bp;
@@ -272,6 +283,79 @@ if(n != 0)
 	}
 return(pow);
 }
+
+#ifdef KR_headers
+VOID pow_zi(p, a, b) 	/* p = a**b  */
+ doublecomplex *p, *a; integer *b;
+#else
+extern void z_div(doublecomplex*, doublecomplex*, doublecomplex*);
+void pow_zi(doublecomplex *p, doublecomplex *a, integer *b) 	/* p = a**b  */
+#endif
+{
+	integer n;
+	unsigned long u;
+	double t;
+	doublecomplex q, x;
+	static doublecomplex one = {1.0, 0.0};
+
+	n = *b;
+	q.r = 1;
+	q.i = 0;
+
+	if(n == 0)
+		goto done;
+	if(n < 0)
+		{
+		n = -n;
+		z_div(&x, &one, a);
+		}
+	else
+		{
+		x.r = a->r;
+		x.i = a->i;
+		}
+
+	for(u = n; ; )
+		{
+		if(u & 01)
+			{
+			t = q.r * x.r - q.i * x.i;
+			q.i = q.r * x.i + q.i * x.r;
+			q.r = t;
+			}
+		if(u >>= 1)
+			{
+			t = x.r * x.r - x.i * x.i;
+			x.i = 2 * x.r * x.i;
+			x.r = t;
+			}
+		else
+			break;
+		}
+ done:
+	p->i = q.i;
+	p->r = q.r;
+	}
+
+#ifdef KR_headers
+VOID pow_ci(p, a, b) 	/* p = a**b  */
+ complex *p, *a; integer *b;
+#else
+extern void pow_zi(doublecomplex*, doublecomplex*, integer*);
+void pow_ci(complex *p, complex *a, integer *b) 	/* p = a**b  */
+#endif
+{
+doublecomplex p1, a1;
+
+a1.r = a->r;
+a1.i = a->i;
+
+pow_zi(&p1, &a1, b);
+
+p->r = p1.r;
+p->i = p1.i;
+}
+
 /* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the
  * target of a concatenation to appear on its right-hand side (contrary
  * to the Fortran 77 Standard, but in accordance with Fortran 90).

From 0b525a5e8072f91a9d10b787bee7de3372898203 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Thu, 2 Mar 2017 00:31:02 +0000
Subject: [PATCH 3/4] ENH: Rebuild all of lapack_lite from 3.2.2

---
 numpy/linalg/lapack_lite/f2c_blas.c     |  1052 +-
 numpy/linalg/lapack_lite/f2c_c_lapack.c | 11335 ++++++++++++--------
 numpy/linalg/lapack_lite/f2c_config.c   |   113 +-
 numpy/linalg/lapack_lite/f2c_d_lapack.c | 10459 +++++++++++++++----
 numpy/linalg/lapack_lite/f2c_lapack.c   |   964 +-
 numpy/linalg/lapack_lite/f2c_s_lapack.c | 10446 ++++++++++++++-----
 numpy/linalg/lapack_lite/f2c_z_lapack.c | 12087 ++++++++++++++--------
 7 files changed, 33005 insertions(+), 13451 deletions(-)

diff --git a/numpy/linalg/lapack_lite/f2c_blas.c b/numpy/linalg/lapack_lite/f2c_blas.c
index 19d1b8a5c5e8..9ce0ce86d126 100644
--- a/numpy/linalg/lapack_lite/f2c_blas.c
+++ b/numpy/linalg/lapack_lite/f2c_blas.c
@@ -30,29 +30,34 @@ them.
 /* Table of constant values */
 
 static complex c_b21 = {1.f,0.f};
-static doublecomplex c_b1069 = {1.,0.};
+static doublecomplex c_b1078 = {1.,0.};
 
 /* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer *
 	incx, complex *cy, integer *incy)
 {
     /* System generated locals */
     integer i__1, i__2, i__3, i__4;
-    real r__1, r__2;
     complex q__1, q__2;
 
-    /* Builtin functions */
-    double r_imag(complex *);
-
     /* Local variables */
     static integer i__, ix, iy;
+    extern doublereal scabs1_(complex *);
 
 
 /*
-       constant times a vector plus a vector.
+    Purpose
+    =======
+
+       CAXPY constant times a vector plus a vector.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --cy;
@@ -62,7 +67,7 @@ static doublecomplex c_b1069 = {1.,0.};
     if (*n <= 0) {
 	return 0;
     }
-    if ((r__1 = ca->r, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) {
+    if (scabs1_(ca) == 0.f) {
 	return 0;
     }
     if (*incx == 1 && *incy == 1) {
@@ -125,11 +130,19 @@ static doublecomplex c_b1069 = {1.,0.};
 
 
 /*
-       copies a vector, x, to a vector, y.
+    Purpose
+    =======
+
+       CCOPY copies a vector x to a vector y.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --cy;
@@ -196,12 +209,20 @@ static doublecomplex c_b1069 = {1.,0.};
 
 
 /*
+    Purpose
+    =======
+
        forms the dot product of two vectors, conjugating the first
        vector.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack,  3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --cy;
@@ -275,11 +296,19 @@ static doublecomplex c_b1069 = {1.,0.};
 
 
 /*
-       forms the dot product of two vectors.
+    Purpose
+    =======
+
+       CDOTU forms the dot product of two vectors.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --cy;
@@ -378,7 +407,7 @@ static doublecomplex c_b1069 = {1.,0.};
     alpha and beta are scalars, and A, B and C are matrices, with op( A )
     an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     TRANSA - CHARACTER*1.
@@ -474,6 +503,8 @@ static doublecomplex c_b1069 = {1.,0.};
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -483,6 +514,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
        conjugated or transposed, set  CONJA and CONJB  as true if  A  and
@@ -1034,7 +1067,7 @@ static doublecomplex c_b1069 = {1.,0.};
     Purpose
     =======
 
-    CGEMV  performs one of the matrix-vector operations
+    CGEMV performs one of the matrix-vector operations
 
        y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
 
@@ -1043,7 +1076,7 @@ static doublecomplex c_b1069 = {1.,0.};
     where alpha and beta are scalars, x and y are vectors and A is an
     m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     TRANS  - CHARACTER*1.
@@ -1114,6 +1147,8 @@ static doublecomplex c_b1069 = {1.,0.};
              Y. INCY must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -1123,6 +1158,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -1425,7 +1462,7 @@ static doublecomplex c_b1069 = {1.,0.};
     where alpha is a scalar, x is an m element vector, y is an n element
     vector and A is an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     M      - INTEGER.
@@ -1475,6 +1512,8 @@ static doublecomplex c_b1069 = {1.,0.};
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -1484,6 +1523,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -1616,7 +1657,7 @@ static doublecomplex c_b1069 = {1.,0.};
     where alpha is a scalar, x is an m element vector, y is an n element
     vector and A is an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     M      - INTEGER.
@@ -1666,6 +1707,8 @@ static doublecomplex c_b1069 = {1.,0.};
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -1675,6 +1718,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -1813,7 +1858,7 @@ static doublecomplex c_b1069 = {1.,0.};
     where alpha and beta are scalars, x and y are n element vectors and
     A is an n by n hermitian matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -1884,6 +1929,8 @@ static doublecomplex c_b1069 = {1.,0.};
              Y. INCY must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -1893,6 +1940,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -2220,7 +2269,7 @@ static doublecomplex c_b1069 = {1.,0.};
     where alpha is a scalar, x and y are n element vectors and A is an n
     by n hermitian matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -2290,6 +2339,8 @@ static doublecomplex c_b1069 = {1.,0.};
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -2299,6 +2350,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -2652,7 +2705,7 @@ static doublecomplex c_b1069 = {1.,0.};
     hermitian matrix and  A and B  are  n by k matrices in the first case
     and  k by n  matrices in the second case.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -2755,6 +2808,8 @@ static doublecomplex c_b1069 = {1.,0.};
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -2767,6 +2822,8 @@ static doublecomplex c_b1069 = {1.,0.};
     -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
        Ed Anderson, Cray Research Inc.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -3301,7 +3358,7 @@ static doublecomplex c_b1069 = {1.,0.};
     matrix and  A  is an  n by k  matrix in the  first case and a  k by n
     matrix in the second case.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -3385,6 +3442,8 @@ static doublecomplex c_b1069 = {1.,0.};
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -3397,6 +3456,8 @@ static doublecomplex c_b1069 = {1.,0.};
     -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
        Ed Anderson, Cray Research Inc.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -3780,12 +3841,20 @@ static doublecomplex c_b1069 = {1.,0.};
 
 
 /*
-       scales a vector by a constant.
+    Purpose
+    =======
+
+       CSCAL scales a vector by a constant.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack,  3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --cx;
@@ -3828,6 +3897,141 @@ static doublecomplex c_b1069 = {1.,0.};
     return 0;
 } /* cscal_ */
 
+/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex *
+	cy, integer *incy, real *c__, real *s)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    complex q__1, q__2, q__3;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static complex ctemp;
+
+
+/*
+    Purpose
+    =======
+
+    CSROT applies a plane rotation, where the cos and sin (c and s) are real
+    and the vectors cx and cy are complex.
+    jack dongarra, linpack, 3/11/78.
+
+    Arguments
+    ==========
+
+    N        (input) INTEGER
+             On entry, N specifies the order of the vectors cx and cy.
+             N must be at least zero.
+             Unchanged on exit.
+
+    CX       (input) COMPLEX array, dimension at least
+             ( 1 + ( N - 1 )*abs( INCX ) ).
+             Before entry, the incremented array CX must contain the n
+             element vector cx. On exit, CX is overwritten by the updated
+             vector cx.
+
+    INCX     (input) INTEGER
+             On entry, INCX specifies the increment for the elements of
+             CX. INCX must not be zero.
+             Unchanged on exit.
+
+    CY       (input) COMPLEX array, dimension at least
+             ( 1 + ( N - 1 )*abs( INCY ) ).
+             Before entry, the incremented array CY must contain the n
+             element vector cy. On exit, CY is overwritten by the updated
+             vector cy.
+
+    INCY     (input) INTEGER
+             On entry, INCY specifies the increment for the elements of
+             CY. INCY must not be zero.
+             Unchanged on exit.
+
+    C        (input) REAL
+             On entry, C specifies the cosine, cos.
+             Unchanged on exit.
+
+    S        (input) REAL
+             On entry, S specifies the sine, sin.
+             Unchanged on exit.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments not equal
+            to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
+	i__3 = iy;
+	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
+	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	i__2 = iy;
+	i__3 = iy;
+	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
+	i__4 = ix;
+	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
+	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+	i__2 = ix;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
+	i__3 = i__;
+	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
+	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	i__2 = i__;
+	i__3 = i__;
+	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
+	i__4 = i__;
+	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
+	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+	i__2 = i__;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+/* L30: */
+    }
+    return 0;
+} /* csrot_ */
+
 /* Subroutine */ int csscal_(integer *n, real *sa, complex *cx, integer *incx)
 {
     /* System generated locals */
@@ -3843,12 +4047,20 @@ static doublecomplex c_b1069 = {1.,0.};
 
 
 /*
-       scales a complex vector by a real constant.
+    Purpose
+    =======
+
+       CSSCAL scales a complex vector by a real constant.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --cx;
@@ -3905,11 +4117,19 @@ static doublecomplex c_b1069 = {1.,0.};
 
 
 /*
-       interchanges two vectors.
+    Purpose
+    =======
+
+      CSWAP interchanges two vectors.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --cy;
@@ -4003,7 +4223,7 @@ static doublecomplex c_b1069 = {1.,0.};
 
        op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
 
-    Parameters
+    Arguments
     ==========
 
     SIDE   - CHARACTER*1.
@@ -4097,6 +4317,8 @@ static doublecomplex c_b1069 = {1.,0.};
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -4106,6 +4328,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -4156,7 +4380,7 @@ static doublecomplex c_b1069 = {1.,0.};
 
 /*     Quick return if possible. */
 
-    if (*n == 0) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -4657,7 +4881,7 @@ static doublecomplex c_b1069 = {1.,0.};
     where x is an n element vector and  A is an n by n unit, or non-unit,
     upper or lower triangular matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -4728,6 +4952,8 @@ static doublecomplex c_b1069 = {1.,0.};
              X. INCX must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -4737,6 +4963,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -5197,7 +5425,7 @@ static doublecomplex c_b1069 = {1.,0.};
 
     The matrix X is overwritten on B.
 
-    Parameters
+    Arguments
     ==========
 
     SIDE   - CHARACTER*1.
@@ -5291,6 +5519,8 @@ static doublecomplex c_b1069 = {1.,0.};
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -5300,6 +5530,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -5350,7 +5582,7 @@ static doublecomplex c_b1069 = {1.,0.};
 
 /*     Quick return if possible. */
 
-    if (*n == 0) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -5862,7 +6094,7 @@ static doublecomplex c_b1069 = {1.,0.};
     No test for singularity or near-singularity is included in this
     routine. Such tests must be performed before calling this routine.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -5933,6 +6165,8 @@ static doublecomplex c_b1069 = {1.,0.};
              X. INCX must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -5942,6 +6176,8 @@ static doublecomplex c_b1069 = {1.,0.};
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -6341,12 +6577,20 @@ static doublecomplex c_b1069 = {1.,0.};
 
 
 /*
-       constant times a vector plus a vector.
+    Purpose
+    =======
+
+       DAXPY constant times a vector plus a vector.
        uses unrolled loops for increments equal to one.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --dy;
@@ -6421,21 +6665,24 @@ static doublecomplex c_b1069 = {1.,0.};
 doublereal dcabs1_(doublecomplex *z__)
 {
     /* System generated locals */
-    doublereal ret_val;
-    static doublecomplex equiv_0[1];
+    doublereal ret_val, d__1, d__2;
 
-    /* Local variables */
-#define t ((doublereal *)equiv_0)
-#define zz (equiv_0)
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
 
-    zz->r = z__->r, zz->i = z__->i;
-    ret_val = abs(t[0]) + abs(t[1]);
-    return ret_val;
-} /* dcabs1_ */
+/*
+    Purpose
+    =======
+
+    DCABS1 computes absolute value of a double complex number
+
+    =====================================================================
+*/
 
-#undef zz
-#undef t
 
+    ret_val = (d__1 = z__->r, abs(d__1)) + (d__2 = d_imag(z__), abs(d__2));
+    return ret_val;
+} /* dcabs1_ */
 
 /* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx,
 	doublereal *dy, integer *incy)
@@ -6448,12 +6695,20 @@ doublereal dcabs1_(doublecomplex *z__)
 
 
 /*
-       copies a vector, x, to a vector, y.
+    Purpose
+    =======
+
+       DCOPY copies a vector, x, to a vector, y.
        uses unrolled loops for increments equal to one.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --dy;
@@ -6538,12 +6793,20 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
 
 
 /*
-       forms the dot product of two vectors.
+    Purpose
+    =======
+
+       DDOT forms the dot product of two vectors.
        uses unrolled loops for increments equal to one.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --dy;
@@ -6650,7 +6913,7 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
     alpha and beta are scalars, and A, B and C are matrices, with op( A )
     an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     TRANSA - CHARACTER*1.
@@ -6746,6 +7009,8 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -6755,6 +7020,8 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
        transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
@@ -7004,7 +7271,7 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
     where alpha and beta are scalars, x and y are vectors and A is an
     m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     TRANS  - CHARACTER*1.
@@ -7075,6 +7342,8 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
              Y. INCY must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -7084,6 +7353,8 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -7291,7 +7562,7 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
     where alpha is a scalar, x is an m element vector, y is an n element
     vector and A is an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     M      - INTEGER.
@@ -7341,6 +7612,8 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -7350,6 +7623,8 @@ doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -7453,17 +7728,23 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
 
 
 /*
+    Purpose
+    =======
+
     DNRM2 returns the euclidean norm of a vector via the function
     name, so that
 
        DNRM2 := sqrt( x'*x )
 
+    Further Details
+    ===============
 
     -- This version written on 25-October-1982.
        Modified on 14-October-1993 to inline the call to DLASSQ.
        Sven Hammarling, Nag Ltd.
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --x;
@@ -7522,11 +7803,19 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
 
 
 /*
-       applies a plane rotation.
+    Purpose
+    =======
+
+       DROT applies a plane rotation.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --dy;
@@ -7588,13 +7877,21 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
 
 
 /*
-       scales a vector by a constant.
+    Purpose
+    =======
+
+       DSCAL scales a vector by a constant.
        uses unrolled loops for increment equal to one.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --dx;
@@ -7664,12 +7961,20 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
 
 
 /*
+    Purpose
+    =======
+
        interchanges two vectors.
        uses unrolled loops for increments equal one.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --dy;
@@ -7772,7 +8077,7 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
     where alpha and beta are scalars, x and y are n element vectors and
     A is an n by n symmetric matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -7841,6 +8146,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
              Y. INCY must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -7850,6 +8157,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -8060,7 +8369,7 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
     where alpha is a scalar, x and y are n element vectors and A is an n
     by n symmetric matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -8127,6 +8436,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -8136,6 +8447,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -8320,7 +8633,7 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
     and  A and B  are  n by k  matrices  in the  first  case  and  k by n
     matrices in the second case.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -8421,6 +8734,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -8431,6 +8746,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -8699,7 +9016,7 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
     and  A  is an  n by k  matrix in the first case and a  k by n  matrix
     in the second case.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -8782,6 +9099,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -8791,6 +9110,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -9043,7 +9364,7 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
 
        op( A ) = A   or   op( A ) = A'.
 
-    Parameters
+    Arguments
     ==========
 
     SIDE   - CHARACTER*1.
@@ -9137,6 +9458,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -9146,6 +9469,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -9195,7 +9520,7 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
 
 /*     Quick return if possible. */
 
-    if (*n == 0) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -9463,7 +9788,7 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
     where x is an n element vector and  A is an n by n unit, or non-unit,
     upper or lower triangular matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -9534,6 +9859,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
              X. INCX must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -9543,6 +9870,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -9793,7 +10122,7 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
 
     The matrix X is overwritten on B.
 
-    Parameters
+    Arguments
     ==========
 
     SIDE   - CHARACTER*1.
@@ -9887,6 +10216,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -9897,6 +10228,8 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -9946,7 +10279,7 @@ doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
 
 /*     Quick return if possible. */
 
-    if (*n == 0) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -10236,12 +10569,20 @@ doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx)
 
 
 /*
-       takes the sum of the absolute values.
+    Purpose
+    =======
+
+       DZASUM takes the sum of the absolute values.
+
+    Further Details
+    ===============
+
        jack dongarra, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --zx;
@@ -10295,17 +10636,23 @@ doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx)
 
 
 /*
+    Purpose
+    =======
+
     DZNRM2 returns the euclidean norm of a vector via the function
     name, so that
 
        DZNRM2 := sqrt( conjg( x' )*x )
 
+    Further Details
+    ===============
 
     -- This version written on 25-October-1982.
        Modified on 14-October-1993 to inline the call to ZLASSQ.
        Sven Hammarling, Nag Ltd.
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --x;
@@ -10368,24 +10715,29 @@ doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx)
 integer icamax_(integer *n, complex *cx, integer *incx)
 {
     /* System generated locals */
-    integer ret_val, i__1, i__2;
-    real r__1, r__2;
-
-    /* Builtin functions */
-    double r_imag(complex *);
+    integer ret_val, i__1;
 
     /* Local variables */
     static integer i__, ix;
     static real smax;
+    extern doublereal scabs1_(complex *);
 
 
 /*
-       finds the index of element having max. absolute value.
+    Purpose
+    =======
+
+       ICAMAX finds the index of element having max. absolute value.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --cx;
@@ -10406,19 +10758,15 @@ integer icamax_(integer *n, complex *cx, integer *incx)
 /*        code for increment not equal to 1 */
 
     ix = 1;
-    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
+    smax = scabs1_(&cx[1]);
     ix += *incx;
     i__1 = *n;
     for (i__ = 2; i__ <= i__1; ++i__) {
-	i__2 = ix;
-	if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), dabs(
-		r__2)) <= smax) {
+	if (scabs1_(&cx[ix]) <= smax) {
 	    goto L5;
 	}
 	ret_val = i__;
-	i__2 = ix;
-	smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]),
-		dabs(r__2));
+	smax = scabs1_(&cx[ix]);
 L5:
 	ix += *incx;
 /* L10: */
@@ -10428,18 +10776,14 @@ integer icamax_(integer *n, complex *cx, integer *incx)
 /*        code for increment equal to 1 */
 
 L20:
-    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
+    smax = scabs1_(&cx[1]);
     i__1 = *n;
     for (i__ = 2; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), dabs(
-		r__2)) <= smax) {
+	if (scabs1_(&cx[i__]) <= smax) {
 	    goto L30;
 	}
 	ret_val = i__;
-	i__2 = i__;
-	smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]),
-		dabs(r__2));
+	smax = scabs1_(&cx[i__]);
 L30:
 	;
     }
@@ -10458,12 +10802,20 @@ integer idamax_(integer *n, doublereal *dx, integer *incx)
 
 
 /*
-       finds the index of element having max. absolute value.
+    Purpose
+    =======
+
+       IDAMAX finds the index of element having max. absolute value.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --dx;
@@ -10528,12 +10880,20 @@ integer isamax_(integer *n, real *sx, integer *incx)
 
 
 /*
-       finds the index of element having max. absolute value.
+    Purpose
+    =======
+
+       ISAMAX finds the index of element having max. absolute value.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --sx;
@@ -10598,12 +10958,20 @@ integer izamax_(integer *n, doublecomplex *zx, integer *incx)
 
 
 /*
-       finds the index of element having max. absolute value.
+    Purpose
+    =======
+
+       IZAMAX finds the index of element having max. absolute value.
+
+    Further Details
+    ===============
+
        jack dongarra, 1/15/85.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --zx;
@@ -10667,12 +11035,20 @@ integer izamax_(integer *n, doublecomplex *zx, integer *incx)
 
 
 /*
-       constant times a vector plus a vector.
+    Purpose
+    =======
+
+       SAXPY constant times a vector plus a vector.
        uses unrolled loop for increments equal to one.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --sy;
@@ -10744,6 +11120,28 @@ integer izamax_(integer *n, doublecomplex *zx, integer *incx)
     return 0;
 } /* saxpy_ */
 
+doublereal scabs1_(complex *z__)
+{
+    /* System generated locals */
+    real ret_val, r__1, r__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+
+/*
+    Purpose
+    =======
+
+    SCABS1 computes absolute value of a complex number
+
+    =====================================================================
+*/
+
+    ret_val = (r__1 = z__->r, dabs(r__1)) + (r__2 = r_imag(z__), dabs(r__2));
+    return ret_val;
+} /* scabs1_ */
+
 doublereal scasum_(integer *n, complex *cx, integer *incx)
 {
     /* System generated locals */
@@ -10759,13 +11157,21 @@ doublereal scasum_(integer *n, complex *cx, integer *incx)
 
 
 /*
-       takes the sum of the absolute values of a complex vector and
+    Purpose
+    =======
+
+       SCASUM takes the sum of the absolute values of a complex vector and
        returns a single precision result.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --cx;
@@ -10823,17 +11229,23 @@ doublereal scnrm2_(integer *n, complex *x, integer *incx)
 
 
 /*
+    Purpose
+    =======
+
     SCNRM2 returns the euclidean norm of a vector via the function
     name, so that
 
        SCNRM2 := sqrt( conjg( x' )*x )
 
+    Further Details
+    ===============
 
     -- This version written on 25-October-1982.
        Modified on 14-October-1993 to inline the call to CLASSQ.
        Sven Hammarling, Nag Ltd.
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --x;
@@ -10904,12 +11316,20 @@ doublereal scnrm2_(integer *n, complex *x, integer *incx)
 
 
 /*
-       copies a vector, x, to a vector, y.
+    Purpose
+    =======
+
+       SCOPY copies a vector, x, to a vector, y.
        uses unrolled loops for increments equal to 1.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --sy;
@@ -10993,12 +11413,20 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
 
 
 /*
-       forms the dot product of two vectors.
+    Purpose
+    =======
+
+       SDOT forms the dot product of two vectors.
        uses unrolled loops for increments equal to one.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --sy;
@@ -11104,7 +11532,7 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
     alpha and beta are scalars, and A, B and C are matrices, with op( A )
     an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     TRANSA - CHARACTER*1.
@@ -11200,6 +11628,8 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -11209,6 +11639,8 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
        transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
@@ -11458,7 +11890,7 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
     where alpha and beta are scalars, x and y are vectors and A is an
     m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     TRANS  - CHARACTER*1.
@@ -11529,6 +11961,8 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
              Y. INCY must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -11538,6 +11972,8 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -11744,7 +12180,7 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
     where alpha is a scalar, x is an m element vector, y is an n element
     vector and A is an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     M      - INTEGER.
@@ -11794,6 +12230,8 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -11803,6 +12241,8 @@ doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -11906,17 +12346,23 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
+    Purpose
+    =======
+
     SNRM2 returns the euclidean norm of a vector via the function
     name, so that
 
-       SNRM2 := sqrt( x'*x )
+       SNRM2 := sqrt( x'*x ).
 
+    Further Details
+    ===============
 
     -- This version written on 25-October-1982.
        Modified on 14-October-1993 to inline the call to SLASSQ.
        Sven Hammarling, Nag Ltd.
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --x;
@@ -11975,11 +12421,19 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
+    Purpose
+    =======
+
        applies a plane rotation.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --sy;
@@ -12040,13 +12494,21 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
+    Purpose
+    =======
+
        scales a vector by a constant.
        uses unrolled loops for increment equal to 1.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --sx;
@@ -12116,12 +12578,20 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
+    Purpose
+    =======
+
        interchanges two vectors.
        uses unrolled loops for increments equal to 1.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --sy;
@@ -12224,7 +12694,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     where alpha and beta are scalars, x and y are n element vectors and
     A is an n by n symmetric matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -12293,6 +12763,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              Y. INCY must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -12302,6 +12774,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -12511,7 +12985,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     where alpha is a scalar, x and y are n element vectors and A is an n
     by n symmetric matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -12578,6 +13052,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -12587,6 +13063,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -12771,7 +13249,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     and  A and B  are  n by k  matrices  in the  first  case  and  k by n
     matrices in the second case.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -12872,6 +13350,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -12882,6 +13362,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -13152,7 +13634,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     and  A  is an  n by k  matrix in the first case and a  k by n  matrix
     in the second case.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -13235,6 +13717,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -13244,6 +13728,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -13496,7 +13982,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
        op( A ) = A   or   op( A ) = A'.
 
-    Parameters
+    Arguments
     ==========
 
     SIDE   - CHARACTER*1.
@@ -13590,6 +14076,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -13599,6 +14087,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -13648,7 +14138,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 /*     Quick return if possible. */
 
-    if (*n == 0) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -13916,7 +14406,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     where x is an n element vector and  A is an n by n unit, or non-unit,
     upper or lower triangular matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -13987,6 +14477,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              X. INCX must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -13996,6 +14488,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -14246,7 +14740,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
     The matrix X is overwritten on B.
 
-    Parameters
+    Arguments
     ==========
 
     SIDE   - CHARACTER*1.
@@ -14340,6 +14834,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -14350,6 +14846,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -14399,7 +14897,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 /*     Quick return if possible. */
 
-    if (*n == 0) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -14689,9 +15187,18 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
-       constant times a vector plus a vector.
+    Purpose
+    =======
+
+       ZAXPY constant times a vector plus a vector.
+
+    Further Details
+    ===============
+
        jack dongarra, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
+
+    =====================================================================
 */
 
     /* Parameter adjustments */
@@ -14765,11 +15272,19 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
-       copies a vector, x, to a vector, y.
+    Purpose
+    =======
+
+       ZCOPY copies a vector, x, to a vector, y.
+
+    Further Details
+    ===============
+
        jack dongarra, linpack, 4/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --zy;
@@ -14836,9 +15351,18 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
-       forms the dot product of a vector.
+    Purpose
+    =======
+
+    ZDOTC forms the dot product of a vector.
+
+    Further Details
+    ===============
+
        jack dongarra, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
+
+    =====================================================================
 */
 
     /* Parameter adjustments */
@@ -14913,9 +15437,18 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
-       forms the dot product of two vectors.
+    Purpose
+    =======
+
+       ZDOTU forms the dot product of two vectors.
+
+    Further Details
+    ===============
+
        jack dongarra, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
+
+    =====================================================================
 */
 
     /* Parameter adjustments */
@@ -14977,6 +15510,141 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     return ;
 } /* zdotu_ */
 
+/* Subroutine */ int zdrot_(integer *n, doublecomplex *cx, integer *incx,
+	doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static doublecomplex ctemp;
+
+
+/*
+    Purpose
+    =======
+
+    Applies a plane rotation, where the cos and sin (c and s) are real
+    and the vectors cx and cy are complex.
+    jack dongarra, linpack, 3/11/78.
+
+    Arguments
+    ==========
+
+    N        (input) INTEGER
+             On entry, N specifies the order of the vectors cx and cy.
+             N must be at least zero.
+             Unchanged on exit.
+
+    CX       (input) COMPLEX*16 array, dimension at least
+             ( 1 + ( N - 1 )*abs( INCX ) ).
+             Before entry, the incremented array CX must contain the n
+             element vector cx. On exit, CX is overwritten by the updated
+             vector cx.
+
+    INCX     (input) INTEGER
+             On entry, INCX specifies the increment for the elements of
+             CX. INCX must not be zero.
+             Unchanged on exit.
+
+    CY       (input) COMPLEX*16 array, dimension at least
+             ( 1 + ( N - 1 )*abs( INCY ) ).
+             Before entry, the incremented array CY must contain the n
+             element vector cy. On exit, CY is overwritten by the updated
+             vector cy.
+
+    INCY     (input) INTEGER
+             On entry, INCY specifies the increment for the elements of
+             CY. INCY must not be zero.
+             Unchanged on exit.
+
+    C        (input) DOUBLE PRECISION
+             On entry, C specifies the cosine, cos.
+             Unchanged on exit.
+
+    S        (input) DOUBLE PRECISION
+             On entry, S specifies the sine, sin.
+             Unchanged on exit.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments not equal
+            to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i;
+	i__3 = iy;
+	z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i;
+	z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	ctemp.r = z__1.r, ctemp.i = z__1.i;
+	i__2 = iy;
+	i__3 = iy;
+	z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i;
+	i__4 = ix;
+	z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i;
+	z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
+	cy[i__2].r = z__1.r, cy[i__2].i = z__1.i;
+	i__2 = ix;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i;
+	i__3 = i__;
+	z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i;
+	z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	ctemp.r = z__1.r, ctemp.i = z__1.i;
+	i__2 = i__;
+	i__3 = i__;
+	z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i;
+	i__4 = i__;
+	z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i;
+	z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
+	cy[i__2].r = z__1.r, cy[i__2].i = z__1.i;
+	i__2 = i__;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+/* L30: */
+    }
+    return 0;
+} /* zdrot_ */
+
 /* Subroutine */ int zdscal_(integer *n, doublereal *da, doublecomplex *zx,
 	integer *incx)
 {
@@ -14989,12 +15657,20 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
-       scales a vector by a constant.
+    Purpose
+    =======
+
+       ZDSCAL scales a vector by a constant.
+
+    Further Details
+    ===============
+
        jack dongarra, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --zx;
@@ -15078,7 +15754,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     alpha and beta are scalars, and A, B and C are matrices, with op( A )
     an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     TRANSA - CHARACTER*1.
@@ -15174,6 +15850,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -15183,6 +15861,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
        conjugated or transposed, set  CONJA and CONJB  as true if  A  and
@@ -15744,7 +16424,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     where alpha and beta are scalars, x and y are vectors and A is an
     m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     TRANS  - CHARACTER*1.
@@ -15815,6 +16495,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              Y. INCY must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -15824,6 +16506,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -16127,7 +16811,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     where alpha is a scalar, x is an m element vector, y is an n element
     vector and A is an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     M      - INTEGER.
@@ -16177,6 +16861,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -16186,6 +16872,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -16319,7 +17007,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     where alpha is a scalar, x is an m element vector, y is an n element
     vector and A is an m by n matrix.
 
-    Parameters
+    Arguments
     ==========
 
     M      - INTEGER.
@@ -16369,6 +17057,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -16378,6 +17068,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -16516,7 +17208,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     where alpha and beta are scalars, x and y are n element vectors and
     A is an n by n hermitian matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -16587,6 +17279,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              Y. INCY must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -16596,6 +17290,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -16924,7 +17620,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     where alpha is a scalar, x and y are n element vectors and A is an n
     by n hermitian matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -16994,6 +17690,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -17003,6 +17701,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -17356,7 +18056,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     hermitian matrix and  A and B  are  n by k matrices in the first case
     and  k by n  matrices in the second case.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -17459,6 +18159,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -17471,6 +18173,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
        Ed Anderson, Cray Research Inc.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -18005,7 +18709,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     matrix and  A  is an  n by k  matrix in the  first case and a  k by n
     matrix in the second case.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -18089,6 +18793,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, n ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -18101,6 +18807,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
        Ed Anderson, Cray Research Inc.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -18484,12 +19192,20 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
-       scales a vector by a constant.
+    Purpose
+    =======
+
+       ZSCAL scales a vector by a constant.
+
+    Further Details
+    ===============
+
        jack dongarra, 3/11/78.
        modified 3/93 to return if incx .le. 0.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --zx;
@@ -18544,11 +19260,19 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 
 /*
-       interchanges two vectors.
+    Purpose
+    =======
+
+       ZSWAP interchanges two vectors.
+
+    Further Details
+    ===============
+
        jack dongarra, 3/11/78.
        modified 12/3/93, array(1) declarations changed to array(*)
-*/
 
+    =====================================================================
+*/
 
     /* Parameter adjustments */
     --zy;
@@ -18642,7 +19366,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
        op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
 
-    Parameters
+    Arguments
     ==========
 
     SIDE   - CHARACTER*1.
@@ -18736,6 +19460,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -18745,6 +19471,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -18795,7 +19523,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 /*     Quick return if possible. */
 
-    if (*n == 0) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -19296,7 +20024,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     where x is an n element vector and  A is an n by n unit, or non-unit,
     upper or lower triangular matrix.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -19367,6 +20095,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              X. INCX must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -19376,6 +20106,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -19837,7 +20569,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
     The matrix X is overwritten on B.
 
-    Parameters
+    Arguments
     ==========
 
     SIDE   - CHARACTER*1.
@@ -19931,6 +20663,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              max( 1, m ).
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 3 Blas routine.
 
@@ -19940,6 +20674,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Jeremy Du Croz, Numerical Algorithms Group Ltd.
        Sven Hammarling, Numerical Algorithms Group Ltd.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
@@ -19990,7 +20726,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 
 /*     Quick return if possible. */
 
-    if (*n == 0) {
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -20259,7 +20995,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 /* L210: */
 		    }
 		    if (nounit) {
-			z_div(&z__1, &c_b1069, &a[j + j * a_dim1]);
+			z_div(&z__1, &c_b1078, &a[j + j * a_dim1]);
 			temp.r = z__1.r, temp.i = z__1.i;
 			i__2 = *m;
 			for (i__ = 1; i__ <= i__2; ++i__) {
@@ -20310,7 +21046,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 /* L260: */
 		    }
 		    if (nounit) {
-			z_div(&z__1, &c_b1069, &a[j + j * a_dim1]);
+			z_div(&z__1, &c_b1078, &a[j + j * a_dim1]);
 			temp.r = z__1.r, temp.i = z__1.i;
 			i__1 = *m;
 			for (i__ = 1; i__ <= i__1; ++i__) {
@@ -20337,11 +21073,11 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 		for (k = *n; k >= 1; --k) {
 		    if (nounit) {
 			if (noconj) {
-			    z_div(&z__1, &c_b1069, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b1078, &a[k + k * a_dim1]);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			} else {
 			    d_cnjg(&z__2, &a[k + k * a_dim1]);
-			    z_div(&z__1, &c_b1069, &z__2);
+			    z_div(&z__1, &c_b1078, &z__2);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 			i__1 = *m;
@@ -20401,11 +21137,11 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
 		for (k = 1; k <= i__1; ++k) {
 		    if (nounit) {
 			if (noconj) {
-			    z_div(&z__1, &c_b1069, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b1078, &a[k + k * a_dim1]);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			} else {
 			    d_cnjg(&z__2, &a[k + k * a_dim1]);
-			    z_div(&z__1, &c_b1069, &z__2);
+			    z_div(&z__1, &c_b1078, &z__2);
 			    temp.r = z__1.r, temp.i = z__1.i;
 			}
 			i__2 = *m;
@@ -20503,7 +21239,7 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
     No test for singularity or near-singularity is included in this
     routine. Such tests must be performed before calling this routine.
 
-    Parameters
+    Arguments
     ==========
 
     UPLO   - CHARACTER*1.
@@ -20574,6 +21310,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
              X. INCX must not be zero.
              Unchanged on exit.
 
+    Further Details
+    ===============
 
     Level 2 Blas routine.
 
@@ -20583,6 +21321,8 @@ doublereal snrm2_(integer *n, real *x, integer *incx)
        Sven Hammarling, Nag Central Office.
        Richard Hanson, Sandia National Labs.
 
+    =====================================================================
+
 
        Test the input parameters.
 */
diff --git a/numpy/linalg/lapack_lite/f2c_c_lapack.c b/numpy/linalg/lapack_lite/f2c_c_lapack.c
index e2c757728ed7..85dcf9477609 100644
--- a/numpy/linalg/lapack_lite/f2c_c_lapack.c
+++ b/numpy/linalg/lapack_lite/f2c_c_lapack.c
@@ -30,22 +30,26 @@ them.
 /* Table of constant values */
 
 static integer c__1 = 1;
-static complex c_b55 = {0.f,0.f};
-static complex c_b56 = {1.f,0.f};
+static complex c_b56 = {0.f,0.f};
+static complex c_b57 = {1.f,0.f};
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 static integer c__0 = 0;
-static integer c__8 = 8;
-static integer c__4 = 4;
 static integer c__65 = 65;
-static real c_b871 = 1.f;
+static real c_b894 = 1.f;
+static integer c__12 = 12;
+static integer c__49 = 49;
+static real c_b1087 = 0.f;
+static integer c__9 = 9;
+static real c_b1136 = -1.f;
+static integer c__13 = 13;
 static integer c__15 = 15;
+static integer c__14 = 14;
+static integer c__16 = 16;
 static logical c_false = FALSE_;
-static real c_b1101 = 0.f;
-static integer c__9 = 9;
-static real c_b1150 = -1.f;
-static real c_b1794 = .5f;
+static logical c_true = TRUE_;
+static real c_b2023 = .5f;
 
 /* Subroutine */ int cgebak_(char *job, char *side, integer *n, integer *ilo,
 	integer *ihi, real *scale, integer *m, complex *v, integer *ldv,
@@ -68,10 +72,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -282,14 +286,15 @@ static real c_b1794 = .5f;
     extern doublereal slamch_(char *);
     extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
 	    *), xerbla_(char *, integer *);
+    extern logical sisnan_(real *);
     static logical noconv;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -529,7 +534,7 @@ static real c_b1794 = .5f;
 
     sfmin1 = slamch_("S") / slamch_("P");
     sfmax1 = 1.f / sfmin1;
-    sfmin2 = sfmin1 * 8.f;
+    sfmin2 = sfmin1 * 2.f;
     sfmax2 = 1.f / sfmin2;
 L140:
     noconv = FALSE_;
@@ -564,7 +569,7 @@ static real c_b1794 = .5f;
 	if (c__ == 0.f || r__ == 0.f) {
 	    goto L200;
 	}
-	g = r__ / 8.f;
+	g = r__ / 2.f;
 	f = 1.f;
 	s = c__ + r__;
 L160:
@@ -575,28 +580,38 @@ static real c_b1794 = .5f;
 	if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) {
 	    goto L170;
 	}
-	f *= 8.f;
-	c__ *= 8.f;
-	ca *= 8.f;
-	r__ /= 8.f;
-	g /= 8.f;
-	ra /= 8.f;
+	r__1 = c__ + f + ca + r__ + g + ra;
+	if (sisnan_(&r__1)) {
+
+/*           Exit if NaN to avoid infinite loop */
+
+	    *info = -3;
+	    i__2 = -(*info);
+	    xerbla_("CGEBAL", &i__2);
+	    return 0;
+	}
+	f *= 2.f;
+	c__ *= 2.f;
+	ca *= 2.f;
+	r__ /= 2.f;
+	g /= 2.f;
+	ra /= 2.f;
 	goto L160;
 
 L170:
-	g = c__ / 8.f;
+	g = c__ / 2.f;
 L180:
 /* Computing MIN */
 	r__1 = min(f,c__), r__1 = min(r__1,g);
 	if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) {
 	    goto L190;
 	}
-	f /= 8.f;
-	c__ /= 8.f;
-	g /= 8.f;
-	ca /= 8.f;
-	r__ *= 8.f;
-	ra *= 8.f;
+	f /= 2.f;
+	c__ /= 2.f;
+	g /= 2.f;
+	ca /= 2.f;
+	r__ *= 2.f;
+	ra *= 2.f;
 	goto L180;
 
 /*        Now balance. */
@@ -646,7 +661,7 @@ static real c_b1794 = .5f;
 	integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
     complex q__1;
 
     /* Builtin functions */
@@ -663,10 +678,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -832,11 +847,13 @@ static real c_b1794 = .5f;
 
 /*           Apply H(i)' to A(i:m,i+1:n) from the left */
 
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    r_cnjg(&q__1, &tauq[i__]);
-	    clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1,
-		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    if (i__ < *n) {
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		r_cnjg(&q__1, &tauq[i__]);
+		clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &
+			q__1, &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    }
 	    i__2 = i__ + i__ * a_dim1;
 	    i__3 = i__;
 	    a[i__2].r = d__[i__3], a[i__2].i = 0.f;
@@ -905,12 +922,12 @@ static real c_b1794 = .5f;
 
 /*           Apply G(i) to A(i+1:m,i:n) from the right */
 
-	    i__2 = *m - i__;
-	    i__3 = *n - i__ + 1;
-/* Computing MIN */
-	    i__4 = i__ + 1;
-	    clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
-		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    if (i__ < *m) {
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &
+			taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
 	    i__2 = *n - i__ + 1;
 	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
 	    i__2 = i__ + i__ * a_dim1;
@@ -988,10 +1005,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -1050,7 +1067,7 @@ static real c_b1794 = .5f;
             The scalar factors of the elementary reflectors which
             represent the unitary matrix P. See Further Details.
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -1235,14 +1252,14 @@ static real c_b1794 = .5f;
 	q__1.r = -1.f, q__1.i = -0.f;
 	cgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, &
 		q__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb +
-		nb + 1], &ldwrky, &c_b56, &a[i__ + nb + (i__ + nb) * a_dim1],
+		nb + 1], &ldwrky, &c_b57, &a[i__ + nb + (i__ + nb) * a_dim1],
 		lda);
 	i__3 = *m - i__ - nb + 1;
 	i__4 = *n - i__ - nb + 1;
 	q__1.r = -1.f, q__1.i = -0.f;
 	cgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &q__1, &
 		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
-		c_b56, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+		c_b57, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
 
 /*        Copy diagonal and off-diagonal elements of B back into A */
 
@@ -1292,7 +1309,7 @@ static real c_b1794 = .5f;
 {
     /* System generated locals */
     integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4;
+	    i__2, i__3;
     real r__1, r__2;
     complex q__1, q__2;
 
@@ -1308,7 +1325,6 @@ static real c_b1794 = .5f;
     static complex tmp;
     static integer ibal;
     static char side[1];
-    static integer maxb;
     static real anrm;
     static integer ierr, itau, iwrk, nout;
     extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
@@ -1352,10 +1368,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -1420,7 +1436,7 @@ static real c_b1794 = .5f;
             The leading dimension of the array VR.  LDVR >= 1; if
             JOBVR = 'V', LDVR >= N.
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -1494,62 +1510,44 @@ static real c_b1794 = .5f;
          the worst case.)
 */
 
-    minwrk = 1;
-    if (*info == 0 && (*lwork >= 1 || lquery)) {
-	maxwrk = *n + *n * ilaenv_(&c__1, "CGEHRD", " ", n, &c__1, n, &c__0, (
-		ftnlen)6, (ftnlen)1);
-	if (! wantvl && ! wantvr) {
-/* Computing MAX */
-	    i__1 = 1, i__2 = *n << 1;
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "CHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "EN", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
-	    maxwrk = max(maxwrk,hswork);
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
 	} else {
+	    maxwrk = *n + *n * ilaenv_(&c__1, "CGEHRD", " ", n, &c__1, n, &
+		    c__0, (ftnlen)6, (ftnlen)1);
+	    minwrk = *n << 1;
+	    if (wantvl) {
 /* Computing MAX */
-	    i__1 = 1, i__2 = *n << 1;
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR",
-		    " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "CHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "SV", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR",
+			 " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
+		chseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &w[1], &vl[
+			vl_offset], ldvl, &work[1], &c_n1, info);
+	    } else if (wantvr) {
 /* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR",
+			 " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
+		chseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &w[1], &vr[
+			vr_offset], ldvr, &work[1], &c_n1, info);
+	    } else {
+		chseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &w[1], &vr[
+			vr_offset], ldvr, &work[1], &c_n1, info);
+	    }
+	    hswork = work[1].r;
 /* Computing MAX */
-	    i__1 = max(maxwrk,hswork), i__2 = *n << 1;
-	    maxwrk = max(i__1,i__2);
+	    i__1 = max(maxwrk,hswork);
+	    maxwrk = max(i__1,minwrk);
 	}
 	work[1].r = (real) maxwrk, work[1].i = 0.f;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
     }
-    if (*lwork < minwrk && ! lquery) {
-	*info = -12;
-    }
+
     if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGEEV ", &i__1);
@@ -1847,10 +1845,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2006,7 +2004,7 @@ static real c_b1794 = .5f;
     complex q__1;
 
     /* Local variables */
-    static integer i__;
+    static integer i__, j;
     static complex t[4160]	/* was [65][64] */;
     static integer ib;
     static complex ei;
@@ -2015,14 +2013,16 @@ static real c_b1794 = .5f;
 	    integer *, complex *, complex *, integer *, complex *, integer *,
 	    complex *, complex *, integer *);
     static integer nbmin, iinfo;
-    extern /* Subroutine */ int cgehd2_(integer *, integer *, integer *,
-	    complex *, integer *, complex *, complex *, integer *), clarfb_(
-	    char *, char *, char *, char *, integer *, integer *, integer *,
-	    complex *, integer *, complex *, integer *, complex *, integer *,
-	    complex *, integer *), clahrd_(
+    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), caxpy_(integer *,
+	    complex *, complex *, integer *, complex *, integer *), cgehd2_(
 	    integer *, integer *, integer *, complex *, integer *, complex *,
-	    complex *, integer *, complex *, integer *), xerbla_(char *,
-	    integer *);
+	    complex *, integer *), clahr2_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), clarfb_(char *, char *, char *, char *, integer *,
+	    integer *, integer *, complex *, integer *, complex *, integer *,
+	    complex *, integer *, complex *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
     static integer ldwork, lwkopt;
@@ -2030,17 +2030,17 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2.1)                                  --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+    -- April 2009                                                      --
 
 
     Purpose
     =======
 
-    CGEHRD reduces a complex general matrix A to upper Hessenberg form H
-    by a unitary similarity transformation:  Q' * A * Q = H .
+    CGEHRD reduces a complex general matrix A to upper Hessenberg form H by
+    an unitary similarity transformation:  Q' * A * Q = H .
 
     Arguments
     =========
@@ -2122,6 +2122,10 @@ static real c_b1794 = .5f;
     modified element of the upper Hessenberg matrix H, and vi denotes an
     element of the vector defining H(i).
 
+    This file is a slight modification of LAPACK-3.0's DGEHRD
+    subroutine incorporating improvements proposed by Quintana-Orti and
+    Van de Geijn (2006). (See DLAHR2.)
+
     =====================================================================
 
 
@@ -2186,13 +2190,21 @@ static real c_b1794 = .5f;
 	return 0;
     }
 
+/*
+       Determine the block size
+
+   Computing MIN
+*/
+    i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
     nbmin = 2;
     iws = 1;
     if (nb > 1 && nb < nh) {
 
 /*
           Determine when to cross over from blocked to unblocked code
-          (last block is always handled by unblocked code).
+          (last block is always handled by unblocked code)
 
    Computing MAX
 */
@@ -2201,7 +2213,7 @@ static real c_b1794 = .5f;
 	nx = max(i__1,i__2);
 	if (nx < nh) {
 
-/*           Determine if workspace is large enough for blocked code. */
+/*           Determine if workspace is large enough for blocked code */
 
 	    iws = *n * nb;
 	    if (*lwork < iws) {
@@ -2209,7 +2221,7 @@ static real c_b1794 = .5f;
 /*
                 Not enough workspace to use optimal NB:  determine the
                 minimum value of NB, and reduce NB or force use of
-                unblocked code.
+                unblocked code
 
    Computing MAX
 */
@@ -2249,13 +2261,13 @@ static real c_b1794 = .5f;
              which performs the reduction, and also the matrix Y = A*V*T
 */
 
-	    clahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+	    clahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
 		    c__65, &work[1], &ldwork);
 
 /*
              Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
              right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
-             to 1.
+             to 1
 */
 
 	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
@@ -2266,10 +2278,27 @@ static real c_b1794 = .5f;
 	    q__1.r = -1.f, q__1.i = -0.f;
 	    cgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, &
 		    q__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda,
-		     &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda);
+		     &c_b57, &a[(i__ + ib) * a_dim1 + 1], lda);
 	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
 	    a[i__3].r = ei.r, a[i__3].i = ei.i;
 
+/*
+             Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
+             right
+*/
+
+	    i__3 = ib - 1;
+	    ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", &i__, &
+		    i__3, &c_b57, &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &
+		    ldwork);
+	    i__3 = ib - 2;
+	    for (j = 0; j <= i__3; ++j) {
+		q__1.r = -1.f, q__1.i = -0.f;
+		caxpy_(&i__, &q__1, &work[ldwork * j + 1], &c__1, &a[(i__ + j
+			+ 1) * a_dim1 + 1], &c__1);
+/* L30: */
+	    }
+
 /*
              Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
              left
@@ -2281,7 +2310,7 @@ static real c_b1794 = .5f;
 		    i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &
 		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
 		    ldwork);
-/* L30: */
+/* L40: */
 	}
     }
 
@@ -2313,10 +2342,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -2459,10 +2488,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2495,7 +2524,7 @@ static real c_b1794 = .5f;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -2688,10 +2717,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -2831,10 +2860,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2868,7 +2897,7 @@ static real c_b1794 = .5f;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -3106,15 +3135,16 @@ static real c_b1794 = .5f;
 	    complex *, integer *, complex *, complex *, integer *, integer *);
     static integer ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
     static real smlnum;
-    static logical wntqas, lquery;
+    static logical wntqas;
     static integer nrwork;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK driver routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
+       8-15-00:  Improve consistency of WS calculations (eca)
 
 
     Purpose
@@ -3153,11 +3183,11 @@ static real c_b1794 = .5f;
                     min(M,N) rows of V**H are returned in the arrays U
                     and VT;
             = 'O':  If M >= N, the first N columns of U are overwritten
-                    on the array A and all rows of V**H are returned in
+                    in the array A and all rows of V**H are returned in
                     the array VT;
                     otherwise, all columns of U are returned in the
                     array U and the first M rows of V**H are overwritten
-                    in the array VT;
+                    in the array A;
             = 'N':  no columns of U or rows of V**H are computed.
 
     M       (input) INTEGER
@@ -3208,7 +3238,7 @@ static real c_b1794 = .5f;
             JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
             if JOBZ = 'S', LDVT >= min(M,N).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -3219,12 +3249,15 @@ static real c_b1794 = .5f;
             if JOBZ = 'S' or 'A',
                   LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
             For good performance, LWORK should generally be larger.
-            If LWORK < 0 but other input arguments are legal, WORK(1)
-            returns the optimal LWORK.
 
-    RWORK   (workspace) REAL array, dimension (LRWORK)
-            If JOBZ = 'N', LRWORK >= 7*min(M,N).
-            Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N)
+            If LWORK = -1, a workspace query is assumed.  The optimal
+            size for the WORK array is calculated and stored in WORK(1),
+            and no other work except argument checking is performed.
+
+    RWORK   (workspace) REAL array, dimension (MAX(1,LRWORK))
+            If JOBZ = 'N', LRWORK >= 5*min(M,N).
+            Otherwise,
+            LRWORK >= min(M,N)*max(5*min(M,N)+7,2*max(M,N)+2*min(M,N)+1)
 
     IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
 
@@ -3273,7 +3306,6 @@ static real c_b1794 = .5f;
     wntqn = lsame_(jobz, "N");
     minwrk = 1;
     maxwrk = 1;
-    lquery = *lwork == -1;
 
     if (! (wntqa || wntqs || wntqo || wntqn)) {
 	*info = -1;
@@ -3306,8 +3338,11 @@ static real c_b1794 = .5f;
 
 /*
              There is no complex work space needed for bidiagonal SVD
-             The real work space needed for bidiagonal SVD is BDSPAC,
-             BDSPAC = 3*N*N + 4*N
+             The real work space needed for bidiagonal SVD is BDSPAC
+             for computing singular values and singular vectors; BDSPAN
+             for computing singular values only.
+             BDSPAC = 5*N*N + 7*N
+             BDSPAN = MAX(7*N+4, 3*N+2+SMLSIZ*(SMLSIZ+8))
 */
 
 	    if (*m >= mnthr1) {
@@ -3315,14 +3350,13 @@ static real c_b1794 = .5f;
 
 /*                 Path 1 (M much larger than N, JOBZ='N') */
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+		    maxwrk = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+		    i__1 = maxwrk, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
 			    c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
 			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl;
+		    maxwrk = max(i__1,i__2);
 		    minwrk = *n * 3;
 		} else if (wntqo) {
 
@@ -3497,8 +3531,11 @@ static real c_b1794 = .5f;
 
 /*
              There is no complex work space needed for bidiagonal SVD
-             The real work space needed for bidiagonal SVD is BDSPAC,
-             BDSPAC = 3*M*M + 4*M
+             The real work space needed for bidiagonal SVD is BDSPAC
+             for computing singular values and singular vectors; BDSPAN
+             for computing singular values only.
+             BDSPAC = 5*M*M + 7*M
+             BDSPAN = MAX(7*M+4, 3*M+2+SMLSIZ*(SMLSIZ+8))
 */
 
 	    if (*n >= mnthr1) {
@@ -3685,26 +3722,25 @@ static real c_b1794 = .5f;
 	    }
 	}
 	maxwrk = max(maxwrk,minwrk);
+    }
+    if (*info == 0) {
 	work[1].r = (real) maxwrk, work[1].i = 0.f;
+	if (*lwork < minwrk && *lwork != -1) {
+	    *info = -13;
+	}
     }
 
-    if (*lwork < minwrk && ! lquery) {
-	*info = -13;
-    }
+/*     Quick returns */
+
     if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("CGESDD", &i__1);
 	return 0;
-    } else if (lquery) {
+    }
+    if (*lwork == -1) {
 	return 0;
     }
-
-/*     Quick return if possible */
-
     if (*m == 0 || *n == 0) {
-	if (*lwork >= 1) {
-	    work[1].r = 1.f, work[1].i = 0.f;
-	}
 	return 0;
     }
 
@@ -3762,7 +3798,7 @@ static real c_b1794 = .5f;
 
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &a[a_dim1 + 2],
+		claset_("L", &i__1, &i__2, &c_b56, &c_b56, &a[a_dim1 + 2],
 			lda);
 		ie = 1;
 		itauq = 1;
@@ -3783,7 +3819,7 @@ static real c_b1794 = .5f;
 /*
                 Perform bidiagonal SVD, compute singular values only
                 (CWorkspace: 0)
-                (RWorkspace: need BDSPAC)
+                (RWorkspace: need BDSPAN)
 */
 
 		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -3829,7 +3865,7 @@ static real c_b1794 = .5f;
 		clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &work[ir + 1], &
+		claset_("L", &i__1, &i__2, &c_b56, &c_b56, &work[ir + 1], &
 			ldwrkr);
 
 /*
@@ -3911,8 +3947,8 @@ static real c_b1794 = .5f;
 /* Computing MIN */
 		    i__3 = *m - i__ + 1;
 		    chunk = min(i__3,ldwrkr);
-		    cgemm_("N", "N", &chunk, n, n, &c_b56, &a[i__ + a_dim1],
-			    lda, &work[iu], &ldwrku, &c_b55, &work[ir], &
+		    cgemm_("N", "N", &chunk, n, n, &c_b57, &a[i__ + a_dim1],
+			    lda, &work[iu], &ldwrku, &c_b56, &work[ir], &
 			    ldwrkr);
 		    clacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
 			    a_dim1], lda);
@@ -3950,7 +3986,7 @@ static real c_b1794 = .5f;
 		clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
 		i__2 = *n - 1;
 		i__1 = *n - 1;
-		claset_("L", &i__2, &i__1, &c_b55, &c_b55, &work[ir + 1], &
+		claset_("L", &i__2, &i__1, &c_b56, &c_b56, &work[ir + 1], &
 			ldwrkr);
 
 /*
@@ -4025,8 +4061,8 @@ static real c_b1794 = .5f;
 */
 
 		clacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
-		cgemm_("N", "N", m, n, n, &c_b56, &a[a_offset], lda, &work[ir]
-			, &ldwrkr, &c_b55, &u[u_offset], ldu);
+		cgemm_("N", "N", m, n, n, &c_b57, &a[a_offset], lda, &work[ir]
+			, &ldwrkr, &c_b56, &u[u_offset], ldu);
 
 	    } else if (wntqa) {
 
@@ -4069,7 +4105,7 @@ static real c_b1794 = .5f;
 
 		i__2 = *n - 1;
 		i__1 = *n - 1;
-		claset_("L", &i__2, &i__1, &c_b55, &c_b55, &a[a_dim1 + 2],
+		claset_("L", &i__2, &i__1, &c_b56, &c_b56, &a[a_dim1 + 2],
 			lda);
 		ie = 1;
 		itauq = itau;
@@ -4134,8 +4170,8 @@ static real c_b1794 = .5f;
                 (RWorkspace: 0)
 */
 
-		cgemm_("N", "N", m, n, n, &c_b56, &u[u_offset], ldu, &work[iu]
-			, &ldwrku, &c_b55, &a[a_offset], lda);
+		cgemm_("N", "N", m, n, n, &c_b57, &u[u_offset], ldu, &work[iu]
+			, &ldwrku, &c_b56, &a[a_offset], lda);
 
 /*              Copy left singular vectors of A from A to U */
 
@@ -4173,7 +4209,7 @@ static real c_b1794 = .5f;
 /*
                 Compute singular values only
                 (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
+                (Rworkspace: need BDSPAN)
 */
 
 		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -4417,7 +4453,7 @@ static real c_b1794 = .5f;
 /*
                 Compute singular values only
                 (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
+                (Rworkspace: need BDSPAN)
 */
 
 		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -4475,7 +4511,7 @@ static real c_b1794 = .5f;
                 (Rworkspace: need 0)
 */
 
-		    claset_("F", m, n, &c_b55, &c_b55, &work[iu], &ldwrku);
+		    claset_("F", m, n, &c_b56, &c_b56, &work[iu], &ldwrku);
 		    clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
 		    i__1 = *lwork - nwork + 1;
 		    cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
@@ -4541,7 +4577,7 @@ static real c_b1794 = .5f;
                 (RWorkspace: 0)
 */
 
-		claset_("F", m, n, &c_b55, &c_b55, &u[u_offset], ldu);
+		claset_("F", m, n, &c_b56, &c_b56, &u[u_offset], ldu);
 		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
 		i__2 = *lwork - nwork + 1;
 		cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
@@ -4578,11 +4614,13 @@ static real c_b1794 = .5f;
 
 /*              Set the right corner of U to identity matrix */
 
-		claset_("F", m, m, &c_b55, &c_b55, &u[u_offset], ldu);
-		i__2 = *m - *n;
-		i__1 = *m - *n;
-		claset_("F", &i__2, &i__1, &c_b55, &c_b56, &u[*n + 1 + (*n +
-			1) * u_dim1], ldu);
+		claset_("F", m, m, &c_b56, &c_b56, &u[u_offset], ldu);
+		if (*m > *n) {
+		    i__2 = *m - *n;
+		    i__1 = *m - *n;
+		    claset_("F", &i__2, &i__1, &c_b56, &c_b57, &u[*n + 1 + (*
+			    n + 1) * u_dim1], ldu);
+		}
 
 /*
                 Copy real matrix RWORK(IRU) to complex matrix U
@@ -4616,8 +4654,8 @@ static real c_b1794 = .5f;
 
 /*
           A has more columns than rows. If A has sufficiently more
-          columns than rows, first reduce using the LQ decomposition
-          (if sufficient workspace available)
+          columns than rows, first reduce using the LQ decomposition (if
+          sufficient workspace available)
 */
 
 	if (*n >= mnthr1) {
@@ -4646,7 +4684,7 @@ static real c_b1794 = .5f;
 
 		i__2 = *m - 1;
 		i__1 = *m - 1;
-		claset_("U", &i__2, &i__1, &c_b55, &c_b55, &a[(a_dim1 << 1) +
+		claset_("U", &i__2, &i__1, &c_b56, &c_b56, &a[(a_dim1 << 1) +
 			1], lda);
 		ie = 1;
 		itauq = 1;
@@ -4667,7 +4705,7 @@ static real c_b1794 = .5f;
 /*
                 Perform bidiagonal SVD, compute singular values only
                 (CWorkspace: 0)
-                (RWorkspace: need BDSPAC)
+                (RWorkspace: need BDSPAN)
 */
 
 		sbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -4718,7 +4756,7 @@ static real c_b1794 = .5f;
 		clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
 		i__2 = *m - 1;
 		i__1 = *m - 1;
-		claset_("U", &i__2, &i__1, &c_b55, &c_b55, &work[il + ldwrkl],
+		claset_("U", &i__2, &i__1, &c_b56, &c_b56, &work[il + ldwrkl],
 			 &ldwrkl);
 
 /*
@@ -4799,8 +4837,8 @@ static real c_b1794 = .5f;
 /* Computing MIN */
 		    i__3 = *n - i__ + 1;
 		    blk = min(i__3,chunk);
-		    cgemm_("N", "N", m, &blk, m, &c_b56, &work[ivt], m, &a[
-			    i__ * a_dim1 + 1], lda, &c_b55, &work[il], &
+		    cgemm_("N", "N", m, &blk, m, &c_b57, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b56, &work[il], &
 			    ldwrkl);
 		    clacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
 			    + 1], lda);
@@ -4838,7 +4876,7 @@ static real c_b1794 = .5f;
 		clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		claset_("U", &i__1, &i__2, &c_b55, &c_b55, &work[il + ldwrkl],
+		claset_("U", &i__1, &i__2, &c_b56, &c_b56, &work[il + ldwrkl],
 			 &ldwrkl);
 
 /*
@@ -4913,8 +4951,8 @@ static real c_b1794 = .5f;
 */
 
 		clacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
-		cgemm_("N", "N", m, n, m, &c_b56, &work[il], &ldwrkl, &a[
-			a_offset], lda, &c_b55, &vt[vt_offset], ldvt);
+		cgemm_("N", "N", m, n, m, &c_b57, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b56, &vt[vt_offset], ldvt);
 
 	    } else if (wntqa) {
 
@@ -4957,7 +4995,7 @@ static real c_b1794 = .5f;
 
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		claset_("U", &i__1, &i__2, &c_b55, &c_b55, &a[(a_dim1 << 1) +
+		claset_("U", &i__1, &i__2, &c_b56, &c_b56, &a[(a_dim1 << 1) +
 			1], lda);
 		ie = 1;
 		itauq = itau;
@@ -5021,8 +5059,8 @@ static real c_b1794 = .5f;
                 (RWorkspace: 0)
 */
 
-		cgemm_("N", "N", m, n, m, &c_b56, &work[ivt], &ldwkvt, &vt[
-			vt_offset], ldvt, &c_b55, &a[a_offset], lda);
+		cgemm_("N", "N", m, n, m, &c_b57, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b56, &a[a_offset], lda);
 
 /*              Copy right singular vectors of A from A to VT */
 
@@ -5062,7 +5100,7 @@ static real c_b1794 = .5f;
 /*
                 Compute singular values only
                 (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
+                (Rworkspace: need BDSPAN)
 */
 
 		sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -5306,7 +5344,7 @@ static real c_b1794 = .5f;
 /*
                 Compute singular values only
                 (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
+                (Rworkspace: need BDSPAN)
 */
 
 		sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -5318,7 +5356,7 @@ static real c_b1794 = .5f;
 
 /*                 WORK( IVT ) is M by N */
 
-		    claset_("F", m, n, &c_b55, &c_b55, &work[ivt], &ldwkvt);
+		    claset_("F", m, n, &c_b56, &c_b56, &work[ivt], &ldwkvt);
 		    nwork = ivt + ldwkvt * *n;
 		} else {
 
@@ -5441,7 +5479,7 @@ static real c_b1794 = .5f;
                 (RWorkspace: M*M)
 */
 
-		claset_("F", m, n, &c_b55, &c_b55, &vt[vt_offset], ldvt);
+		claset_("F", m, n, &c_b56, &c_b56, &vt[vt_offset], ldvt);
 		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
 		i__1 = *lwork - nwork + 1;
 		cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
@@ -5477,12 +5515,9 @@ static real c_b1794 = .5f;
 		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
 
-/*              Set the right corner of VT to identity matrix */
+/*              Set all of VT to identity matrix */
 
-		i__1 = *n - *m;
-		i__2 = *n - *m;
-		claset_("F", &i__1, &i__2, &c_b55, &c_b56, &vt[*m + 1 + (*m +
-			1) * vt_dim1], ldvt);
+		claset_("F", n, n, &c_b56, &c_b57, &vt[vt_offset], ldvt);
 
 /*
                 Copy real matrix RWORK(IRVT) to complex matrix VT
@@ -5491,7 +5526,6 @@ static real c_b1794 = .5f;
                 (RWorkspace: M*M)
 */
 
-		claset_("F", n, n, &c_b55, &c_b55, &vt[vt_offset], ldvt);
 		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
 		i__1 = *lwork - nwork + 1;
 		cunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[
@@ -5510,10 +5544,20 @@ static real c_b1794 = .5f;
 	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
 		    minmn, &ierr);
 	}
+	if (*info != 0 && anrm > bignum) {
+	    i__1 = minmn - 1;
+	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &i__1, &c__1, &rwork[
+		    ie], &minmn, &ierr);
+	}
 	if (anrm < smlnum) {
 	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
 		    minmn, &ierr);
 	}
+	if (*info != 0 && anrm < smlnum) {
+	    i__1 = minmn - 1;
+	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__1, &c__1, &rwork[
+		    ie], &minmn, &ierr);
+	}
     }
 
 /*     Return optimal workspace in WORK(1) */
@@ -5539,10 +5583,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -5652,23 +5696,27 @@ static real c_b1794 = .5f;
     complex q__1;
 
     /* Builtin functions */
+    double c_abs(complex *);
     void c_div(complex *, complex *, complex *);
 
     /* Local variables */
-    static integer j, jp;
+    static integer i__, j, jp;
     extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
 	    integer *), cgeru_(integer *, integer *, complex *, complex *,
-	    integer *, complex *, integer *, complex *, integer *), cswap_(
 	    integer *, complex *, integer *, complex *, integer *);
+    static real sfmin;
+    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
+	    complex *, integer *);
     extern integer icamax_(integer *, complex *, integer *);
+    extern doublereal slamch_(char *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -5747,6 +5795,10 @@ static real c_b1794 = .5f;
 	return 0;
     }
 
+/*     Compute machine safe minimum */
+
+    sfmin = slamch_("S");
+
     i__1 = min(*m,*n);
     for (j = 1; j <= i__1; ++j) {
 
@@ -5767,9 +5819,20 @@ static real c_b1794 = .5f;
 /*           Compute elements J+1:M of J-th column. */
 
 	    if (j < *m) {
-		i__2 = *m - j;
-		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
-		cscal_(&i__2, &q__1, &a[j + 1 + j * a_dim1], &c__1);
+		if (c_abs(&a[j + j * a_dim1]) >= sfmin) {
+		    i__2 = *m - j;
+		    c_div(&q__1, &c_b57, &a[j + j * a_dim1]);
+		    cscal_(&i__2, &q__1, &a[j + 1 + j * a_dim1], &c__1);
+		} else {
+		    i__2 = *m - j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = j + i__ + j * a_dim1;
+			c_div(&q__1, &a[j + i__ + j * a_dim1], &a[j + j *
+				a_dim1]);
+			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L20: */
+		    }
+		}
 	    }
 
 	} else if (*info == 0) {
@@ -5821,10 +5884,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -5963,7 +6026,7 @@ static real c_b1794 = .5f;
 
 		i__3 = *n - j - jb + 1;
 		ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
-			c_b56, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+			c_b57, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
 			a_dim1], lda);
 		if (j + jb <= *m) {
 
@@ -5974,7 +6037,7 @@ static real c_b1794 = .5f;
 		    q__1.r = -1.f, q__1.i = -0.f;
 		    cgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
 			    &q__1, &a[j + jb + j * a_dim1], lda, &a[j + (j +
-			    jb) * a_dim1], lda, &c_b56, &a[j + jb + (j + jb) *
+			    jb) * a_dim1], lda, &c_b57, &a[j + jb + (j + jb) *
 			     a_dim1], lda);
 		}
 	    }
@@ -6005,10 +6068,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6111,12 +6174,12 @@ static real c_b1794 = .5f;
 
 /*        Solve L*X = B, overwriting B with X. */
 
-	ctrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b56, &a[
+	ctrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b57, &a[
 		a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve U*X = B, overwriting B with X. */
 
-	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, &
+	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b57, &
 		a[a_offset], lda, &b[b_offset], ldb);
     } else {
 
@@ -6126,12 +6189,12 @@ static real c_b1794 = .5f;
           Solve U'*X = B, overwriting B with X.
 */
 
-	ctrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b56, &a[
+	ctrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b57, &a[
 		a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve L'*X = B, overwriting B with X. */
 
-	ctrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b56, &a[a_offset],
+	ctrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b57, &a[a_offset],
 		lda, &b[b_offset], ldb);
 
 /*        Apply row interchanges to the solution vectors. */
@@ -6150,8 +6213,8 @@ static real c_b1794 = .5f;
 	integer *lrwork, integer *iwork, integer *liwork, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    real r__1, r__2;
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
@@ -6184,6 +6247,8 @@ static real c_b1794 = .5f;
 	    *, real *, real *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer
 	    *, complex *, integer *);
     static real safmin;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
     extern /* Subroutine */ int xerbla_(char *, integer *);
     static real bignum;
     static integer indtau, indrwk, indwrk, liwmin;
@@ -6198,10 +6263,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6250,7 +6315,7 @@ static real c_b1794 = .5f;
     W       (output) REAL array, dimension (N)
             If INFO = 0, the eigenvalues in ascending order.
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -6260,9 +6325,10 @@ static real c_b1794 = .5f;
             If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2.
 
             If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+            only calculates the optimal sizes of the WORK, RWORK and
+            IWORK arrays, returns these values as the first entries of
+            the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
     RWORK   (workspace/output) REAL array,
                                            dimension (LRWORK)
@@ -6276,11 +6342,12 @@ static real c_b1794 = .5f;
                            1 + 5*N + 2*N**2.
 
             If LRWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the RWORK array,
-            returns this value as the first entry of the RWORK array, and
-            no error message related to LRWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK, RWORK
+            and IWORK arrays, returns these values as the first entries
+            of the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+    IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
             On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
 
     LIWORK  (input) INTEGER
@@ -6290,16 +6357,21 @@ static real c_b1794 = .5f;
             If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
 
             If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK, RWORK
+            and IWORK arrays, returns these values as the first entries
+            of the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the algorithm failed to converge; i
-                  off-diagonal elements of an intermediate tridiagonal
-                  form did not converge to zero.
+            > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed
+                  to converge; i off-diagonal elements of an intermediate
+                  tridiagonal form did not converge to zero;
+                  if INFO = i and JOBZ = 'V', then the algorithm failed
+                  to compute an eigenvalue while working on the submatrix
+                  lying in rows and columns INFO/(N+1) through
+                  mod(INFO,N+1).
 
     Further Details
     ===============
@@ -6308,6 +6380,7 @@ static real c_b1794 = .5f;
        Jeff Rutter, Computer Science Division, University of California
        at Berkeley, USA
 
+    Modified description of INFO. Sven, 16 Feb 05.
     =====================================================================
 
 
@@ -6329,29 +6402,6 @@ static real c_b1794 = .5f;
     lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;
 
     *info = 0;
-    if (*n <= 1) {
-	lwmin = 1;
-	lrwmin = 1;
-	liwmin = 1;
-	lopt = lwmin;
-	lropt = lrwmin;
-	liopt = liwmin;
-    } else {
-	if (wantz) {
-	    lwmin = (*n << 1) + *n * *n;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
-	    liwmin = *n * 5 + 3;
-	} else {
-	    lwmin = *n + 1;
-	    lrwmin = *n;
-	    liwmin = 1;
-	}
-	lopt = lwmin;
-	lropt = lrwmin;
-	liopt = liwmin;
-    }
     if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
     } else if (! (lower || lsame_(uplo, "U"))) {
@@ -6360,18 +6410,46 @@ static real c_b1794 = .5f;
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*lrwork < lrwmin && ! lquery) {
-	*info = -10;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -12;
     }
 
     if (*info == 0) {
+	if (*n <= 1) {
+	    lwmin = 1;
+	    lrwmin = 1;
+	    liwmin = 1;
+	    lopt = lwmin;
+	    lropt = lrwmin;
+	    liopt = liwmin;
+	} else {
+	    if (wantz) {
+		lwmin = (*n << 1) + *n * *n;
+/* Computing 2nd power */
+		i__1 = *n;
+		lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
+		liwmin = *n * 5 + 3;
+	    } else {
+		lwmin = *n + 1;
+		lrwmin = *n;
+		liwmin = 1;
+	    }
+/* Computing MAX */
+	    i__1 = lwmin, i__2 = *n + ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1,
+		     &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    lopt = max(i__1,i__2);
+	    lropt = lrwmin;
+	    liopt = liwmin;
+	}
 	work[1].r = (real) lopt, work[1].i = 0.f;
 	rwork[1] = (real) lropt;
 	iwork[1] = liopt;
+
+	if (*lwork < lwmin && ! lquery) {
+	    *info = -8;
+	} else if (*lrwork < lrwmin && ! lquery) {
+	    *info = -10;
+	} else if (*liwork < liwmin && ! lquery) {
+	    *info = -12;
+	}
     }
 
     if (*info != 0) {
@@ -6419,7 +6497,7 @@ static real c_b1794 = .5f;
 	sigma = rmax / anrm;
     }
     if (iscale == 1) {
-	clascl_(uplo, &c__0, &c__0, &c_b871, &sigma, n, n, &a[a_offset], lda,
+	clascl_(uplo, &c__0, &c__0, &c_b894, &sigma, n, n, &a[a_offset], lda,
 		info);
     }
 
@@ -6435,10 +6513,6 @@ static real c_b1794 = .5f;
     llrwk = *lrwork - indrwk + 1;
     chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
 	    work[indwrk], &llwork, &iinfo);
-/* Computing MAX */
-    i__1 = indwrk;
-    r__1 = (real) lopt, r__2 = (real) (*n) + work[i__1].r;
-    lopt = dmax(r__1,r__2);
 
 /*
        For eigenvalues only, call SSTERF.  For eigenvectors, first call
@@ -6456,14 +6530,6 @@ static real c_b1794 = .5f;
 	cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
 		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
 	clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
-/*
-   Computing MAX
-   Computing 2nd power
-*/
-	i__3 = *n;
-	i__4 = indwk2;
-	i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r;
-	lopt = max(i__1,i__2);
     }
 
 /*     If matrix was scaled, then rescale eigenvalues appropriately. */
@@ -6515,10 +6581,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6688,7 +6754,7 @@ static real c_b1794 = .5f;
 /*              Compute  x := tau * A * v  storing x in TAU(1:i) */
 
 		chemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
-			a_dim1 + 1], &c__1, &c_b55, &tau[1], &c__1)
+			a_dim1 + 1], &c__1, &c_b56, &tau[1], &c__1)
 			;
 
 /*              Compute  w := x - 1/2 * tau * (x'*v) * v */
@@ -6768,7 +6834,7 @@ static real c_b1794 = .5f;
 
 		i__2 = *n - i__;
 		chemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b55, &tau[
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b56, &tau[
 			i__], &c__1);
 
 /*              Compute  w := x - 1/2 * tau * (x'*v) * v */
@@ -6850,10 +6916,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6907,7 +6973,7 @@ static real c_b1794 = .5f;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -7095,7 +7161,7 @@ static real c_b1794 = .5f;
 	    i__3 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
 	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ * a_dim1
-		    + 1], lda, &work[1], &ldwork, &c_b871, &a[a_offset], lda);
+		    + 1], lda, &work[1], &ldwork, &c_b894, &a[a_offset], lda);
 
 /*
              Copy superdiagonal elements back into A, and diagonal
@@ -7144,7 +7210,7 @@ static real c_b1794 = .5f;
 	    i__3 = *n - i__ - nb + 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
 	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ + nb +
-		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b871, &a[
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b894, &a[
 		    i__ + nb + (i__ + nb) * a_dim1], lda);
 
 /*
@@ -7185,160 +7251,259 @@ static real c_b1794 = .5f;
 {
     /* System generated locals */
     address a__1[2];
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2],
-	    i__5, i__6;
-    real r__1, r__2, r__3, r__4;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2];
+    real r__1, r__2, r__3;
     complex q__1;
     char ch__1[2];
 
     /* Builtin functions */
-    double r_imag(complex *);
-    void r_cnjg(complex *, complex *);
     /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
 
     /* Local variables */
-    static integer i__, j, k, l;
-    static complex s[225]	/* was [15][15] */, v[16];
-    static integer i1, i2, ii, nh, nr, ns, nv;
-    static complex vv[16];
-    static integer itn;
-    static complex tau;
-    static integer its;
-    static real ulp, tst1;
-    static integer maxb, ierr;
-    static real unfl;
-    static complex temp;
-    static real ovfl;
-    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *);
+    static complex hl[2401]	/* was [49][49] */;
+    static integer kbot, nmin;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
-	    , complex *, integer *, complex *, integer *, complex *, complex *
-	    , integer *), ccopy_(integer *, complex *, integer *,
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
 	    complex *, integer *);
-    static integer itemp;
-    static real rtemp;
-    static logical initz, wantt, wantz;
-    static real rwork[1];
-    extern doublereal slapy2_(real *, real *);
-    extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *,
-	    complex *, complex *, integer *, complex *);
-    extern integer icamax_(integer *, complex *, integer *);
-    extern doublereal slamch_(char *), clanhs_(char *, integer *,
-	    complex *, integer *, real *);
-    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
-	    *), clahqr_(logical *, logical *, integer *, integer *, integer *,
-	     complex *, integer *, complex *, integer *, integer *, complex *,
-	     integer *, integer *), clacpy_(char *, integer *, integer *,
+    static logical initz;
+    static complex workl[49];
+    static logical wantt, wantz;
+    extern /* Subroutine */ int claqr0_(logical *, logical *, integer *,
+	    integer *, integer *, complex *, integer *, complex *, integer *,
+	    integer *, complex *, integer *, complex *, integer *, integer *),
+	     clahqr_(logical *, logical *, integer *, integer *, integer *,
+	    complex *, integer *, complex *, integer *, integer *, complex *,
+	    integer *, integer *), clacpy_(char *, integer *, integer *,
 	    complex *, integer *, complex *, integer *), claset_(char
 	    *, integer *, integer *, complex *, complex *, complex *, integer
 	    *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex
-	    *, complex *, complex *, integer *, complex *);
-    static real smlnum;
     static logical lquery;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    CHSEQR computes the eigenvalues of a complex upper Hessenberg
-    matrix H, and, optionally, the matrices T and Z from the Schur
-    decomposition H = Z T Z**H, where T is an upper triangular matrix
-    (the Schur form), and Z is the unitary matrix of Schur vectors.
-
-    Optionally Z may be postmultiplied into an input unitary matrix Q,
-    so that this routine can give the Schur factorization of a matrix A
-    which has been reduced to the Hessenberg form H by the unitary
-    matrix Q:  A = Q*H*Q**H = (QZ)*T*(QZ)**H.
-
-    Arguments
-    =========
-
-    JOB     (input) CHARACTER*1
-            = 'E': compute eigenvalues only;
-            = 'S': compute eigenvalues and the Schur form T.
+    -- LAPACK computational routine (version 3.2.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       June 2010
+
+       Purpose
+       =======
+
+       CHSEQR computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**H, where T is an upper triangular matrix (the
+       Schur form), and Z is the unitary matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input unitary
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
+
+       Arguments
+       =========
+
+       JOB   (input) CHARACTER*1
+             = 'E':  compute eigenvalues only;
+             = 'S':  compute eigenvalues and the Schur form T.
+
+       COMPZ (input) CHARACTER*1
+             = 'N':  no Schur vectors are computed;
+             = 'I':  Z is initialized to the unit matrix and the matrix Z
+                     of Schur vectors of H is returned;
+             = 'V':  Z must contain an unitary matrix Q on entry, and
+                     the product Q*Z is returned.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+             set by a previous call to CGEBAL, and then passed to CGEHRD
+             when the matrix output by CGEBAL is reduced to Hessenberg
+             form. Otherwise ILO and IHI should be set to 1 and N
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) COMPLEX array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and JOB = 'S', H contains the upper
+             triangular matrix T from the Schur decomposition (the
+             Schur form). If INFO = 0 and JOB = 'E', the contents of
+             H are unspecified on exit.  (The output value of H when
+             INFO.GT.0 is given under the description of INFO below.)
+
+             Unlike earlier versions of CHSEQR, this subroutine may
+             explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1
+             or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       W        (output) COMPLEX array, dimension (N)
+             The computed eigenvalues. If JOB = 'S', the eigenvalues are
+             stored in the same order as on the diagonal of the Schur
+             form returned in H, with W(i) = H(i,i).
+
+       Z     (input/output) COMPLEX array, dimension (LDZ,N)
+             If COMPZ = 'N', Z is not referenced.
+             If COMPZ = 'I', on entry Z need not be set and on exit,
+             if INFO = 0, Z contains the unitary matrix Z of the Schur
+             vectors of H.  If COMPZ = 'V', on entry Z must contain an
+             N-by-N matrix Q, which is assumed to be equal to the unit
+             matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit,
+             if INFO = 0, Z contains Q*Z.
+             Normally Q is the unitary matrix generated by CUNGHR
+             after the call to CGEHRD which formed the Hessenberg matrix
+             H. (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if COMPZ = 'I' or
+             COMPZ = 'V', then LDZ.GE.MAX(1,N).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) COMPLEX array, dimension (LWORK)
+             On exit, if INFO = 0, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient and delivers very good and sometimes
+             optimal performance.  However, LWORK as large as 11*N
+             may be required for optimal performance.  A workspace
+             query is recommended to determine the optimal workspace
+             size.
+
+             If LWORK = -1, then CHSEQR does a workspace query.
+             In this case, CHSEQR checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .LT. 0:  if INFO = -i, the i-th argument had an illegal
+                      value
+             .GT. 0:  if INFO = i, CHSEQR failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and JOB = 'E', then on exit, the
+                  remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and JOB   = 'S', then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is a unitary matrix.  The final
+                  value of  H is upper Hessenberg and triangular in
+                  rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and COMPZ = 'V', then on exit
+
+                    (final value of Z)  =  (initial value of Z)*U
+
+                  where U is the unitary matrix in (*) (regard-
+                  less of the value of JOB.)
+
+                  If INFO .GT. 0 and COMPZ = 'I', then on exit
+                        (final value of Z)  = U
+                  where U is the unitary matrix in (*) (regard-
+                  less of the value of JOB.)
+
+                  If INFO .GT. 0 and COMPZ = 'N', then Z is not
+                  accessed.
+
+       ================================================================
+               Default values supplied by
+               ILAENV(ISPEC,'CHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK).
+               It is suggested that these defaults be adjusted in order
+               to attain best performance in each particular
+               computational environment.
+
+              ISPEC=12: The CLAHQR vs CLAQR0 crossover point.
+                        Default: 75. (Must be at least 11.)
+
+              ISPEC=13: Recommended deflation window size.
+                        This depends on ILO, IHI and NS.  NS is the
+                        number of simultaneous shifts returned
+                        by ILAENV(ISPEC=15).  (See ISPEC=15 below.)
+                        The default for (IHI-ILO+1).LE.500 is NS.
+                        The default for (IHI-ILO+1).GT.500 is 3*NS/2.
+
+              ISPEC=14: Nibble crossover point. (See IPARMQ for
+                        details.)  Default: 14% of deflation window
+                        size.
+
+              ISPEC=15: Number of simultaneous shifts in a multishift
+                        QR iteration.
+
+                        If IHI-ILO+1 is ...
+
+                        greater than      ...but less    ... the
+                        or equal to ...      than        default is
+
+                             1               30          NS =   2(+)
+                            30               60          NS =   4(+)
+                            60              150          NS =  10(+)
+                           150              590          NS =  **
+                           590             3000          NS =  64
+                          3000             6000          NS = 128
+                          6000             infinity      NS = 256
+
+                    (+)  By default some or all matrices of this order
+                         are passed to the implicit double shift routine
+                         CLAHQR and this parameter is ignored.  See
+                         ISPEC=12 above and comments in IPARMQ for
+                         details.
+
+                   (**)  The asterisks (**) indicate an ad-hoc
+                         function of N increasing from 10 to 64.
+
+              ISPEC=16: Select structured matrix multiply.
+                        If the number of simultaneous shifts (specified
+                        by ISPEC=15) is less than 14, then the default
+                        for ISPEC=16 is 0.  Otherwise the default for
+                        ISPEC=16 is 2.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
 
-    COMPZ   (input) CHARACTER*1
-            = 'N': no Schur vectors are computed;
-            = 'I': Z is initialized to the unit matrix and the matrix Z
-                   of Schur vectors of H is returned;
-            = 'V': Z must contain an unitary matrix Q on entry, and
-                   the product Q*Z is returned.
-
-    N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to CGEBAL, and then passed to CGEHRD
-            when the matrix output by CGEBAL is reduced to Hessenberg
-            form. Otherwise ILO and IHI should be set to 1 and N
-            respectively.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    H       (input/output) COMPLEX array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if JOB = 'S', H contains the upper triangular matrix
-            T from the Schur decomposition (the Schur form). If
-            JOB = 'E', the contents of H are unspecified on exit.
-
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
-
-    W       (output) COMPLEX array, dimension (N)
-            The computed eigenvalues. If JOB = 'S', the eigenvalues are
-            stored in the same order as on the diagonal of the Schur form
-            returned in H, with W(i) = H(i,i).
-
-    Z       (input/output) COMPLEX array, dimension (LDZ,N)
-            If COMPZ = 'N': Z is not referenced.
-            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
-            contains the unitary matrix Z of the Schur vectors of H.
-            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
-            which is assumed to be equal to the unit matrix except for
-            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
-            Normally Q is the unitary matrix generated by CUNGHR after
-            the call to CGEHRD which formed the Hessenberg matrix H.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.
-            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
-
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, CHSEQR failed to compute all the
-                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
-                  elements 1:ilo-1 and i+1:n of W contain those
-                  eigenvalues which have been successfully computed.
+       ================================================================
 
-    =====================================================================
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    CLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
 
+       ==== NL allocates some local workspace to help small matrices
+       .    through a rare CLAHQR failure.  NL .GT. NTINY = 11 is
+       .    required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom-
+       .    mended.  (The default value of NMIN is 75.)  Using NL = 49
+       .    allows up to six simultaneous shifts and a 16-by-16
+       .    deflation window.  ====
 
-       Decode and test the input parameters
+       ==== Decode and check the input parameters. ====
 */
 
     /* Parameter adjustments */
@@ -7355,11 +7520,12 @@ static real c_b1794 = .5f;
     wantt = lsame_(job, "S");
     initz = lsame_(compz, "I");
     wantz = initz || lsame_(compz, "V");
+    r__1 = (real) max(1,*n);
+    q__1.r = r__1, q__1.i = 0.f;
+    work[1].r = q__1.r, work[1].i = q__1.i;
+    lquery = *lwork == -1;
 
     *info = 0;
-    i__1 = max(1,*n);
-    work[1].r = (real) i__1, work[1].i = 0.f;
-    lquery = *lwork == -1;
     if (! lsame_(job, "E") && ! wantt) {
 	*info = -1;
     } else if (! lsame_(compz, "N") && ! wantz) {
@@ -7377,451 +7543,162 @@ static real c_b1794 = .5f;
     } else if (*lwork < max(1,*n) && ! lquery) {
 	*info = -12;
     }
+
     if (*info != 0) {
+
+/*        ==== Quick return in case of invalid argument. ==== */
+
 	i__1 = -(*info);
 	xerbla_("CHSEQR", &i__1);
 	return 0;
-    } else if (lquery) {
-	return 0;
-    }
 
-/*     Initialize Z, if necessary */
+    } else if (*n == 0) {
 
-    if (initz) {
-	claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz);
-    }
+/*        ==== Quick return in case N = 0; nothing to do. ==== */
 
-/*     Store the eigenvalues isolated by CGEBAL. */
+	return 0;
 
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	i__3 = i__ + i__ * h_dim1;
-	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
-/* L10: */
-    }
-    i__1 = *n;
-    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	i__3 = i__ + i__ * h_dim1;
-	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
-/* L20: */
-    }
+    } else if (lquery) {
 
-/*     Quick return if possible. */
+/*        ==== Quick return in case of a workspace query ==== */
 
-    if (*n == 0) {
+	claqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo,
+		ihi, &z__[z_offset], ldz, &work[1], lwork, info);
+/*
+          ==== Ensure reported workspace size is backward-compatible with
+          .    previous LAPACK versions. ====
+   Computing MAX
+*/
+	r__2 = work[1].r, r__3 = (real) max(1,*n);
+	r__1 = dmax(r__2,r__3);
+	q__1.r = r__1, q__1.i = 0.f;
+	work[1].r = q__1.r, work[1].i = q__1.i;
 	return 0;
-    }
-    if (*ilo == *ihi) {
-	i__1 = *ilo;
-	i__2 = *ilo + *ilo * h_dim1;
-	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
-	return 0;
-    }
-
-/*
-       Set rows and columns ILO to IHI to zero below the first
-       subdiagonal.
-*/
-
-    i__1 = *ihi - 2;
-    for (j = *ilo; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = j + 2; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * h_dim1;
-	    h__[i__3].r = 0.f, h__[i__3].i = 0.f;
-/* L30: */
-	}
-/* L40: */
-    }
-    nh = *ihi - *ilo + 1;
-
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are re-set inside the main loop.
-*/
 
-    if (wantt) {
-	i1 = 1;
-	i2 = *n;
     } else {
-	i1 = *ilo;
-	i2 = *ihi;
-    }
 
-/*     Ensure that the subdiagonal elements are real. */
+/*        ==== copy eigenvalues isolated by CGEBAL ==== */
 
-    i__1 = *ihi;
-    for (i__ = *ilo + 1; i__ <= i__1; ++i__) {
-	i__2 = i__ + (i__ - 1) * h_dim1;
-	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
-	if (r_imag(&temp) != 0.f) {
-	    r__1 = temp.r;
-	    r__2 = r_imag(&temp);
-	    rtemp = slapy2_(&r__1, &r__2);
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
-	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
-	    temp.r = q__1.r, temp.i = q__1.i;
-	    if (i2 > i__) {
-		i__2 = i2 - i__;
-		r_cnjg(&q__1, &temp);
-		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
-	    }
-	    i__2 = i__ - i1;
-	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
-	    if (i__ < *ihi) {
-		i__2 = i__ + 1 + i__ * h_dim1;
-		i__3 = i__ + 1 + i__ * h_dim1;
-		q__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, q__1.i =
-			 temp.r * h__[i__3].i + temp.i * h__[i__3].r;
-		h__[i__2].r = q__1.r, h__[i__2].i = q__1.i;
-	    }
-	    if (wantz) {
-		cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
-	    }
+	if (*ilo > 1) {
+	    i__1 = *ilo - 1;
+	    i__2 = *ldh + 1;
+	    ccopy_(&i__1, &h__[h_offset], &i__2, &w[1], &c__1);
 	}
-/* L50: */
-    }
-
-/*
-       Determine the order of the multi-shift QR algorithm to be used.
-
-   Writing concatenation
-*/
-    i__4[0] = 1, a__1[0] = job;
-    i__4[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
-    ns = ilaenv_(&c__4, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-/* Writing concatenation */
-    i__4[0] = 1, a__1[0] = job;
-    i__4[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
-    maxb = ilaenv_(&c__8, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if (ns <= 1 || ns > nh || maxb >= nh) {
-
-/*        Use the standard double-shift algorithm */
-
-	clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo,
-		ihi, &z__[z_offset], ldz, info);
-	return 0;
-    }
-    maxb = max(2,maxb);
-/* Computing MIN */
-    i__1 = min(ns,maxb);
-    ns = min(i__1,15);
-
-/*
-       Now 1 < NS <= MAXB < NH.
-
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
-
-    unfl = slamch_("Safe minimum");
-    ovfl = 1.f / unfl;
-    slabad_(&unfl, &ovfl);
-    ulp = slamch_("Precision");
-    smlnum = unfl * (nh / ulp);
-
-/*     ITN is the total number of multiple-shift QR iterations allowed. */
-
-    itn = nh * 30;
-
-/*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of at most MAXB. Each iteration of the loop
-       works with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
-
-    i__ = *ihi;
-L60:
-    if (i__ < *ilo) {
-	goto L180;
-    }
-
-/*
-       Perform multiple-shift QR iterations on rows and columns ILO to I
-       until a submatrix of order at most MAXB splits off at the bottom
-       because a subdiagonal element has become negligible.
-*/
-
-    l = *ilo;
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
-
-/*        Look for a single small subdiagonal element. */
-
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    i__3 = k - 1 + (k - 1) * h_dim1;
-	    i__5 = k + k * h_dim1;
-	    tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k -
-		    1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__5]
-		    .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]),
-		    dabs(r__4)));
-	    if (tst1 == 0.f) {
-		i__3 = i__ - l + 1;
-		tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
-	    }
-	    i__3 = k + (k - 1) * h_dim1;
-/* Computing MAX */
-	    r__2 = ulp * tst1;
-	    if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) {
-		goto L80;
-	    }
-/* L70: */
+	if (*ihi < *n) {
+	    i__1 = *n - *ihi;
+	    i__2 = *ldh + 1;
+	    ccopy_(&i__1, &h__[*ihi + 1 + (*ihi + 1) * h_dim1], &i__2, &w[*
+		    ihi + 1], &c__1);
 	}
-L80:
-	l = k;
-	if (l > *ilo) {
 
-/*           H(L,L-1) is negligible. */
+/*        ==== Initialize Z, if requested ==== */
 
-	    i__2 = l + (l - 1) * h_dim1;
-	    h__[i__2].r = 0.f, h__[i__2].i = 0.f;
+	if (initz) {
+	    claset_("A", n, n, &c_b56, &c_b57, &z__[z_offset], ldz)
+		    ;
 	}
 
-/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+/*        ==== Quick return if possible ==== */
 
-	if (l >= i__ - maxb + 1) {
-	    goto L170;
+	if (*ilo == *ihi) {
+	    i__1 = *ilo;
+	    i__2 = *ilo + *ilo * h_dim1;
+	    w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+	    return 0;
 	}
 
 /*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
-
-	if (! wantt) {
-	    i1 = l;
-	    i2 = i__;
-	}
+          ==== CLAHQR/CLAQR0 crossover point ====
 
-	if (its == 20 || its == 30) {
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = job;
+	i__3[1] = 1, a__1[1] = compz;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	nmin = ilaenv_(&c__12, "CHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nmin = max(11,nmin);
 
-/*           Exceptional shifts. */
+/*        ==== CLAQR0 for big matrices; CLAHQR for small ones ==== */
 
-	    i__2 = i__;
-	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
-		i__3 = ii;
-		i__5 = ii + (ii - 1) * h_dim1;
-		i__6 = ii + ii * h_dim1;
-		r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = h__[i__6]
-			.r, dabs(r__2))) * 1.5f;
-		w[i__3].r = r__3, w[i__3].i = 0.f;
-/* L90: */
-	    }
+	if (*n > nmin) {
+	    claqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1],
+		    ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info);
 	} else {
 
-/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
-
-	    clacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
-		    h_dim1], ldh, s, &c__15);
-	    clahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ -
-		    ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr);
-	    if (ierr > 0) {
+/*           ==== Small matrix ==== */
 
-/*
-                If CLAHQR failed to compute all NS eigenvalues, use the
-                unconverged diagonal elements as the remaining shifts.
-*/
+	    clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1],
+		    ilo, ihi, &z__[z_offset], ldz, info);
 
-		i__2 = ierr;
-		for (ii = 1; ii <= i__2; ++ii) {
-		    i__3 = i__ - ns + ii;
-		    i__5 = ii + ii * 15 - 16;
-		    w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i;
-/* L100: */
-		}
-	    }
-	}
+	    if (*info > 0) {
 
 /*
-          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
-          where G is the Hessenberg submatrix H(L:I,L:I) and w is
-          the vector of shifts (stored in W). The result is
-          stored in the local array V.
+                ==== A rare CLAHQR failure!  CLAQR0 sometimes succeeds
+                .    when CLAHQR fails. ====
 */
 
-	v[0].r = 1.f, v[0].i = 0.f;
-	i__2 = ns + 1;
-	for (ii = 2; ii <= i__2; ++ii) {
-	    i__3 = ii - 1;
-	    v[i__3].r = 0.f, v[i__3].i = 0.f;
-/* L110: */
-	}
-	nv = 1;
-	i__2 = i__;
-	for (j = i__ - ns + 1; j <= i__2; ++j) {
-	    i__3 = nv + 1;
-	    ccopy_(&i__3, v, &c__1, vv, &c__1);
-	    i__3 = nv + 1;
-	    i__5 = j;
-	    q__1.r = -w[i__5].r, q__1.i = -w[i__5].i;
-	    cgemv_("No transpose", &i__3, &nv, &c_b56, &h__[l + l * h_dim1],
-		    ldh, vv, &c__1, &q__1, v, &c__1);
-	    ++nv;
-
-/*
-             Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
-             reset it to the unit vector.
-*/
-
-	    itemp = icamax_(&nv, v, &c__1);
-	    i__3 = itemp - 1;
-	    rtemp = (r__1 = v[i__3].r, dabs(r__1)) + (r__2 = r_imag(&v[itemp
-		    - 1]), dabs(r__2));
-	    if (rtemp == 0.f) {
-		v[0].r = 1.f, v[0].i = 0.f;
-		i__3 = nv;
-		for (ii = 2; ii <= i__3; ++ii) {
-		    i__5 = ii - 1;
-		    v[i__5].r = 0.f, v[i__5].i = 0.f;
-/* L120: */
-		}
-	    } else {
-		rtemp = dmax(rtemp,smlnum);
-		r__1 = 1.f / rtemp;
-		csscal_(&nv, &r__1, v, &c__1);
-	    }
-/* L130: */
-	}
-
-/*        Multiple-shift QR step */
+		kbot = *info;
 
-	i__2 = i__ - 1;
-	for (k = l; k <= i__2; ++k) {
+		if (*n >= 49) {
 
 /*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
-
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
-
-   Computing MIN
+                   ==== Larger matrices have enough subdiagonal scratch
+                   .    space to call CLAQR0 directly. ====
 */
-	    i__3 = ns + 1, i__5 = i__ - k + 1;
-	    nr = min(i__3,i__5);
-	    if (k > l) {
-		ccopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    clarfg_(&nr, v, &v[1], &c__1, &tau);
-	    if (k > l) {
-		i__3 = k + (k - 1) * h_dim1;
-		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
-		i__3 = i__;
-		for (ii = k + 1; ii <= i__3; ++ii) {
-		    i__5 = ii + (k - 1) * h_dim1;
-		    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
-/* L140: */
-		}
-	    }
-	    v[0].r = 1.f, v[0].i = 0.f;
 
-/*
-             Apply G' from the left to transform the rows of the matrix
-             in columns K to I2.
-*/
+		    claqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset],
+			    ldh, &w[1], ilo, ihi, &z__[z_offset], ldz, &work[
+			    1], lwork, info);
 
-	    i__3 = i2 - k + 1;
-	    r_cnjg(&q__1, &tau);
-	    clarfx_("Left", &nr, &i__3, v, &q__1, &h__[k + k * h_dim1], ldh, &
-		    work[1]);
+		} else {
 
 /*
-             Apply G from the right to transform the columns of the
-             matrix in rows I1 to min(K+NR,I).
-
-   Computing MIN
+                   ==== Tiny matrices don't have enough subdiagonal
+                   .    scratch space to benefit from CLAQR0.  Hence,
+                   .    tiny matrices must be copied into a larger
+                   .    array before calling CLAQR0. ====
 */
-	    i__5 = k + nr;
-	    i__3 = min(i__5,i__) - i1 + 1;
-	    clarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
-		    &work[1]);
-
-	    if (wantz) {
-
-/*              Accumulate transformations in the matrix Z */
 
-		clarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
-			ldz, &work[1]);
+		    clacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49);
+		    i__1 = *n + 1 + *n * 49 - 50;
+		    hl[i__1].r = 0.f, hl[i__1].i = 0.f;
+		    i__1 = 49 - *n;
+		    claset_("A", &c__49, &i__1, &c_b56, &c_b56, &hl[(*n + 1) *
+			     49 - 49], &c__49);
+		    claqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &
+			    w[1], ilo, ihi, &z__[z_offset], ldz, workl, &
+			    c__49, info);
+		    if (wantt || *info != 0) {
+			clacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh);
+		    }
+		}
 	    }
-/* L150: */
 	}
 
-/*        Ensure that H(I,I-1) is real. */
+/*        ==== Clear out the trash, if necessary. ==== */
 
-	i__2 = i__ + (i__ - 1) * h_dim1;
-	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
-	if (r_imag(&temp) != 0.f) {
-	    r__1 = temp.r;
-	    r__2 = r_imag(&temp);
-	    rtemp = slapy2_(&r__1, &r__2);
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
-	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
-	    temp.r = q__1.r, temp.i = q__1.i;
-	    if (i2 > i__) {
-		i__2 = i2 - i__;
-		r_cnjg(&q__1, &temp);
-		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
-	    }
-	    i__2 = i__ - i1;
-	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
-	    if (wantz) {
-		cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
-	    }
+	if ((wantt || *info != 0) && *n > 2) {
+	    i__1 = *n - 2;
+	    i__2 = *n - 2;
+	    claset_("L", &i__1, &i__2, &c_b56, &c_b56, &h__[h_dim1 + 3], ldh);
 	}
 
-/* L160: */
-    }
-
-/*     Failure to converge in remaining number of iterations */
-
-    *info = i__;
-    return 0;
-
-L170:
-
 /*
-       A submatrix of order <= MAXB in rows and columns L to I has split
-       off. Use the double-shift QR algorithm to handle it.
-*/
-
-    clahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi,
-	     &z__[z_offset], ldz, info);
-    if (*info > 0) {
-	return 0;
-    }
+          ==== Ensure reported workspace size is backward-compatible with
+          .    previous LAPACK versions. ====
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with a new value of I.
+   Computing MAX
 */
+	r__2 = (real) max(1,*n), r__3 = work[1].r;
+	r__1 = dmax(r__2,r__3);
+	q__1.r = r__1, q__1.i = 0.f;
+	work[1].r = q__1.r, work[1].i = q__1.i;
+    }
 
-    itn -= its;
-    i__ = l - 1;
-    goto L60;
+/*     ==== End of CHSEQR ==== */
 
-L180:
-    i__1 = max(1,*n);
-    work[1].r = (real) i__1, work[1].i = 0.f;
     return 0;
-
-/*     End of CHSEQR */
-
 } /* chseqr_ */
 
 /* Subroutine */ int clabrd_(integer *m, integer *n, integer *nb, complex *a,
@@ -7844,10 +7721,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7923,7 +7800,7 @@ static real c_b1794 = .5f;
             The n-by-nb matrix Y required to update the unreduced part
             of A.
 
-    LDY     (output) INTEGER
+    LDY     (input) INTEGER
             The leading dimension of the array Y. LDY >= max(1,N).
 
     Further Details
@@ -8011,7 +7888,7 @@ static real c_b1794 = .5f;
 	    i__3 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
 	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda,
-		     &y[i__ + y_dim1], ldy, &c_b56, &a[i__ + i__ * a_dim1], &
+		     &y[i__ + y_dim1], ldy, &c_b57, &a[i__ + i__ * a_dim1], &
 		    c__1);
 	    i__2 = i__ - 1;
 	    clacgv_(&i__2, &y[i__ + y_dim1], ldy);
@@ -8019,7 +7896,7 @@ static real c_b1794 = .5f;
 	    i__3 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
 	    cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + x_dim1], ldx,
-		     &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[i__ + i__ *
+		     &a[i__ * a_dim1 + 1], &c__1, &c_b57, &a[i__ + i__ *
 		    a_dim1], &c__1);
 
 /*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
@@ -8041,31 +7918,31 @@ static real c_b1794 = .5f;
 
 		i__2 = *m - i__ + 1;
 		i__3 = *n - i__;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + (
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + (
 			i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], &
-			c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &c__1);
+			c__1, &c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__ + 1;
 		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
-			a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b55, &
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
+			a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b56, &
 			y[i__ * y_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b57, &y[
 			i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__ + 1;
 		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &x[i__ +
-			x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b55, &
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &x[i__ +
+			x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b56, &
 			y[i__ * y_dim1 + 1], &c__1);
 		i__2 = i__ - 1;
 		i__3 = *n - i__;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ +
 			1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
+			c_b57, &y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *n - i__;
 		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
 
@@ -8077,7 +7954,7 @@ static real c_b1794 = .5f;
 		i__2 = *n - i__;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__, &q__1, &y[i__ + 1 +
-			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b56, &a[i__ +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b57, &a[i__ +
 			(i__ + 1) * a_dim1], lda);
 		clacgv_(&i__, &a[i__ + a_dim1], lda);
 		i__2 = i__ - 1;
@@ -8086,7 +7963,7 @@ static real c_b1794 = .5f;
 		i__3 = *n - i__;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ +
-			1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56,
+			1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b57,
 			&a[i__ + (i__ + 1) * a_dim1], lda);
 		i__2 = i__ - 1;
 		clacgv_(&i__2, &x[i__ + x_dim1], ldx);
@@ -8109,28 +7986,28 @@ static real c_b1794 = .5f;
 
 		i__2 = *m - i__;
 		i__3 = *n - i__;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + (
+		cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ + 1 + (
 			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
-			 lda, &c_b55, &x[i__ + 1 + i__ * x_dim1], &c__1);
+			 lda, &c_b56, &x[i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *n - i__;
-		cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &y[i__ + 1
+		cgemv_("Conjugate transpose", &i__2, &i__, &c_b57, &y[i__ + 1
 			+ y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b55, &x[i__ * x_dim1 + 1], &c__1);
+			c_b56, &x[i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__, &q__1, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[
 			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = i__ - 1;
 		i__3 = *n - i__;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) *
+		cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[(i__ + 1) *
 			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b55, &x[i__ * x_dim1 + 1], &c__1);
+			c_b56, &x[i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[
 			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *m - i__;
 		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
@@ -8156,7 +8033,7 @@ static real c_b1794 = .5f;
 	    i__3 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
 	    cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + y_dim1], ldy,
-		     &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1],
+		     &a[i__ + a_dim1], lda, &c_b57, &a[i__ + i__ * a_dim1],
 		    lda);
 	    i__2 = i__ - 1;
 	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
@@ -8166,7 +8043,7 @@ static real c_b1794 = .5f;
 	    i__3 = *n - i__ + 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
 	    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[i__ *
-		    a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56, &a[i__ +
+		    a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b57, &a[i__ +
 		    i__ * a_dim1], lda);
 	    i__2 = i__ - 1;
 	    clacgv_(&i__2, &x[i__ + x_dim1], ldx);
@@ -8190,30 +8067,30 @@ static real c_b1794 = .5f;
 
 		i__2 = *m - i__;
 		i__3 = *n - i__ + 1;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + i__
-			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &
+		cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ + 1 + i__
+			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b56, &
 			x[i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *n - i__ + 1;
 		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &y[i__ +
-			y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &y[i__ +
+			y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b56, &x[
 			i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[
 			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = i__ - 1;
 		i__3 = *n - i__ + 1;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ * a_dim1
-			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[
+		cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ * a_dim1
+			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b56, &x[
 			i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[
 			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *m - i__;
 		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
@@ -8228,14 +8105,14 @@ static real c_b1794 = .5f;
 		i__3 = i__ - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
-			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b56, &a[i__ +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b57, &a[i__ +
 			1 + i__ * a_dim1], &c__1);
 		i__2 = i__ - 1;
 		clacgv_(&i__2, &y[i__ + y_dim1], ldy);
 		i__2 = *m - i__;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__, &q__1, &x[i__ + 1 +
-			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b57, &a[
 			i__ + 1 + i__ * a_dim1], &c__1);
 
 /*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
@@ -8256,30 +8133,30 @@ static real c_b1794 = .5f;
 
 		i__2 = *m - i__;
 		i__3 = *n - i__;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
 			1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ *
-			a_dim1], &c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &
+			a_dim1], &c__1, &c_b56, &y[i__ + 1 + i__ * y_dim1], &
 			c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
 			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &y[i__ * y_dim1 + 1], &c__1);
+			c_b56, &y[i__ * y_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b57, &y[
 			i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__;
-		cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &x[i__ + 1
+		cgemv_("Conjugate transpose", &i__2, &i__, &c_b57, &x[i__ + 1
 			+ x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &y[i__ * y_dim1 + 1], &c__1);
+			c_b56, &y[i__ * y_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("Conjugate transpose", &i__, &i__2, &q__1, &a[(i__ + 1)
 			 * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
+			c_b57, &y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *n - i__;
 		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
 	    } else {
@@ -8309,10 +8186,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8382,10 +8259,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8494,10 +8371,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8614,10 +8491,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8695,8 +8572,8 @@ static real c_b1794 = .5f;
     }
 
     l = *m * *n + 1;
-    sgemm_("N", "N", m, n, n, &c_b871, &rwork[1], m, &b[b_offset], ldb, &
-	    c_b1101, &rwork[l], m);
+    sgemm_("N", "N", m, n, n, &c_b894, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b1087, &rwork[l], m);
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
@@ -8718,8 +8595,8 @@ static real c_b1794 = .5f;
 	}
 /* L60: */
     }
-    sgemm_("N", "N", m, n, n, &c_b871, &rwork[1], m, &b[b_offset], ldb, &
-	    c_b1101, &rwork[l], m);
+    sgemm_("N", "N", m, n, n, &c_b894, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b1087, &rwork[l], m);
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
@@ -8757,10 +8634,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8836,10 +8713,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9160,8 +9037,7 @@ static real c_b1794 = .5f;
     integer pow_ii(integer *, integer *);
 
     /* Local variables */
-    static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr,
-	    indxc, indxp;
+    static integer i__, k, n1, n2, iq, iw, iz, ptr, indx, curr, indxc, indxp;
     extern /* Subroutine */ int claed8_(integer *, integer *, integer *,
 	    complex *, integer *, real *, real *, integer *, real *, real *,
 	    complex *, integer *, real *, integer *, integer *, integer *,
@@ -9180,10 +9056,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9436,8 +9312,6 @@ static real c_b1794 = .5f;
 
 	n1 = k;
 	n2 = *n - k;
-	ind1 = 1;
-	ind2 = *n;
 	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
     } else {
 	qptr[curr + 1] = qptr[curr];
@@ -9489,10 +9363,10 @@ static real c_b1794 = .5f;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -9649,6 +9523,15 @@ static real c_b1794 = .5f;
 	return 0;
     }
 
+/*
+       Need to initialize GIVPTR to O here in case of quick exit
+       to prevent an unspecified code behavior (usually sigfault)
+       when IWORK array on entry to *stedc is not zeroed
+       (or at least some IWORK entries which used in *laed7 for GIVPTR).
+*/
+
+    *givptr = 0;
+
 /*     Quick return if possible */
 
     if (*n == 0) {
@@ -9660,7 +9543,7 @@ static real c_b1794 = .5f;
     n1p1 = n1 + 1;
 
     if (*rho < 0.f) {
-	sscal_(&n2, &c_b1150, &z__[n1p1], &c__1);
+	sscal_(&n2, &c_b1136, &z__[n1p1], &c__1);
     }
 
 /*     Normalize z so that norm(z) = 1 */
@@ -9732,7 +9615,6 @@ static real c_b1794 = .5f;
 */
 
     *k = 0;
-    *givptr = 0;
     k2 = *n + 1;
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
@@ -9875,14 +9757,16 @@ static real c_b1794 = .5f;
 	info)
 {
     /* System generated locals */
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
     real r__1, r__2, r__3, r__4, r__5, r__6;
-    complex q__1, q__2, q__3, q__4;
+    complex q__1, q__2, q__3, q__4, q__5, q__6, q__7;
 
     /* Builtin functions */
     double r_imag(complex *);
-    void c_sqrt(complex *, complex *), r_cnjg(complex *, complex *);
+    void r_cnjg(complex *, complex *);
     double c_abs(complex *);
+    void c_sqrt(complex *, complex *), pow_ci(complex *, complex *, integer *)
+	    ;
 
     /* Local variables */
     static integer i__, j, k, l, m;
@@ -9892,59 +9776,60 @@ static real c_b1794 = .5f;
     static complex t1;
     static real t2;
     static complex v2;
-    static real h10;
+    static real aa, ab, ba, bb, h10;
     static complex h11;
     static real h21;
-    static complex h22;
+    static complex h22, sc;
     static integer nh, nz;
+    static real sx;
+    static integer jhi;
     static complex h11s;
-    static integer itn, its;
+    static integer jlo, its;
     static real ulp;
     static complex sum;
-    static real tst1;
+    static real tst;
     static complex temp;
     extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
 	    integer *), ccopy_(integer *, complex *, integer *, complex *,
 	    integer *);
-    static real rtemp, rwork[1];
-    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *,
-	    integer *, complex *);
+    static real rtemp;
+    extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *,
+	    complex *, complex *, integer *, complex *);
     extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
-    extern doublereal slamch_(char *), clanhs_(char *, integer *,
-	    complex *, integer *, real *);
-    static real smlnum;
+    extern doublereal slamch_(char *);
+    static real safmin, safmax, smlnum;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
 
 
-    Purpose
-    =======
+       Purpose
+       =======
 
-    CLAHQR is an auxiliary routine called by CHSEQR to update the
-    eigenvalues and Schur decomposition already computed by CHSEQR, by
-    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+       CLAHQR is an auxiliary routine called by CHSEQR to update the
+       eigenvalues and Schur decomposition already computed by CHSEQR, by
+       dealing with the Hessenberg submatrix in rows and columns ILO to
+       IHI.
 
-    Arguments
-    =========
+       Arguments
+       =========
 
-    WANTT   (input) LOGICAL
+       WANTT   (input) LOGICAL
             = .TRUE. : the full Schur form T is required;
             = .FALSE.: only eigenvalues are required.
 
-    WANTZ   (input) LOGICAL
+       WANTZ   (input) LOGICAL
             = .TRUE. : the matrix of Schur vectors Z is required;
             = .FALSE.: Schur vectors are not required.
 
-    N       (input) INTEGER
+       N       (input) INTEGER
             The order of the matrix H.  N >= 0.
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
+       ILO     (input) INTEGER
+       IHI     (input) INTEGER
             It is assumed that H is already upper triangular in rows and
             columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1).
             CLAHQR works primarily with the Hessenberg submatrix in rows
@@ -9952,46 +9837,78 @@ static real c_b1794 = .5f;
             H if WANTT is .TRUE..
             1 <= ILO <= max(1,IHI); IHI <= N.
 
-    H       (input/output) COMPLEX array, dimension (LDH,N)
+       H       (input/output) COMPLEX array, dimension (LDH,N)
             On entry, the upper Hessenberg matrix H.
-            On exit, if WANTT is .TRUE., H is upper triangular in rows
-            and columns ILO:IHI, with any 2-by-2 diagonal blocks in
-            standard form. If WANTT is .FALSE., the contents of H are
-            unspecified on exit.
+            On exit, if INFO is zero and if WANTT is .TRUE., then H
+            is upper triangular in rows and columns ILO:IHI.  If INFO
+            is zero and if WANTT is .FALSE., then the contents of H
+            are unspecified on exit.  The output state of H in case
+            INF is positive is below under the description of INFO.
 
-    LDH     (input) INTEGER
+       LDH     (input) INTEGER
             The leading dimension of the array H. LDH >= max(1,N).
 
-    W       (output) COMPLEX array, dimension (N)
+       W       (output) COMPLEX array, dimension (N)
             The computed eigenvalues ILO to IHI are stored in the
             corresponding elements of W. If WANTT is .TRUE., the
             eigenvalues are stored in the same order as on the diagonal
             of the Schur form returned in H, with W(i) = H(i,i).
 
-    ILOZ    (input) INTEGER
-    IHIZ    (input) INTEGER
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
             Specify the rows of Z to which transformations must be
             applied if WANTZ is .TRUE..
             1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 
-    Z       (input/output) COMPLEX array, dimension (LDZ,N)
+       Z       (input/output) COMPLEX array, dimension (LDZ,N)
             If WANTZ is .TRUE., on entry Z must contain the current
             matrix Z of transformations accumulated by CHSEQR, and on
             exit Z has been updated; transformations are applied only to
             the submatrix Z(ILOZ:IHIZ,ILO:IHI).
             If WANTZ is .FALSE., Z is not referenced.
 
-    LDZ     (input) INTEGER
+       LDZ     (input) INTEGER
             The leading dimension of the array Z. LDZ >= max(1,N).
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            > 0: if INFO = i, CLAHQR failed to compute all the
-                 eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1)
-                 iterations; elements i+1:ihi of W contain those
-                 eigenvalues which have been successfully computed.
+       INFO    (output) INTEGER
+             =   0: successful exit
+            .GT. 0: if INFO = i, CLAHQR failed to compute all the
+                    eigenvalues ILO to IHI in a total of 30 iterations
+                    per eigenvalue; elements i+1:ihi of W contain
+                    those eigenvalues which have been successfully
+                    computed.
 
-    =====================================================================
+                    If INFO .GT. 0 and WANTT is .FALSE., then on exit,
+                    the remaining unconverged eigenvalues are the
+                    eigenvalues of the upper Hessenberg matrix
+                    rows and columns ILO thorugh INFO of the final,
+                    output value of H.
+
+                    If INFO .GT. 0 and WANTT is .TRUE., then on exit
+            (*)       (initial value of H)*U  = U*(final value of H)
+                    where U is an orthognal matrix.    The final
+                    value of H is upper Hessenberg and triangular in
+                    rows and columns INFO+1 through IHI.
+
+                    If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+                        (final value of Z)  = (initial value of Z)*U
+                    where U is the orthogonal matrix in (*)
+                    (regardless of the value of WANTT.)
+
+       Further Details
+       ===============
+
+       02-96 Based on modifications by
+       David Day, Sandia National Laboratory, USA
+
+       12-04 Further modifications by
+       Ralph Byers, University of Kansas, USA
+       This is a modified version of CLAHQR from LAPACK version 3.0.
+       It is (1) more robust against overflow and underflow and
+       (2) adopts the more conservative Ahues & Tisseur stopping
+       criterion (LAWN 122, 1997).
+
+       =========================================================
 */
 
 
@@ -10019,16 +9936,74 @@ static real c_b1794 = .5f;
 	return 0;
     }
 
+/*     ==== clear out the trash ==== */
+    i__1 = *ihi - 3;
+    for (j = *ilo; j <= i__1; ++j) {
+	i__2 = j + 2 + j * h_dim1;
+	h__[i__2].r = 0.f, h__[i__2].i = 0.f;
+	i__2 = j + 3 + j * h_dim1;
+	h__[i__2].r = 0.f, h__[i__2].i = 0.f;
+/* L10: */
+    }
+    if (*ilo <= *ihi - 2) {
+	i__1 = *ihi + (*ihi - 2) * h_dim1;
+	h__[i__1].r = 0.f, h__[i__1].i = 0.f;
+    }
+/*     ==== ensure that subdiagonal entries are real ==== */
+    if (*wantt) {
+	jlo = 1;
+	jhi = *n;
+    } else {
+	jlo = *ilo;
+	jhi = *ihi;
+    }
+    i__1 = *ihi;
+    for (i__ = *ilo + 1; i__ <= i__1; ++i__) {
+	if (r_imag(&h__[i__ + (i__ - 1) * h_dim1]) != 0.f) {
+/*
+             ==== The following redundant normalization
+             .    avoids problems with both gradual and
+             .    sudden underflow in ABS(H(I,I-1)) ====
+*/
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    i__3 = i__ + (i__ - 1) * h_dim1;
+	    r__3 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[i__
+		    + (i__ - 1) * h_dim1]), dabs(r__2));
+	    q__1.r = h__[i__2].r / r__3, q__1.i = h__[i__2].i / r__3;
+	    sc.r = q__1.r, sc.i = q__1.i;
+	    r_cnjg(&q__2, &sc);
+	    r__1 = c_abs(&sc);
+	    q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1;
+	    sc.r = q__1.r, sc.i = q__1.i;
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    r__1 = c_abs(&h__[i__ + (i__ - 1) * h_dim1]);
+	    h__[i__2].r = r__1, h__[i__2].i = 0.f;
+	    i__2 = jhi - i__ + 1;
+	    cscal_(&i__2, &sc, &h__[i__ + i__ * h_dim1], ldh);
+/* Computing MIN */
+	    i__3 = jhi, i__4 = i__ + 1;
+	    i__2 = min(i__3,i__4) - jlo + 1;
+	    r_cnjg(&q__1, &sc);
+	    cscal_(&i__2, &q__1, &h__[jlo + i__ * h_dim1], &c__1);
+	    if (*wantz) {
+		i__2 = *ihiz - *iloz + 1;
+		r_cnjg(&q__1, &sc);
+		cscal_(&i__2, &q__1, &z__[*iloz + i__ * z_dim1], &c__1);
+	    }
+	}
+/* L20: */
+    }
+
     nh = *ihi - *ilo + 1;
     nz = *ihiz - *iloz + 1;
 
-/*
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
+/*     Set machine-dependent constants for the stopping criterion. */
 
-    ulp = slamch_("Precision");
-    smlnum = slamch_("Safe minimum") / ulp;
+    safmin = slamch_("SAFE MINIMUM");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    ulp = slamch_("PRECISION");
+    smlnum = safmin * ((real) nh / ulp);
 
 /*
        I1 and I2 are the indices of the first row and last column of H
@@ -10041,10 +10016,6 @@ static real c_b1794 = .5f;
 	i2 = *n;
     }
 
-/*     ITN is the total number of QR iterations allowed. */
-
-    itn = nh * 30;
-
 /*
        The main loop begins here. I is the loop index and decreases from
        IHI to ILO in steps of 1. Each iteration of the loop works
@@ -10054,9 +10025,9 @@ static real c_b1794 = .5f;
 */
 
     i__ = *ihi;
-L10:
+L30:
     if (i__ < *ilo) {
-	goto L130;
+	goto L150;
     }
 
 /*
@@ -10066,45 +10037,102 @@ static real c_b1794 = .5f;
 */
 
     l = *ilo;
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
+    for (its = 0; its <= 30; ++its) {
 
 /*        Look for a single small subdiagonal element. */
 
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    i__3 = k - 1 + (k - 1) * h_dim1;
-	    i__4 = k + k * h_dim1;
-	    tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k -
-		    1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__4]
+	i__1 = l + 1;
+	for (k = i__; k >= i__1; --k) {
+	    i__2 = k + (k - 1) * h_dim1;
+	    if ((r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&h__[k + (k
+		    - 1) * h_dim1]), dabs(r__2)) <= smlnum) {
+		goto L50;
+	    }
+	    i__2 = k - 1 + (k - 1) * h_dim1;
+	    i__3 = k + k * h_dim1;
+	    tst = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&h__[k -
+		    1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__3]
 		    .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]),
 		    dabs(r__4)));
-	    if (tst1 == 0.f) {
-		i__3 = i__ - l + 1;
-		tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
+	    if (tst == 0.f) {
+		if (k - 2 >= *ilo) {
+		    i__2 = k - 1 + (k - 2) * h_dim1;
+		    tst += (r__1 = h__[i__2].r, dabs(r__1));
+		}
+		if (k + 1 <= *ihi) {
+		    i__2 = k + 1 + k * h_dim1;
+		    tst += (r__1 = h__[i__2].r, dabs(r__1));
+		}
 	    }
-	    i__3 = k + (k - 1) * h_dim1;
+/*
+             ==== The following is a conservative small subdiagonal
+             .    deflation criterion due to Ahues & Tisseur (LAWN 122,
+             .    1997). It has better mathematical foundation and
+             .    improves accuracy in some examples.  ====
+*/
+	    i__2 = k + (k - 1) * h_dim1;
+	    if ((r__1 = h__[i__2].r, dabs(r__1)) <= ulp * tst) {
 /* Computing MAX */
-	    r__2 = ulp * tst1;
-	    if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) {
-		goto L30;
+		i__2 = k + (k - 1) * h_dim1;
+		i__3 = k - 1 + k * h_dim1;
+		r__5 = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&h__[
+			k + (k - 1) * h_dim1]), dabs(r__2)), r__6 = (r__3 =
+			h__[i__3].r, dabs(r__3)) + (r__4 = r_imag(&h__[k - 1
+			+ k * h_dim1]), dabs(r__4));
+		ab = dmax(r__5,r__6);
+/* Computing MIN */
+		i__2 = k + (k - 1) * h_dim1;
+		i__3 = k - 1 + k * h_dim1;
+		r__5 = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&h__[
+			k + (k - 1) * h_dim1]), dabs(r__2)), r__6 = (r__3 =
+			h__[i__3].r, dabs(r__3)) + (r__4 = r_imag(&h__[k - 1
+			+ k * h_dim1]), dabs(r__4));
+		ba = dmin(r__5,r__6);
+		i__2 = k - 1 + (k - 1) * h_dim1;
+		i__3 = k + k * h_dim1;
+		q__2.r = h__[i__2].r - h__[i__3].r, q__2.i = h__[i__2].i -
+			h__[i__3].i;
+		q__1.r = q__2.r, q__1.i = q__2.i;
+/* Computing MAX */
+		i__4 = k + k * h_dim1;
+		r__5 = (r__1 = h__[i__4].r, dabs(r__1)) + (r__2 = r_imag(&h__[
+			k + k * h_dim1]), dabs(r__2)), r__6 = (r__3 = q__1.r,
+			dabs(r__3)) + (r__4 = r_imag(&q__1), dabs(r__4));
+		aa = dmax(r__5,r__6);
+		i__2 = k - 1 + (k - 1) * h_dim1;
+		i__3 = k + k * h_dim1;
+		q__2.r = h__[i__2].r - h__[i__3].r, q__2.i = h__[i__2].i -
+			h__[i__3].i;
+		q__1.r = q__2.r, q__1.i = q__2.i;
+/* Computing MIN */
+		i__4 = k + k * h_dim1;
+		r__5 = (r__1 = h__[i__4].r, dabs(r__1)) + (r__2 = r_imag(&h__[
+			k + k * h_dim1]), dabs(r__2)), r__6 = (r__3 = q__1.r,
+			dabs(r__3)) + (r__4 = r_imag(&q__1), dabs(r__4));
+		bb = dmin(r__5,r__6);
+		s = aa + ab;
+/* Computing MAX */
+		r__1 = smlnum, r__2 = ulp * (bb * (aa / s));
+		if (ba * (ab / s) <= dmax(r__1,r__2)) {
+		    goto L50;
+		}
 	    }
-/* L20: */
+/* L40: */
 	}
-L30:
+L50:
 	l = k;
 	if (l > *ilo) {
 
 /*           H(L,L-1) is negligible */
 
-	    i__2 = l + (l - 1) * h_dim1;
-	    h__[i__2].r = 0.f, h__[i__2].i = 0.f;
+	    i__1 = l + (l - 1) * h_dim1;
+	    h__[i__1].r = 0.f, h__[i__1].i = 0.f;
 	}
 
 /*        Exit from loop if a submatrix of order 1 has split off. */
 
 	if (l >= i__) {
-	    goto L120;
+	    goto L140;
 	}
 
 /*
@@ -10118,42 +10146,67 @@ static real c_b1794 = .5f;
 	    i2 = i__;
 	}
 
-	if (its == 10 || its == 20) {
+	if (its == 10) {
 
 /*           Exceptional shift. */
 
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    s = (r__1 = h__[i__2].r, dabs(r__1)) * .75f;
-	    i__2 = i__ + i__ * h_dim1;
-	    q__1.r = s + h__[i__2].r, q__1.i = h__[i__2].i;
+	    i__1 = l + 1 + l * h_dim1;
+	    s = (r__1 = h__[i__1].r, dabs(r__1)) * .75f;
+	    i__1 = l + l * h_dim1;
+	    q__1.r = s + h__[i__1].r, q__1.i = h__[i__1].i;
+	    t.r = q__1.r, t.i = q__1.i;
+	} else if (its == 20) {
+
+/*           Exceptional shift. */
+
+	    i__1 = i__ + (i__ - 1) * h_dim1;
+	    s = (r__1 = h__[i__1].r, dabs(r__1)) * .75f;
+	    i__1 = i__ + i__ * h_dim1;
+	    q__1.r = s + h__[i__1].r, q__1.i = h__[i__1].i;
 	    t.r = q__1.r, t.i = q__1.i;
 	} else {
 
 /*           Wilkinson's shift. */
 
-	    i__2 = i__ + i__ * h_dim1;
-	    t.r = h__[i__2].r, t.i = h__[i__2].i;
-	    i__2 = i__ - 1 + i__ * h_dim1;
-	    i__3 = i__ + (i__ - 1) * h_dim1;
-	    r__1 = h__[i__3].r;
-	    q__1.r = r__1 * h__[i__2].r, q__1.i = r__1 * h__[i__2].i;
+	    i__1 = i__ + i__ * h_dim1;
+	    t.r = h__[i__1].r, t.i = h__[i__1].i;
+	    c_sqrt(&q__2, &h__[i__ - 1 + i__ * h_dim1]);
+	    c_sqrt(&q__3, &h__[i__ + (i__ - 1) * h_dim1]);
+	    q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = q__2.r *
+		    q__3.i + q__2.i * q__3.r;
 	    u.r = q__1.r, u.i = q__1.i;
-	    if (u.r != 0.f || u.i != 0.f) {
-		i__2 = i__ - 1 + (i__ - 1) * h_dim1;
-		q__2.r = h__[i__2].r - t.r, q__2.i = h__[i__2].i - t.i;
+	    s = (r__1 = u.r, dabs(r__1)) + (r__2 = r_imag(&u), dabs(r__2));
+	    if (s != 0.f) {
+		i__1 = i__ - 1 + (i__ - 1) * h_dim1;
+		q__2.r = h__[i__1].r - t.r, q__2.i = h__[i__1].i - t.i;
 		q__1.r = q__2.r * .5f, q__1.i = q__2.i * .5f;
 		x.r = q__1.r, x.i = q__1.i;
-		q__3.r = x.r * x.r - x.i * x.i, q__3.i = x.r * x.i + x.i *
-			x.r;
-		q__2.r = q__3.r + u.r, q__2.i = q__3.i + u.i;
-		c_sqrt(&q__1, &q__2);
+		sx = (r__1 = x.r, dabs(r__1)) + (r__2 = r_imag(&x), dabs(r__2)
+			);
+/* Computing MAX */
+		r__3 = s, r__4 = (r__1 = x.r, dabs(r__1)) + (r__2 = r_imag(&x)
+			, dabs(r__2));
+		s = dmax(r__3,r__4);
+		q__5.r = x.r / s, q__5.i = x.i / s;
+		pow_ci(&q__4, &q__5, &c__2);
+		q__7.r = u.r / s, q__7.i = u.i / s;
+		pow_ci(&q__6, &q__7, &c__2);
+		q__3.r = q__4.r + q__6.r, q__3.i = q__4.i + q__6.i;
+		c_sqrt(&q__2, &q__3);
+		q__1.r = s * q__2.r, q__1.i = s * q__2.i;
 		y.r = q__1.r, y.i = q__1.i;
-		if (x.r * y.r + r_imag(&x) * r_imag(&y) < 0.f) {
-		    q__1.r = -y.r, q__1.i = -y.i;
-		    y.r = q__1.r, y.i = q__1.i;
+		if (sx > 0.f) {
+		    q__1.r = x.r / sx, q__1.i = x.i / sx;
+		    q__2.r = x.r / sx, q__2.i = x.i / sx;
+		    if (q__1.r * y.r + r_imag(&q__2) * r_imag(&y) < 0.f) {
+			q__3.r = -y.r, q__3.i = -y.i;
+			y.r = q__3.r, y.i = q__3.i;
+		    }
 		}
-		q__3.r = x.r + y.r, q__3.i = x.i + y.i;
-		cladiv_(&q__2, &u, &q__3);
+		q__4.r = x.r + y.r, q__4.i = x.i + y.i;
+		cladiv_(&q__3, &u, &q__4);
+		q__2.r = u.r * q__3.r - u.i * q__3.i, q__2.i = u.r * q__3.i +
+			u.i * q__3.r;
 		q__1.r = t.r - q__2.r, q__1.i = t.i - q__2.i;
 		t.r = q__1.r, t.i = q__1.i;
 	    }
@@ -10161,8 +10214,8 @@ static real c_b1794 = .5f;
 
 /*        Look for two consecutive small subdiagonal elements. */
 
-	i__2 = l + 1;
-	for (m = i__ - 1; m >= i__2; --m) {
+	i__1 = l + 1;
+	for (m = i__ - 1; m >= i__1; --m) {
 
 /*
              Determine the effect of starting the single-shift QR
@@ -10170,14 +10223,14 @@ static real c_b1794 = .5f;
              negligible.
 */
 
-	    i__3 = m + m * h_dim1;
-	    h11.r = h__[i__3].r, h11.i = h__[i__3].i;
-	    i__3 = m + 1 + (m + 1) * h_dim1;
-	    h22.r = h__[i__3].r, h22.i = h__[i__3].i;
+	    i__2 = m + m * h_dim1;
+	    h11.r = h__[i__2].r, h11.i = h__[i__2].i;
+	    i__2 = m + 1 + (m + 1) * h_dim1;
+	    h22.r = h__[i__2].r, h22.i = h__[i__2].i;
 	    q__1.r = h11.r - t.r, q__1.i = h11.i - t.i;
 	    h11s.r = q__1.r, h11s.i = q__1.i;
-	    i__3 = m + 1 + m * h_dim1;
-	    h21 = h__[i__3].r;
+	    i__2 = m + 1 + m * h_dim1;
+	    h21 = h__[i__2].r;
 	    s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(
 		    r__2)) + dabs(h21);
 	    q__1.r = h11s.r / s, q__1.i = h11s.i / s;
@@ -10185,25 +10238,25 @@ static real c_b1794 = .5f;
 	    h21 /= s;
 	    v[0].r = h11s.r, v[0].i = h11s.i;
 	    v[1].r = h21, v[1].i = 0.f;
-	    i__3 = m + (m - 1) * h_dim1;
-	    h10 = h__[i__3].r;
-	    tst1 = ((r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(
-		    r__2))) * ((r__3 = h11.r, dabs(r__3)) + (r__4 = r_imag(&
-		    h11), dabs(r__4)) + ((r__5 = h22.r, dabs(r__5)) + (r__6 =
-		    r_imag(&h22), dabs(r__6))));
-	    if ((r__1 = h10 * h21, dabs(r__1)) <= ulp * tst1) {
-		goto L50;
+	    i__2 = m + (m - 1) * h_dim1;
+	    h10 = h__[i__2].r;
+	    if (dabs(h10) * dabs(h21) <= ulp * (((r__1 = h11s.r, dabs(r__1))
+		    + (r__2 = r_imag(&h11s), dabs(r__2))) * ((r__3 = h11.r,
+		    dabs(r__3)) + (r__4 = r_imag(&h11), dabs(r__4)) + ((r__5 =
+		     h22.r, dabs(r__5)) + (r__6 = r_imag(&h22), dabs(r__6)))))
+		    ) {
+		goto L70;
 	    }
-/* L40: */
+/* L60: */
 	}
-	i__2 = l + l * h_dim1;
-	h11.r = h__[i__2].r, h11.i = h__[i__2].i;
-	i__2 = l + 1 + (l + 1) * h_dim1;
-	h22.r = h__[i__2].r, h22.i = h__[i__2].i;
+	i__1 = l + l * h_dim1;
+	h11.r = h__[i__1].r, h11.i = h__[i__1].i;
+	i__1 = l + 1 + (l + 1) * h_dim1;
+	h22.r = h__[i__1].r, h22.i = h__[i__1].i;
 	q__1.r = h11.r - t.r, q__1.i = h11.i - t.i;
 	h11s.r = q__1.r, h11s.i = q__1.i;
-	i__2 = l + 1 + l * h_dim1;
-	h21 = h__[i__2].r;
+	i__1 = l + 1 + l * h_dim1;
+	h21 = h__[i__1].r;
 	s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(r__2))
 		+ dabs(h21);
 	q__1.r = h11s.r / s, q__1.i = h11s.i / s;
@@ -10211,12 +10264,12 @@ static real c_b1794 = .5f;
 	h21 /= s;
 	v[0].r = h11s.r, v[0].i = h11s.i;
 	v[1].r = h21, v[1].i = 0.f;
-L50:
+L70:
 
 /*        Single-shift QR step */
 
-	i__2 = i__ - 1;
-	for (k = m; k <= i__2; ++k) {
+	i__1 = i__ - 1;
+	for (k = m; k <= i__1; ++k) {
 
 /*
              The first iteration of this loop determines a reflection G
@@ -10237,10 +10290,10 @@ static real c_b1794 = .5f;
 	    }
 	    clarfg_(&c__2, v, &v[1], &c__1, &t1);
 	    if (k > m) {
-		i__3 = k + (k - 1) * h_dim1;
-		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
-		i__3 = k + 1 + (k - 1) * h_dim1;
-		h__[i__3].r = 0.f, h__[i__3].i = 0.f;
+		i__2 = k + (k - 1) * h_dim1;
+		h__[i__2].r = v[0].r, h__[i__2].i = v[0].i;
+		i__2 = k + 1 + (k - 1) * h_dim1;
+		h__[i__2].r = 0.f, h__[i__2].i = 0.f;
 	    }
 	    v2.r = v[1].r, v2.i = v[1].i;
 	    q__1.r = t1.r * v2.r - t1.i * v2.i, q__1.i = t1.r * v2.i + t1.i *
@@ -10252,27 +10305,27 @@ static real c_b1794 = .5f;
              in columns K to I2.
 */
 
-	    i__3 = i2;
-	    for (j = k; j <= i__3; ++j) {
+	    i__2 = i2;
+	    for (j = k; j <= i__2; ++j) {
 		r_cnjg(&q__3, &t1);
-		i__4 = k + j * h_dim1;
-		q__2.r = q__3.r * h__[i__4].r - q__3.i * h__[i__4].i, q__2.i =
-			 q__3.r * h__[i__4].i + q__3.i * h__[i__4].r;
-		i__5 = k + 1 + j * h_dim1;
-		q__4.r = t2 * h__[i__5].r, q__4.i = t2 * h__[i__5].i;
+		i__3 = k + j * h_dim1;
+		q__2.r = q__3.r * h__[i__3].r - q__3.i * h__[i__3].i, q__2.i =
+			 q__3.r * h__[i__3].i + q__3.i * h__[i__3].r;
+		i__4 = k + 1 + j * h_dim1;
+		q__4.r = t2 * h__[i__4].r, q__4.i = t2 * h__[i__4].i;
 		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
 		sum.r = q__1.r, sum.i = q__1.i;
+		i__3 = k + j * h_dim1;
 		i__4 = k + j * h_dim1;
-		i__5 = k + j * h_dim1;
-		q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i;
-		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+		q__1.r = h__[i__4].r - sum.r, q__1.i = h__[i__4].i - sum.i;
+		h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+		i__3 = k + 1 + j * h_dim1;
 		i__4 = k + 1 + j * h_dim1;
-		i__5 = k + 1 + j * h_dim1;
 		q__2.r = sum.r * v2.r - sum.i * v2.i, q__2.i = sum.r * v2.i +
 			sum.i * v2.r;
-		q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i;
-		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
-/* L60: */
+		q__1.r = h__[i__4].r - q__2.r, q__1.i = h__[i__4].i - q__2.i;
+		h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+/* L80: */
 	    }
 
 /*
@@ -10281,57 +10334,57 @@ static real c_b1794 = .5f;
 
    Computing MIN
 */
-	    i__4 = k + 2;
-	    i__3 = min(i__4,i__);
-	    for (j = i1; j <= i__3; ++j) {
-		i__4 = j + k * h_dim1;
-		q__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, q__2.i =
-			t1.r * h__[i__4].i + t1.i * h__[i__4].r;
-		i__5 = j + (k + 1) * h_dim1;
-		q__3.r = t2 * h__[i__5].r, q__3.i = t2 * h__[i__5].i;
+	    i__3 = k + 2;
+	    i__2 = min(i__3,i__);
+	    for (j = i1; j <= i__2; ++j) {
+		i__3 = j + k * h_dim1;
+		q__2.r = t1.r * h__[i__3].r - t1.i * h__[i__3].i, q__2.i =
+			t1.r * h__[i__3].i + t1.i * h__[i__3].r;
+		i__4 = j + (k + 1) * h_dim1;
+		q__3.r = t2 * h__[i__4].r, q__3.i = t2 * h__[i__4].i;
 		q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
 		sum.r = q__1.r, sum.i = q__1.i;
+		i__3 = j + k * h_dim1;
 		i__4 = j + k * h_dim1;
-		i__5 = j + k * h_dim1;
-		q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i;
-		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+		q__1.r = h__[i__4].r - sum.r, q__1.i = h__[i__4].i - sum.i;
+		h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+		i__3 = j + (k + 1) * h_dim1;
 		i__4 = j + (k + 1) * h_dim1;
-		i__5 = j + (k + 1) * h_dim1;
 		r_cnjg(&q__3, &v2);
 		q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r *
 			q__3.i + sum.i * q__3.r;
-		q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i;
-		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
-/* L70: */
+		q__1.r = h__[i__4].r - q__2.r, q__1.i = h__[i__4].i - q__2.i;
+		h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+/* L90: */
 	    }
 
 	    if (*wantz) {
 
 /*              Accumulate transformations in the matrix Z */
 
-		i__3 = *ihiz;
-		for (j = *iloz; j <= i__3; ++j) {
-		    i__4 = j + k * z_dim1;
-		    q__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, q__2.i =
-			     t1.r * z__[i__4].i + t1.i * z__[i__4].r;
-		    i__5 = j + (k + 1) * z_dim1;
-		    q__3.r = t2 * z__[i__5].r, q__3.i = t2 * z__[i__5].i;
+		i__2 = *ihiz;
+		for (j = *iloz; j <= i__2; ++j) {
+		    i__3 = j + k * z_dim1;
+		    q__2.r = t1.r * z__[i__3].r - t1.i * z__[i__3].i, q__2.i =
+			     t1.r * z__[i__3].i + t1.i * z__[i__3].r;
+		    i__4 = j + (k + 1) * z_dim1;
+		    q__3.r = t2 * z__[i__4].r, q__3.i = t2 * z__[i__4].i;
 		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
 		    sum.r = q__1.r, sum.i = q__1.i;
+		    i__3 = j + k * z_dim1;
 		    i__4 = j + k * z_dim1;
-		    i__5 = j + k * z_dim1;
-		    q__1.r = z__[i__5].r - sum.r, q__1.i = z__[i__5].i -
+		    q__1.r = z__[i__4].r - sum.r, q__1.i = z__[i__4].i -
 			    sum.i;
-		    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
+		    z__[i__3].r = q__1.r, z__[i__3].i = q__1.i;
+		    i__3 = j + (k + 1) * z_dim1;
 		    i__4 = j + (k + 1) * z_dim1;
-		    i__5 = j + (k + 1) * z_dim1;
 		    r_cnjg(&q__3, &v2);
 		    q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r *
 			     q__3.i + sum.i * q__3.r;
-		    q__1.r = z__[i__5].r - q__2.r, q__1.i = z__[i__5].i -
+		    q__1.r = z__[i__4].r - q__2.r, q__1.i = z__[i__4].i -
 			    q__2.i;
-		    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
-/* L80: */
+		    z__[i__3].r = q__1.r, z__[i__3].i = q__1.i;
+/* L100: */
 		}
 	    }
 
@@ -10349,66 +10402,66 @@ static real c_b1794 = .5f;
 		r__1 = c_abs(&temp);
 		q__1.r = temp.r / r__1, q__1.i = temp.i / r__1;
 		temp.r = q__1.r, temp.i = q__1.i;
+		i__2 = m + 1 + m * h_dim1;
 		i__3 = m + 1 + m * h_dim1;
-		i__4 = m + 1 + m * h_dim1;
 		r_cnjg(&q__2, &temp);
-		q__1.r = h__[i__4].r * q__2.r - h__[i__4].i * q__2.i, q__1.i =
-			 h__[i__4].r * q__2.i + h__[i__4].i * q__2.r;
-		h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+		q__1.r = h__[i__3].r * q__2.r - h__[i__3].i * q__2.i, q__1.i =
+			 h__[i__3].r * q__2.i + h__[i__3].i * q__2.r;
+		h__[i__2].r = q__1.r, h__[i__2].i = q__1.i;
 		if (m + 2 <= i__) {
+		    i__2 = m + 2 + (m + 1) * h_dim1;
 		    i__3 = m + 2 + (m + 1) * h_dim1;
-		    i__4 = m + 2 + (m + 1) * h_dim1;
-		    q__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i,
-			    q__1.i = h__[i__4].r * temp.i + h__[i__4].i *
+		    q__1.r = h__[i__3].r * temp.r - h__[i__3].i * temp.i,
+			    q__1.i = h__[i__3].r * temp.i + h__[i__3].i *
 			    temp.r;
-		    h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+		    h__[i__2].r = q__1.r, h__[i__2].i = q__1.i;
 		}
-		i__3 = i__;
-		for (j = m; j <= i__3; ++j) {
+		i__2 = i__;
+		for (j = m; j <= i__2; ++j) {
 		    if (j != m + 1) {
 			if (i2 > j) {
-			    i__4 = i2 - j;
-			    cscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1],
+			    i__3 = i2 - j;
+			    cscal_(&i__3, &temp, &h__[j + (j + 1) * h_dim1],
 				    ldh);
 			}
-			i__4 = j - i1;
+			i__3 = j - i1;
 			r_cnjg(&q__1, &temp);
-			cscal_(&i__4, &q__1, &h__[i1 + j * h_dim1], &c__1);
+			cscal_(&i__3, &q__1, &h__[i1 + j * h_dim1], &c__1);
 			if (*wantz) {
 			    r_cnjg(&q__1, &temp);
 			    cscal_(&nz, &q__1, &z__[*iloz + j * z_dim1], &
 				    c__1);
 			}
 		    }
-/* L90: */
+/* L110: */
 		}
 	    }
-/* L100: */
+/* L120: */
 	}
 
 /*        Ensure that H(I,I-1) is real. */
 
-	i__2 = i__ + (i__ - 1) * h_dim1;
-	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	i__1 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__1].r, temp.i = h__[i__1].i;
 	if (r_imag(&temp) != 0.f) {
 	    rtemp = c_abs(&temp);
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
+	    i__1 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__1].r = rtemp, h__[i__1].i = 0.f;
 	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
 	    temp.r = q__1.r, temp.i = q__1.i;
 	    if (i2 > i__) {
-		i__2 = i2 - i__;
+		i__1 = i2 - i__;
 		r_cnjg(&q__1, &temp);
-		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+		cscal_(&i__1, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
 	    }
-	    i__2 = i__ - i1;
-	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    i__1 = i__ - i1;
+	    cscal_(&i__1, &temp, &h__[i1 + i__ * h_dim1], &c__1);
 	    if (*wantz) {
 		cscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1);
 	    }
 	}
 
-/* L110: */
+/* L130: */
     }
 
 /*     Failure to converge in remaining number of iterations */
@@ -10416,7 +10469,7 @@ static real c_b1794 = .5f;
     *info = i__;
     return 0;
 
-L120:
+L140:
 
 /*     H(I,I-1) is negligible: one eigenvalue has converged. */
 
@@ -10424,23 +10477,19 @@ static real c_b1794 = .5f;
     i__2 = i__ + i__ * h_dim1;
     w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with new value of I.
-*/
+/*     return to start of the main loop with new value of I. */
 
-    itn -= its;
     i__ = l - 1;
-    goto L10;
+    goto L30;
 
-L130:
+L150:
     return 0;
 
 /*     End of CLAHQR */
 
 } /* clahqr_ */
 
-/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a,
+/* Subroutine */ int clahr2_(integer *n, integer *k, integer *nb, complex *a,
 	integer *lda, complex *tau, complex *t, integer *ldt, complex *y,
 	integer *ldy)
 {
@@ -10453,28 +10502,35 @@ static real c_b1794 = .5f;
     static integer i__;
     static complex ei;
     extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
-	    integer *), cgemv_(char *, integer *, integer *, complex *,
-	    complex *, integer *, complex *, integer *, complex *, complex *,
-	    integer *), ccopy_(integer *, complex *, integer *,
-	    complex *, integer *), caxpy_(integer *, complex *, complex *,
-	    integer *, complex *, integer *), ctrmv_(char *, char *, char *,
-	    integer *, complex *, integer *, complex *, integer *), clarfg_(integer *, complex *, complex *, integer
-	    *, complex *), clacgv_(integer *, complex *, integer *);
+	    integer *), cgemm_(char *, char *, integer *, integer *, integer *
+	    , complex *, complex *, integer *, complex *, integer *, complex *
+	    , complex *, integer *), cgemv_(char *, integer *,
+	     integer *, complex *, complex *, integer *, complex *, integer *,
+	     complex *, complex *, integer *), ccopy_(integer *,
+	    complex *, integer *, complex *, integer *), ctrmm_(char *, char *
+	    , char *, char *, integer *, integer *, complex *, complex *,
+	    integer *, complex *, integer *),
+	    caxpy_(integer *, complex *, complex *, integer *, complex *,
+	    integer *), ctrmv_(char *, char *, char *, integer *, complex *,
+	    integer *, complex *, integer *), clarfg_(
+	    integer *, complex *, complex *, integer *, complex *), clacgv_(
+	    integer *, complex *, integer *), clacpy_(char *, integer *,
+	    integer *, complex *, integer *, complex *, integer *);
 
 
+/*  -- LAPACK auxiliary routine (version 3.2.1)                        -- */
+/*  -- LAPACK is a software package provided by Univ. of Tennessee,    --*  -- April 2009
+                                 -- */
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
     =======
 
-    CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1)
+    CLAHR2 reduces the first NB columns of A complex general n-BY-(n-k+1)
     matrix A so that elements below the k-th subdiagonal are zero. The
-    reduction is performed by a unitary similarity transformation
+    reduction is performed by an unitary similarity transformation
     Q' * A * Q. The routine returns the matrices V and T which determine
     Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
 
@@ -10489,6 +10545,7 @@ static real c_b1794 = .5f;
     K       (input) INTEGER
             The offset for the reduction. Elements below the k-th
             subdiagonal in the first NB columns are reduced to zero.
+            K < N.
 
     NB      (input) INTEGER
             The number of columns to be reduced.
@@ -10519,7 +10576,7 @@ static real c_b1794 = .5f;
             The n-by-nb matrix Y.
 
     LDY     (input) INTEGER
-            The leading dimension of the array Y. LDY >= max(1,N).
+            The leading dimension of the array Y. LDY >= N.
 
     Further Details
     ===============
@@ -10544,9 +10601,9 @@ static real c_b1794 = .5f;
     The contents of A on exit are illustrated by the following example
     with n = 7, k = 3 and nb = 2:
 
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
+       ( a   a   a   a   a )
+       ( a   a   a   a   a )
+       ( a   a   a   a   a )
        ( h   h   a   a   a )
        ( v1  h   a   a   a )
        ( v1  v2  a   a   a )
@@ -10556,6 +10613,19 @@ static real c_b1794 = .5f;
     modified element of the upper Hessenberg matrix H, and vi denotes an
     element of the vector defining H(i).
 
+    This subroutine is a slight modification of LAPACK-3.0's DLAHRD
+    incorporating improvements proposed by Quintana-Orti and Van de
+    Gejin. Note that the entries of A(1:K,2:NB) differ from those
+    returned by the original LAPACK-3.0's DLAHRD routine. (This
+    subroutine is not backward compatible with LAPACK-3.0's DLAHRD.)
+
+    References
+    ==========
+
+    Gregorio Quintana-Orti and Robert van de Geijn, "Improving the
+    performance of reduction to Hessenberg form," ACM Transactions on
+    Mathematical Software, 32(2):180-194, June 2006.
+
     =====================================================================
 
 
@@ -10584,18 +10654,19 @@ static real c_b1794 = .5f;
 	if (i__ > 1) {
 
 /*
-             Update A(1:n,i)
+             Update A(K+1:N,I)
 
-             Compute i-th column of A - Y * V'
+             Update I-th column of A - Y * V'
 */
 
 	    i__2 = i__ - 1;
 	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
-	    i__2 = i__ - 1;
+	    i__2 = *n - *k;
+	    i__3 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a[*k
-		    + i__ - 1 + a_dim1], lda, &c_b56, &a[i__ * a_dim1 + 1], &
-		    c__1);
+	    cgemv_("NO TRANSPOSE", &i__2, &i__3, &q__1, &y[*k + 1 + y_dim1],
+		    ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b57, &a[*k + 1 +
+		    i__ * a_dim1], &c__1);
 	    i__2 = i__ - 1;
 	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
 
@@ -10615,21 +10686,21 @@ static real c_b1794 = .5f;
 	    ccopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
 		    1], &c__1);
 	    i__2 = i__ - 1;
-	    ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 +
+	    ctrmv_("Lower", "Conjugate transpose", "UNIT", &i__2, &a[*k + 1 +
 		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1);
 
 /*           w := w + V2'*b2 */
 
 	    i__2 = *n - *k - i__ + 1;
 	    i__3 = i__ - 1;
-	    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ +
-		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56,
+	    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[*k + i__ +
+		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b57,
 		    &t[*nb * t_dim1 + 1], &c__1);
 
 /*           w := T'*w */
 
 	    i__2 = i__ - 1;
-	    ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[
+	    ctrmv_("Upper", "Conjugate transpose", "NON-UNIT", &i__2, &t[
 		    t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1);
 
 /*           b2 := b2 - V2*w */
@@ -10637,14 +10708,14 @@ static real c_b1794 = .5f;
 	    i__2 = *n - *k - i__ + 1;
 	    i__3 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
-	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1],
-		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b56, &a[*k + i__ +
+	    cgemv_("NO TRANSPOSE", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1],
+		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b57, &a[*k + i__ +
 		    i__ * a_dim1], &c__1);
 
 /*           b1 := b1 - V1*w */
 
 	    i__2 = i__ - 1;
-	    ctrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+	    ctrmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1]
 		    , lda, &t[*nb * t_dim1 + 1], &c__1);
 	    i__2 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
@@ -10656,45 +10727,49 @@ static real c_b1794 = .5f;
 	}
 
 /*
-          Generate the elementary reflector H(i) to annihilate
-          A(k+i+1:n,i)
+          Generate the elementary reflector H(I) to annihilate
+          A(K+I+1:N,I)
 */
 
-	i__2 = *k + i__ + i__ * a_dim1;
-	ei.r = a[i__2].r, ei.i = a[i__2].i;
 	i__2 = *n - *k - i__ + 1;
 /* Computing MIN */
 	i__3 = *k + i__ + 1;
-	clarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__])
-		;
+	clarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	i__2 = *k + i__ + i__ * a_dim1;
+	ei.r = a[i__2].r, ei.i = a[i__2].i;
 	i__2 = *k + i__ + i__ * a_dim1;
 	a[i__2].r = 1.f, a[i__2].i = 0.f;
 
-/*        Compute  Y(1:n,i) */
+/*        Compute  Y(K+1:N,I) */
 
-	i__2 = *n - *k - i__ + 1;
-	cgemv_("No transpose", n, &i__2, &c_b56, &a[(i__ + 1) * a_dim1 + 1],
-		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &y[i__ *
-		y_dim1 + 1], &c__1);
+	i__2 = *n - *k;
+	i__3 = *n - *k - i__ + 1;
+	cgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b57, &a[*k + 1 + (i__ + 1) *
+		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, &y[*
+		k + 1 + i__ * y_dim1], &c__1);
 	i__2 = *n - *k - i__ + 1;
 	i__3 = i__ - 1;
-	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ +
-		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &t[
+	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[*k + i__ +
+		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, &t[
 		i__ * t_dim1 + 1], &c__1);
-	i__2 = i__ - 1;
+	i__2 = *n - *k;
+	i__3 = i__ - 1;
 	q__1.r = -1.f, q__1.i = -0.f;
-	cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t[i__ *
-		t_dim1 + 1], &c__1, &c_b56, &y[i__ * y_dim1 + 1], &c__1);
-	cscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+	cgemv_("NO TRANSPOSE", &i__2, &i__3, &q__1, &y[*k + 1 + y_dim1], ldy,
+		&t[i__ * t_dim1 + 1], &c__1, &c_b57, &y[*k + 1 + i__ * y_dim1]
+		, &c__1);
+	i__2 = *n - *k;
+	cscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1);
 
-/*        Compute T(1:i,i) */
+/*        Compute T(1:I,I) */
 
 	i__2 = i__ - 1;
 	i__3 = i__;
 	q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
 	cscal_(&i__2, &q__1, &t[i__ * t_dim1 + 1], &c__1);
 	i__2 = i__ - 1;
-	ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+	ctrmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
 		&t[i__ * t_dim1 + 1], &c__1)
 		;
 	i__2 = i__ + i__ * t_dim1;
@@ -10706,11 +10781,25 @@ static real c_b1794 = .5f;
     i__1 = *k + *nb + *nb * a_dim1;
     a[i__1].r = ei.r, a[i__1].i = ei.i;
 
+/*     Compute Y(1:K,1:NB) */
+
+    clacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy);
+    ctrmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b57, &a[*k + 1
+	    + a_dim1], lda, &y[y_offset], ldy);
+    if (*n > *k + *nb) {
+	i__1 = *n - *k - *nb;
+	cgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b57, &a[(*nb
+		+ 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, &
+		c_b57, &y[y_offset], ldy);
+    }
+    ctrmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b57, &t[
+	    t_offset], ldt, &y[y_offset], ldy);
+
     return 0;
 
-/*     End of CLAHRD */
+/*     End of CLAHR2 */
 
-} /* clahrd_ */
+} /* clahr2_ */
 
 doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer *
 	lda, real *work)
@@ -10732,10 +10821,10 @@ doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -10761,7 +10850,7 @@ doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer *
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -10784,7 +10873,7 @@ doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer *
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(M,1).
 
-    WORK    (workspace) REAL array, dimension (LWORK),
+    WORK    (workspace) REAL array, dimension (MAX(1,LWORK)),
             where LWORK >= M when NORM = 'I'; otherwise, WORK is not
             referenced.
 
@@ -10901,10 +10990,10 @@ doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -10930,7 +11019,7 @@ doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer *
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -10962,7 +11051,7 @@ doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer *
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(N,1).
 
-    WORK    (workspace) REAL array, dimension (LWORK),
+    WORK    (workspace) REAL array, dimension (MAX(1,LWORK)),
             where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
             WORK is not referenced.
 
@@ -11116,183 +11205,4276 @@ doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer *
 
 } /* clanhe_ */
 
-doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
-	work)
+/* Subroutine */ int claqr0_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w,
+	integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex *
+	work, integer *lwork, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    real ret_val, r__1, r__2;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8;
+    complex q__1, q__2, q__3, q__4, q__5;
 
     /* Builtin functions */
-    double c_abs(complex *), sqrt(doublereal);
+    double r_imag(complex *);
+    void c_sqrt(complex *, complex *);
 
     /* Local variables */
-    static integer i__, j;
-    static real sum, scale;
-    extern logical lsame_(char *, char *);
-    static real value;
-    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
-	    *, real *);
+    static integer i__, k;
+    static real s;
+    static complex aa, bb, cc, dd;
+    static integer ld, nh, it, ks, kt, ku, kv, ls, ns, nw;
+    static complex tr2, det;
+    static integer inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot,
+	    nmin;
+    static complex swap;
+    static integer ktop;
+    static complex zdum[1]	/* was [1][1] */;
+    static integer kacc22, itmax, nsmax, nwmax, kwtop;
+    extern /* Subroutine */ int claqr3_(logical *, logical *, integer *,
+	    integer *, integer *, integer *, complex *, integer *, integer *,
+	    integer *, complex *, integer *, integer *, integer *, complex *,
+	    complex *, integer *, integer *, complex *, integer *, integer *,
+	    complex *, integer *, complex *, integer *), claqr4_(logical *,
+	    logical *, integer *, integer *, integer *, complex *, integer *,
+	    complex *, integer *, integer *, complex *, integer *, complex *,
+	    integer *, integer *), claqr5_(logical *, logical *, integer *,
+	    integer *, integer *, integer *, integer *, complex *, complex *,
+	    integer *, integer *, integer *, complex *, integer *, complex *,
+	    integer *, complex *, integer *, integer *, complex *, integer *,
+	    integer *, complex *, integer *);
+    static integer nibble;
+    extern /* Subroutine */ int clahqr_(logical *, logical *, integer *,
+	    integer *, integer *, complex *, integer *, complex *, integer *,
+	    integer *, complex *, integer *, integer *), clacpy_(char *,
+	    integer *, integer *, complex *, integer *, complex *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static char jbcmpz[2];
+    static complex rtdisc;
+    static integer nwupbd;
+    static logical sorted;
+    static integer lwkopt;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
 
 
-    Purpose
-    =======
+       Purpose
+       =======
 
-    CLANHS  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    Hessenberg matrix A.
+       CLAQR0 computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**H, where T is an upper triangular matrix (the
+       Schur form), and Z is the unitary matrix of Schur vectors.
 
-    Description
-    ===========
+       Optionally Z may be postmultiplied into an input unitary
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
 
-    CLANHS returns the value
+       Arguments
+       =========
 
-       CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+       WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
 
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+       WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
 
-    Arguments
-    =========
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+             H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
+             previous call to CGEBAL, and then passed to CGEHRD when the
+             matrix output by CGEBAL is reduced to Hessenberg form.
+             Otherwise, ILO and IHI should be set to 1 and N,
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) COMPLEX array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and WANTT is .TRUE., then H
+             contains the upper triangular matrix T from the Schur
+             decomposition (the Schur form). If INFO = 0 and WANT is
+             .FALSE., then the contents of H are unspecified on exit.
+             (The output value of H when INFO.GT.0 is given under the
+             description of INFO below.)
+
+             This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+             j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       W        (output) COMPLEX array, dimension (N)
+             The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored
+             in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are
+             stored in the same order as on the diagonal of the Schur
+             form returned in H, with W(i) = H(i,i).
+
+       Z     (input/output) COMPLEX array, dimension (LDZ,IHI)
+             If WANTZ is .FALSE., then Z is not referenced.
+             If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
+             replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
+             orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
+             (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if WANTZ is .TRUE.
+             then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) COMPLEX array, dimension LWORK
+             On exit, if LWORK = -1, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient, but LWORK typically as large as 6*N may
+             be required for optimal performance.  A workspace query
+             to determine the optimal workspace size is recommended.
+
+             If LWORK = -1, then CLAQR0 does a workspace query.
+             In this case, CLAQR0 checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .GT. 0:  if INFO = i, CLAQR0 failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and WANT is .FALSE., then on exit,
+                  the remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is a unitary matrix.  The final
+                  value of  H is upper Hessenberg and triangular in
+                  rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+
+                    (final value of Z(ILO:IHI,ILOZ:IHIZ)
+                     =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
+
+                  where U is the unitary matrix in (*) (regard-
+                  less of the value of WANTT.)
+
+                  If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+                  accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
+
+       ================================================================
+
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    CLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
+
+       ==== Exceptional deflation windows:  try to cure rare
+       .    slow convergence by varying the size of the
+       .    deflation window after KEXNW iterations. ====
 
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in CLANHS as described
-            above.
+       ==== Exceptional shifts: try to cure rare slow convergence
+       .    with ad-hoc exceptional shifts every KEXSH iterations.
+       .    ====
+
+       ==== The constant WILK1 is used to form the exceptional
+       .    shifts. ====
+*/
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, CLANHS is
-            set to zero.
+    /* Function Body */
+    *info = 0;
 
-    A       (input) COMPLEX array, dimension (LDA,N)
-            The n by n upper Hessenberg matrix A; the part of A below the
-            first sub-diagonal is not referenced.
+/*     ==== Quick return for N = 0: nothing to do. ==== */
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
+    if (*n == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
 
-    WORK    (workspace) REAL array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-            referenced.
+    if (*n <= 11) {
 
-   =====================================================================
-*/
+/*        ==== Tiny matrices must use CLAHQR. ==== */
 
+	lwkopt = 1;
+	if (*lwork != -1) {
+	    clahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1],
+		    iloz, ihiz, &z__[z_offset], ldz, info);
+	}
+    } else {
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
+/*
+          ==== Use small bulge multi-shift QR with aggressive early
+          .    deflation on larger-than-tiny matrices. ====
 
-    /* Function Body */
-    if (*n == 0) {
-	value = 0.f;
-    } else if (lsame_(norm, "M")) {
+          ==== Hope for the best. ====
+*/
 
-/*        Find max(abs(A(i,j))). */
+	*info = 0;
 
-	value = 0.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
-		value = dmax(r__1,r__2);
-/* L10: */
-	    }
-/* L20: */
+/*        ==== Set up job flags for ILAENV. ==== */
+
+	if (*wantt) {
+	    *(unsigned char *)jbcmpz = 'S';
+	} else {
+	    *(unsigned char *)jbcmpz = 'E';
+	}
+	if (*wantz) {
+	    *(unsigned char *)&jbcmpz[1] = 'V';
+	} else {
+	    *(unsigned char *)&jbcmpz[1] = 'N';
 	}
-    } else if (lsame_(norm, "O") || *(unsigned char *)
-	    norm == '1') {
 
-/*        Find norm1(A). */
+/*
+          ==== NWR = recommended deflation window size.  At this
+          .    point,  N .GT. NTINY = 11, so there is enough
+          .    subdiagonal workspace for NWR.GE.2 as required.
+          .    (In fact, there is enough subdiagonal space for
+          .    NWR.GE.3.) ====
+*/
 
-	value = 0.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.f;
+	nwr = ilaenv_(&c__13, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nwr = max(2,nwr);
 /* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += c_abs(&a[i__ + j * a_dim1]);
-/* L30: */
-	    }
-	    value = dmax(value,sum);
-/* L40: */
-	}
-    } else if (lsame_(norm, "I")) {
+	i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2);
+	nwr = min(i__1,nwr);
 
-/*        Find normI(A). */
+/*
+          ==== NSR = recommended number of simultaneous shifts.
+          .    At this point N .GT. NTINY = 11, so there is at
+          .    enough subdiagonal workspace for NSR to be even
+          .    and greater than or equal to two as required. ====
+*/
 
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.f;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
+	nsr = ilaenv_(&c__15, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
 /* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += c_abs(&a[i__ + j * a_dim1]);
-/* L60: */
-	    }
-/* L70: */
-	}
-	value = 0.f;
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
+	i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi -
+		*ilo;
+	nsr = min(i__1,i__2);
 /* Computing MAX */
-	    r__1 = value, r__2 = work[i__];
-	    value = dmax(r__1,r__2);
-/* L80: */
-	}
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+	i__1 = 2, i__2 = nsr - nsr % 2;
+	nsr = max(i__1,i__2);
 
-/*        Find normF(A). */
+/*
+          ==== Estimate optimal workspace ====
 
-	scale = 0.f;
-	sum = 1.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
-	}
-	value = scale * sqrt(sum);
-    }
+          ==== Workspace query call to CLAQR3 ====
+*/
 
-    ret_val = value;
-    return ret_val;
+	i__1 = nwr + 1;
+	claqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz,
+		ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[h_offset],
+		ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1],
+		 &c_n1);
 
-/*     End of CLANHS */
+/*
+          ==== Optimal workspace = MAX(CLAQR5, CLAQR3) ====
 
-} /* clanhs_ */
+   Computing MAX
+*/
+	i__1 = nsr * 3 / 2, i__2 = (integer) work[1].r;
+	lwkopt = max(i__1,i__2);
 
-/* Subroutine */ int clarcm_(integer *m, integer *n, real *a, integer *lda,
-	complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork)
+/*        ==== Quick return in case of workspace query. ==== */
+
+	if (*lwork == -1) {
+	    r__1 = (real) lwkopt;
+	    q__1.r = r__1, q__1.i = 0.f;
+	    work[1].r = q__1.r, work[1].i = q__1.i;
+	    return 0;
+	}
+
+/*        ==== CLAHQR/CLAQR0 crossover point ==== */
+
+	nmin = ilaenv_(&c__12, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)
+		6, (ftnlen)2);
+	nmin = max(11,nmin);
+
+/*        ==== Nibble crossover point ==== */
+
+	nibble = ilaenv_(&c__14, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	nibble = max(0,nibble);
+
+/*
+          ==== Accumulate reflections during ttswp?  Use block
+          .    2-by-2 structure during matrix-matrix multiply? ====
+*/
+
+	kacc22 = ilaenv_(&c__16, "CLAQR0", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	kacc22 = max(0,kacc22);
+	kacc22 = min(2,kacc22);
+
+/*
+          ==== NWMAX = the largest possible deflation window for
+          .    which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n - 1) / 3, i__2 = *lwork / 2;
+	nwmax = min(i__1,i__2);
+	nw = nwmax;
+
+/*
+          ==== NSMAX = the Largest number of simultaneous shifts
+          .    for which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3;
+	nsmax = min(i__1,i__2);
+	nsmax -= nsmax % 2;
+
+/*        ==== NDFL: an iteration count restarted at deflation. ==== */
+
+	ndfl = 1;
+
+/*
+          ==== ITMAX = iteration limit ====
+
+   Computing MAX
+*/
+	i__1 = 10, i__2 = *ihi - *ilo + 1;
+	itmax = max(i__1,i__2) * 30;
+
+/*        ==== Last row and column in the active block ==== */
+
+	kbot = *ihi;
+
+/*        ==== Main Loop ==== */
+
+	i__1 = itmax;
+	for (it = 1; it <= i__1; ++it) {
+
+/*           ==== Done when KBOT falls below ILO ==== */
+
+	    if (kbot < *ilo) {
+		goto L80;
+	    }
+
+/*           ==== Locate active block ==== */
+
+	    i__2 = *ilo + 1;
+	    for (k = kbot; k >= i__2; --k) {
+		i__3 = k + (k - 1) * h_dim1;
+		if (h__[i__3].r == 0.f && h__[i__3].i == 0.f) {
+		    goto L20;
+		}
+/* L10: */
+	    }
+	    k = *ilo;
+L20:
+	    ktop = k;
+
+/*
+             ==== Select deflation window size:
+             .    Typical Case:
+             .      If possible and advisable, nibble the entire
+             .      active block.  If not, use size MIN(NWR,NWMAX)
+             .      or MIN(NWR+1,NWMAX) depending upon which has
+             .      the smaller corresponding subdiagonal entry
+             .      (a heuristic).
+             .
+             .    Exceptional Case:
+             .      If there have been no deflations in KEXNW or
+             .      more iterations, then vary the deflation window
+             .      size.   At first, because, larger windows are,
+             .      in general, more powerful than smaller ones,
+             .      rapidly increase the window to the maximum possible.
+             .      Then, gradually reduce the window size. ====
+*/
+
+	    nh = kbot - ktop + 1;
+	    nwupbd = min(nh,nwmax);
+	    if (ndfl < 5) {
+		nw = min(nwupbd,nwr);
+	    } else {
+/* Computing MIN */
+		i__2 = nwupbd, i__3 = nw << 1;
+		nw = min(i__2,i__3);
+	    }
+	    if (nw < nwmax) {
+		if (nw >= nh - 1) {
+		    nw = nh;
+		} else {
+		    kwtop = kbot - nw + 1;
+		    i__2 = kwtop + (kwtop - 1) * h_dim1;
+		    i__3 = kwtop - 1 + (kwtop - 2) * h_dim1;
+		    if ((r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&
+			    h__[kwtop + (kwtop - 1) * h_dim1]), dabs(r__2)) >
+			    (r__3 = h__[i__3].r, dabs(r__3)) + (r__4 = r_imag(
+			    &h__[kwtop - 1 + (kwtop - 2) * h_dim1]), dabs(
+			    r__4))) {
+			++nw;
+		    }
+		}
+	    }
+	    if (ndfl < 5) {
+		ndec = -1;
+	    } else if (ndec >= 0 || nw >= nwupbd) {
+		++ndec;
+		if (nw - ndec < 2) {
+		    ndec = 0;
+		}
+		nw -= ndec;
+	    }
+
+/*
+             ==== Aggressive early deflation:
+             .    split workspace under the subdiagonal into
+             .      - an nw-by-nw work array V in the lower
+             .        left-hand-corner,
+             .      - an NW-by-at-least-NW-but-more-is-better
+             .        (NW-by-NHO) horizontal work array along
+             .        the bottom edge,
+             .      - an at-least-NW-but-more-is-better (NHV-by-NW)
+             .        vertical work array along the left-hand-edge.
+             .        ====
+*/
+
+	    kv = *n - nw + 1;
+	    kt = nw + 1;
+	    nho = *n - nw - 1 - kt + 1;
+	    kwv = nw + 2;
+	    nve = *n - nw - kwv + 1;
+
+/*           ==== Aggressive early deflation ==== */
+
+	    claqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh,
+		    iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[kv
+		    + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, &
+		    h__[kwv + h_dim1], ldh, &work[1], lwork);
+
+/*           ==== Adjust KBOT accounting for new deflations. ==== */
+
+	    kbot -= ld;
+
+/*           ==== KS points to the shifts. ==== */
+
+	    ks = kbot - ls + 1;
+
+/*
+             ==== Skip an expensive QR sweep if there is a (partly
+             .    heuristic) reason to expect that many eigenvalues
+             .    will deflate without it.  Here, the QR sweep is
+             .    skipped if many eigenvalues have just been deflated
+             .    or if the remaining active block is small.
+*/
+
+	    if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(
+		    nmin,nwmax)) {
+
+/*
+                ==== NS = nominal number of simultaneous shifts.
+                .    This may be lowered (slightly) if CLAQR3
+                .    did not provide that many shifts. ====
+
+   Computing MIN
+   Computing MAX
+*/
+		i__4 = 2, i__5 = kbot - ktop;
+		i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5);
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+
+/*
+                ==== If there have been no deflations
+                .    in a multiple of KEXSH iterations,
+                .    then try exceptional shifts.
+                .    Otherwise use shifts provided by
+                .    CLAQR3 above or from the eigenvalues
+                .    of a trailing principal submatrix. ====
+*/
+
+		if (ndfl % 6 == 0) {
+		    ks = kbot - ns + 1;
+		    i__2 = ks + 1;
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			i__3 = i__;
+			i__4 = i__ + i__ * h_dim1;
+			i__5 = i__ + (i__ - 1) * h_dim1;
+			r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				r_imag(&h__[i__ + (i__ - 1) * h_dim1]), dabs(
+				r__2))) * .75f;
+			q__1.r = h__[i__4].r + r__3, q__1.i = h__[i__4].i;
+			w[i__3].r = q__1.r, w[i__3].i = q__1.i;
+			i__3 = i__ - 1;
+			i__4 = i__;
+			w[i__3].r = w[i__4].r, w[i__3].i = w[i__4].i;
+/* L30: */
+		    }
+		} else {
+
+/*
+                   ==== Got NS/2 or fewer shifts? Use CLAQR4 or
+                   .    CLAHQR on a trailing principal submatrix to
+                   .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
+                   .    there is enough space below the subdiagonal
+                   .    to fit an NS-by-NS scratch array.) ====
+*/
+
+		    if (kbot - ks + 1 <= ns / 2) {
+			ks = kbot - ns + 1;
+			kt = *n - ns + 1;
+			clacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &
+				h__[kt + h_dim1], ldh);
+			if (ns > nmin) {
+			    claqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[
+				    kt + h_dim1], ldh, &w[ks], &c__1, &c__1,
+				    zdum, &c__1, &work[1], lwork, &inf);
+			} else {
+			    clahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[
+				    kt + h_dim1], ldh, &w[ks], &c__1, &c__1,
+				    zdum, &c__1, &inf);
+			}
+			ks += inf;
+
+/*
+                      ==== In case of a rare QR failure use
+                      .    eigenvalues of the trailing 2-by-2
+                      .    principal submatrix.  Scale to avoid
+                      .    overflows, underflows and subnormals.
+                      .    (The scale factor S can not be zero,
+                      .    because H(KBOT,KBOT-1) is nonzero.) ====
+*/
+
+			if (ks >= kbot) {
+			    i__2 = kbot - 1 + (kbot - 1) * h_dim1;
+			    i__3 = kbot + (kbot - 1) * h_dim1;
+			    i__4 = kbot - 1 + kbot * h_dim1;
+			    i__5 = kbot + kbot * h_dim1;
+			    s = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 =
+				    r_imag(&h__[kbot - 1 + (kbot - 1) *
+				    h_dim1]), dabs(r__2)) + ((r__3 = h__[i__3]
+				    .r, dabs(r__3)) + (r__4 = r_imag(&h__[
+				    kbot + (kbot - 1) * h_dim1]), dabs(r__4)))
+				     + ((r__5 = h__[i__4].r, dabs(r__5)) + (
+				    r__6 = r_imag(&h__[kbot - 1 + kbot *
+				    h_dim1]), dabs(r__6))) + ((r__7 = h__[
+				    i__5].r, dabs(r__7)) + (r__8 = r_imag(&
+				    h__[kbot + kbot * h_dim1]), dabs(r__8)));
+			    i__2 = kbot - 1 + (kbot - 1) * h_dim1;
+			    q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i /
+				    s;
+			    aa.r = q__1.r, aa.i = q__1.i;
+			    i__2 = kbot + (kbot - 1) * h_dim1;
+			    q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i /
+				    s;
+			    cc.r = q__1.r, cc.i = q__1.i;
+			    i__2 = kbot - 1 + kbot * h_dim1;
+			    q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i /
+				    s;
+			    bb.r = q__1.r, bb.i = q__1.i;
+			    i__2 = kbot + kbot * h_dim1;
+			    q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i /
+				    s;
+			    dd.r = q__1.r, dd.i = q__1.i;
+			    q__2.r = aa.r + dd.r, q__2.i = aa.i + dd.i;
+			    q__1.r = q__2.r / 2.f, q__1.i = q__2.i / 2.f;
+			    tr2.r = q__1.r, tr2.i = q__1.i;
+			    q__3.r = aa.r - tr2.r, q__3.i = aa.i - tr2.i;
+			    q__4.r = dd.r - tr2.r, q__4.i = dd.i - tr2.i;
+			    q__2.r = q__3.r * q__4.r - q__3.i * q__4.i,
+				    q__2.i = q__3.r * q__4.i + q__3.i *
+				    q__4.r;
+			    q__5.r = bb.r * cc.r - bb.i * cc.i, q__5.i = bb.r
+				    * cc.i + bb.i * cc.r;
+			    q__1.r = q__2.r - q__5.r, q__1.i = q__2.i -
+				    q__5.i;
+			    det.r = q__1.r, det.i = q__1.i;
+			    q__2.r = -det.r, q__2.i = -det.i;
+			    c_sqrt(&q__1, &q__2);
+			    rtdisc.r = q__1.r, rtdisc.i = q__1.i;
+			    i__2 = kbot - 1;
+			    q__2.r = tr2.r + rtdisc.r, q__2.i = tr2.i +
+				    rtdisc.i;
+			    q__1.r = s * q__2.r, q__1.i = s * q__2.i;
+			    w[i__2].r = q__1.r, w[i__2].i = q__1.i;
+			    i__2 = kbot;
+			    q__2.r = tr2.r - rtdisc.r, q__2.i = tr2.i -
+				    rtdisc.i;
+			    q__1.r = s * q__2.r, q__1.i = s * q__2.i;
+			    w[i__2].r = q__1.r, w[i__2].i = q__1.i;
+
+			    ks = kbot - 1;
+			}
+		    }
+
+		    if (kbot - ks + 1 > ns) {
+
+/*                    ==== Sort the shifts (Helps a little) ==== */
+
+			sorted = FALSE_;
+			i__2 = ks + 1;
+			for (k = kbot; k >= i__2; --k) {
+			    if (sorted) {
+				goto L60;
+			    }
+			    sorted = TRUE_;
+			    i__3 = k - 1;
+			    for (i__ = ks; i__ <= i__3; ++i__) {
+				i__4 = i__;
+				i__5 = i__ + 1;
+				if ((r__1 = w[i__4].r, dabs(r__1)) + (r__2 =
+					r_imag(&w[i__]), dabs(r__2)) < (r__3 =
+					 w[i__5].r, dabs(r__3)) + (r__4 =
+					r_imag(&w[i__ + 1]), dabs(r__4))) {
+				    sorted = FALSE_;
+				    i__4 = i__;
+				    swap.r = w[i__4].r, swap.i = w[i__4].i;
+				    i__4 = i__;
+				    i__5 = i__ + 1;
+				    w[i__4].r = w[i__5].r, w[i__4].i = w[i__5]
+					    .i;
+				    i__4 = i__ + 1;
+				    w[i__4].r = swap.r, w[i__4].i = swap.i;
+				}
+/* L40: */
+			    }
+/* L50: */
+			}
+L60:
+			;
+		    }
+		}
+
+/*
+                ==== If there are only two shifts, then use
+                .    only one.  ====
+*/
+
+		if (kbot - ks + 1 == 2) {
+		    i__2 = kbot;
+		    i__3 = kbot + kbot * h_dim1;
+		    q__2.r = w[i__2].r - h__[i__3].r, q__2.i = w[i__2].i -
+			    h__[i__3].i;
+		    q__1.r = q__2.r, q__1.i = q__2.i;
+		    i__4 = kbot - 1;
+		    i__5 = kbot + kbot * h_dim1;
+		    q__4.r = w[i__4].r - h__[i__5].r, q__4.i = w[i__4].i -
+			    h__[i__5].i;
+		    q__3.r = q__4.r, q__3.i = q__4.i;
+		    if ((r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(&q__1),
+			    dabs(r__2)) < (r__3 = q__3.r, dabs(r__3)) + (r__4
+			    = r_imag(&q__3), dabs(r__4))) {
+			i__2 = kbot - 1;
+			i__3 = kbot;
+			w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i;
+		    } else {
+			i__2 = kbot;
+			i__3 = kbot - 1;
+			w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i;
+		    }
+		}
+
+/*
+                ==== Use up to NS of the the smallest magnatiude
+                .    shifts.  If there aren't NS shifts available,
+                .    then use them all, possibly dropping one to
+                .    make the number of shifts even. ====
+
+   Computing MIN
+*/
+		i__2 = ns, i__3 = kbot - ks + 1;
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+		ks = kbot - ns + 1;
+
+/*
+                ==== Small-bulge multi-shift QR sweep:
+                .    split workspace under the subdiagonal into
+                .    - a KDU-by-KDU work array U in the lower
+                .      left-hand-corner,
+                .    - a KDU-by-at-least-KDU-but-more-is-better
+                .      (KDU-by-NHo) horizontal work array WH along
+                .      the bottom edge,
+                .    - and an at-least-KDU-but-more-is-better-by-KDU
+                .      (NVE-by-KDU) vertical work WV arrow along
+                .      the left-hand-edge. ====
+*/
+
+		kdu = ns * 3 - 3;
+		ku = *n - kdu + 1;
+		kwh = kdu + 1;
+		nho = *n - kdu - 3 - (kdu + 1) + 1;
+		kwv = kdu + 4;
+		nve = *n - kdu - kwv + 1;
+
+/*              ==== Small-bulge multi-shift QR sweep ==== */
+
+		claqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &w[ks], &
+			h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &
+			work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[
+			kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1],
+			ldh);
+	    }
+
+/*           ==== Note progress (or the lack of it). ==== */
+
+	    if (ld > 0) {
+		ndfl = 1;
+	    } else {
+		++ndfl;
+	    }
+
+/*
+             ==== End of main loop ====
+   L70:
+*/
+	}
+
+/*
+          ==== Iteration limit exceeded.  Set INFO to show where
+          .    the problem occurred and exit. ====
+*/
+
+	*info = kbot;
+L80:
+	;
+    }
+
+/*     ==== Return the optimal value of LWORK. ==== */
+
+    r__1 = (real) lwkopt;
+    q__1.r = r__1, q__1.i = 0.f;
+    work[1].r = q__1.r, work[1].i = q__1.i;
+
+/*     ==== End of CLAQR0 ==== */
+
+    return 0;
+} /* claqr0_ */
+
+/* Subroutine */ int claqr1_(integer *n, complex *h__, integer *ldh, complex *
+	s1, complex *s2, complex *v)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2, r__3, r__4, r__5, r__6;
+    complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static real s;
+    static complex h21s, h31s;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+         Given a 2-by-2 or 3-by-3 matrix H, CLAQR1 sets v to a
+         scalar multiple of the first column of the product
+
+         (*)  K = (H - s1*I)*(H - s2*I)
+
+         scaling to avoid overflows and most underflows.
+
+         This is useful for starting double implicit shift bulges
+         in the QR algorithm.
+
+
+         N      (input) integer
+                Order of the matrix H. N must be either 2 or 3.
+
+         H      (input) COMPLEX array of dimension (LDH,N)
+                The 2-by-2 or 3-by-3 matrix H in (*).
+
+         LDH    (input) integer
+                The leading dimension of H as declared in
+                the calling procedure.  LDH.GE.N
+
+         S1     (input) COMPLEX
+         S2     S1 and S2 are the shifts defining K in (*) above.
+
+         V      (output) COMPLEX array of dimension N
+                A scalar multiple of the first column of the
+                matrix K in (*).
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --v;
+
+    /* Function Body */
+    if (*n == 2) {
+	i__1 = h_dim1 + 1;
+	q__2.r = h__[i__1].r - s2->r, q__2.i = h__[i__1].i - s2->i;
+	q__1.r = q__2.r, q__1.i = q__2.i;
+	i__2 = h_dim1 + 2;
+	s = (r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(&q__1), dabs(r__2))
+		+ ((r__3 = h__[i__2].r, dabs(r__3)) + (r__4 = r_imag(&h__[
+		h_dim1 + 2]), dabs(r__4)));
+	if (s == 0.f) {
+	    v[1].r = 0.f, v[1].i = 0.f;
+	    v[2].r = 0.f, v[2].i = 0.f;
+	} else {
+	    i__1 = h_dim1 + 2;
+	    q__1.r = h__[i__1].r / s, q__1.i = h__[i__1].i / s;
+	    h21s.r = q__1.r, h21s.i = q__1.i;
+	    i__1 = (h_dim1 << 1) + 1;
+	    q__2.r = h21s.r * h__[i__1].r - h21s.i * h__[i__1].i, q__2.i =
+		    h21s.r * h__[i__1].i + h21s.i * h__[i__1].r;
+	    i__2 = h_dim1 + 1;
+	    q__4.r = h__[i__2].r - s1->r, q__4.i = h__[i__2].i - s1->i;
+	    i__3 = h_dim1 + 1;
+	    q__6.r = h__[i__3].r - s2->r, q__6.i = h__[i__3].i - s2->i;
+	    q__5.r = q__6.r / s, q__5.i = q__6.i / s;
+	    q__3.r = q__4.r * q__5.r - q__4.i * q__5.i, q__3.i = q__4.r *
+		    q__5.i + q__4.i * q__5.r;
+	    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	    v[1].r = q__1.r, v[1].i = q__1.i;
+	    i__1 = h_dim1 + 1;
+	    i__2 = (h_dim1 << 1) + 2;
+	    q__4.r = h__[i__1].r + h__[i__2].r, q__4.i = h__[i__1].i + h__[
+		    i__2].i;
+	    q__3.r = q__4.r - s1->r, q__3.i = q__4.i - s1->i;
+	    q__2.r = q__3.r - s2->r, q__2.i = q__3.i - s2->i;
+	    q__1.r = h21s.r * q__2.r - h21s.i * q__2.i, q__1.i = h21s.r *
+		    q__2.i + h21s.i * q__2.r;
+	    v[2].r = q__1.r, v[2].i = q__1.i;
+	}
+    } else {
+	i__1 = h_dim1 + 1;
+	q__2.r = h__[i__1].r - s2->r, q__2.i = h__[i__1].i - s2->i;
+	q__1.r = q__2.r, q__1.i = q__2.i;
+	i__2 = h_dim1 + 2;
+	i__3 = h_dim1 + 3;
+	s = (r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(&q__1), dabs(r__2))
+		+ ((r__3 = h__[i__2].r, dabs(r__3)) + (r__4 = r_imag(&h__[
+		h_dim1 + 2]), dabs(r__4))) + ((r__5 = h__[i__3].r, dabs(r__5))
+		 + (r__6 = r_imag(&h__[h_dim1 + 3]), dabs(r__6)));
+	if (s == 0.f) {
+	    v[1].r = 0.f, v[1].i = 0.f;
+	    v[2].r = 0.f, v[2].i = 0.f;
+	    v[3].r = 0.f, v[3].i = 0.f;
+	} else {
+	    i__1 = h_dim1 + 2;
+	    q__1.r = h__[i__1].r / s, q__1.i = h__[i__1].i / s;
+	    h21s.r = q__1.r, h21s.i = q__1.i;
+	    i__1 = h_dim1 + 3;
+	    q__1.r = h__[i__1].r / s, q__1.i = h__[i__1].i / s;
+	    h31s.r = q__1.r, h31s.i = q__1.i;
+	    i__1 = h_dim1 + 1;
+	    q__4.r = h__[i__1].r - s1->r, q__4.i = h__[i__1].i - s1->i;
+	    i__2 = h_dim1 + 1;
+	    q__6.r = h__[i__2].r - s2->r, q__6.i = h__[i__2].i - s2->i;
+	    q__5.r = q__6.r / s, q__5.i = q__6.i / s;
+	    q__3.r = q__4.r * q__5.r - q__4.i * q__5.i, q__3.i = q__4.r *
+		    q__5.i + q__4.i * q__5.r;
+	    i__3 = (h_dim1 << 1) + 1;
+	    q__7.r = h__[i__3].r * h21s.r - h__[i__3].i * h21s.i, q__7.i =
+		    h__[i__3].r * h21s.i + h__[i__3].i * h21s.r;
+	    q__2.r = q__3.r + q__7.r, q__2.i = q__3.i + q__7.i;
+	    i__4 = h_dim1 * 3 + 1;
+	    q__8.r = h__[i__4].r * h31s.r - h__[i__4].i * h31s.i, q__8.i =
+		    h__[i__4].r * h31s.i + h__[i__4].i * h31s.r;
+	    q__1.r = q__2.r + q__8.r, q__1.i = q__2.i + q__8.i;
+	    v[1].r = q__1.r, v[1].i = q__1.i;
+	    i__1 = h_dim1 + 1;
+	    i__2 = (h_dim1 << 1) + 2;
+	    q__5.r = h__[i__1].r + h__[i__2].r, q__5.i = h__[i__1].i + h__[
+		    i__2].i;
+	    q__4.r = q__5.r - s1->r, q__4.i = q__5.i - s1->i;
+	    q__3.r = q__4.r - s2->r, q__3.i = q__4.i - s2->i;
+	    q__2.r = h21s.r * q__3.r - h21s.i * q__3.i, q__2.i = h21s.r *
+		    q__3.i + h21s.i * q__3.r;
+	    i__3 = h_dim1 * 3 + 2;
+	    q__6.r = h__[i__3].r * h31s.r - h__[i__3].i * h31s.i, q__6.i =
+		    h__[i__3].r * h31s.i + h__[i__3].i * h31s.r;
+	    q__1.r = q__2.r + q__6.r, q__1.i = q__2.i + q__6.i;
+	    v[2].r = q__1.r, v[2].i = q__1.i;
+	    i__1 = h_dim1 + 1;
+	    i__2 = h_dim1 * 3 + 3;
+	    q__5.r = h__[i__1].r + h__[i__2].r, q__5.i = h__[i__1].i + h__[
+		    i__2].i;
+	    q__4.r = q__5.r - s1->r, q__4.i = q__5.i - s1->i;
+	    q__3.r = q__4.r - s2->r, q__3.i = q__4.i - s2->i;
+	    q__2.r = h31s.r * q__3.r - h31s.i * q__3.i, q__2.i = h31s.r *
+		    q__3.i + h31s.i * q__3.r;
+	    i__3 = (h_dim1 << 1) + 3;
+	    q__6.r = h21s.r * h__[i__3].r - h21s.i * h__[i__3].i, q__6.i =
+		    h21s.r * h__[i__3].i + h21s.i * h__[i__3].r;
+	    q__1.r = q__2.r + q__6.r, q__1.i = q__2.i + q__6.i;
+	    v[3].r = q__1.r, v[3].i = q__1.i;
+	}
+    }
+    return 0;
+} /* claqr1_ */
+
+/* Subroutine */ int claqr2_(logical *wantt, logical *wantz, integer *n,
+	integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh,
+	 integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer *
+	ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh,
+	complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv,
+	complex *work, integer *lwork)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1,
+	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2, r__3, r__4, r__5, r__6;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j;
+    static complex s;
+    static integer jw;
+    static real foo;
+    static integer kln;
+    static complex tau;
+    static integer knt;
+    static real ulp;
+    static integer lwk1, lwk2;
+    static complex beta;
+    static integer kcol, info, ifst, ilst, ltop, krow;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    cgemm_(char *, char *, integer *, integer *, integer *, complex *,
+	     complex *, integer *, complex *, integer *, complex *, complex *,
+	     integer *), ccopy_(integer *, complex *, integer
+	    *, complex *, integer *);
+    static integer infqr, kwtop;
+    extern /* Subroutine */ int slabad_(real *, real *), cgehrd_(integer *,
+	    integer *, integer *, complex *, integer *, complex *, complex *,
+	    integer *, integer *), clarfg_(integer *, complex *, complex *,
+	    integer *, complex *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int clahqr_(logical *, logical *, integer *,
+	    integer *, integer *, complex *, integer *, complex *, integer *,
+	    integer *, complex *, integer *, integer *), clacpy_(char *,
+	    integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex
+	    *, complex *, integer *);
+    static real safmin, safmax;
+    extern /* Subroutine */ int ctrexc_(char *, integer *, complex *, integer
+	    *, complex *, integer *, integer *, integer *, integer *),
+	     cunmhr_(char *, char *, integer *, integer *, integer *, integer
+	    *, complex *, integer *, complex *, complex *, integer *, complex
+	    *, integer *, integer *);
+    static real smlnum;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+    -- April 2009                                                      --
+
+
+       This subroutine is identical to CLAQR3 except that it avoids
+       recursion by calling CLAHQR instead of CLAQR4.
+
+
+       ******************************************************************
+       Aggressive early deflation:
+
+       This subroutine accepts as input an upper Hessenberg matrix
+       H and performs an unitary similarity transformation
+       designed to detect and deflate fully converged eigenvalues from
+       a trailing principal submatrix.  On output H has been over-
+       written by a new Hessenberg matrix that is a perturbation of
+       an unitary similarity transformation of H.  It is to be
+       hoped that the final version of H has many zero subdiagonal
+       entries.
+
+       ******************************************************************
+       WANTT   (input) LOGICAL
+            If .TRUE., then the Hessenberg matrix H is fully updated
+            so that the triangular Schur factor may be
+            computed (in cooperation with the calling subroutine).
+            If .FALSE., then only enough of H is updated to preserve
+            the eigenvalues.
+
+       WANTZ   (input) LOGICAL
+            If .TRUE., then the unitary matrix Z is updated so
+            so that the unitary Schur factor may be computed
+            (in cooperation with the calling subroutine).
+            If .FALSE., then Z is not referenced.
+
+       N       (input) INTEGER
+            The order of the matrix H and (if WANTZ is .TRUE.) the
+            order of the unitary matrix Z.
+
+       KTOP    (input) INTEGER
+            It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+            KBOT and KTOP together determine an isolated block
+            along the diagonal of the Hessenberg matrix.
+
+       KBOT    (input) INTEGER
+            It is assumed without a check that either
+            KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
+            determine an isolated block along the diagonal of the
+            Hessenberg matrix.
+
+       NW      (input) INTEGER
+            Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+
+       H       (input/output) COMPLEX array, dimension (LDH,N)
+            On input the initial N-by-N section of H stores the
+            Hessenberg matrix undergoing aggressive early deflation.
+            On output H has been transformed by a unitary
+            similarity transformation, perturbed, and the returned
+            to Hessenberg form that (it is to be hoped) has some
+            zero subdiagonal entries.
+
+       LDH     (input) integer
+            Leading dimension of H just as declared in the calling
+            subroutine.  N .LE. LDH
+
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+
+       Z       (input/output) COMPLEX array, dimension (LDZ,N)
+            IF WANTZ is .TRUE., then on output, the unitary
+            similarity transformation mentioned above has been
+            accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+            If WANTZ is .FALSE., then Z is unreferenced.
+
+       LDZ     (input) integer
+            The leading dimension of Z just as declared in the
+            calling subroutine.  1 .LE. LDZ.
+
+       NS      (output) integer
+            The number of unconverged (ie approximate) eigenvalues
+            returned in SR and SI that may be used as shifts by the
+            calling subroutine.
+
+       ND      (output) integer
+            The number of converged eigenvalues uncovered by this
+            subroutine.
+
+       SH      (output) COMPLEX array, dimension KBOT
+            On output, approximate eigenvalues that may
+            be used for shifts are stored in SH(KBOT-ND-NS+1)
+            through SR(KBOT-ND).  Converged eigenvalues are
+            stored in SH(KBOT-ND+1) through SH(KBOT).
+
+       V       (workspace) COMPLEX array, dimension (LDV,NW)
+            An NW-by-NW work array.
+
+       LDV     (input) integer scalar
+            The leading dimension of V just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       NH      (input) integer scalar
+            The number of columns of T.  NH.GE.NW.
+
+       T       (workspace) COMPLEX array, dimension (LDT,NW)
+
+       LDT     (input) integer
+            The leading dimension of T just as declared in the
+            calling subroutine.  NW .LE. LDT
+
+       NV      (input) integer
+            The number of rows of work array WV available for
+            workspace.  NV.GE.NW.
+
+       WV      (workspace) COMPLEX array, dimension (LDWV,NW)
+
+       LDWV    (input) integer
+            The leading dimension of W just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       WORK    (workspace) COMPLEX array, dimension LWORK.
+            On exit, WORK(1) is set to an estimate of the optimal value
+            of LWORK for the given values of N, NW, KTOP and KBOT.
+
+       LWORK   (input) integer
+            The dimension of the work array WORK.  LWORK = 2*NW
+            suffices, but greater efficiency may result from larger
+            values of LWORK.
+
+            If LWORK = -1, then a workspace query is assumed; CLAQR2
+            only estimates the optimal workspace size for the given
+            values of N, NW, KTOP and KBOT.  The estimate is returned
+            in WORK(1).  No error message related to LWORK is issued
+            by XERBLA.  Neither H nor Z are accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+
+       ==== Estimate optimal workspace. ====
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --sh;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    --work;
+
+    /* Function Body */
+/* Computing MIN */
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    if (jw <= 2) {
+	lwkopt = 1;
+    } else {
+
+/*        ==== Workspace query call to CGEHRD ==== */
+
+	i__1 = jw - 1;
+	cgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
+		c_n1, &info);
+	lwk1 = (integer) work[1].r;
+
+/*        ==== Workspace query call to CUNMHR ==== */
+
+	i__1 = jw - 1;
+	cunmhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1],
+		 &v[v_offset], ldv, &work[1], &c_n1, &info);
+	lwk2 = (integer) work[1].r;
+
+/*        ==== Optimal workspace ==== */
+
+	lwkopt = jw + max(lwk1,lwk2);
+    }
+
+/*     ==== Quick return in case of workspace query. ==== */
+
+    if (*lwork == -1) {
+	r__1 = (real) lwkopt;
+	q__1.r = r__1, q__1.i = 0.f;
+	work[1].r = q__1.r, work[1].i = q__1.i;
+	return 0;
+    }
+
+/*
+       ==== Nothing to do ...
+       ... for an empty active block ... ====
+*/
+    *ns = 0;
+    *nd = 0;
+    work[1].r = 1.f, work[1].i = 0.f;
+    if (*ktop > *kbot) {
+	return 0;
+    }
+/*     ... nor for an empty deflation window. ==== */
+    if (*nw < 1) {
+	return 0;
+    }
+
+/*     ==== Machine constants ==== */
+
+    safmin = slamch_("SAFE MINIMUM");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    ulp = slamch_("PRECISION");
+    smlnum = safmin * ((real) (*n) / ulp);
+
+/*
+       ==== Setup deflation window ====
+
+   Computing MIN
+*/
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    kwtop = *kbot - jw + 1;
+    if (kwtop == *ktop) {
+	s.r = 0.f, s.i = 0.f;
+    } else {
+	i__1 = kwtop + (kwtop - 1) * h_dim1;
+	s.r = h__[i__1].r, s.i = h__[i__1].i;
+    }
+
+    if (*kbot == kwtop) {
+
+/*        ==== 1-by-1 deflation window: not much to do ==== */
+
+	i__1 = kwtop;
+	i__2 = kwtop + kwtop * h_dim1;
+	sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i;
+	*ns = 1;
+	*nd = 0;
+/* Computing MAX */
+	i__1 = kwtop + kwtop * h_dim1;
+	r__5 = smlnum, r__6 = ulp * ((r__1 = h__[i__1].r, dabs(r__1)) + (r__2
+		= r_imag(&h__[kwtop + kwtop * h_dim1]), dabs(r__2)));
+	if ((r__3 = s.r, dabs(r__3)) + (r__4 = r_imag(&s), dabs(r__4)) <=
+		dmax(r__5,r__6)) {
+	    *ns = 0;
+	    *nd = 1;
+	    if (kwtop > *ktop) {
+		i__1 = kwtop + (kwtop - 1) * h_dim1;
+		h__[i__1].r = 0.f, h__[i__1].i = 0.f;
+	    }
+	}
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+/*
+       ==== Convert to spike-triangular form.  (In case of a
+       .    rare QR failure, this routine continues to do
+       .    aggressive early deflation using that part of
+       .    the deflation window that converged using INFQR
+       .    here and there to keep track.) ====
+*/
+
+    clacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset],
+	    ldt);
+    i__1 = jw - 1;
+    i__2 = *ldh + 1;
+    i__3 = *ldt + 1;
+    ccopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
+	    i__3);
+
+    claset_("A", &jw, &jw, &c_b56, &c_b57, &v[v_offset], ldv);
+    clahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[kwtop],
+	    &c__1, &jw, &v[v_offset], ldv, &infqr);
+
+/*     ==== Deflation detection loop ==== */
+
+    *ns = jw;
+    ilst = infqr + 1;
+    i__1 = jw;
+    for (knt = infqr + 1; knt <= i__1; ++knt) {
+
+/*        ==== Small spike tip deflation test ==== */
+
+	i__2 = *ns + *ns * t_dim1;
+	foo = (r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[*ns + *ns *
+		t_dim1]), dabs(r__2));
+	if (foo == 0.f) {
+	    foo = (r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2));
+	}
+	i__2 = *ns * v_dim1 + 1;
+/* Computing MAX */
+	r__5 = smlnum, r__6 = ulp * foo;
+	if (((r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2))) * ((
+		r__3 = v[i__2].r, dabs(r__3)) + (r__4 = r_imag(&v[*ns *
+		v_dim1 + 1]), dabs(r__4))) <= dmax(r__5,r__6)) {
+
+/*           ==== One more converged eigenvalue ==== */
+
+	    --(*ns);
+	} else {
+
+/*
+             ==== One undeflatable eigenvalue.  Move it up out of the
+             .    way.   (CTREXC can not fail in this case.) ====
+*/
+
+	    ifst = *ns;
+	    ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &
+		    ilst, &info);
+	    ++ilst;
+	}
+/* L10: */
+    }
+
+/*        ==== Return to Hessenberg form ==== */
+
+    if (*ns == 0) {
+	s.r = 0.f, s.i = 0.f;
+    }
+
+    if (*ns < jw) {
+
+/*
+          ==== sorting the diagonal of T improves accuracy for
+          .    graded matrices.  ====
+*/
+
+	i__1 = *ns;
+	for (i__ = infqr + 1; i__ <= i__1; ++i__) {
+	    ifst = i__;
+	    i__2 = *ns;
+	    for (j = i__ + 1; j <= i__2; ++j) {
+		i__3 = j + j * t_dim1;
+		i__4 = ifst + ifst * t_dim1;
+		if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[j + j *
+			 t_dim1]), dabs(r__2)) > (r__3 = t[i__4].r, dabs(r__3)
+			) + (r__4 = r_imag(&t[ifst + ifst * t_dim1]), dabs(
+			r__4))) {
+		    ifst = j;
+		}
+/* L20: */
+	    }
+	    ilst = i__;
+	    if (ifst != ilst) {
+		ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &info);
+	    }
+/* L30: */
+	}
+    }
+
+/*     ==== Restore shift/eigenvalue array from T ==== */
+
+    i__1 = jw;
+    for (i__ = infqr + 1; i__ <= i__1; ++i__) {
+	i__2 = kwtop + i__ - 1;
+	i__3 = i__ + i__ * t_dim1;
+	sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i;
+/* L40: */
+    }
+
+
+    if (*ns < jw || s.r == 0.f && s.i == 0.f) {
+	if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) {
+
+/*           ==== Reflect spike back into lower triangle ==== */
+
+	    ccopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
+	    i__1 = *ns;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = i__;
+		r_cnjg(&q__1, &work[i__]);
+		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
+/* L50: */
+	    }
+	    beta.r = work[1].r, beta.i = work[1].i;
+	    clarfg_(ns, &beta, &work[2], &c__1, &tau);
+	    work[1].r = 1.f, work[1].i = 0.f;
+
+	    i__1 = jw - 2;
+	    i__2 = jw - 2;
+	    claset_("L", &i__1, &i__2, &c_b56, &c_b56, &t[t_dim1 + 3], ldt);
+
+	    r_cnjg(&q__1, &tau);
+	    clarf_("L", ns, &jw, &work[1], &c__1, &q__1, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    clarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    clarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
+		    work[jw + 1]);
+
+	    i__1 = *lwork - jw;
+	    cgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
+		    , &i__1, &info);
+	}
+
+/*        ==== Copy updated reduced window into place ==== */
+
+	if (kwtop > 1) {
+	    i__1 = kwtop + (kwtop - 1) * h_dim1;
+	    r_cnjg(&q__2, &v[v_dim1 + 1]);
+	    q__1.r = s.r * q__2.r - s.i * q__2.i, q__1.i = s.r * q__2.i + s.i
+		    * q__2.r;
+	    h__[i__1].r = q__1.r, h__[i__1].i = q__1.i;
+	}
+	clacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
+		, ldh);
+	i__1 = jw - 1;
+	i__2 = *ldt + 1;
+	i__3 = *ldh + 1;
+	ccopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1],
+		 &i__3);
+
+/*
+          ==== Accumulate orthogonal matrix in order update
+          .    H and Z, if requested.  ====
+*/
+
+	if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) {
+	    i__1 = *lwork - jw;
+	    cunmhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1],
+		     &v[v_offset], ldv, &work[jw + 1], &i__1, &info);
+	}
+
+/*        ==== Update vertical slab in H ==== */
+
+	if (*wantt) {
+	    ltop = 1;
+	} else {
+	    ltop = *ktop;
+	}
+	i__1 = kwtop - 1;
+	i__2 = *nv;
+	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		i__2) {
+/* Computing MIN */
+	    i__3 = *nv, i__4 = kwtop - krow;
+	    kln = min(i__3,i__4);
+	    cgemm_("N", "N", &kln, &jw, &jw, &c_b57, &h__[krow + kwtop *
+		    h_dim1], ldh, &v[v_offset], ldv, &c_b56, &wv[wv_offset],
+		    ldwv);
+	    clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop *
+		    h_dim1], ldh);
+/* L60: */
+	}
+
+/*        ==== Update horizontal slab in H ==== */
+
+	if (*wantt) {
+	    i__2 = *n;
+	    i__1 = *nh;
+	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2;
+		    kcol += i__1) {
+/* Computing MIN */
+		i__3 = *nh, i__4 = *n - kcol + 1;
+		kln = min(i__3,i__4);
+		cgemm_("C", "N", &jw, &kln, &jw, &c_b57, &v[v_offset], ldv, &
+			h__[kwtop + kcol * h_dim1], ldh, &c_b56, &t[t_offset],
+			 ldt);
+		clacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
+			 h_dim1], ldh);
+/* L70: */
+	    }
+	}
+
+/*        ==== Update vertical slab in Z ==== */
+
+	if (*wantz) {
+	    i__1 = *ihiz;
+	    i__2 = *nv;
+	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		     i__2) {
+/* Computing MIN */
+		i__3 = *nv, i__4 = *ihiz - krow + 1;
+		kln = min(i__3,i__4);
+		cgemm_("N", "N", &kln, &jw, &jw, &c_b57, &z__[krow + kwtop *
+			z_dim1], ldz, &v[v_offset], ldv, &c_b56, &wv[
+			wv_offset], ldwv);
+		clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow +
+			kwtop * z_dim1], ldz);
+/* L80: */
+	    }
+	}
+    }
+
+/*     ==== Return the number of deflations ... ==== */
+
+    *nd = jw - *ns;
+
+/*
+       ==== ... and the number of shifts. (Subtracting
+       .    INFQR from the spike length takes care
+       .    of the case of a rare QR failure while
+       .    calculating eigenvalues of the deflation
+       .    window.)  ====
+*/
+
+    *ns -= infqr;
+
+/*      ==== Return optimal workspace. ==== */
+
+    r__1 = (real) lwkopt;
+    q__1.r = r__1, q__1.i = 0.f;
+    work[1].r = q__1.r, work[1].i = q__1.i;
+
+/*     ==== End of CLAQR2 ==== */
+
+    return 0;
+} /* claqr2_ */
+
+/* Subroutine */ int claqr3_(logical *wantt, logical *wantz, integer *n,
+	integer *ktop, integer *kbot, integer *nw, complex *h__, integer *ldh,
+	 integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer *
+	ns, integer *nd, complex *sh, complex *v, integer *ldv, integer *nh,
+	complex *t, integer *ldt, integer *nv, complex *wv, integer *ldwv,
+	complex *work, integer *lwork)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1,
+	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2, r__3, r__4, r__5, r__6;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j;
+    static complex s;
+    static integer jw;
+    static real foo;
+    static integer kln;
+    static complex tau;
+    static integer knt;
+    static real ulp;
+    static integer lwk1, lwk2, lwk3;
+    static complex beta;
+    static integer kcol, info, nmin, ifst, ilst, ltop, krow;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    cgemm_(char *, char *, integer *, integer *, integer *, complex *,
+	     complex *, integer *, complex *, integer *, complex *, complex *,
+	     integer *), ccopy_(integer *, complex *, integer
+	    *, complex *, integer *);
+    static integer infqr, kwtop;
+    extern /* Subroutine */ int claqr4_(logical *, logical *, integer *,
+	    integer *, integer *, complex *, integer *, complex *, integer *,
+	    integer *, complex *, integer *, complex *, integer *, integer *),
+	     slabad_(real *, real *), cgehrd_(integer *, integer *, integer *,
+	     complex *, integer *, complex *, complex *, integer *, integer *)
+	    , clarfg_(integer *, complex *, complex *, integer *, complex *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int clahqr_(logical *, logical *, integer *,
+	    integer *, integer *, complex *, integer *, complex *, integer *,
+	    integer *, complex *, integer *, integer *), clacpy_(char *,
+	    integer *, integer *, complex *, integer *, complex *, integer *), claset_(char *, integer *, integer *, complex *, complex
+	    *, complex *, integer *);
+    static real safmin;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static real safmax;
+    extern /* Subroutine */ int ctrexc_(char *, integer *, complex *, integer
+	    *, complex *, integer *, integer *, integer *, integer *),
+	     cunmhr_(char *, char *, integer *, integer *, integer *, integer
+	    *, complex *, integer *, complex *, complex *, integer *, complex
+	    *, integer *, integer *);
+    static real smlnum;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+    -- April 2009                                                      --
+
+
+       ******************************************************************
+       Aggressive early deflation:
+
+       This subroutine accepts as input an upper Hessenberg matrix
+       H and performs an unitary similarity transformation
+       designed to detect and deflate fully converged eigenvalues from
+       a trailing principal submatrix.  On output H has been over-
+       written by a new Hessenberg matrix that is a perturbation of
+       an unitary similarity transformation of H.  It is to be
+       hoped that the final version of H has many zero subdiagonal
+       entries.
+
+       ******************************************************************
+       WANTT   (input) LOGICAL
+            If .TRUE., then the Hessenberg matrix H is fully updated
+            so that the triangular Schur factor may be
+            computed (in cooperation with the calling subroutine).
+            If .FALSE., then only enough of H is updated to preserve
+            the eigenvalues.
+
+       WANTZ   (input) LOGICAL
+            If .TRUE., then the unitary matrix Z is updated so
+            so that the unitary Schur factor may be computed
+            (in cooperation with the calling subroutine).
+            If .FALSE., then Z is not referenced.
+
+       N       (input) INTEGER
+            The order of the matrix H and (if WANTZ is .TRUE.) the
+            order of the unitary matrix Z.
+
+       KTOP    (input) INTEGER
+            It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+            KBOT and KTOP together determine an isolated block
+            along the diagonal of the Hessenberg matrix.
+
+       KBOT    (input) INTEGER
+            It is assumed without a check that either
+            KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
+            determine an isolated block along the diagonal of the
+            Hessenberg matrix.
+
+       NW      (input) INTEGER
+            Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+
+       H       (input/output) COMPLEX array, dimension (LDH,N)
+            On input the initial N-by-N section of H stores the
+            Hessenberg matrix undergoing aggressive early deflation.
+            On output H has been transformed by a unitary
+            similarity transformation, perturbed, and the returned
+            to Hessenberg form that (it is to be hoped) has some
+            zero subdiagonal entries.
+
+       LDH     (input) integer
+            Leading dimension of H just as declared in the calling
+            subroutine.  N .LE. LDH
+
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+
+       Z       (input/output) COMPLEX array, dimension (LDZ,N)
+            IF WANTZ is .TRUE., then on output, the unitary
+            similarity transformation mentioned above has been
+            accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+            If WANTZ is .FALSE., then Z is unreferenced.
+
+       LDZ     (input) integer
+            The leading dimension of Z just as declared in the
+            calling subroutine.  1 .LE. LDZ.
+
+       NS      (output) integer
+            The number of unconverged (ie approximate) eigenvalues
+            returned in SR and SI that may be used as shifts by the
+            calling subroutine.
+
+       ND      (output) integer
+            The number of converged eigenvalues uncovered by this
+            subroutine.
+
+       SH      (output) COMPLEX array, dimension KBOT
+            On output, approximate eigenvalues that may
+            be used for shifts are stored in SH(KBOT-ND-NS+1)
+            through SR(KBOT-ND).  Converged eigenvalues are
+            stored in SH(KBOT-ND+1) through SH(KBOT).
+
+       V       (workspace) COMPLEX array, dimension (LDV,NW)
+            An NW-by-NW work array.
+
+       LDV     (input) integer scalar
+            The leading dimension of V just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       NH      (input) integer scalar
+            The number of columns of T.  NH.GE.NW.
+
+       T       (workspace) COMPLEX array, dimension (LDT,NW)
+
+       LDT     (input) integer
+            The leading dimension of T just as declared in the
+            calling subroutine.  NW .LE. LDT
+
+       NV      (input) integer
+            The number of rows of work array WV available for
+            workspace.  NV.GE.NW.
+
+       WV      (workspace) COMPLEX array, dimension (LDWV,NW)
+
+       LDWV    (input) integer
+            The leading dimension of W just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       WORK    (workspace) COMPLEX array, dimension LWORK.
+            On exit, WORK(1) is set to an estimate of the optimal value
+            of LWORK for the given values of N, NW, KTOP and KBOT.
+
+       LWORK   (input) integer
+            The dimension of the work array WORK.  LWORK = 2*NW
+            suffices, but greater efficiency may result from larger
+            values of LWORK.
+
+            If LWORK = -1, then a workspace query is assumed; CLAQR3
+            only estimates the optimal workspace size for the given
+            values of N, NW, KTOP and KBOT.  The estimate is returned
+            in WORK(1).  No error message related to LWORK is issued
+            by XERBLA.  Neither H nor Z are accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+
+       ==== Estimate optimal workspace. ====
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --sh;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    --work;
+
+    /* Function Body */
+/* Computing MIN */
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    if (jw <= 2) {
+	lwkopt = 1;
+    } else {
+
+/*        ==== Workspace query call to CGEHRD ==== */
+
+	i__1 = jw - 1;
+	cgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
+		c_n1, &info);
+	lwk1 = (integer) work[1].r;
+
+/*        ==== Workspace query call to CUNMHR ==== */
+
+	i__1 = jw - 1;
+	cunmhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1],
+		 &v[v_offset], ldv, &work[1], &c_n1, &info);
+	lwk2 = (integer) work[1].r;
+
+/*        ==== Workspace query call to CLAQR4 ==== */
+
+	claqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[1],
+		&c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, &infqr);
+	lwk3 = (integer) work[1].r;
+
+/*
+          ==== Optimal workspace ====
+
+   Computing MAX
+*/
+	i__1 = jw + max(lwk1,lwk2);
+	lwkopt = max(i__1,lwk3);
+    }
+
+/*     ==== Quick return in case of workspace query. ==== */
+
+    if (*lwork == -1) {
+	r__1 = (real) lwkopt;
+	q__1.r = r__1, q__1.i = 0.f;
+	work[1].r = q__1.r, work[1].i = q__1.i;
+	return 0;
+    }
+
+/*
+       ==== Nothing to do ...
+       ... for an empty active block ... ====
+*/
+    *ns = 0;
+    *nd = 0;
+    work[1].r = 1.f, work[1].i = 0.f;
+    if (*ktop > *kbot) {
+	return 0;
+    }
+/*     ... nor for an empty deflation window. ==== */
+    if (*nw < 1) {
+	return 0;
+    }
+
+/*     ==== Machine constants ==== */
+
+    safmin = slamch_("SAFE MINIMUM");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    ulp = slamch_("PRECISION");
+    smlnum = safmin * ((real) (*n) / ulp);
+
+/*
+       ==== Setup deflation window ====
+
+   Computing MIN
+*/
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    kwtop = *kbot - jw + 1;
+    if (kwtop == *ktop) {
+	s.r = 0.f, s.i = 0.f;
+    } else {
+	i__1 = kwtop + (kwtop - 1) * h_dim1;
+	s.r = h__[i__1].r, s.i = h__[i__1].i;
+    }
+
+    if (*kbot == kwtop) {
+
+/*        ==== 1-by-1 deflation window: not much to do ==== */
+
+	i__1 = kwtop;
+	i__2 = kwtop + kwtop * h_dim1;
+	sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i;
+	*ns = 1;
+	*nd = 0;
+/* Computing MAX */
+	i__1 = kwtop + kwtop * h_dim1;
+	r__5 = smlnum, r__6 = ulp * ((r__1 = h__[i__1].r, dabs(r__1)) + (r__2
+		= r_imag(&h__[kwtop + kwtop * h_dim1]), dabs(r__2)));
+	if ((r__3 = s.r, dabs(r__3)) + (r__4 = r_imag(&s), dabs(r__4)) <=
+		dmax(r__5,r__6)) {
+	    *ns = 0;
+	    *nd = 1;
+	    if (kwtop > *ktop) {
+		i__1 = kwtop + (kwtop - 1) * h_dim1;
+		h__[i__1].r = 0.f, h__[i__1].i = 0.f;
+	    }
+	}
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+/*
+       ==== Convert to spike-triangular form.  (In case of a
+       .    rare QR failure, this routine continues to do
+       .    aggressive early deflation using that part of
+       .    the deflation window that converged using INFQR
+       .    here and there to keep track.) ====
+*/
+
+    clacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset],
+	    ldt);
+    i__1 = jw - 1;
+    i__2 = *ldh + 1;
+    i__3 = *ldt + 1;
+    ccopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
+	    i__3);
+
+    claset_("A", &jw, &jw, &c_b56, &c_b57, &v[v_offset], ldv);
+    nmin = ilaenv_(&c__12, "CLAQR3", "SV", &jw, &c__1, &jw, lwork, (ftnlen)6,
+	    (ftnlen)2);
+    if (jw > nmin) {
+	claqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[
+		kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1], lwork, &
+		infqr);
+    } else {
+	clahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[
+		kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr);
+    }
+
+/*     ==== Deflation detection loop ==== */
+
+    *ns = jw;
+    ilst = infqr + 1;
+    i__1 = jw;
+    for (knt = infqr + 1; knt <= i__1; ++knt) {
+
+/*        ==== Small spike tip deflation test ==== */
+
+	i__2 = *ns + *ns * t_dim1;
+	foo = (r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[*ns + *ns *
+		t_dim1]), dabs(r__2));
+	if (foo == 0.f) {
+	    foo = (r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2));
+	}
+	i__2 = *ns * v_dim1 + 1;
+/* Computing MAX */
+	r__5 = smlnum, r__6 = ulp * foo;
+	if (((r__1 = s.r, dabs(r__1)) + (r__2 = r_imag(&s), dabs(r__2))) * ((
+		r__3 = v[i__2].r, dabs(r__3)) + (r__4 = r_imag(&v[*ns *
+		v_dim1 + 1]), dabs(r__4))) <= dmax(r__5,r__6)) {
+
+/*           ==== One more converged eigenvalue ==== */
+
+	    --(*ns);
+	} else {
+
+/*
+             ==== One undeflatable eigenvalue.  Move it up out of the
+             .    way.   (CTREXC can not fail in this case.) ====
+*/
+
+	    ifst = *ns;
+	    ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &
+		    ilst, &info);
+	    ++ilst;
+	}
+/* L10: */
+    }
+
+/*        ==== Return to Hessenberg form ==== */
+
+    if (*ns == 0) {
+	s.r = 0.f, s.i = 0.f;
+    }
+
+    if (*ns < jw) {
+
+/*
+          ==== sorting the diagonal of T improves accuracy for
+          .    graded matrices.  ====
+*/
+
+	i__1 = *ns;
+	for (i__ = infqr + 1; i__ <= i__1; ++i__) {
+	    ifst = i__;
+	    i__2 = *ns;
+	    for (j = i__ + 1; j <= i__2; ++j) {
+		i__3 = j + j * t_dim1;
+		i__4 = ifst + ifst * t_dim1;
+		if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[j + j *
+			 t_dim1]), dabs(r__2)) > (r__3 = t[i__4].r, dabs(r__3)
+			) + (r__4 = r_imag(&t[ifst + ifst * t_dim1]), dabs(
+			r__4))) {
+		    ifst = j;
+		}
+/* L20: */
+	    }
+	    ilst = i__;
+	    if (ifst != ilst) {
+		ctrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &info);
+	    }
+/* L30: */
+	}
+    }
+
+/*     ==== Restore shift/eigenvalue array from T ==== */
+
+    i__1 = jw;
+    for (i__ = infqr + 1; i__ <= i__1; ++i__) {
+	i__2 = kwtop + i__ - 1;
+	i__3 = i__ + i__ * t_dim1;
+	sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i;
+/* L40: */
+    }
+
+
+    if (*ns < jw || s.r == 0.f && s.i == 0.f) {
+	if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) {
+
+/*           ==== Reflect spike back into lower triangle ==== */
+
+	    ccopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
+	    i__1 = *ns;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = i__;
+		r_cnjg(&q__1, &work[i__]);
+		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
+/* L50: */
+	    }
+	    beta.r = work[1].r, beta.i = work[1].i;
+	    clarfg_(ns, &beta, &work[2], &c__1, &tau);
+	    work[1].r = 1.f, work[1].i = 0.f;
+
+	    i__1 = jw - 2;
+	    i__2 = jw - 2;
+	    claset_("L", &i__1, &i__2, &c_b56, &c_b56, &t[t_dim1 + 3], ldt);
+
+	    r_cnjg(&q__1, &tau);
+	    clarf_("L", ns, &jw, &work[1], &c__1, &q__1, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    clarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    clarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
+		    work[jw + 1]);
+
+	    i__1 = *lwork - jw;
+	    cgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
+		    , &i__1, &info);
+	}
+
+/*        ==== Copy updated reduced window into place ==== */
+
+	if (kwtop > 1) {
+	    i__1 = kwtop + (kwtop - 1) * h_dim1;
+	    r_cnjg(&q__2, &v[v_dim1 + 1]);
+	    q__1.r = s.r * q__2.r - s.i * q__2.i, q__1.i = s.r * q__2.i + s.i
+		    * q__2.r;
+	    h__[i__1].r = q__1.r, h__[i__1].i = q__1.i;
+	}
+	clacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
+		, ldh);
+	i__1 = jw - 1;
+	i__2 = *ldt + 1;
+	i__3 = *ldh + 1;
+	ccopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1],
+		 &i__3);
+
+/*
+          ==== Accumulate orthogonal matrix in order update
+          .    H and Z, if requested.  ====
+*/
+
+	if (*ns > 1 && (s.r != 0.f || s.i != 0.f)) {
+	    i__1 = *lwork - jw;
+	    cunmhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1],
+		     &v[v_offset], ldv, &work[jw + 1], &i__1, &info);
+	}
+
+/*        ==== Update vertical slab in H ==== */
+
+	if (*wantt) {
+	    ltop = 1;
+	} else {
+	    ltop = *ktop;
+	}
+	i__1 = kwtop - 1;
+	i__2 = *nv;
+	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		i__2) {
+/* Computing MIN */
+	    i__3 = *nv, i__4 = kwtop - krow;
+	    kln = min(i__3,i__4);
+	    cgemm_("N", "N", &kln, &jw, &jw, &c_b57, &h__[krow + kwtop *
+		    h_dim1], ldh, &v[v_offset], ldv, &c_b56, &wv[wv_offset],
+		    ldwv);
+	    clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop *
+		    h_dim1], ldh);
+/* L60: */
+	}
+
+/*        ==== Update horizontal slab in H ==== */
+
+	if (*wantt) {
+	    i__2 = *n;
+	    i__1 = *nh;
+	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2;
+		    kcol += i__1) {
+/* Computing MIN */
+		i__3 = *nh, i__4 = *n - kcol + 1;
+		kln = min(i__3,i__4);
+		cgemm_("C", "N", &jw, &kln, &jw, &c_b57, &v[v_offset], ldv, &
+			h__[kwtop + kcol * h_dim1], ldh, &c_b56, &t[t_offset],
+			 ldt);
+		clacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
+			 h_dim1], ldh);
+/* L70: */
+	    }
+	}
+
+/*        ==== Update vertical slab in Z ==== */
+
+	if (*wantz) {
+	    i__1 = *ihiz;
+	    i__2 = *nv;
+	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		     i__2) {
+/* Computing MIN */
+		i__3 = *nv, i__4 = *ihiz - krow + 1;
+		kln = min(i__3,i__4);
+		cgemm_("N", "N", &kln, &jw, &jw, &c_b57, &z__[krow + kwtop *
+			z_dim1], ldz, &v[v_offset], ldv, &c_b56, &wv[
+			wv_offset], ldwv);
+		clacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow +
+			kwtop * z_dim1], ldz);
+/* L80: */
+	    }
+	}
+    }
+
+/*     ==== Return the number of deflations ... ==== */
+
+    *nd = jw - *ns;
+
+/*
+       ==== ... and the number of shifts. (Subtracting
+       .    INFQR from the spike length takes care
+       .    of the case of a rare QR failure while
+       .    calculating eigenvalues of the deflation
+       .    window.)  ====
+*/
+
+    *ns -= infqr;
+
+/*      ==== Return optimal workspace. ==== */
+
+    r__1 = (real) lwkopt;
+    q__1.r = r__1, q__1.i = 0.f;
+    work[1].r = q__1.r, work[1].i = q__1.i;
+
+/*     ==== End of CLAQR3 ==== */
+
+    return 0;
+} /* claqr3_ */
+
+/* Subroutine */ int claqr4_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w,
+	integer *iloz, integer *ihiz, complex *z__, integer *ldz, complex *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8;
+    complex q__1, q__2, q__3, q__4, q__5;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void c_sqrt(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, k;
+    static real s;
+    static complex aa, bb, cc, dd;
+    static integer ld, nh, it, ks, kt, ku, kv, ls, ns, nw;
+    static complex tr2, det;
+    static integer inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot,
+	    nmin;
+    static complex swap;
+    static integer ktop;
+    static complex zdum[1]	/* was [1][1] */;
+    static integer kacc22, itmax, nsmax, nwmax, kwtop;
+    extern /* Subroutine */ int claqr2_(logical *, logical *, integer *,
+	    integer *, integer *, integer *, complex *, integer *, integer *,
+	    integer *, complex *, integer *, integer *, integer *, complex *,
+	    complex *, integer *, integer *, complex *, integer *, integer *,
+	    complex *, integer *, complex *, integer *), claqr5_(logical *,
+	    logical *, integer *, integer *, integer *, integer *, integer *,
+	    complex *, complex *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, integer *,
+	    complex *, integer *, integer *, complex *, integer *);
+    static integer nibble;
+    extern /* Subroutine */ int clahqr_(logical *, logical *, integer *,
+	    integer *, integer *, complex *, integer *, complex *, integer *,
+	    integer *, complex *, integer *, integer *), clacpy_(char *,
+	    integer *, integer *, complex *, integer *, complex *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static char jbcmpz[2];
+    static complex rtdisc;
+    static integer nwupbd;
+    static logical sorted;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+       This subroutine implements one level of recursion for CLAQR0.
+       It is a complete implementation of the small bulge multi-shift
+       QR algorithm.  It may be called by CLAQR0 and, for large enough
+       deflation window size, it may be called by CLAQR3.  This
+       subroutine is identical to CLAQR0 except that it calls CLAQR2
+       instead of CLAQR3.
+
+       Purpose
+       =======
+
+       CLAQR4 computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**H, where T is an upper triangular matrix (the
+       Schur form), and Z is the unitary matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input unitary
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
+
+       Arguments
+       =========
+
+       WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+       WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+             H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
+             previous call to CGEBAL, and then passed to CGEHRD when the
+             matrix output by CGEBAL is reduced to Hessenberg form.
+             Otherwise, ILO and IHI should be set to 1 and N,
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) COMPLEX array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and WANTT is .TRUE., then H
+             contains the upper triangular matrix T from the Schur
+             decomposition (the Schur form). If INFO = 0 and WANT is
+             .FALSE., then the contents of H are unspecified on exit.
+             (The output value of H when INFO.GT.0 is given under the
+             description of INFO below.)
+
+             This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+             j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       W        (output) COMPLEX array, dimension (N)
+             The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored
+             in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are
+             stored in the same order as on the diagonal of the Schur
+             form returned in H, with W(i) = H(i,i).
+
+       Z     (input/output) COMPLEX array, dimension (LDZ,IHI)
+             If WANTZ is .FALSE., then Z is not referenced.
+             If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
+             replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
+             orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
+             (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if WANTZ is .TRUE.
+             then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) COMPLEX array, dimension LWORK
+             On exit, if LWORK = -1, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient, but LWORK typically as large as 6*N may
+             be required for optimal performance.  A workspace query
+             to determine the optimal workspace size is recommended.
+
+             If LWORK = -1, then CLAQR4 does a workspace query.
+             In this case, CLAQR4 checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .GT. 0:  if INFO = i, CLAQR4 failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and WANT is .FALSE., then on exit,
+                  the remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is a unitary matrix.  The final
+                  value of  H is upper Hessenberg and triangular in
+                  rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+
+                    (final value of Z(ILO:IHI,ILOZ:IHIZ)
+                     =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
+
+                  where U is the unitary matrix in (*) (regard-
+                  less of the value of WANTT.)
+
+                  If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+                  accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
+
+       ================================================================
+
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    CLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
+
+       ==== Exceptional deflation windows:  try to cure rare
+       .    slow convergence by varying the size of the
+       .    deflation window after KEXNW iterations. ====
+
+       ==== Exceptional shifts: try to cure rare slow convergence
+       .    with ad-hoc exceptional shifts every KEXSH iterations.
+       .    ====
+
+       ==== The constant WILK1 is used to form the exceptional
+       .    shifts. ====
+*/
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+/*     ==== Quick return for N = 0: nothing to do. ==== */
+
+    if (*n == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    if (*n <= 11) {
+
+/*        ==== Tiny matrices must use CLAHQR. ==== */
+
+	lwkopt = 1;
+	if (*lwork != -1) {
+	    clahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1],
+		    iloz, ihiz, &z__[z_offset], ldz, info);
+	}
+    } else {
+
+/*
+          ==== Use small bulge multi-shift QR with aggressive early
+          .    deflation on larger-than-tiny matrices. ====
+
+          ==== Hope for the best. ====
+*/
+
+	*info = 0;
+
+/*        ==== Set up job flags for ILAENV. ==== */
+
+	if (*wantt) {
+	    *(unsigned char *)jbcmpz = 'S';
+	} else {
+	    *(unsigned char *)jbcmpz = 'E';
+	}
+	if (*wantz) {
+	    *(unsigned char *)&jbcmpz[1] = 'V';
+	} else {
+	    *(unsigned char *)&jbcmpz[1] = 'N';
+	}
+
+/*
+          ==== NWR = recommended deflation window size.  At this
+          .    point,  N .GT. NTINY = 11, so there is enough
+          .    subdiagonal workspace for NWR.GE.2 as required.
+          .    (In fact, there is enough subdiagonal space for
+          .    NWR.GE.3.) ====
+*/
+
+	nwr = ilaenv_(&c__13, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nwr = max(2,nwr);
+/* Computing MIN */
+	i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2);
+	nwr = min(i__1,nwr);
+
+/*
+          ==== NSR = recommended number of simultaneous shifts.
+          .    At this point N .GT. NTINY = 11, so there is at
+          .    enough subdiagonal workspace for NSR to be even
+          .    and greater than or equal to two as required. ====
+*/
+
+	nsr = ilaenv_(&c__15, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+/* Computing MIN */
+	i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi -
+		*ilo;
+	nsr = min(i__1,i__2);
+/* Computing MAX */
+	i__1 = 2, i__2 = nsr - nsr % 2;
+	nsr = max(i__1,i__2);
+
+/*
+          ==== Estimate optimal workspace ====
+
+          ==== Workspace query call to CLAQR2 ====
+*/
+
+	i__1 = nwr + 1;
+	claqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz,
+		ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[h_offset],
+		ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1],
+		 &c_n1);
+
+/*
+          ==== Optimal workspace = MAX(CLAQR5, CLAQR2) ====
+
+   Computing MAX
+*/
+	i__1 = nsr * 3 / 2, i__2 = (integer) work[1].r;
+	lwkopt = max(i__1,i__2);
+
+/*        ==== Quick return in case of workspace query. ==== */
+
+	if (*lwork == -1) {
+	    r__1 = (real) lwkopt;
+	    q__1.r = r__1, q__1.i = 0.f;
+	    work[1].r = q__1.r, work[1].i = q__1.i;
+	    return 0;
+	}
+
+/*        ==== CLAHQR/CLAQR0 crossover point ==== */
+
+	nmin = ilaenv_(&c__12, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)
+		6, (ftnlen)2);
+	nmin = max(11,nmin);
+
+/*        ==== Nibble crossover point ==== */
+
+	nibble = ilaenv_(&c__14, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	nibble = max(0,nibble);
+
+/*
+          ==== Accumulate reflections during ttswp?  Use block
+          .    2-by-2 structure during matrix-matrix multiply? ====
+*/
+
+	kacc22 = ilaenv_(&c__16, "CLAQR4", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	kacc22 = max(0,kacc22);
+	kacc22 = min(2,kacc22);
+
+/*
+          ==== NWMAX = the largest possible deflation window for
+          .    which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n - 1) / 3, i__2 = *lwork / 2;
+	nwmax = min(i__1,i__2);
+	nw = nwmax;
+
+/*
+          ==== NSMAX = the Largest number of simultaneous shifts
+          .    for which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3;
+	nsmax = min(i__1,i__2);
+	nsmax -= nsmax % 2;
+
+/*        ==== NDFL: an iteration count restarted at deflation. ==== */
+
+	ndfl = 1;
+
+/*
+          ==== ITMAX = iteration limit ====
+
+   Computing MAX
+*/
+	i__1 = 10, i__2 = *ihi - *ilo + 1;
+	itmax = max(i__1,i__2) * 30;
+
+/*        ==== Last row and column in the active block ==== */
+
+	kbot = *ihi;
+
+/*        ==== Main Loop ==== */
+
+	i__1 = itmax;
+	for (it = 1; it <= i__1; ++it) {
+
+/*           ==== Done when KBOT falls below ILO ==== */
+
+	    if (kbot < *ilo) {
+		goto L80;
+	    }
+
+/*           ==== Locate active block ==== */
+
+	    i__2 = *ilo + 1;
+	    for (k = kbot; k >= i__2; --k) {
+		i__3 = k + (k - 1) * h_dim1;
+		if (h__[i__3].r == 0.f && h__[i__3].i == 0.f) {
+		    goto L20;
+		}
+/* L10: */
+	    }
+	    k = *ilo;
+L20:
+	    ktop = k;
+
+/*
+             ==== Select deflation window size:
+             .    Typical Case:
+             .      If possible and advisable, nibble the entire
+             .      active block.  If not, use size MIN(NWR,NWMAX)
+             .      or MIN(NWR+1,NWMAX) depending upon which has
+             .      the smaller corresponding subdiagonal entry
+             .      (a heuristic).
+             .
+             .    Exceptional Case:
+             .      If there have been no deflations in KEXNW or
+             .      more iterations, then vary the deflation window
+             .      size.   At first, because, larger windows are,
+             .      in general, more powerful than smaller ones,
+             .      rapidly increase the window to the maximum possible.
+             .      Then, gradually reduce the window size. ====
+*/
+
+	    nh = kbot - ktop + 1;
+	    nwupbd = min(nh,nwmax);
+	    if (ndfl < 5) {
+		nw = min(nwupbd,nwr);
+	    } else {
+/* Computing MIN */
+		i__2 = nwupbd, i__3 = nw << 1;
+		nw = min(i__2,i__3);
+	    }
+	    if (nw < nwmax) {
+		if (nw >= nh - 1) {
+		    nw = nh;
+		} else {
+		    kwtop = kbot - nw + 1;
+		    i__2 = kwtop + (kwtop - 1) * h_dim1;
+		    i__3 = kwtop - 1 + (kwtop - 2) * h_dim1;
+		    if ((r__1 = h__[i__2].r, dabs(r__1)) + (r__2 = r_imag(&
+			    h__[kwtop + (kwtop - 1) * h_dim1]), dabs(r__2)) >
+			    (r__3 = h__[i__3].r, dabs(r__3)) + (r__4 = r_imag(
+			    &h__[kwtop - 1 + (kwtop - 2) * h_dim1]), dabs(
+			    r__4))) {
+			++nw;
+		    }
+		}
+	    }
+	    if (ndfl < 5) {
+		ndec = -1;
+	    } else if (ndec >= 0 || nw >= nwupbd) {
+		++ndec;
+		if (nw - ndec < 2) {
+		    ndec = 0;
+		}
+		nw -= ndec;
+	    }
+
+/*
+             ==== Aggressive early deflation:
+             .    split workspace under the subdiagonal into
+             .      - an nw-by-nw work array V in the lower
+             .        left-hand-corner,
+             .      - an NW-by-at-least-NW-but-more-is-better
+             .        (NW-by-NHO) horizontal work array along
+             .        the bottom edge,
+             .      - an at-least-NW-but-more-is-better (NHV-by-NW)
+             .        vertical work array along the left-hand-edge.
+             .        ====
+*/
+
+	    kv = *n - nw + 1;
+	    kt = nw + 1;
+	    nho = *n - nw - 1 - kt + 1;
+	    kwv = nw + 2;
+	    nve = *n - nw - kwv + 1;
+
+/*           ==== Aggressive early deflation ==== */
+
+	    claqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh,
+		    iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[kv
+		    + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, &
+		    h__[kwv + h_dim1], ldh, &work[1], lwork);
+
+/*           ==== Adjust KBOT accounting for new deflations. ==== */
+
+	    kbot -= ld;
+
+/*           ==== KS points to the shifts. ==== */
+
+	    ks = kbot - ls + 1;
+
+/*
+             ==== Skip an expensive QR sweep if there is a (partly
+             .    heuristic) reason to expect that many eigenvalues
+             .    will deflate without it.  Here, the QR sweep is
+             .    skipped if many eigenvalues have just been deflated
+             .    or if the remaining active block is small.
+*/
+
+	    if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(
+		    nmin,nwmax)) {
+
+/*
+                ==== NS = nominal number of simultaneous shifts.
+                .    This may be lowered (slightly) if CLAQR2
+                .    did not provide that many shifts. ====
+
+   Computing MIN
+   Computing MAX
+*/
+		i__4 = 2, i__5 = kbot - ktop;
+		i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5);
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+
+/*
+                ==== If there have been no deflations
+                .    in a multiple of KEXSH iterations,
+                .    then try exceptional shifts.
+                .    Otherwise use shifts provided by
+                .    CLAQR2 above or from the eigenvalues
+                .    of a trailing principal submatrix. ====
+*/
+
+		if (ndfl % 6 == 0) {
+		    ks = kbot - ns + 1;
+		    i__2 = ks + 1;
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			i__3 = i__;
+			i__4 = i__ + i__ * h_dim1;
+			i__5 = i__ + (i__ - 1) * h_dim1;
+			r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				r_imag(&h__[i__ + (i__ - 1) * h_dim1]), dabs(
+				r__2))) * .75f;
+			q__1.r = h__[i__4].r + r__3, q__1.i = h__[i__4].i;
+			w[i__3].r = q__1.r, w[i__3].i = q__1.i;
+			i__3 = i__ - 1;
+			i__4 = i__;
+			w[i__3].r = w[i__4].r, w[i__3].i = w[i__4].i;
+/* L30: */
+		    }
+		} else {
+
+/*
+                   ==== Got NS/2 or fewer shifts? Use CLAHQR
+                   .    on a trailing principal submatrix to
+                   .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
+                   .    there is enough space below the subdiagonal
+                   .    to fit an NS-by-NS scratch array.) ====
+*/
+
+		    if (kbot - ks + 1 <= ns / 2) {
+			ks = kbot - ns + 1;
+			kt = *n - ns + 1;
+			clacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &
+				h__[kt + h_dim1], ldh);
+			clahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt
+				+ h_dim1], ldh, &w[ks], &c__1, &c__1, zdum, &
+				c__1, &inf);
+			ks += inf;
+
+/*
+                      ==== In case of a rare QR failure use
+                      .    eigenvalues of the trailing 2-by-2
+                      .    principal submatrix.  Scale to avoid
+                      .    overflows, underflows and subnormals.
+                      .    (The scale factor S can not be zero,
+                      .    because H(KBOT,KBOT-1) is nonzero.) ====
+*/
+
+			if (ks >= kbot) {
+			    i__2 = kbot - 1 + (kbot - 1) * h_dim1;
+			    i__3 = kbot + (kbot - 1) * h_dim1;
+			    i__4 = kbot - 1 + kbot * h_dim1;
+			    i__5 = kbot + kbot * h_dim1;
+			    s = (r__1 = h__[i__2].r, dabs(r__1)) + (r__2 =
+				    r_imag(&h__[kbot - 1 + (kbot - 1) *
+				    h_dim1]), dabs(r__2)) + ((r__3 = h__[i__3]
+				    .r, dabs(r__3)) + (r__4 = r_imag(&h__[
+				    kbot + (kbot - 1) * h_dim1]), dabs(r__4)))
+				     + ((r__5 = h__[i__4].r, dabs(r__5)) + (
+				    r__6 = r_imag(&h__[kbot - 1 + kbot *
+				    h_dim1]), dabs(r__6))) + ((r__7 = h__[
+				    i__5].r, dabs(r__7)) + (r__8 = r_imag(&
+				    h__[kbot + kbot * h_dim1]), dabs(r__8)));
+			    i__2 = kbot - 1 + (kbot - 1) * h_dim1;
+			    q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i /
+				    s;
+			    aa.r = q__1.r, aa.i = q__1.i;
+			    i__2 = kbot + (kbot - 1) * h_dim1;
+			    q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i /
+				    s;
+			    cc.r = q__1.r, cc.i = q__1.i;
+			    i__2 = kbot - 1 + kbot * h_dim1;
+			    q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i /
+				    s;
+			    bb.r = q__1.r, bb.i = q__1.i;
+			    i__2 = kbot + kbot * h_dim1;
+			    q__1.r = h__[i__2].r / s, q__1.i = h__[i__2].i /
+				    s;
+			    dd.r = q__1.r, dd.i = q__1.i;
+			    q__2.r = aa.r + dd.r, q__2.i = aa.i + dd.i;
+			    q__1.r = q__2.r / 2.f, q__1.i = q__2.i / 2.f;
+			    tr2.r = q__1.r, tr2.i = q__1.i;
+			    q__3.r = aa.r - tr2.r, q__3.i = aa.i - tr2.i;
+			    q__4.r = dd.r - tr2.r, q__4.i = dd.i - tr2.i;
+			    q__2.r = q__3.r * q__4.r - q__3.i * q__4.i,
+				    q__2.i = q__3.r * q__4.i + q__3.i *
+				    q__4.r;
+			    q__5.r = bb.r * cc.r - bb.i * cc.i, q__5.i = bb.r
+				    * cc.i + bb.i * cc.r;
+			    q__1.r = q__2.r - q__5.r, q__1.i = q__2.i -
+				    q__5.i;
+			    det.r = q__1.r, det.i = q__1.i;
+			    q__2.r = -det.r, q__2.i = -det.i;
+			    c_sqrt(&q__1, &q__2);
+			    rtdisc.r = q__1.r, rtdisc.i = q__1.i;
+			    i__2 = kbot - 1;
+			    q__2.r = tr2.r + rtdisc.r, q__2.i = tr2.i +
+				    rtdisc.i;
+			    q__1.r = s * q__2.r, q__1.i = s * q__2.i;
+			    w[i__2].r = q__1.r, w[i__2].i = q__1.i;
+			    i__2 = kbot;
+			    q__2.r = tr2.r - rtdisc.r, q__2.i = tr2.i -
+				    rtdisc.i;
+			    q__1.r = s * q__2.r, q__1.i = s * q__2.i;
+			    w[i__2].r = q__1.r, w[i__2].i = q__1.i;
+
+			    ks = kbot - 1;
+			}
+		    }
+
+		    if (kbot - ks + 1 > ns) {
+
+/*                    ==== Sort the shifts (Helps a little) ==== */
+
+			sorted = FALSE_;
+			i__2 = ks + 1;
+			for (k = kbot; k >= i__2; --k) {
+			    if (sorted) {
+				goto L60;
+			    }
+			    sorted = TRUE_;
+			    i__3 = k - 1;
+			    for (i__ = ks; i__ <= i__3; ++i__) {
+				i__4 = i__;
+				i__5 = i__ + 1;
+				if ((r__1 = w[i__4].r, dabs(r__1)) + (r__2 =
+					r_imag(&w[i__]), dabs(r__2)) < (r__3 =
+					 w[i__5].r, dabs(r__3)) + (r__4 =
+					r_imag(&w[i__ + 1]), dabs(r__4))) {
+				    sorted = FALSE_;
+				    i__4 = i__;
+				    swap.r = w[i__4].r, swap.i = w[i__4].i;
+				    i__4 = i__;
+				    i__5 = i__ + 1;
+				    w[i__4].r = w[i__5].r, w[i__4].i = w[i__5]
+					    .i;
+				    i__4 = i__ + 1;
+				    w[i__4].r = swap.r, w[i__4].i = swap.i;
+				}
+/* L40: */
+			    }
+/* L50: */
+			}
+L60:
+			;
+		    }
+		}
+
+/*
+                ==== If there are only two shifts, then use
+                .    only one.  ====
+*/
+
+		if (kbot - ks + 1 == 2) {
+		    i__2 = kbot;
+		    i__3 = kbot + kbot * h_dim1;
+		    q__2.r = w[i__2].r - h__[i__3].r, q__2.i = w[i__2].i -
+			    h__[i__3].i;
+		    q__1.r = q__2.r, q__1.i = q__2.i;
+		    i__4 = kbot - 1;
+		    i__5 = kbot + kbot * h_dim1;
+		    q__4.r = w[i__4].r - h__[i__5].r, q__4.i = w[i__4].i -
+			    h__[i__5].i;
+		    q__3.r = q__4.r, q__3.i = q__4.i;
+		    if ((r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(&q__1),
+			    dabs(r__2)) < (r__3 = q__3.r, dabs(r__3)) + (r__4
+			    = r_imag(&q__3), dabs(r__4))) {
+			i__2 = kbot - 1;
+			i__3 = kbot;
+			w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i;
+		    } else {
+			i__2 = kbot;
+			i__3 = kbot - 1;
+			w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i;
+		    }
+		}
+
+/*
+                ==== Use up to NS of the the smallest magnatiude
+                .    shifts.  If there aren't NS shifts available,
+                .    then use them all, possibly dropping one to
+                .    make the number of shifts even. ====
+
+   Computing MIN
+*/
+		i__2 = ns, i__3 = kbot - ks + 1;
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+		ks = kbot - ns + 1;
+
+/*
+                ==== Small-bulge multi-shift QR sweep:
+                .    split workspace under the subdiagonal into
+                .    - a KDU-by-KDU work array U in the lower
+                .      left-hand-corner,
+                .    - a KDU-by-at-least-KDU-but-more-is-better
+                .      (KDU-by-NHo) horizontal work array WH along
+                .      the bottom edge,
+                .    - and an at-least-KDU-but-more-is-better-by-KDU
+                .      (NVE-by-KDU) vertical work WV arrow along
+                .      the left-hand-edge. ====
+*/
+
+		kdu = ns * 3 - 3;
+		ku = *n - kdu + 1;
+		kwh = kdu + 1;
+		nho = *n - kdu - 3 - (kdu + 1) + 1;
+		kwv = kdu + 4;
+		nve = *n - kdu - kwv + 1;
+
+/*              ==== Small-bulge multi-shift QR sweep ==== */
+
+		claqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &w[ks], &
+			h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &
+			work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[
+			kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1],
+			ldh);
+	    }
+
+/*           ==== Note progress (or the lack of it). ==== */
+
+	    if (ld > 0) {
+		ndfl = 1;
+	    } else {
+		++ndfl;
+	    }
+
+/*
+             ==== End of main loop ====
+   L70:
+*/
+	}
+
+/*
+          ==== Iteration limit exceeded.  Set INFO to show where
+          .    the problem occurred and exit. ====
+*/
+
+	*info = kbot;
+L80:
+	;
+    }
+
+/*     ==== Return the optimal value of LWORK. ==== */
+
+    r__1 = (real) lwkopt;
+    q__1.r = r__1, q__1.i = 0.f;
+    work[1].r = q__1.r, work[1].i = q__1.i;
+
+/*     ==== End of CLAQR4 ==== */
+
+    return 0;
+} /* claqr4_ */
+
+/* Subroutine */ int claqr5_(logical *wantt, logical *wantz, integer *kacc22,
+	integer *n, integer *ktop, integer *kbot, integer *nshfts, complex *s,
+	 complex *h__, integer *ldh, integer *iloz, integer *ihiz, complex *
+	z__, integer *ldz, complex *v, integer *ldv, complex *u, integer *ldu,
+	 integer *nv, complex *wv, integer *ldwv, integer *nh, complex *wh,
+	integer *ldwh)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1,
+	    wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3,
+	     i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11;
+    real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8, r__9, r__10;
+    complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer j, k, m, i2, j2, i4, j4, k1;
+    static real h11, h12, h21, h22;
+    static integer m22, ns, nu;
+    static complex vt[3];
+    static real scl;
+    static integer kdu, kms;
+    static real ulp;
+    static integer knz, kzs;
+    static real tst1, tst2;
+    static complex beta;
+    static logical blk22, bmp22;
+    static integer mend, jcol, jlen, jbot, mbot, jtop, jrow, mtop;
+    static complex alpha;
+    static logical accum;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    static integer ndcol, incol, krcol, nbmps;
+    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), claqr1_(integer *,
+	    complex *, integer *, complex *, complex *, complex *), slabad_(
+	    real *, real *), clarfg_(integer *, complex *, complex *, integer
+	    *, complex *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
+	    *, integer *, complex *, integer *), claset_(char *,
+	    integer *, integer *, complex *, complex *, complex *, integer *);
+    static real safmin, safmax;
+    static complex refsum;
+    static integer mstart;
+    static real smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+       This auxiliary subroutine called by CLAQR0 performs a
+       single small-bulge multi-shift QR sweep.
+
+        WANTT  (input) logical scalar
+               WANTT = .true. if the triangular Schur factor
+               is being computed.  WANTT is set to .false. otherwise.
+
+        WANTZ  (input) logical scalar
+               WANTZ = .true. if the unitary Schur factor is being
+               computed.  WANTZ is set to .false. otherwise.
+
+        KACC22 (input) integer with value 0, 1, or 2.
+               Specifies the computation mode of far-from-diagonal
+               orthogonal updates.
+          = 0: CLAQR5 does not accumulate reflections and does not
+               use matrix-matrix multiply to update far-from-diagonal
+               matrix entries.
+          = 1: CLAQR5 accumulates reflections and uses matrix-matrix
+               multiply to update the far-from-diagonal matrix entries.
+          = 2: CLAQR5 accumulates reflections, uses matrix-matrix
+               multiply to update the far-from-diagonal matrix entries,
+               and takes advantage of 2-by-2 block structure during
+               matrix multiplies.
+
+        N      (input) integer scalar
+               N is the order of the Hessenberg matrix H upon which this
+               subroutine operates.
+
+        KTOP   (input) integer scalar
+        KBOT   (input) integer scalar
+               These are the first and last rows and columns of an
+               isolated diagonal block upon which the QR sweep is to be
+               applied. It is assumed without a check that
+                         either KTOP = 1  or   H(KTOP,KTOP-1) = 0
+               and
+                         either KBOT = N  or   H(KBOT+1,KBOT) = 0.
+
+        NSHFTS (input) integer scalar
+               NSHFTS gives the number of simultaneous shifts.  NSHFTS
+               must be positive and even.
+
+        S      (input/output) COMPLEX array of size (NSHFTS)
+               S contains the shifts of origin that define the multi-
+               shift QR sweep.  On output S may be reordered.
+
+        H      (input/output) COMPLEX array of size (LDH,N)
+               On input H contains a Hessenberg matrix.  On output a
+               multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
+               to the isolated diagonal block in rows and columns KTOP
+               through KBOT.
+
+        LDH    (input) integer scalar
+               LDH is the leading dimension of H just as declared in the
+               calling procedure.  LDH.GE.MAX(1,N).
+
+        ILOZ   (input) INTEGER
+        IHIZ   (input) INTEGER
+               Specify the rows of Z to which transformations must be
+               applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
+
+        Z      (input/output) COMPLEX array of size (LDZ,IHI)
+               If WANTZ = .TRUE., then the QR Sweep unitary
+               similarity transformation is accumulated into
+               Z(ILOZ:IHIZ,ILO:IHI) from the right.
+               If WANTZ = .FALSE., then Z is unreferenced.
+
+        LDZ    (input) integer scalar
+               LDA is the leading dimension of Z just as declared in
+               the calling procedure. LDZ.GE.N.
+
+        V      (workspace) COMPLEX array of size (LDV,NSHFTS/2)
+
+        LDV    (input) integer scalar
+               LDV is the leading dimension of V as declared in the
+               calling procedure.  LDV.GE.3.
+
+        U      (workspace) COMPLEX array of size
+               (LDU,3*NSHFTS-3)
+
+        LDU    (input) integer scalar
+               LDU is the leading dimension of U just as declared in the
+               in the calling subroutine.  LDU.GE.3*NSHFTS-3.
+
+        NH     (input) integer scalar
+               NH is the number of columns in array WH available for
+               workspace. NH.GE.1.
+
+        WH     (workspace) COMPLEX array of size (LDWH,NH)
+
+        LDWH   (input) integer scalar
+               Leading dimension of WH just as declared in the
+               calling procedure.  LDWH.GE.3*NSHFTS-3.
+
+        NV     (input) integer scalar
+               NV is the number of rows in WV agailable for workspace.
+               NV.GE.1.
+
+        WV     (workspace) COMPLEX array of size
+               (LDWV,3*NSHFTS-3)
+
+        LDWV   (input) integer scalar
+               LDWV is the leading dimension of WV as declared in the
+               in the calling subroutine.  LDWV.GE.NV.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       Reference:
+
+       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+       Algorithm Part I: Maintaining Well Focused Shifts, and
+       Level 3 Performance, SIAM Journal of Matrix Analysis,
+       volume 23, pages 929--947, 2002.
+
+       ================================================================
+
+
+       ==== If there are no shifts, then there is nothing to do. ====
+*/
+
+    /* Parameter adjustments */
+    --s;
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    wh_dim1 = *ldwh;
+    wh_offset = 1 + wh_dim1;
+    wh -= wh_offset;
+
+    /* Function Body */
+    if (*nshfts < 2) {
+	return 0;
+    }
+
+/*
+       ==== If the active block is empty or 1-by-1, then there
+       .    is nothing to do. ====
+*/
+
+    if (*ktop >= *kbot) {
+	return 0;
+    }
+
+/*
+       ==== NSHFTS is supposed to be even, but if it is odd,
+       .    then simply reduce it by one.  ====
+*/
+
+    ns = *nshfts - *nshfts % 2;
+
+/*     ==== Machine constants for deflation ==== */
+
+    safmin = slamch_("SAFE MINIMUM");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    ulp = slamch_("PRECISION");
+    smlnum = safmin * ((real) (*n) / ulp);
+
+/*
+       ==== Use accumulated reflections to update far-from-diagonal
+       .    entries ? ====
+*/
+
+    accum = *kacc22 == 1 || *kacc22 == 2;
+
+/*     ==== If so, exploit the 2-by-2 block structure? ==== */
+
+    blk22 = ns > 2 && *kacc22 == 2;
+
+/*     ==== clear trash ==== */
+
+    if (*ktop + 2 <= *kbot) {
+	i__1 = *ktop + 2 + *ktop * h_dim1;
+	h__[i__1].r = 0.f, h__[i__1].i = 0.f;
+    }
+
+/*     ==== NBMPS = number of 2-shift bulges in the chain ==== */
+
+    nbmps = ns / 2;
+
+/*     ==== KDU = width of slab ==== */
+
+    kdu = nbmps * 6 - 3;
+
+/*     ==== Create and chase chains of NBMPS bulges ==== */
+
+    i__1 = *kbot - 2;
+    i__2 = nbmps * 3 - 2;
+    for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 :
+	    incol <= i__1; incol += i__2) {
+	ndcol = incol + kdu;
+	if (accum) {
+	    claset_("ALL", &kdu, &kdu, &c_b56, &c_b57, &u[u_offset], ldu);
+	}
+
+/*
+          ==== Near-the-diagonal bulge chase.  The following loop
+          .    performs the near-the-diagonal part of a small bulge
+          .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal
+          .    chunk extends from column INCOL to column NDCOL
+          .    (including both column INCOL and column NDCOL). The
+          .    following loop chases a 3*NBMPS column long chain of
+          .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL
+          .    may be less than KTOP and and NDCOL may be greater than
+          .    KBOT indicating phantom columns from which to chase
+          .    bulges before they are actually introduced or to which
+          .    to chase bulges beyond column KBOT.)  ====
+
+   Computing MIN
+*/
+	i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2;
+	i__3 = min(i__4,i__5);
+	for (krcol = incol; krcol <= i__3; ++krcol) {
+
+/*
+             ==== Bulges number MTOP to MBOT are active double implicit
+             .    shift bulges.  There may or may not also be small
+             .    2-by-2 bulge, if there is room.  The inactive bulges
+             .    (if any) must wait until the active bulges have moved
+             .    down the diagonal to make room.  The phantom matrix
+             .    paradigm described above helps keep track.  ====
+
+   Computing MAX
+*/
+	    i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1;
+	    mtop = max(i__4,i__5);
+/* Computing MIN */
+	    i__4 = nbmps, i__5 = (*kbot - krcol) / 3;
+	    mbot = min(i__4,i__5);
+	    m22 = mbot + 1;
+	    bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2;
+
+/*
+             ==== Generate reflections to chase the chain right
+             .    one column.  (The minimum value of K is KTOP-1.) ====
+*/
+
+	    i__4 = mbot;
+	    for (m = mtop; m <= i__4; ++m) {
+		k = krcol + (m - 1) * 3;
+		if (k == *ktop - 1) {
+		    claqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &s[(m <<
+			     1) - 1], &s[m * 2], &v[m * v_dim1 + 1]);
+		    i__5 = m * v_dim1 + 1;
+		    alpha.r = v[i__5].r, alpha.i = v[i__5].i;
+		    clarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m *
+			    v_dim1 + 1]);
+		} else {
+		    i__5 = k + 1 + k * h_dim1;
+		    beta.r = h__[i__5].r, beta.i = h__[i__5].i;
+		    i__5 = m * v_dim1 + 2;
+		    i__6 = k + 2 + k * h_dim1;
+		    v[i__5].r = h__[i__6].r, v[i__5].i = h__[i__6].i;
+		    i__5 = m * v_dim1 + 3;
+		    i__6 = k + 3 + k * h_dim1;
+		    v[i__5].r = h__[i__6].r, v[i__5].i = h__[i__6].i;
+		    clarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m *
+			    v_dim1 + 1]);
+
+/*
+                   ==== A Bulge may collapse because of vigilant
+                   .    deflation or destructive underflow.  In the
+                   .    underflow case, try the two-small-subdiagonals
+                   .    trick to try to reinflate the bulge.  ====
+*/
+
+		    i__5 = k + 3 + k * h_dim1;
+		    i__6 = k + 3 + (k + 1) * h_dim1;
+		    i__7 = k + 3 + (k + 2) * h_dim1;
+		    if (h__[i__5].r != 0.f || h__[i__5].i != 0.f || (h__[i__6]
+			    .r != 0.f || h__[i__6].i != 0.f) || h__[i__7].r ==
+			     0.f && h__[i__7].i == 0.f) {
+
+/*                    ==== Typical case: not collapsed (yet). ==== */
+
+			i__5 = k + 1 + k * h_dim1;
+			h__[i__5].r = beta.r, h__[i__5].i = beta.i;
+			i__5 = k + 2 + k * h_dim1;
+			h__[i__5].r = 0.f, h__[i__5].i = 0.f;
+			i__5 = k + 3 + k * h_dim1;
+			h__[i__5].r = 0.f, h__[i__5].i = 0.f;
+		    } else {
+
+/*
+                      ==== Atypical case: collapsed.  Attempt to
+                      .    reintroduce ignoring H(K+1,K) and H(K+2,K).
+                      .    If the fill resulting from the new
+                      .    reflector is too large, then abandon it.
+                      .    Otherwise, use the new one. ====
+*/
+
+			claqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, &
+				s[(m << 1) - 1], &s[m * 2], vt);
+			alpha.r = vt[0].r, alpha.i = vt[0].i;
+			clarfg_(&c__3, &alpha, &vt[1], &c__1, vt);
+			r_cnjg(&q__2, vt);
+			i__5 = k + 1 + k * h_dim1;
+			r_cnjg(&q__5, &vt[1]);
+			i__6 = k + 2 + k * h_dim1;
+			q__4.r = q__5.r * h__[i__6].r - q__5.i * h__[i__6].i,
+				q__4.i = q__5.r * h__[i__6].i + q__5.i * h__[
+				i__6].r;
+			q__3.r = h__[i__5].r + q__4.r, q__3.i = h__[i__5].i +
+				q__4.i;
+			q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i =
+				q__2.r * q__3.i + q__2.i * q__3.r;
+			refsum.r = q__1.r, refsum.i = q__1.i;
+
+			i__5 = k + 2 + k * h_dim1;
+			q__3.r = refsum.r * vt[1].r - refsum.i * vt[1].i,
+				q__3.i = refsum.r * vt[1].i + refsum.i * vt[1]
+				.r;
+			q__2.r = h__[i__5].r - q__3.r, q__2.i = h__[i__5].i -
+				q__3.i;
+			q__1.r = q__2.r, q__1.i = q__2.i;
+			q__5.r = refsum.r * vt[2].r - refsum.i * vt[2].i,
+				q__5.i = refsum.r * vt[2].i + refsum.i * vt[2]
+				.r;
+			q__4.r = q__5.r, q__4.i = q__5.i;
+			i__6 = k + k * h_dim1;
+			i__7 = k + 1 + (k + 1) * h_dim1;
+			i__8 = k + 2 + (k + 2) * h_dim1;
+			if ((r__1 = q__1.r, dabs(r__1)) + (r__2 = r_imag(&
+				q__1), dabs(r__2)) + ((r__3 = q__4.r, dabs(
+				r__3)) + (r__4 = r_imag(&q__4), dabs(r__4)))
+				> ulp * ((r__5 = h__[i__6].r, dabs(r__5)) + (
+				r__6 = r_imag(&h__[k + k * h_dim1]), dabs(
+				r__6)) + ((r__7 = h__[i__7].r, dabs(r__7)) + (
+				r__8 = r_imag(&h__[k + 1 + (k + 1) * h_dim1]),
+				 dabs(r__8))) + ((r__9 = h__[i__8].r, dabs(
+				r__9)) + (r__10 = r_imag(&h__[k + 2 + (k + 2)
+				* h_dim1]), dabs(r__10))))) {
+
+/*
+                         ==== Starting a new bulge here would
+                         .    create non-negligible fill.  Use
+                         .    the old one with trepidation. ====
+*/
+
+			    i__5 = k + 1 + k * h_dim1;
+			    h__[i__5].r = beta.r, h__[i__5].i = beta.i;
+			    i__5 = k + 2 + k * h_dim1;
+			    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
+			    i__5 = k + 3 + k * h_dim1;
+			    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
+			} else {
+
+/*
+                         ==== Stating a new bulge here would
+                         .    create only negligible fill.
+                         .    Replace the old reflector with
+                         .    the new one. ====
+*/
+
+			    i__5 = k + 1 + k * h_dim1;
+			    i__6 = k + 1 + k * h_dim1;
+			    q__1.r = h__[i__6].r - refsum.r, q__1.i = h__[
+				    i__6].i - refsum.i;
+			    h__[i__5].r = q__1.r, h__[i__5].i = q__1.i;
+			    i__5 = k + 2 + k * h_dim1;
+			    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
+			    i__5 = k + 3 + k * h_dim1;
+			    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
+			    i__5 = m * v_dim1 + 1;
+			    v[i__5].r = vt[0].r, v[i__5].i = vt[0].i;
+			    i__5 = m * v_dim1 + 2;
+			    v[i__5].r = vt[1].r, v[i__5].i = vt[1].i;
+			    i__5 = m * v_dim1 + 3;
+			    v[i__5].r = vt[2].r, v[i__5].i = vt[2].i;
+			}
+		    }
+		}
+/* L10: */
+	    }
+
+/*           ==== Generate a 2-by-2 reflection, if needed. ==== */
+
+	    k = krcol + (m22 - 1) * 3;
+	    if (bmp22) {
+		if (k == *ktop - 1) {
+		    claqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &s[(
+			    m22 << 1) - 1], &s[m22 * 2], &v[m22 * v_dim1 + 1])
+			    ;
+		    i__4 = m22 * v_dim1 + 1;
+		    beta.r = v[i__4].r, beta.i = v[i__4].i;
+		    clarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22
+			    * v_dim1 + 1]);
+		} else {
+		    i__4 = k + 1 + k * h_dim1;
+		    beta.r = h__[i__4].r, beta.i = h__[i__4].i;
+		    i__4 = m22 * v_dim1 + 2;
+		    i__5 = k + 2 + k * h_dim1;
+		    v[i__4].r = h__[i__5].r, v[i__4].i = h__[i__5].i;
+		    clarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22
+			    * v_dim1 + 1]);
+		    i__4 = k + 1 + k * h_dim1;
+		    h__[i__4].r = beta.r, h__[i__4].i = beta.i;
+		    i__4 = k + 2 + k * h_dim1;
+		    h__[i__4].r = 0.f, h__[i__4].i = 0.f;
+		}
+	    }
+
+/*           ==== Multiply H by reflections from the left ==== */
+
+	    if (accum) {
+		jbot = min(ndcol,*kbot);
+	    } else if (*wantt) {
+		jbot = *n;
+	    } else {
+		jbot = *kbot;
+	    }
+	    i__4 = jbot;
+	    for (j = max(*ktop,krcol); j <= i__4; ++j) {
+/* Computing MIN */
+		i__5 = mbot, i__6 = (j - krcol + 2) / 3;
+		mend = min(i__5,i__6);
+		i__5 = mend;
+		for (m = mtop; m <= i__5; ++m) {
+		    k = krcol + (m - 1) * 3;
+		    r_cnjg(&q__2, &v[m * v_dim1 + 1]);
+		    i__6 = k + 1 + j * h_dim1;
+		    r_cnjg(&q__6, &v[m * v_dim1 + 2]);
+		    i__7 = k + 2 + j * h_dim1;
+		    q__5.r = q__6.r * h__[i__7].r - q__6.i * h__[i__7].i,
+			    q__5.i = q__6.r * h__[i__7].i + q__6.i * h__[i__7]
+			    .r;
+		    q__4.r = h__[i__6].r + q__5.r, q__4.i = h__[i__6].i +
+			    q__5.i;
+		    r_cnjg(&q__8, &v[m * v_dim1 + 3]);
+		    i__8 = k + 3 + j * h_dim1;
+		    q__7.r = q__8.r * h__[i__8].r - q__8.i * h__[i__8].i,
+			    q__7.i = q__8.r * h__[i__8].i + q__8.i * h__[i__8]
+			    .r;
+		    q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i;
+		    q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i =
+			    q__2.r * q__3.i + q__2.i * q__3.r;
+		    refsum.r = q__1.r, refsum.i = q__1.i;
+		    i__6 = k + 1 + j * h_dim1;
+		    i__7 = k + 1 + j * h_dim1;
+		    q__1.r = h__[i__7].r - refsum.r, q__1.i = h__[i__7].i -
+			    refsum.i;
+		    h__[i__6].r = q__1.r, h__[i__6].i = q__1.i;
+		    i__6 = k + 2 + j * h_dim1;
+		    i__7 = k + 2 + j * h_dim1;
+		    i__8 = m * v_dim1 + 2;
+		    q__2.r = refsum.r * v[i__8].r - refsum.i * v[i__8].i,
+			    q__2.i = refsum.r * v[i__8].i + refsum.i * v[i__8]
+			    .r;
+		    q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i -
+			    q__2.i;
+		    h__[i__6].r = q__1.r, h__[i__6].i = q__1.i;
+		    i__6 = k + 3 + j * h_dim1;
+		    i__7 = k + 3 + j * h_dim1;
+		    i__8 = m * v_dim1 + 3;
+		    q__2.r = refsum.r * v[i__8].r - refsum.i * v[i__8].i,
+			    q__2.i = refsum.r * v[i__8].i + refsum.i * v[i__8]
+			    .r;
+		    q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i -
+			    q__2.i;
+		    h__[i__6].r = q__1.r, h__[i__6].i = q__1.i;
+/* L20: */
+		}
+/* L30: */
+	    }
+	    if (bmp22) {
+		k = krcol + (m22 - 1) * 3;
+/* Computing MAX */
+		i__4 = k + 1;
+		i__5 = jbot;
+		for (j = max(i__4,*ktop); j <= i__5; ++j) {
+		    r_cnjg(&q__2, &v[m22 * v_dim1 + 1]);
+		    i__4 = k + 1 + j * h_dim1;
+		    r_cnjg(&q__5, &v[m22 * v_dim1 + 2]);
+		    i__6 = k + 2 + j * h_dim1;
+		    q__4.r = q__5.r * h__[i__6].r - q__5.i * h__[i__6].i,
+			    q__4.i = q__5.r * h__[i__6].i + q__5.i * h__[i__6]
+			    .r;
+		    q__3.r = h__[i__4].r + q__4.r, q__3.i = h__[i__4].i +
+			    q__4.i;
+		    q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i =
+			    q__2.r * q__3.i + q__2.i * q__3.r;
+		    refsum.r = q__1.r, refsum.i = q__1.i;
+		    i__4 = k + 1 + j * h_dim1;
+		    i__6 = k + 1 + j * h_dim1;
+		    q__1.r = h__[i__6].r - refsum.r, q__1.i = h__[i__6].i -
+			    refsum.i;
+		    h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+		    i__4 = k + 2 + j * h_dim1;
+		    i__6 = k + 2 + j * h_dim1;
+		    i__7 = m22 * v_dim1 + 2;
+		    q__2.r = refsum.r * v[i__7].r - refsum.i * v[i__7].i,
+			    q__2.i = refsum.r * v[i__7].i + refsum.i * v[i__7]
+			    .r;
+		    q__1.r = h__[i__6].r - q__2.r, q__1.i = h__[i__6].i -
+			    q__2.i;
+		    h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+/* L40: */
+		}
+	    }
+
+/*
+             ==== Multiply H by reflections from the right.
+             .    Delay filling in the last row until the
+             .    vigilant deflation check is complete. ====
+*/
+
+	    if (accum) {
+		jtop = max(*ktop,incol);
+	    } else if (*wantt) {
+		jtop = 1;
+	    } else {
+		jtop = *ktop;
+	    }
+	    i__5 = mbot;
+	    for (m = mtop; m <= i__5; ++m) {
+		i__4 = m * v_dim1 + 1;
+		if (v[i__4].r != 0.f || v[i__4].i != 0.f) {
+		    k = krcol + (m - 1) * 3;
+/* Computing MIN */
+		    i__6 = *kbot, i__7 = k + 3;
+		    i__4 = min(i__6,i__7);
+		    for (j = jtop; j <= i__4; ++j) {
+			i__6 = m * v_dim1 + 1;
+			i__7 = j + (k + 1) * h_dim1;
+			i__8 = m * v_dim1 + 2;
+			i__9 = j + (k + 2) * h_dim1;
+			q__4.r = v[i__8].r * h__[i__9].r - v[i__8].i * h__[
+				i__9].i, q__4.i = v[i__8].r * h__[i__9].i + v[
+				i__8].i * h__[i__9].r;
+			q__3.r = h__[i__7].r + q__4.r, q__3.i = h__[i__7].i +
+				q__4.i;
+			i__10 = m * v_dim1 + 3;
+			i__11 = j + (k + 3) * h_dim1;
+			q__5.r = v[i__10].r * h__[i__11].r - v[i__10].i * h__[
+				i__11].i, q__5.i = v[i__10].r * h__[i__11].i
+				+ v[i__10].i * h__[i__11].r;
+			q__2.r = q__3.r + q__5.r, q__2.i = q__3.i + q__5.i;
+			q__1.r = v[i__6].r * q__2.r - v[i__6].i * q__2.i,
+				q__1.i = v[i__6].r * q__2.i + v[i__6].i *
+				q__2.r;
+			refsum.r = q__1.r, refsum.i = q__1.i;
+			i__6 = j + (k + 1) * h_dim1;
+			i__7 = j + (k + 1) * h_dim1;
+			q__1.r = h__[i__7].r - refsum.r, q__1.i = h__[i__7].i
+				- refsum.i;
+			h__[i__6].r = q__1.r, h__[i__6].i = q__1.i;
+			i__6 = j + (k + 2) * h_dim1;
+			i__7 = j + (k + 2) * h_dim1;
+			r_cnjg(&q__3, &v[m * v_dim1 + 2]);
+			q__2.r = refsum.r * q__3.r - refsum.i * q__3.i,
+				q__2.i = refsum.r * q__3.i + refsum.i *
+				q__3.r;
+			q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i -
+				q__2.i;
+			h__[i__6].r = q__1.r, h__[i__6].i = q__1.i;
+			i__6 = j + (k + 3) * h_dim1;
+			i__7 = j + (k + 3) * h_dim1;
+			r_cnjg(&q__3, &v[m * v_dim1 + 3]);
+			q__2.r = refsum.r * q__3.r - refsum.i * q__3.i,
+				q__2.i = refsum.r * q__3.i + refsum.i *
+				q__3.r;
+			q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i -
+				q__2.i;
+			h__[i__6].r = q__1.r, h__[i__6].i = q__1.i;
+/* L50: */
+		    }
+
+		    if (accum) {
+
+/*
+                      ==== Accumulate U. (If necessary, update Z later
+                      .    with with an efficient matrix-matrix
+                      .    multiply.) ====
+*/
+
+			kms = k - incol;
+/* Computing MAX */
+			i__4 = 1, i__6 = *ktop - incol;
+			i__7 = kdu;
+			for (j = max(i__4,i__6); j <= i__7; ++j) {
+			    i__4 = m * v_dim1 + 1;
+			    i__6 = j + (kms + 1) * u_dim1;
+			    i__8 = m * v_dim1 + 2;
+			    i__9 = j + (kms + 2) * u_dim1;
+			    q__4.r = v[i__8].r * u[i__9].r - v[i__8].i * u[
+				    i__9].i, q__4.i = v[i__8].r * u[i__9].i +
+				    v[i__8].i * u[i__9].r;
+			    q__3.r = u[i__6].r + q__4.r, q__3.i = u[i__6].i +
+				    q__4.i;
+			    i__10 = m * v_dim1 + 3;
+			    i__11 = j + (kms + 3) * u_dim1;
+			    q__5.r = v[i__10].r * u[i__11].r - v[i__10].i * u[
+				    i__11].i, q__5.i = v[i__10].r * u[i__11]
+				    .i + v[i__10].i * u[i__11].r;
+			    q__2.r = q__3.r + q__5.r, q__2.i = q__3.i +
+				    q__5.i;
+			    q__1.r = v[i__4].r * q__2.r - v[i__4].i * q__2.i,
+				    q__1.i = v[i__4].r * q__2.i + v[i__4].i *
+				    q__2.r;
+			    refsum.r = q__1.r, refsum.i = q__1.i;
+			    i__4 = j + (kms + 1) * u_dim1;
+			    i__6 = j + (kms + 1) * u_dim1;
+			    q__1.r = u[i__6].r - refsum.r, q__1.i = u[i__6].i
+				    - refsum.i;
+			    u[i__4].r = q__1.r, u[i__4].i = q__1.i;
+			    i__4 = j + (kms + 2) * u_dim1;
+			    i__6 = j + (kms + 2) * u_dim1;
+			    r_cnjg(&q__3, &v[m * v_dim1 + 2]);
+			    q__2.r = refsum.r * q__3.r - refsum.i * q__3.i,
+				    q__2.i = refsum.r * q__3.i + refsum.i *
+				    q__3.r;
+			    q__1.r = u[i__6].r - q__2.r, q__1.i = u[i__6].i -
+				    q__2.i;
+			    u[i__4].r = q__1.r, u[i__4].i = q__1.i;
+			    i__4 = j + (kms + 3) * u_dim1;
+			    i__6 = j + (kms + 3) * u_dim1;
+			    r_cnjg(&q__3, &v[m * v_dim1 + 3]);
+			    q__2.r = refsum.r * q__3.r - refsum.i * q__3.i,
+				    q__2.i = refsum.r * q__3.i + refsum.i *
+				    q__3.r;
+			    q__1.r = u[i__6].r - q__2.r, q__1.i = u[i__6].i -
+				    q__2.i;
+			    u[i__4].r = q__1.r, u[i__4].i = q__1.i;
+/* L60: */
+			}
+		    } else if (*wantz) {
+
+/*
+                      ==== U is not accumulated, so update Z
+                      .    now by multiplying by reflections
+                      .    from the right. ====
+*/
+
+			i__7 = *ihiz;
+			for (j = *iloz; j <= i__7; ++j) {
+			    i__4 = m * v_dim1 + 1;
+			    i__6 = j + (k + 1) * z_dim1;
+			    i__8 = m * v_dim1 + 2;
+			    i__9 = j + (k + 2) * z_dim1;
+			    q__4.r = v[i__8].r * z__[i__9].r - v[i__8].i *
+				    z__[i__9].i, q__4.i = v[i__8].r * z__[
+				    i__9].i + v[i__8].i * z__[i__9].r;
+			    q__3.r = z__[i__6].r + q__4.r, q__3.i = z__[i__6]
+				    .i + q__4.i;
+			    i__10 = m * v_dim1 + 3;
+			    i__11 = j + (k + 3) * z_dim1;
+			    q__5.r = v[i__10].r * z__[i__11].r - v[i__10].i *
+				    z__[i__11].i, q__5.i = v[i__10].r * z__[
+				    i__11].i + v[i__10].i * z__[i__11].r;
+			    q__2.r = q__3.r + q__5.r, q__2.i = q__3.i +
+				    q__5.i;
+			    q__1.r = v[i__4].r * q__2.r - v[i__4].i * q__2.i,
+				    q__1.i = v[i__4].r * q__2.i + v[i__4].i *
+				    q__2.r;
+			    refsum.r = q__1.r, refsum.i = q__1.i;
+			    i__4 = j + (k + 1) * z_dim1;
+			    i__6 = j + (k + 1) * z_dim1;
+			    q__1.r = z__[i__6].r - refsum.r, q__1.i = z__[
+				    i__6].i - refsum.i;
+			    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
+			    i__4 = j + (k + 2) * z_dim1;
+			    i__6 = j + (k + 2) * z_dim1;
+			    r_cnjg(&q__3, &v[m * v_dim1 + 2]);
+			    q__2.r = refsum.r * q__3.r - refsum.i * q__3.i,
+				    q__2.i = refsum.r * q__3.i + refsum.i *
+				    q__3.r;
+			    q__1.r = z__[i__6].r - q__2.r, q__1.i = z__[i__6]
+				    .i - q__2.i;
+			    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
+			    i__4 = j + (k + 3) * z_dim1;
+			    i__6 = j + (k + 3) * z_dim1;
+			    r_cnjg(&q__3, &v[m * v_dim1 + 3]);
+			    q__2.r = refsum.r * q__3.r - refsum.i * q__3.i,
+				    q__2.i = refsum.r * q__3.i + refsum.i *
+				    q__3.r;
+			    q__1.r = z__[i__6].r - q__2.r, q__1.i = z__[i__6]
+				    .i - q__2.i;
+			    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
+/* L70: */
+			}
+		    }
+		}
+/* L80: */
+	    }
+
+/*           ==== Special case: 2-by-2 reflection (if needed) ==== */
+
+	    k = krcol + (m22 - 1) * 3;
+	    i__5 = m22 * v_dim1 + 1;
+	    if (bmp22 && (v[i__5].r != 0.f || v[i__5].i != 0.f)) {
+/* Computing MIN */
+		i__7 = *kbot, i__4 = k + 3;
+		i__5 = min(i__7,i__4);
+		for (j = jtop; j <= i__5; ++j) {
+		    i__7 = m22 * v_dim1 + 1;
+		    i__4 = j + (k + 1) * h_dim1;
+		    i__6 = m22 * v_dim1 + 2;
+		    i__8 = j + (k + 2) * h_dim1;
+		    q__3.r = v[i__6].r * h__[i__8].r - v[i__6].i * h__[i__8]
+			    .i, q__3.i = v[i__6].r * h__[i__8].i + v[i__6].i *
+			     h__[i__8].r;
+		    q__2.r = h__[i__4].r + q__3.r, q__2.i = h__[i__4].i +
+			    q__3.i;
+		    q__1.r = v[i__7].r * q__2.r - v[i__7].i * q__2.i, q__1.i =
+			     v[i__7].r * q__2.i + v[i__7].i * q__2.r;
+		    refsum.r = q__1.r, refsum.i = q__1.i;
+		    i__7 = j + (k + 1) * h_dim1;
+		    i__4 = j + (k + 1) * h_dim1;
+		    q__1.r = h__[i__4].r - refsum.r, q__1.i = h__[i__4].i -
+			    refsum.i;
+		    h__[i__7].r = q__1.r, h__[i__7].i = q__1.i;
+		    i__7 = j + (k + 2) * h_dim1;
+		    i__4 = j + (k + 2) * h_dim1;
+		    r_cnjg(&q__3, &v[m22 * v_dim1 + 2]);
+		    q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, q__2.i =
+			    refsum.r * q__3.i + refsum.i * q__3.r;
+		    q__1.r = h__[i__4].r - q__2.r, q__1.i = h__[i__4].i -
+			    q__2.i;
+		    h__[i__7].r = q__1.r, h__[i__7].i = q__1.i;
+/* L90: */
+		}
+
+		if (accum) {
+		    kms = k - incol;
+/* Computing MAX */
+		    i__5 = 1, i__7 = *ktop - incol;
+		    i__4 = kdu;
+		    for (j = max(i__5,i__7); j <= i__4; ++j) {
+			i__5 = m22 * v_dim1 + 1;
+			i__7 = j + (kms + 1) * u_dim1;
+			i__6 = m22 * v_dim1 + 2;
+			i__8 = j + (kms + 2) * u_dim1;
+			q__3.r = v[i__6].r * u[i__8].r - v[i__6].i * u[i__8]
+				.i, q__3.i = v[i__6].r * u[i__8].i + v[i__6]
+				.i * u[i__8].r;
+			q__2.r = u[i__7].r + q__3.r, q__2.i = u[i__7].i +
+				q__3.i;
+			q__1.r = v[i__5].r * q__2.r - v[i__5].i * q__2.i,
+				q__1.i = v[i__5].r * q__2.i + v[i__5].i *
+				q__2.r;
+			refsum.r = q__1.r, refsum.i = q__1.i;
+			i__5 = j + (kms + 1) * u_dim1;
+			i__7 = j + (kms + 1) * u_dim1;
+			q__1.r = u[i__7].r - refsum.r, q__1.i = u[i__7].i -
+				refsum.i;
+			u[i__5].r = q__1.r, u[i__5].i = q__1.i;
+			i__5 = j + (kms + 2) * u_dim1;
+			i__7 = j + (kms + 2) * u_dim1;
+			r_cnjg(&q__3, &v[m22 * v_dim1 + 2]);
+			q__2.r = refsum.r * q__3.r - refsum.i * q__3.i,
+				q__2.i = refsum.r * q__3.i + refsum.i *
+				q__3.r;
+			q__1.r = u[i__7].r - q__2.r, q__1.i = u[i__7].i -
+				q__2.i;
+			u[i__5].r = q__1.r, u[i__5].i = q__1.i;
+/* L100: */
+		    }
+		} else if (*wantz) {
+		    i__4 = *ihiz;
+		    for (j = *iloz; j <= i__4; ++j) {
+			i__5 = m22 * v_dim1 + 1;
+			i__7 = j + (k + 1) * z_dim1;
+			i__6 = m22 * v_dim1 + 2;
+			i__8 = j + (k + 2) * z_dim1;
+			q__3.r = v[i__6].r * z__[i__8].r - v[i__6].i * z__[
+				i__8].i, q__3.i = v[i__6].r * z__[i__8].i + v[
+				i__6].i * z__[i__8].r;
+			q__2.r = z__[i__7].r + q__3.r, q__2.i = z__[i__7].i +
+				q__3.i;
+			q__1.r = v[i__5].r * q__2.r - v[i__5].i * q__2.i,
+				q__1.i = v[i__5].r * q__2.i + v[i__5].i *
+				q__2.r;
+			refsum.r = q__1.r, refsum.i = q__1.i;
+			i__5 = j + (k + 1) * z_dim1;
+			i__7 = j + (k + 1) * z_dim1;
+			q__1.r = z__[i__7].r - refsum.r, q__1.i = z__[i__7].i
+				- refsum.i;
+			z__[i__5].r = q__1.r, z__[i__5].i = q__1.i;
+			i__5 = j + (k + 2) * z_dim1;
+			i__7 = j + (k + 2) * z_dim1;
+			r_cnjg(&q__3, &v[m22 * v_dim1 + 2]);
+			q__2.r = refsum.r * q__3.r - refsum.i * q__3.i,
+				q__2.i = refsum.r * q__3.i + refsum.i *
+				q__3.r;
+			q__1.r = z__[i__7].r - q__2.r, q__1.i = z__[i__7].i -
+				q__2.i;
+			z__[i__5].r = q__1.r, z__[i__5].i = q__1.i;
+/* L110: */
+		    }
+		}
+	    }
+
+/*           ==== Vigilant deflation check ==== */
+
+	    mstart = mtop;
+	    if (krcol + (mstart - 1) * 3 < *ktop) {
+		++mstart;
+	    }
+	    mend = mbot;
+	    if (bmp22) {
+		++mend;
+	    }
+	    if (krcol == *kbot - 2) {
+		++mend;
+	    }
+	    i__4 = mend;
+	    for (m = mstart; m <= i__4; ++m) {
+/* Computing MIN */
+		i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3;
+		k = min(i__5,i__7);
+
+/*
+                ==== The following convergence test requires that
+                .    the tradition small-compared-to-nearby-diagonals
+                .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
+                .    criteria both be satisfied.  The latter improves
+                .    accuracy in some examples. Falling back on an
+                .    alternate convergence criterion when TST1 or TST2
+                .    is zero (as done here) is traditional but probably
+                .    unnecessary. ====
+*/
+
+		i__5 = k + 1 + k * h_dim1;
+		if (h__[i__5].r != 0.f || h__[i__5].i != 0.f) {
+		    i__5 = k + k * h_dim1;
+		    i__7 = k + 1 + (k + 1) * h_dim1;
+		    tst1 = (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = r_imag(&
+			    h__[k + k * h_dim1]), dabs(r__2)) + ((r__3 = h__[
+			    i__7].r, dabs(r__3)) + (r__4 = r_imag(&h__[k + 1
+			    + (k + 1) * h_dim1]), dabs(r__4)));
+		    if (tst1 == 0.f) {
+			if (k >= *ktop + 1) {
+			    i__5 = k + (k - 1) * h_dim1;
+			    tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				     r_imag(&h__[k + (k - 1) * h_dim1]), dabs(
+				    r__2));
+			}
+			if (k >= *ktop + 2) {
+			    i__5 = k + (k - 2) * h_dim1;
+			    tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				     r_imag(&h__[k + (k - 2) * h_dim1]), dabs(
+				    r__2));
+			}
+			if (k >= *ktop + 3) {
+			    i__5 = k + (k - 3) * h_dim1;
+			    tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				     r_imag(&h__[k + (k - 3) * h_dim1]), dabs(
+				    r__2));
+			}
+			if (k <= *kbot - 2) {
+			    i__5 = k + 2 + (k + 1) * h_dim1;
+			    tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				     r_imag(&h__[k + 2 + (k + 1) * h_dim1]),
+				    dabs(r__2));
+			}
+			if (k <= *kbot - 3) {
+			    i__5 = k + 3 + (k + 1) * h_dim1;
+			    tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				     r_imag(&h__[k + 3 + (k + 1) * h_dim1]),
+				    dabs(r__2));
+			}
+			if (k <= *kbot - 4) {
+			    i__5 = k + 4 + (k + 1) * h_dim1;
+			    tst1 += (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				     r_imag(&h__[k + 4 + (k + 1) * h_dim1]),
+				    dabs(r__2));
+			}
+		    }
+		    i__5 = k + 1 + k * h_dim1;
+/* Computing MAX */
+		    r__3 = smlnum, r__4 = ulp * tst1;
+		    if ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = r_imag(&
+			    h__[k + 1 + k * h_dim1]), dabs(r__2)) <= dmax(
+			    r__3,r__4)) {
+/* Computing MAX */
+			i__5 = k + 1 + k * h_dim1;
+			i__7 = k + (k + 1) * h_dim1;
+			r__5 = (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				r_imag(&h__[k + 1 + k * h_dim1]), dabs(r__2)),
+				 r__6 = (r__3 = h__[i__7].r, dabs(r__3)) + (
+				r__4 = r_imag(&h__[k + (k + 1) * h_dim1]),
+				dabs(r__4));
+			h12 = dmax(r__5,r__6);
+/* Computing MIN */
+			i__5 = k + 1 + k * h_dim1;
+			i__7 = k + (k + 1) * h_dim1;
+			r__5 = (r__1 = h__[i__5].r, dabs(r__1)) + (r__2 =
+				r_imag(&h__[k + 1 + k * h_dim1]), dabs(r__2)),
+				 r__6 = (r__3 = h__[i__7].r, dabs(r__3)) + (
+				r__4 = r_imag(&h__[k + (k + 1) * h_dim1]),
+				dabs(r__4));
+			h21 = dmin(r__5,r__6);
+			i__5 = k + k * h_dim1;
+			i__7 = k + 1 + (k + 1) * h_dim1;
+			q__2.r = h__[i__5].r - h__[i__7].r, q__2.i = h__[i__5]
+				.i - h__[i__7].i;
+			q__1.r = q__2.r, q__1.i = q__2.i;
+/* Computing MAX */
+			i__6 = k + 1 + (k + 1) * h_dim1;
+			r__5 = (r__1 = h__[i__6].r, dabs(r__1)) + (r__2 =
+				r_imag(&h__[k + 1 + (k + 1) * h_dim1]), dabs(
+				r__2)), r__6 = (r__3 = q__1.r, dabs(r__3)) + (
+				r__4 = r_imag(&q__1), dabs(r__4));
+			h11 = dmax(r__5,r__6);
+			i__5 = k + k * h_dim1;
+			i__7 = k + 1 + (k + 1) * h_dim1;
+			q__2.r = h__[i__5].r - h__[i__7].r, q__2.i = h__[i__5]
+				.i - h__[i__7].i;
+			q__1.r = q__2.r, q__1.i = q__2.i;
+/* Computing MIN */
+			i__6 = k + 1 + (k + 1) * h_dim1;
+			r__5 = (r__1 = h__[i__6].r, dabs(r__1)) + (r__2 =
+				r_imag(&h__[k + 1 + (k + 1) * h_dim1]), dabs(
+				r__2)), r__6 = (r__3 = q__1.r, dabs(r__3)) + (
+				r__4 = r_imag(&q__1), dabs(r__4));
+			h22 = dmin(r__5,r__6);
+			scl = h11 + h12;
+			tst2 = h22 * (h11 / scl);
+
+/* Computing MAX */
+			r__1 = smlnum, r__2 = ulp * tst2;
+			if (tst2 == 0.f || h21 * (h12 / scl) <= dmax(r__1,
+				r__2)) {
+			    i__5 = k + 1 + k * h_dim1;
+			    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
+			}
+		    }
+		}
+/* L120: */
+	    }
+
+/*
+             ==== Fill in the last row of each bulge. ====
+
+   Computing MIN
+*/
+	    i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3;
+	    mend = min(i__4,i__5);
+	    i__4 = mend;
+	    for (m = mtop; m <= i__4; ++m) {
+		k = krcol + (m - 1) * 3;
+		i__5 = m * v_dim1 + 1;
+		i__7 = m * v_dim1 + 3;
+		q__2.r = v[i__5].r * v[i__7].r - v[i__5].i * v[i__7].i,
+			q__2.i = v[i__5].r * v[i__7].i + v[i__5].i * v[i__7]
+			.r;
+		i__6 = k + 4 + (k + 3) * h_dim1;
+		q__1.r = q__2.r * h__[i__6].r - q__2.i * h__[i__6].i, q__1.i =
+			 q__2.r * h__[i__6].i + q__2.i * h__[i__6].r;
+		refsum.r = q__1.r, refsum.i = q__1.i;
+		i__5 = k + 4 + (k + 1) * h_dim1;
+		q__1.r = -refsum.r, q__1.i = -refsum.i;
+		h__[i__5].r = q__1.r, h__[i__5].i = q__1.i;
+		i__5 = k + 4 + (k + 2) * h_dim1;
+		q__2.r = -refsum.r, q__2.i = -refsum.i;
+		r_cnjg(&q__3, &v[m * v_dim1 + 2]);
+		q__1.r = q__2.r * q__3.r - q__2.i * q__3.i, q__1.i = q__2.r *
+			q__3.i + q__2.i * q__3.r;
+		h__[i__5].r = q__1.r, h__[i__5].i = q__1.i;
+		i__5 = k + 4 + (k + 3) * h_dim1;
+		i__7 = k + 4 + (k + 3) * h_dim1;
+		r_cnjg(&q__3, &v[m * v_dim1 + 3]);
+		q__2.r = refsum.r * q__3.r - refsum.i * q__3.i, q__2.i =
+			refsum.r * q__3.i + refsum.i * q__3.r;
+		q__1.r = h__[i__7].r - q__2.r, q__1.i = h__[i__7].i - q__2.i;
+		h__[i__5].r = q__1.r, h__[i__5].i = q__1.i;
+/* L130: */
+	    }
+
+/*
+             ==== End of near-the-diagonal bulge chase. ====
+
+   L140:
+*/
+	}
+
+/*
+          ==== Use U (if accumulated) to update far-from-diagonal
+          .    entries in H.  If required, use U to update Z as
+          .    well. ====
+*/
+
+	if (accum) {
+	    if (*wantt) {
+		jtop = 1;
+		jbot = *n;
+	    } else {
+		jtop = *ktop;
+		jbot = *kbot;
+	    }
+	    if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) {
+
+/*
+                ==== Updates not exploiting the 2-by-2 block
+                .    structure of U.  K1 and NU keep track of
+                .    the location and size of U in the special
+                .    cases of introducing bulges and chasing
+                .    bulges off the bottom.  In these special
+                .    cases and in case the number of shifts
+                .    is NS = 2, there is no 2-by-2 block
+                .    structure to exploit.  ====
+
+   Computing MAX
+*/
+		i__3 = 1, i__4 = *ktop - incol;
+		k1 = max(i__3,i__4);
+/* Computing MAX */
+		i__3 = 0, i__4 = ndcol - *kbot;
+		nu = kdu - max(i__3,i__4) - k1 + 1;
+
+/*              ==== Horizontal Multiply ==== */
+
+		i__3 = jbot;
+		i__4 = *nh;
+		for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 :
+			jcol <= i__3; jcol += i__4) {
+/* Computing MIN */
+		    i__5 = *nh, i__7 = jbot - jcol + 1;
+		    jlen = min(i__5,i__7);
+		    cgemm_("C", "N", &nu, &jlen, &nu, &c_b57, &u[k1 + k1 *
+			    u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1],
+			    ldh, &c_b56, &wh[wh_offset], ldwh);
+		    clacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[
+			    incol + k1 + jcol * h_dim1], ldh);
+/* L150: */
+		}
+
+/*              ==== Vertical multiply ==== */
+
+		i__4 = max(*ktop,incol) - 1;
+		i__3 = *nv;
+		for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4;
+			jrow += i__3) {
+/* Computing MIN */
+		    i__5 = *nv, i__7 = max(*ktop,incol) - jrow;
+		    jlen = min(i__5,i__7);
+		    cgemm_("N", "N", &jlen, &nu, &nu, &c_b57, &h__[jrow + (
+			    incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1],
+			    ldu, &c_b56, &wv[wv_offset], ldwv);
+		    clacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[
+			    jrow + (incol + k1) * h_dim1], ldh);
+/* L160: */
+		}
+
+/*              ==== Z multiply (also vertical) ==== */
+
+		if (*wantz) {
+		    i__3 = *ihiz;
+		    i__4 = *nv;
+		    for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3;
+			     jrow += i__4) {
+/* Computing MIN */
+			i__5 = *nv, i__7 = *ihiz - jrow + 1;
+			jlen = min(i__5,i__7);
+			cgemm_("N", "N", &jlen, &nu, &nu, &c_b57, &z__[jrow +
+				(incol + k1) * z_dim1], ldz, &u[k1 + k1 *
+				u_dim1], ldu, &c_b56, &wv[wv_offset], ldwv);
+			clacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[
+				jrow + (incol + k1) * z_dim1], ldz)
+				;
+/* L170: */
+		    }
+		}
+	    } else {
+
+/*
+                ==== Updates exploiting U's 2-by-2 block structure.
+                .    (I2, I4, J2, J4 are the last rows and columns
+                .    of the blocks.) ====
+*/
+
+		i2 = (kdu + 1) / 2;
+		i4 = kdu;
+		j2 = i4 - i2;
+		j4 = kdu;
+
+/*
+                ==== KZS and KNZ deal with the band of zeros
+                .    along the diagonal of one of the triangular
+                .    blocks. ====
+*/
+
+		kzs = j4 - j2 - (ns + 1);
+		knz = ns + 1;
+
+/*              ==== Horizontal multiply ==== */
+
+		i__4 = jbot;
+		i__3 = *nh;
+		for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 :
+			jcol <= i__4; jcol += i__3) {
+/* Computing MIN */
+		    i__5 = *nh, i__7 = jbot - jcol + 1;
+		    jlen = min(i__5,i__7);
+
+/*
+                   ==== Copy bottom of H to top+KZS of scratch ====
+                    (The first KZS rows get multiplied by zero.) ====
+*/
+
+		    clacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol *
+			    h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U21' ==== */
+
+		    claset_("ALL", &kzs, &jlen, &c_b56, &c_b56, &wh[wh_offset]
+			    , ldwh);
+		    ctrmm_("L", "U", "C", "N", &knz, &jlen, &c_b57, &u[j2 + 1
+			    + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1]
+			    , ldwh);
+
+/*                 ==== Multiply top of H by U11' ==== */
+
+		    cgemm_("C", "N", &i2, &jlen, &j2, &c_b57, &u[u_offset],
+			    ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b57,
+			     &wh[wh_offset], ldwh);
+
+/*                 ==== Copy top of H to bottom of WH ==== */
+
+		    clacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1]
+			    , ldh, &wh[i2 + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U21' ==== */
+
+		    ctrmm_("L", "L", "C", "N", &j2, &jlen, &c_b57, &u[(i2 + 1)
+			     * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U22 ==== */
+
+		    i__5 = i4 - i2;
+		    i__7 = j4 - j2;
+		    cgemm_("C", "N", &i__5, &jlen, &i__7, &c_b57, &u[j2 + 1 +
+			    (i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 +
+			    jcol * h_dim1], ldh, &c_b57, &wh[i2 + 1 + wh_dim1]
+			    , ldwh);
+
+/*                 ==== Copy it back ==== */
+
+		    clacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[
+			    incol + 1 + jcol * h_dim1], ldh);
+/* L180: */
+		}
+
+/*              ==== Vertical multiply ==== */
+
+		i__3 = max(incol,*ktop) - 1;
+		i__4 = *nv;
+		for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3;
+			jrow += i__4) {
+/* Computing MIN */
+		    i__5 = *nv, i__7 = max(incol,*ktop) - jrow;
+		    jlen = min(i__5,i__7);
+
+/*
+                   ==== Copy right of H to scratch (the first KZS
+                   .    columns get multiplied by zero) ====
+*/
+
+		    clacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) *
+			     h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U21 ==== */
+
+		    claset_("ALL", &jlen, &kzs, &c_b56, &c_b56, &wv[wv_offset]
+			    , ldwv);
+		    ctrmm_("R", "U", "N", "N", &jlen, &knz, &c_b57, &u[j2 + 1
+			    + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) *
+			    wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U11 ==== */
+
+		    cgemm_("N", "N", &jlen, &i2, &j2, &c_b57, &h__[jrow + (
+			    incol + 1) * h_dim1], ldh, &u[u_offset], ldu, &
+			    c_b57, &wv[wv_offset], ldwv)
+			    ;
+
+/*                 ==== Copy left of H to right of scratch ==== */
+
+		    clacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) *
+			    h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U21 ==== */
+
+		    i__5 = i4 - i2;
+		    ctrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b57, &u[(i2 +
+			    1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1]
+			    , ldwv);
+
+/*                 ==== Multiply by U22 ==== */
+
+		    i__5 = i4 - i2;
+		    i__7 = j4 - j2;
+		    cgemm_("N", "N", &jlen, &i__5, &i__7, &c_b57, &h__[jrow +
+			    (incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2
+			    + 1) * u_dim1], ldu, &c_b57, &wv[(i2 + 1) *
+			    wv_dim1 + 1], ldwv);
+
+/*                 ==== Copy it back ==== */
+
+		    clacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[
+			    jrow + (incol + 1) * h_dim1], ldh);
+/* L190: */
+		}
+
+/*              ==== Multiply Z (also vertical) ==== */
+
+		if (*wantz) {
+		    i__4 = *ihiz;
+		    i__3 = *nv;
+		    for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4;
+			     jrow += i__3) {
+/* Computing MIN */
+			i__5 = *nv, i__7 = *ihiz - jrow + 1;
+			jlen = min(i__5,i__7);
+
+/*
+                      ==== Copy right of Z to left of scratch (first
+                      .     KZS columns get multiplied by zero) ====
+*/
+
+			clacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 +
+				j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 +
+				1], ldwv);
+
+/*                    ==== Multiply by U12 ==== */
+
+			claset_("ALL", &jlen, &kzs, &c_b56, &c_b56, &wv[
+				wv_offset], ldwv);
+			ctrmm_("R", "U", "N", "N", &jlen, &knz, &c_b57, &u[j2
+				+ 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1)
+				* wv_dim1 + 1], ldwv);
+
+/*                    ==== Multiply by U11 ==== */
+
+			cgemm_("N", "N", &jlen, &i2, &j2, &c_b57, &z__[jrow +
+				(incol + 1) * z_dim1], ldz, &u[u_offset], ldu,
+				 &c_b57, &wv[wv_offset], ldwv);
+
+/*                    ==== Copy left of Z to right of scratch ==== */
+
+			clacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) *
+				z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1],
+				ldwv);
+
+/*                    ==== Multiply by U21 ==== */
+
+			i__5 = i4 - i2;
+			ctrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b57, &u[(
+				i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) *
+				wv_dim1 + 1], ldwv);
+
+/*                    ==== Multiply by U22 ==== */
+
+			i__5 = i4 - i2;
+			i__7 = j4 - j2;
+			cgemm_("N", "N", &jlen, &i__5, &i__7, &c_b57, &z__[
+				jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2
+				+ 1 + (i2 + 1) * u_dim1], ldu, &c_b57, &wv[(
+				i2 + 1) * wv_dim1 + 1], ldwv);
+
+/*                    ==== Copy the result back to Z ==== */
+
+			clacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &
+				z__[jrow + (incol + 1) * z_dim1], ldz);
+/* L200: */
+		    }
+		}
+	    }
+	}
+/* L210: */
+    }
+
+/*     ==== End of CLAQR5 ==== */
+
+    return 0;
+} /* claqr5_ */
+
+/* Subroutine */ int clarcm_(integer *m, integer *n, real *a, integer *lda,
+	complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork)
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
@@ -11311,10 +15493,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -11392,8 +15574,8 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     }
 
     l = *m * *n + 1;
-    sgemm_("N", "N", m, n, m, &c_b871, &a[a_offset], lda, &rwork[1], m, &
-	    c_b1101, &rwork[l], m);
+    sgemm_("N", "N", m, n, m, &c_b894, &a[a_offset], lda, &rwork[1], m, &
+	    c_b1087, &rwork[l], m);
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
@@ -11415,8 +15597,8 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	}
 /* L60: */
     }
-    sgemm_("N", "N", m, n, m, &c_b871, &a[a_offset], lda, &rwork[1], m, &
-	    c_b1101, &rwork[l], m);
+    sgemm_("N", "N", m, n, m, &c_b894, &a[a_offset], lda, &rwork[1], m, &
+	    c_b1087, &rwork[l], m);
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
@@ -11443,22 +15625,27 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	work)
 {
     /* System generated locals */
-    integer c_dim1, c_offset;
+    integer c_dim1, c_offset, i__1;
     complex q__1;
 
     /* Local variables */
+    static integer i__;
+    static logical applyleft;
     extern /* Subroutine */ int cgerc_(integer *, integer *, complex *,
 	    complex *, integer *, complex *, integer *, complex *, integer *),
 	     cgemv_(char *, integer *, integer *, complex *, complex *,
 	    integer *, complex *, integer *, complex *, complex *, integer *);
     extern logical lsame_(char *, char *);
+    static integer lastc, lastv;
+    extern integer ilaclc_(integer *, integer *, complex *, integer *),
+	    ilaclr_(integer *, integer *, complex *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -11526,39 +15713,77 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     --work;
 
     /* Function Body */
-    if (lsame_(side, "L")) {
+    applyleft = lsame_(side, "L");
+    lastv = 0;
+    lastc = 0;
+    if (tau->r != 0.f || tau->i != 0.f) {
+/*
+       Set up variables for scanning V.  LASTV begins pointing to the end
+       of V.
+*/
+	if (applyleft) {
+	    lastv = *m;
+	} else {
+	    lastv = *n;
+	}
+	if (*incv > 0) {
+	    i__ = (lastv - 1) * *incv + 1;
+	} else {
+	    i__ = 1;
+	}
+/*     Look for the last non-zero row in V. */
+	for(;;) { /* while(complicated condition) */
+	    i__1 = i__;
+	    if (!(lastv > 0 && (v[i__1].r == 0.f && v[i__1].i == 0.f)))
+	    	break;
+	    --lastv;
+	    i__ -= *incv;
+	}
+	if (applyleft) {
+/*     Scan for the last non-zero column in C(1:lastv,:). */
+	    lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc);
+	} else {
+/*     Scan for the last non-zero row in C(:,1:lastv). */
+	    lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc);
+	}
+    }
+/*
+       Note that lastc.eq.0 renders the BLAS operations null; no special
+       case is needed at this level.
+*/
+    if (applyleft) {
 
 /*        Form  H * C */
 
-	if (tau->r != 0.f || tau->i != 0.f) {
+	if (lastv > 0) {
 
-/*           w := C' * v */
+/*           w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */
 
-	    cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &
-		    v[1], incv, &c_b55, &work[1], &c__1);
+	    cgemv_("Conjugate transpose", &lastv, &lastc, &c_b57, &c__[
+		    c_offset], ldc, &v[1], incv, &c_b56, &work[1], &c__1);
 
-/*           C := C - v * w' */
+/*           C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */
 
 	    q__1.r = -tau->r, q__1.i = -tau->i;
-	    cgerc_(m, n, &q__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
-		    ldc);
+	    cgerc_(&lastv, &lastc, &q__1, &v[1], incv, &work[1], &c__1, &c__[
+		    c_offset], ldc);
 	}
     } else {
 
 /*        Form  C * H */
 
-	if (tau->r != 0.f || tau->i != 0.f) {
+	if (lastv > 0) {
 
-/*           w := C * v */
+/*           w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */
 
-	    cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b55, &work[1], &c__1);
+	    cgemv_("No transpose", &lastc, &lastv, &c_b57, &c__[c_offset],
+		    ldc, &v[1], incv, &c_b56, &work[1], &c__1);
 
-/*           C := C - w * v' */
+/*           C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */
 
 	    q__1.r = -tau->r, q__1.i = -tau->i;
-	    cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
-		    ldc);
+	    cgerc_(&lastc, &lastv, &q__1, &work[1], &c__1, &v[1], incv, &c__[
+		    c_offset], ldc);
 	}
     }
     return 0;
@@ -11586,19 +15811,23 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	    integer *, complex *, complex *, integer *, complex *, integer *,
 	    complex *, complex *, integer *);
     extern logical lsame_(char *, char *);
+    static integer lastc;
     extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
 	    complex *, integer *), ctrmm_(char *, char *, char *, char *,
 	    integer *, integer *, complex *, complex *, integer *, complex *,
-	    integer *), clacgv_(integer *,
-	    complex *, integer *);
+	    integer *);
+    static integer lastv;
+    extern integer ilaclc_(integer *, integer *, complex *, integer *);
+    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *);
+    extern integer ilaclr_(integer *, integer *, complex *, integer *);
     static char transt[1];
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -11720,6 +15949,13 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaclr_(m, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
 
                 W := C1'
@@ -11727,30 +15963,31 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+		    ccopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
+			    + 1], &c__1);
+		    clacgv_(&lastc, &work[j * work_dim1 + 1], &c__1);
 /* L10: */
 		}
 
 /*              W := W * V1 */
 
-		ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+		ctrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2'*V2 */
 
-		    i__1 = *m - *k;
-		    cgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
-			     &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 +
-			    v_dim1], ldv, &c_b56, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    cgemm_("Conjugate transpose", "No transpose", &lastc, k, &
+			    i__1, &c_b57, &c__[*k + 1 + c_dim1], ldc, &v[*k +
+			    1 + v_dim1], ldv, &c_b57, &work[work_offset],
+			    ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ctrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
+			c_b57, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
@@ -11758,24 +15995,25 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 /*                 C2 := C2 - V2 * W' */
 
-		    i__1 = *m - *k;
+		    i__1 = lastv - *k;
 		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
-			     &q__1, &v[*k + 1 + v_dim1], ldv, &work[
-			    work_offset], ldwork, &c_b56, &c__[*k + 1 +
+		    cgemm_("No transpose", "Conjugate transpose", &i__1, &
+			    lastc, k, &q__1, &v[*k + 1 + v_dim1], ldv, &work[
+			    work_offset], ldwork, &c_b57, &c__[*k + 1 +
 			    c_dim1], ldc);
 		}
 
 /*              W := W * V1' */
 
-		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
-			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[v_offset], ldv, &work[
+			work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = j + i__ * c_dim1;
 			i__4 = j + i__ * c_dim1;
@@ -11793,6 +16031,13 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaclr_(n, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
                 W := C1
@@ -11800,55 +16045,56 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    ccopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L40: */
 		}
 
 /*              W := W * V1 */
 
-		ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+		ctrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2 * V2 */
 
-		    i__1 = *n - *k;
-		    cgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
-			    1 + v_dim1], ldv, &c_b56, &work[work_offset],
+		    i__1 = lastv - *k;
+		    cgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
+			    c_b57, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
+			    1 + v_dim1], ldv, &c_b57, &work[work_offset],
 			    ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ctrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b57,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - W * V2' */
 
-		    i__1 = *n - *k;
+		    i__1 = lastv - *k;
 		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
-			     &q__1, &work[work_offset], ldwork, &v[*k + 1 +
-			    v_dim1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1],
-			     ldc);
+		    cgemm_("No transpose", "Conjugate transpose", &lastc, &
+			    i__1, k, &q__1, &work[work_offset], ldwork, &v[*k
+			    + 1 + v_dim1], ldv, &c_b57, &c__[(*k + 1) *
+			    c_dim1 + 1], ldc);
 		}
 
 /*              W := W * V1' */
 
-		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
-			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[v_offset], ldv, &work[
+			work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = i__ + j * c_dim1;
 			i__4 = i__ + j * c_dim1;
@@ -11876,6 +16122,13 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaclr_(m, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
 
                 W := C2'
@@ -11883,59 +16136,59 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+		    ccopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
+			    j * work_dim1 + 1], &c__1);
+		    clacgv_(&lastc, &work[j * work_dim1 + 1], &c__1);
 /* L70: */
 		}
 
 /*              W := W * V2 */
 
-		ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56,
-			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*m > *k) {
+		ctrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1'*V1 */
 
-		    i__1 = *m - *k;
-		    cgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
-			     &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b56, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    cgemm_("Conjugate transpose", "No transpose", &lastc, k, &
+			    i__1, &c_b57, &c__[c_offset], ldc, &v[v_offset],
+			    ldv, &c_b57, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ctrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
+			c_b57, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - V1 * W' */
 
-		    i__1 = *m - *k;
+		    i__1 = lastv - *k;
 		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
-			     &q__1, &v[v_offset], ldv, &work[work_offset],
-			    ldwork, &c_b56, &c__[c_offset], ldc);
+		    cgemm_("No transpose", "Conjugate transpose", &i__1, &
+			    lastc, k, &q__1, &v[v_offset], ldv, &work[
+			    work_offset], ldwork, &c_b57, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2' */
 
-		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
-			&c_b56, &v[*m - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &
+			work[work_offset], ldwork);
 
 /*              C2 := C2 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = *m - *k + j + i__ * c_dim1;
-			i__4 = *m - *k + j + i__ * c_dim1;
+			i__3 = lastv - *k + j + i__ * c_dim1;
+			i__4 = lastv - *k + j + i__ * c_dim1;
 			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
 			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
 				q__2.i;
@@ -11950,6 +16203,13 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaclr_(n, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
                 W := C2
@@ -11957,58 +16217,58 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
+		    ccopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1,
+			     &work[j * work_dim1 + 1], &c__1);
 /* L100: */
 		}
 
 /*              W := W * V2 */
 
-		ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56,
-			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*n > *k) {
+		ctrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1 * V1 */
 
-		    i__1 = *n - *k;
-		    cgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b56, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    cgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
+			    c_b57, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b57, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ctrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b57,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - W * V1' */
 
-		    i__1 = *n - *k;
+		    i__1 = lastv - *k;
 		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
-			     &q__1, &work[work_offset], ldwork, &v[v_offset],
-			    ldv, &c_b56, &c__[c_offset], ldc);
+		    cgemm_("No transpose", "Conjugate transpose", &lastc, &
+			    i__1, k, &q__1, &work[work_offset], ldwork, &v[
+			    v_offset], ldv, &c_b57, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2' */
 
-		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
-			&c_b56, &v[*n - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &
+			work[work_offset], ldwork);
 
 /*              C2 := C2 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + (*n - *k + j) * c_dim1;
-			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__3 = i__ + (lastv - *k + j) * c_dim1;
+			i__4 = i__ + (lastv - *k + j) * c_dim1;
 			i__5 = i__ + j * work_dim1;
 			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
 				i__4].i - work[i__5].i;
@@ -12035,6 +16295,13 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaclc_(k, m, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
                 W := C1'
@@ -12042,56 +16309,58 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+		    ccopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
+			    + 1], &c__1);
+		    clacgv_(&lastc, &work[j * work_dim1 + 1], &c__1);
 /* L130: */
 		}
 
 /*              W := W * V1' */
 
-		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
-			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[v_offset], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2'*V2' */
 
-		    i__1 = *m - *k;
-		    cgemm_("Conjugate transpose", "Conjugate transpose", n, k,
-			     &i__1, &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[(*
-			    k + 1) * v_dim1 + 1], ldv, &c_b56, &work[
-			    work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", &
+			    lastc, k, &i__1, &c_b57, &c__[*k + 1 + c_dim1],
+			    ldc, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b57, &
+			    work[work_offset], ldwork)
+			    ;
 		}
 
 /*              W := W * T'  or  W * T */
 
-		ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ctrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
+			c_b57, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - V2' * W' */
 
-		    i__1 = *m - *k;
+		    i__1 = lastv - *k;
 		    q__1.r = -1.f, q__1.i = -0.f;
 		    cgemm_("Conjugate transpose", "Conjugate transpose", &
-			    i__1, n, k, &q__1, &v[(*k + 1) * v_dim1 + 1], ldv,
-			     &work[work_offset], ldwork, &c_b56, &c__[*k + 1
-			    + c_dim1], ldc);
+			    i__1, &lastc, k, &q__1, &v[(*k + 1) * v_dim1 + 1],
+			     ldv, &work[work_offset], ldwork, &c_b57, &c__[*k
+			    + 1 + c_dim1], ldc);
 		}
 
 /*              W := W * V1 */
 
-		ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		ctrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = j + i__ * c_dim1;
 			i__4 = j + i__ * c_dim1;
@@ -12109,6 +16378,13 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaclc_(k, n, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
                 W := C1
@@ -12116,55 +16392,56 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    ccopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L160: */
 		}
 
 /*              W := W * V1' */
 
-		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
-			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[v_offset], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2 * V2' */
 
-		    i__1 = *n - *k;
-		    cgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
-			     &c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k
-			    + 1) * v_dim1 + 1], ldv, &c_b56, &work[
+		    i__1 = lastv - *k;
+		    cgemm_("No transpose", "Conjugate transpose", &lastc, k, &
+			    i__1, &c_b57, &c__[(*k + 1) * c_dim1 + 1], ldc, &
+			    v[(*k + 1) * v_dim1 + 1], ldv, &c_b57, &work[
 			    work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ctrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b57,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - W * V2 */
 
-		    i__1 = *n - *k;
+		    i__1 = lastv - *k;
 		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1,
-			     &work[work_offset], ldwork, &v[(*k + 1) * v_dim1
-			    + 1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1],
-			    ldc);
+		    cgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
+			    q__1, &work[work_offset], ldwork, &v[(*k + 1) *
+			    v_dim1 + 1], ldv, &c_b57, &c__[(*k + 1) * c_dim1
+			    + 1], ldc);
 		}
 
 /*              W := W * V1 */
 
-		ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		ctrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = i__ + j * c_dim1;
 			i__4 = i__ + j * c_dim1;
@@ -12192,6 +16469,13 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaclc_(k, m, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaclc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
                 W := C2'
@@ -12199,59 +16483,60 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+		    ccopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
+			    j * work_dim1 + 1], &c__1);
+		    clacgv_(&lastc, &work[j * work_dim1 + 1], &c__1);
 /* L190: */
 		}
 
 /*              W := W * V2' */
 
-		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
-			&c_b56, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-		if (*m > *k) {
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[(lastv - *k + 1) * v_dim1 + 1],
+			ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1'*V1' */
 
-		    i__1 = *m - *k;
-		    cgemm_("Conjugate transpose", "Conjugate transpose", n, k,
-			     &i__1, &c_b56, &c__[c_offset], ldc, &v[v_offset],
-			     ldv, &c_b56, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", &
+			    lastc, k, &i__1, &c_b57, &c__[c_offset], ldc, &v[
+			    v_offset], ldv, &c_b57, &work[work_offset],
+			    ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ctrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
+			c_b57, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - V1' * W' */
 
-		    i__1 = *m - *k;
+		    i__1 = lastv - *k;
 		    q__1.r = -1.f, q__1.i = -0.f;
 		    cgemm_("Conjugate transpose", "Conjugate transpose", &
-			    i__1, n, k, &q__1, &v[v_offset], ldv, &work[
-			    work_offset], ldwork, &c_b56, &c__[c_offset], ldc);
+			    i__1, &lastc, k, &q__1, &v[v_offset], ldv, &work[
+			    work_offset], ldwork, &c_b57, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56,
-			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+		ctrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C2 := C2 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = *m - *k + j + i__ * c_dim1;
-			i__4 = *m - *k + j + i__ * c_dim1;
+			i__3 = lastv - *k + j + i__ * c_dim1;
+			i__4 = lastv - *k + j + i__ * c_dim1;
 			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
 			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
 				q__2.i;
@@ -12266,6 +16551,13 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaclc_(k, n, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaclr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
                 W := C2
@@ -12273,58 +16565,58 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
+		    ccopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1,
+			     &work[j * work_dim1 + 1], &c__1);
 /* L220: */
 		}
 
 /*              W := W * V2' */
 
-		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
-			&c_b56, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-		if (*n > *k) {
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[(lastv - *k + 1) * v_dim1 + 1],
+			ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1 * V1' */
 
-		    i__1 = *n - *k;
-		    cgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
-			     &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b56, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    cgemm_("No transpose", "Conjugate transpose", &lastc, k, &
+			    i__1, &c_b57, &c__[c_offset], ldc, &v[v_offset],
+			    ldv, &c_b57, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ctrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b57,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - W * V1 */
 
-		    i__1 = *n - *k;
+		    i__1 = lastv - *k;
 		    q__1.r = -1.f, q__1.i = -0.f;
-		    cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1,
-			     &work[work_offset], ldwork, &v[v_offset], ldv, &
-			    c_b56, &c__[c_offset], ldc);
+		    cgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
+			    q__1, &work[work_offset], ldwork, &v[v_offset],
+			    ldv, &c_b57, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56,
-			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+		ctrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + (*n - *k + j) * c_dim1;
-			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__3 = i__ + (lastv - *k + j) * c_dim1;
+			i__4 = i__ + (lastv - *k + j) * c_dim1;
 			i__5 = i__ + j * work_dim1;
 			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
 				i__4].i - work[i__5].i;
@@ -12372,10 +16664,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -12454,11 +16746,11 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	safmin = slamch_("S") / slamch_("E");
 	rsafmn = 1.f / safmin;
 
+	knt = 0;
 	if (dabs(beta) < safmin) {
 
 /*           XNORM, BETA may be inaccurate; scale X and recompute them */
 
-	    knt = 0;
 L10:
 	    ++knt;
 	    i__1 = *n - 1;
@@ -12478,37 +16770,25 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	    alpha->r = q__1.r, alpha->i = q__1.i;
 	    r__1 = slapy3_(&alphr, &alphi, &xnorm);
 	    beta = -r_sign(&r__1, &alphr);
-	    r__1 = (beta - alphr) / beta;
-	    r__2 = -alphi / beta;
-	    q__1.r = r__1, q__1.i = r__2;
-	    tau->r = q__1.r, tau->i = q__1.i;
-	    q__2.r = alpha->r - beta, q__2.i = alpha->i;
-	    cladiv_(&q__1, &c_b56, &q__2);
-	    alpha->r = q__1.r, alpha->i = q__1.i;
-	    i__1 = *n - 1;
-	    cscal_(&i__1, alpha, &x[1], incx);
+	}
+	r__1 = (beta - alphr) / beta;
+	r__2 = -alphi / beta;
+	q__1.r = r__1, q__1.i = r__2;
+	tau->r = q__1.r, tau->i = q__1.i;
+	q__2.r = alpha->r - beta, q__2.i = alpha->i;
+	cladiv_(&q__1, &c_b57, &q__2);
+	alpha->r = q__1.r, alpha->i = q__1.i;
+	i__1 = *n - 1;
+	cscal_(&i__1, alpha, &x[1], incx);
 
-/*           If ALPHA is subnormal, it may lose relative accuracy */
+/*        If ALPHA is subnormal, it may lose relative accuracy */
 
-	    alpha->r = beta, alpha->i = 0.f;
-	    i__1 = knt;
-	    for (j = 1; j <= i__1; ++j) {
-		q__1.r = safmin * alpha->r, q__1.i = safmin * alpha->i;
-		alpha->r = q__1.r, alpha->i = q__1.i;
+	i__1 = knt;
+	for (j = 1; j <= i__1; ++j) {
+	    beta *= safmin;
 /* L20: */
-	    }
-	} else {
-	    r__1 = (beta - alphr) / beta;
-	    r__2 = -alphi / beta;
-	    q__1.r = r__1, q__1.i = r__2;
-	    tau->r = q__1.r, tau->i = q__1.i;
-	    q__2.r = alpha->r - beta, q__2.i = alpha->i;
-	    cladiv_(&q__1, &c_b56, &q__2);
-	    alpha->r = q__1.r, alpha->i = q__1.i;
-	    i__1 = *n - 1;
-	    cscal_(&i__1, alpha, &x[1], incx);
-	    alpha->r = beta, alpha->i = 0.f;
 	}
+	alpha->r = beta, alpha->i = 0.f;
     }
 
     return 0;
@@ -12525,21 +16805,22 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     complex q__1;
 
     /* Local variables */
-    static integer i__, j;
+    static integer i__, j, prevlastv;
     static complex vii;
     extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
 	    , complex *, integer *, complex *, integer *, complex *, complex *
 	    , integer *);
     extern logical lsame_(char *, char *);
+    static integer lastv;
     extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *,
 	    complex *, integer *, complex *, integer *), clacgv_(integer *, complex *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -12651,8 +16932,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     }
 
     if (lsame_(direct, "F")) {
+	prevlastv = *n;
 	i__1 = *k;
 	for (i__ = 1; i__ <= i__1; ++i__) {
+	    prevlastv = max(prevlastv,i__);
 	    i__2 = i__;
 	    if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) {
 
@@ -12673,33 +16956,53 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		i__2 = i__ + i__ * v_dim1;
 		v[i__2].r = 1.f, v[i__2].i = 0.f;
 		if (lsame_(storev, "C")) {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			i__3 = lastv + i__ * v_dim1;
+			if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
+			    goto L15;
+			}
+		    }
+L15:
+		    j = min(lastv,prevlastv);
 
-/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */
 
-		    i__2 = *n - i__ + 1;
+		    i__2 = j - i__ + 1;
 		    i__3 = i__ - 1;
 		    i__4 = i__;
 		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
 		    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__
 			    + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, &
-			    c_b55, &t[i__ * t_dim1 + 1], &c__1);
+			    c_b56, &t[i__ * t_dim1 + 1], &c__1);
 		} else {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			i__3 = i__ + lastv * v_dim1;
+			if (v[i__3].r != 0.f || v[i__3].i != 0.f) {
+			    goto L16;
+			}
+		    }
+L16:
+		    j = min(lastv,prevlastv);
 
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */
 
-		    if (i__ < *n) {
-			i__2 = *n - i__;
+		    if (i__ < j) {
+			i__2 = j - i__;
 			clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
 		    }
 		    i__2 = i__ - 1;
-		    i__3 = *n - i__ + 1;
+		    i__3 = j - i__ + 1;
 		    i__4 = i__;
 		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
 		    cgemv_("No transpose", &i__2, &i__3, &q__1, &v[i__ *
 			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
-			    c_b55, &t[i__ * t_dim1 + 1], &c__1);
-		    if (i__ < *n) {
-			i__2 = *n - i__;
+			    c_b56, &t[i__ * t_dim1 + 1], &c__1);
+		    if (i__ < j) {
+			i__2 = j - i__;
 			clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
 		    }
 		}
@@ -12714,10 +17017,16 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		i__2 = i__ + i__ * t_dim1;
 		i__3 = i__;
 		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+		if (i__ > 1) {
+		    prevlastv = max(prevlastv,lastv);
+		} else {
+		    prevlastv = lastv;
+		}
 	    }
 /* L20: */
 	}
     } else {
+	prevlastv = 1;
 	for (i__ = *k; i__ >= 1; --i__) {
 	    i__1 = i__;
 	    if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) {
@@ -12740,19 +17049,29 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 			vii.r = v[i__1].r, vii.i = v[i__1].i;
 			i__1 = *n - *k + i__ + i__ * v_dim1;
 			v[i__1].r = 1.f, v[i__1].i = 0.f;
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    i__2 = lastv + i__ * v_dim1;
+			    if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
+				goto L35;
+			    }
+			}
+L35:
+			j = max(lastv,prevlastv);
 
 /*
                       T(i+1:k,i) :=
-                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+                              - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i)
 */
 
-			i__1 = *n - *k + i__;
+			i__1 = *n - *k + i__ - j + 1;
 			i__2 = *k - i__;
 			i__3 = i__;
 			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
 			cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[
-				(i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1
-				+ 1], &c__1, &c_b55, &t[i__ + 1 + i__ *
+				j + (i__ + 1) * v_dim1], ldv, &v[j + i__ *
+				v_dim1], &c__1, &c_b56, &t[i__ + 1 + i__ *
 				t_dim1], &c__1);
 			i__1 = *n - *k + i__ + i__ * v_dim1;
 			v[i__1].r = vii.r, v[i__1].i = vii.i;
@@ -12761,23 +17080,34 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 			vii.r = v[i__1].r, vii.i = v[i__1].i;
 			i__1 = i__ + (*n - *k + i__) * v_dim1;
 			v[i__1].r = 1.f, v[i__1].i = 0.f;
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    i__2 = i__ + lastv * v_dim1;
+			    if (v[i__2].r != 0.f || v[i__2].i != 0.f) {
+				goto L36;
+			    }
+			}
+L36:
+			j = max(lastv,prevlastv);
 
 /*
                       T(i+1:k,i) :=
-                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+                              - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)'
 */
 
-			i__1 = *n - *k + i__ - 1;
-			clacgv_(&i__1, &v[i__ + v_dim1], ldv);
+			i__1 = *n - *k + i__ - 1 - j + 1;
+			clacgv_(&i__1, &v[i__ + j * v_dim1], ldv);
 			i__1 = *k - i__;
-			i__2 = *n - *k + i__;
+			i__2 = *n - *k + i__ - j + 1;
 			i__3 = i__;
 			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
 			cgemv_("No transpose", &i__1, &i__2, &q__1, &v[i__ +
-				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
-				c_b55, &t[i__ + 1 + i__ * t_dim1], &c__1);
-			i__1 = *n - *k + i__ - 1;
-			clacgv_(&i__1, &v[i__ + v_dim1], ldv);
+				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
+				ldv, &c_b56, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
+			i__1 = *n - *k + i__ - 1 - j + 1;
+			clacgv_(&i__1, &v[i__ + j * v_dim1], ldv);
 			i__1 = i__ + (*n - *k + i__) * v_dim1;
 			v[i__1].r = vii.r, v[i__1].i = vii.i;
 		    }
@@ -12789,6 +17119,11 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
 			     t_dim1], &c__1)
 			    ;
+		    if (i__ > 1) {
+			prevlastv = min(prevlastv,lastv);
+		    } else {
+			prevlastv = lastv;
+		    }
 		}
 		i__1 = i__ + i__ * t_dim1;
 		i__2 = i__;
@@ -12803,2048 +17138,269 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 } /* clarft_ */
 
-/* Subroutine */ int clarfx_(char *side, integer *m, integer *n, complex *v,
-	complex *tau, complex *c__, integer *ldc, complex *work)
+/* Subroutine */ int clartg_(complex *f, complex *g, real *cs, complex *sn,
+	complex *r__)
 {
     /* System generated locals */
-    integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8,
-	    i__9, i__10, i__11;
-    complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8, q__9, q__10,
-	    q__11, q__12, q__13, q__14, q__15, q__16, q__17, q__18, q__19;
+    integer i__1;
+    real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8, r__9, r__10;
+    complex q__1, q__2, q__3;
 
     /* Builtin functions */
+    double log(doublereal), pow_ri(real *, integer *), r_imag(complex *),
+	    sqrt(doublereal);
     void r_cnjg(complex *, complex *);
 
     /* Local variables */
-    static integer j;
-    static complex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6,
-	     v7, v8, v9, t10, v10, sum;
-    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *,
-	    complex *, integer *, complex *, integer *, complex *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
-	    , complex *, integer *, complex *, integer *, complex *, complex *
-	    , integer *);
+    static real d__;
+    static integer i__;
+    static real f2, g2;
+    static complex ff;
+    static real di, dr;
+    static complex fs, gs;
+    static real f2s, g2s, eps, scale;
+    static integer count;
+    static real safmn2, safmx2;
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    static real safmin;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
     =======
 
-    CLARFX applies a complex elementary reflector H to a complex m by n
-    matrix C, from either the left or the right. H is represented in the
-    form
-
-          H = I - tau * v * v'
-
-    where tau is a complex scalar and v is a complex vector.
+    CLARTG generates a plane rotation so that
 
-    If tau = 0, then H is taken to be the unit matrix
+       [  CS  SN  ]     [ F ]     [ R ]
+       [  __      ]  .  [   ]  =  [   ]   where CS**2 + |SN|**2 = 1.
+       [ -SN  CS  ]     [ G ]     [ 0 ]
 
-    This version uses inline code if H has order < 11.
+    This is a faster version of the BLAS1 routine CROTG, except for
+    the following differences:
+       F and G are unchanged on return.
+       If G=0, then CS=1 and SN=0.
+       If F=0, then CS=0 and SN is chosen so that R is real.
 
     Arguments
     =========
 
-    SIDE    (input) CHARACTER*1
-            = 'L': form  H * C
-            = 'R': form  C * H
+    F       (input) COMPLEX
+            The first component of vector to be rotated.
 
-    M       (input) INTEGER
-            The number of rows of the matrix C.
+    G       (input) COMPLEX
+            The second component of vector to be rotated.
 
-    N       (input) INTEGER
-            The number of columns of the matrix C.
+    CS      (output) REAL
+            The cosine of the rotation.
 
-    V       (input) COMPLEX array, dimension (M) if SIDE = 'L'
-                                          or (N) if SIDE = 'R'
-            The vector v in the representation of H.
+    SN      (output) COMPLEX
+            The sine of the rotation.
 
-    TAU     (input) COMPLEX
-            The value tau in the representation of H.
+    R       (output) COMPLEX
+            The nonzero component of the rotated vector.
 
-    C       (input/output) COMPLEX array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
-            or C * H if SIDE = 'R'.
+    Further Details
+    ======= =======
 
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDA >= max(1,M).
+    3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel
 
-    WORK    (workspace) COMPLEX array, dimension (N) if SIDE = 'L'
-                                              or (M) if SIDE = 'R'
-            WORK is not referenced if H has order < 11.
+    This version has a few statements commented out for thread safety
+    (machine parameters are computed on each entry). 10 feb 03, SJH.
 
     =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --v;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    if (tau->r == 0.f && tau->i == 0.f) {
-	return 0;
-    }
-    if (lsame_(side, "L")) {
 
-/*        Form  H * C, where H has order m. */
-
-	switch (*m) {
-	    case 1:  goto L10;
-	    case 2:  goto L30;
-	    case 3:  goto L50;
-	    case 4:  goto L70;
-	    case 5:  goto L90;
-	    case 6:  goto L110;
-	    case 7:  goto L130;
-	    case 8:  goto L150;
-	    case 9:  goto L170;
-	    case 10:  goto L190;
-	}
+       LOGICAL            FIRST
+       SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+       DATA               FIRST / .TRUE. /
 
+       IF( FIRST ) THEN
+*/
+    safmin = slamch_("S");
+    eps = slamch_("E");
+    r__1 = slamch_("B");
+    i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f);
+    safmn2 = pow_ri(&r__1, &i__1);
+    safmx2 = 1.f / safmn2;
 /*
-          Code for general M
-
-          w := C'*v
+          FIRST = .FALSE.
+       END IF
+   Computing MAX
+   Computing MAX
 */
-
-	cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1]
-		, &c__1, &c_b55, &work[1], &c__1);
-
-/*        C := C - tau * v * w' */
-
-	q__1.r = -tau->r, q__1.i = -tau->i;
-	cgerc_(m, n, &q__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset],
-		ldc);
-	goto L410;
+    r__7 = (r__1 = f->r, dabs(r__1)), r__8 = (r__2 = r_imag(f), dabs(r__2));
+/* Computing MAX */
+    r__9 = (r__3 = g->r, dabs(r__3)), r__10 = (r__4 = r_imag(g), dabs(r__4));
+    r__5 = dmax(r__7,r__8), r__6 = dmax(r__9,r__10);
+    scale = dmax(r__5,r__6);
+    fs.r = f->r, fs.i = f->i;
+    gs.r = g->r, gs.i = g->i;
+    count = 0;
+    if (scale >= safmx2) {
 L10:
-
-/*        Special code for 1 x 1 Householder */
-
-	q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i
-		+ tau->i * v[1].r;
-	r_cnjg(&q__4, &v[1]);
-	q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i
-		+ q__3.i * q__4.r;
-	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
-	t1.r = q__1.r, t1.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r *
-		    c__[i__3].i + t1.i * c__[i__3].r;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L20: */
-	}
-	goto L410;
-L30:
-
-/*        Special code for 2 x 2 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L40: */
-	}
-	goto L410;
-L50:
-
-/*        Special code for 3 x 3 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L60: */
-	}
-	goto L410;
-L70:
-
-/*        Special code for 4 x 4 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L80: */
-	}
-	goto L410;
-L90:
-
-/*        Special code for 5 x 5 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r *
-		    c__[i__6].i + v5.i * c__[i__6].r;
-	    q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L100: */
-	}
-	goto L410;
-L110:
-
-/*        Special code for 6 x 6 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L120: */
-	}
-	goto L410;
-L130:
-
-/*        Special code for 7 x 7 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	r_cnjg(&q__1, &v[7]);
-	v7.r = q__1.r, v7.i = q__1.i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i;
-	    i__8 = j * c_dim1 + 7;
-	    q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L140: */
-	}
-	goto L410;
-L150:
-
-/*        Special code for 8 x 8 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	r_cnjg(&q__1, &v[7]);
-	v7.r = q__1.r, v7.i = q__1.i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	r_cnjg(&q__1, &v[8]);
-	v8.r = q__1.r, v8.i = q__1.i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i;
-	    i__8 = j * c_dim1 + 7;
-	    q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i;
-	    i__9 = j * c_dim1 + 8;
-	    q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 8;
-	    i__3 = j * c_dim1 + 8;
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L160: */
-	}
-	goto L410;
-L170:
-
-/*        Special code for 9 x 9 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	r_cnjg(&q__1, &v[7]);
-	v7.r = q__1.r, v7.i = q__1.i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	r_cnjg(&q__1, &v[8]);
-	v8.r = q__1.r, v8.i = q__1.i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	r_cnjg(&q__1, &v[9]);
-	v9.r = q__1.r, v9.i = q__1.i;
-	r_cnjg(&q__2, &v9);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t9.r = q__1.r, t9.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i;
-	    i__8 = j * c_dim1 + 7;
-	    q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i;
-	    i__9 = j * c_dim1 + 8;
-	    q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i;
-	    i__10 = j * c_dim1 + 9;
-	    q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 8;
-	    i__3 = j * c_dim1 + 8;
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 9;
-	    i__3 = j * c_dim1 + 9;
-	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L180: */
+	++count;
+	q__1.r = safmn2 * fs.r, q__1.i = safmn2 * fs.i;
+	fs.r = q__1.r, fs.i = q__1.i;
+	q__1.r = safmn2 * gs.r, q__1.i = safmn2 * gs.i;
+	gs.r = q__1.r, gs.i = q__1.i;
+	scale *= safmn2;
+	if (scale >= safmx2) {
+	    goto L10;
+	}
+    } else if (scale <= safmn2) {
+	if (g->r == 0.f && g->i == 0.f) {
+	    *cs = 1.f;
+	    sn->r = 0.f, sn->i = 0.f;
+	    r__->r = f->r, r__->i = f->i;
+	    return 0;
 	}
-	goto L410;
-L190:
-
-/*        Special code for 10 x 10 Householder */
-
-	r_cnjg(&q__1, &v[1]);
-	v1.r = q__1.r, v1.i = q__1.i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	r_cnjg(&q__1, &v[2]);
-	v2.r = q__1.r, v2.i = q__1.i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	r_cnjg(&q__1, &v[3]);
-	v3.r = q__1.r, v3.i = q__1.i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	r_cnjg(&q__1, &v[4]);
-	v4.r = q__1.r, v4.i = q__1.i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	r_cnjg(&q__1, &v[5]);
-	v5.r = q__1.r, v5.i = q__1.i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	r_cnjg(&q__1, &v[6]);
-	v6.r = q__1.r, v6.i = q__1.i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	r_cnjg(&q__1, &v[7]);
-	v7.r = q__1.r, v7.i = q__1.i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	r_cnjg(&q__1, &v[8]);
-	v8.r = q__1.r, v8.i = q__1.i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	r_cnjg(&q__1, &v[9]);
-	v9.r = q__1.r, v9.i = q__1.i;
-	r_cnjg(&q__2, &v9);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t9.r = q__1.r, t9.i = q__1.i;
-	r_cnjg(&q__1, &v[10]);
-	v10.r = q__1.r, v10.i = q__1.i;
-	r_cnjg(&q__2, &v10);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t10.r = q__1.r, t10.i = q__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r
-		    * c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i;
-	    i__4 = j * c_dim1 + 3;
-	    q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i;
-	    i__5 = j * c_dim1 + 4;
-	    q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i;
-	    i__6 = j * c_dim1 + 5;
-	    q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i;
-	    i__7 = j * c_dim1 + 6;
-	    q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i;
-	    i__8 = j * c_dim1 + 7;
-	    q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i;
-	    i__9 = j * c_dim1 + 8;
-	    q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i;
-	    i__10 = j * c_dim1 + 9;
-	    q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i;
-	    i__11 = j * c_dim1 + 10;
-	    q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i =
-		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
-	    q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 8;
-	    i__3 = j * c_dim1 + 8;
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 9;
-	    i__3 = j * c_dim1 + 9;
-	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j * c_dim1 + 10;
-	    i__3 = j * c_dim1 + 10;
-	    q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i +
-		    sum.i * t10.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L200: */
+L20:
+	--count;
+	q__1.r = safmx2 * fs.r, q__1.i = safmx2 * fs.i;
+	fs.r = q__1.r, fs.i = q__1.i;
+	q__1.r = safmx2 * gs.r, q__1.i = safmx2 * gs.i;
+	gs.r = q__1.r, gs.i = q__1.i;
+	scale *= safmx2;
+	if (scale <= safmn2) {
+	    goto L20;
 	}
-	goto L410;
-    } else {
-
-/*        Form  C * H, where H has order n. */
-
-	switch (*n) {
-	    case 1:  goto L210;
-	    case 2:  goto L230;
-	    case 3:  goto L250;
-	    case 4:  goto L270;
-	    case 5:  goto L290;
-	    case 6:  goto L310;
-	    case 7:  goto L330;
-	    case 8:  goto L350;
-	    case 9:  goto L370;
-	    case 10:  goto L390;
+    }
+/* Computing 2nd power */
+    r__1 = fs.r;
+/* Computing 2nd power */
+    r__2 = r_imag(&fs);
+    f2 = r__1 * r__1 + r__2 * r__2;
+/* Computing 2nd power */
+    r__1 = gs.r;
+/* Computing 2nd power */
+    r__2 = r_imag(&gs);
+    g2 = r__1 * r__1 + r__2 * r__2;
+    if (f2 <= dmax(g2,1.f) * safmin) {
+
+/*        This is a rare case: F is very small. */
+
+	if (f->r == 0.f && f->i == 0.f) {
+	    *cs = 0.f;
+	    r__2 = g->r;
+	    r__3 = r_imag(g);
+	    r__1 = slapy2_(&r__2, &r__3);
+	    r__->r = r__1, r__->i = 0.f;
+/*           Do complex/real division explicitly with two real divisions */
+	    r__1 = gs.r;
+	    r__2 = r_imag(&gs);
+	    d__ = slapy2_(&r__1, &r__2);
+	    r__1 = gs.r / d__;
+	    r__2 = -r_imag(&gs) / d__;
+	    q__1.r = r__1, q__1.i = r__2;
+	    sn->r = q__1.r, sn->i = q__1.i;
+	    return 0;
 	}
-
+	r__1 = fs.r;
+	r__2 = r_imag(&fs);
+	f2s = slapy2_(&r__1, &r__2);
 /*
-          Code for general N
-
-          w := C * v
+          G2 and G2S are accurate
+          G2 is at least SAFMIN, and G2S is at least SAFMN2
 */
+	g2s = sqrt(g2);
+/*
+          Error in CS from underflow in F2S is at most
+          UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS
+          If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN,
+          and so CS .lt. sqrt(SAFMIN)
+          If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN
+          and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS)
+          Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S
+*/
+	*cs = f2s / g2s;
+/*
+          Make sure abs(FF) = 1
+          Do complex/real division explicitly with 2 real divisions
+   Computing MAX
+*/
+	r__3 = (r__1 = f->r, dabs(r__1)), r__4 = (r__2 = r_imag(f), dabs(r__2)
+		);
+	if (dmax(r__3,r__4) > 1.f) {
+	    r__1 = f->r;
+	    r__2 = r_imag(f);
+	    d__ = slapy2_(&r__1, &r__2);
+	    r__1 = f->r / d__;
+	    r__2 = r_imag(f) / d__;
+	    q__1.r = r__1, q__1.i = r__2;
+	    ff.r = q__1.r, ff.i = q__1.i;
+	} else {
+	    dr = safmx2 * f->r;
+	    di = safmx2 * r_imag(f);
+	    d__ = slapy2_(&dr, &di);
+	    r__1 = dr / d__;
+	    r__2 = di / d__;
+	    q__1.r = r__1, q__1.i = r__2;
+	    ff.r = q__1.r, ff.i = q__1.i;
+	}
+	r__1 = gs.r / g2s;
+	r__2 = -r_imag(&gs) / g2s;
+	q__2.r = r__1, q__2.i = r__2;
+	q__1.r = ff.r * q__2.r - ff.i * q__2.i, q__1.i = ff.r * q__2.i + ff.i
+		* q__2.r;
+	sn->r = q__1.r, sn->i = q__1.i;
+	q__2.r = *cs * f->r, q__2.i = *cs * f->i;
+	q__3.r = sn->r * g->r - sn->i * g->i, q__3.i = sn->r * g->i + sn->i *
+		g->r;
+	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	r__->r = q__1.r, r__->i = q__1.i;
+    } else {
 
-	cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1], &
-		c__1, &c_b55, &work[1], &c__1);
-
-/*        C := C - tau * w * v' */
-
-	q__1.r = -tau->r, q__1.i = -tau->i;
-	cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset],
-		ldc);
-	goto L410;
-L210:
-
-/*        Special code for 1 x 1 Householder */
-
-	q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i
-		+ tau->i * v[1].r;
-	r_cnjg(&q__4, &v[1]);
-	q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i
-		+ q__3.i * q__4.r;
-	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
-	t1.r = q__1.r, t1.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r *
-		    c__[i__3].i + t1.i * c__[i__3].r;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L220: */
-	}
-	goto L410;
-L230:
-
-/*        Special code for 2 x 2 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L240: */
+/*
+          This is the most common case.
+          Neither F2 nor F2/G2 are less than SAFMIN
+          F2S cannot overflow, and it is accurate
+*/
+
+	f2s = sqrt(g2 / f2 + 1.f);
+/*        Do the F2S(real)*FS(complex) multiply with two real multiplies */
+	r__1 = f2s * fs.r;
+	r__2 = f2s * r_imag(&fs);
+	q__1.r = r__1, q__1.i = r__2;
+	r__->r = q__1.r, r__->i = q__1.i;
+	*cs = 1.f / f2s;
+	d__ = f2 + g2;
+/*        Do complex/real division explicitly with two real divisions */
+	r__1 = r__->r / d__;
+	r__2 = r_imag(r__) / d__;
+	q__1.r = r__1, q__1.i = r__2;
+	sn->r = q__1.r, sn->i = q__1.i;
+	r_cnjg(&q__2, &gs);
+	q__1.r = sn->r * q__2.r - sn->i * q__2.i, q__1.i = sn->r * q__2.i +
+		sn->i * q__2.r;
+	sn->r = q__1.r, sn->i = q__1.i;
+	if (count != 0) {
+	    if (count > 0) {
+		i__1 = count;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    q__1.r = safmx2 * r__->r, q__1.i = safmx2 * r__->i;
+		    r__->r = q__1.r, r__->i = q__1.i;
+/* L30: */
+		}
+	    } else {
+		i__1 = -count;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    q__1.r = safmn2 * r__->r, q__1.i = safmn2 * r__->i;
+		    r__->r = q__1.r, r__->i = q__1.i;
+/* L40: */
+		}
+	    }
 	}
-	goto L410;
-L250:
-
-/*        Special code for 3 x 3 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L260: */
-	}
-	goto L410;
-L270:
-
-/*        Special code for 4 x 4 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i;
-	    i__5 = j + (c_dim1 << 2);
-	    q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L280: */
-	}
-	goto L410;
-L290:
-
-/*        Special code for 5 x 5 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i;
-	    i__5 = j + (c_dim1 << 2);
-	    q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r *
-		    c__[i__6].i + v5.i * c__[i__6].r;
-	    q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L300: */
-	}
-	goto L410;
-L310:
-
-/*        Special code for 6 x 6 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i;
-	    i__5 = j + (c_dim1 << 2);
-	    q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L320: */
-	}
-	goto L410;
-L330:
-
-/*        Special code for 7 x 7 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i;
-	    i__5 = j + (c_dim1 << 2);
-	    q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i;
-	    i__8 = j + c_dim1 * 7;
-	    q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L340: */
-	}
-	goto L410;
-L350:
-
-/*        Special code for 8 x 8 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	v8.r = v[8].r, v8.i = v[8].i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i;
-	    i__5 = j + (c_dim1 << 2);
-	    q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i;
-	    i__8 = j + c_dim1 * 7;
-	    q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i;
-	    i__9 = j + (c_dim1 << 3);
-	    q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 3);
-	    i__3 = j + (c_dim1 << 3);
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L360: */
-	}
-	goto L410;
-L370:
-
-/*        Special code for 9 x 9 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	v8.r = v[8].r, v8.i = v[8].i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	v9.r = v[9].r, v9.i = v[9].i;
-	r_cnjg(&q__2, &v9);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t9.r = q__1.r, t9.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i;
-	    i__5 = j + (c_dim1 << 2);
-	    q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i;
-	    i__8 = j + c_dim1 * 7;
-	    q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i;
-	    i__9 = j + (c_dim1 << 3);
-	    q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i;
-	    i__10 = j + c_dim1 * 9;
-	    q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 3);
-	    i__3 = j + (c_dim1 << 3);
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 9;
-	    i__3 = j + c_dim1 * 9;
-	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L380: */
-	}
-	goto L410;
-L390:
-
-/*        Special code for 10 x 10 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	r_cnjg(&q__2, &v1);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t1.r = q__1.r, t1.i = q__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	r_cnjg(&q__2, &v2);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t2.r = q__1.r, t2.i = q__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	r_cnjg(&q__2, &v3);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t3.r = q__1.r, t3.i = q__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	r_cnjg(&q__2, &v4);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t4.r = q__1.r, t4.i = q__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	r_cnjg(&q__2, &v5);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t5.r = q__1.r, t5.i = q__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	r_cnjg(&q__2, &v6);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t6.r = q__1.r, t6.i = q__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	r_cnjg(&q__2, &v7);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t7.r = q__1.r, t7.i = q__1.i;
-	v8.r = v[8].r, v8.i = v[8].i;
-	r_cnjg(&q__2, &v8);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t8.r = q__1.r, t8.i = q__1.i;
-	v9.r = v[9].r, v9.i = v[9].i;
-	r_cnjg(&q__2, &v9);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t9.r = q__1.r, t9.i = q__1.i;
-	v10.r = v[10].r, v10.i = v[10].i;
-	r_cnjg(&q__2, &v10);
-	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
-		+ tau->i * q__2.r;
-	t10.r = q__1.r, t10.i = q__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r
-		    * c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i;
-	    i__4 = j + c_dim1 * 3;
-	    q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i;
-	    i__5 = j + (c_dim1 << 2);
-	    q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i;
-	    i__6 = j + c_dim1 * 5;
-	    q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i;
-	    i__7 = j + c_dim1 * 6;
-	    q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i;
-	    i__8 = j + c_dim1 * 7;
-	    q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i;
-	    i__9 = j + (c_dim1 << 3);
-	    q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i;
-	    i__10 = j + c_dim1 * 9;
-	    q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i;
-	    i__11 = j + c_dim1 * 10;
-	    q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i =
-		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
-	    q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i;
-	    sum.r = q__1.r, sum.i = q__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + (c_dim1 << 3);
-	    i__3 = j + (c_dim1 << 3);
-	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 9;
-	    i__3 = j + c_dim1 * 9;
-	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-	    i__2 = j + c_dim1 * 10;
-	    i__3 = j + c_dim1 * 10;
-	    q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i +
-		    sum.i * t10.r;
-	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
-	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-/* L400: */
-	}
-	goto L410;
-    }
-L410:
+    }
     return 0;
 
-/*     End of CLARFX */
+/*     End of CLARTG */
 
-} /* clarfx_ */
+} /* clartg_ */
 
 /* Subroutine */ int clascl_(char *type__, integer *kl, integer *ku, real *
 	cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda,
@@ -14865,14 +17421,16 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     extern doublereal slamch_(char *);
     static real cfromc;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real bignum, smlnum;
+    static real bignum;
+    extern logical sisnan_(real *);
+    static real smlnum;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14923,7 +17481,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) COMPLEX array, dimension (LDA,M)
+    A       (input/output) COMPLEX array, dimension (LDA,N)
             The matrix to be multiplied by CTO/CFROM.  See TYPE for the
             storage type.
 
@@ -14968,8 +17526,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
     if (itype == -1) {
 	*info = -1;
-    } else if (*cfrom == 0.f) {
+    } else if (*cfrom == 0.f || sisnan_(cfrom)) {
 	*info = -4;
+    } else if (sisnan_(cto)) {
+	*info = -5;
     } else if (*m < 0) {
 	*info = -6;
     } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
@@ -15016,18 +17576,36 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 L10:
     cfrom1 = cfromc * smlnum;
-    cto1 = ctoc / bignum;
-    if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
-	mul = smlnum;
-	done = FALSE_;
-	cfromc = cfrom1;
-    } else if (dabs(cto1) > dabs(cfromc)) {
-	mul = bignum;
-	done = FALSE_;
-	ctoc = cto1;
-    } else {
+    if (cfrom1 == cfromc) {
+/*
+          CFROMC is an inf.  Multiply by a correctly signed zero for
+          finite CTOC, or a NaN if CTOC is infinite.
+*/
 	mul = ctoc / cfromc;
 	done = TRUE_;
+	cto1 = ctoc;
+    } else {
+	cto1 = ctoc / bignum;
+	if (cto1 == ctoc) {
+/*
+             CTOC is either 0 or an inf.  In both cases, CTOC itself
+             serves as the correct multiplication factor.
+*/
+	    mul = ctoc;
+	    done = TRUE_;
+	    cfromc = 1.f;
+	} else if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
+	    mul = smlnum;
+	    done = FALSE_;
+	    cfromc = cfrom1;
+	} else if (dabs(cto1) > dabs(cfromc)) {
+	    mul = bignum;
+	    done = FALSE_;
+	    ctoc = cto1;
+	} else {
+	    mul = ctoc / cfromc;
+	    done = TRUE_;
+	}
     }
 
     if (itype == 0) {
@@ -15191,10 +17769,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -15341,51 +17919,86 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
     =======
 
-    CLASR   performs the transformation
+    CLASR applies a sequence of real plane rotations to a complex matrix
+    A, from either the left or the right.
+
+    When SIDE = 'L', the transformation takes the form
+
+       A := P*A
+
+    and when SIDE = 'R', the transformation takes the form
+
+       A := A*P**T
+
+    where P is an orthogonal matrix consisting of a sequence of z plane
+    rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R',
+    and P**T is the transpose of P.
 
-       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+    When DIRECT = 'F' (Forward sequence), then
 
-       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+       P = P(z-1) * ... * P(2) * P(1)
 
-    where A is an m by n complex matrix and P is an orthogonal matrix,
-    consisting of a sequence of plane rotations determined by the
-    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
-    and z = n when SIDE = 'R' or 'r' ):
+    and when DIRECT = 'B' (Backward sequence), then
 
-    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+       P = P(1) * P(2) * ... * P(z-1)
 
-       P = P( z - 1 )*...*P( 2 )*P( 1 ),
+    where P(k) is a plane rotation matrix defined by the 2-by-2 rotation
 
-    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+       R(k) = (  c(k)  s(k) )
+            = ( -s(k)  c(k) ).
 
-       P = P( 1 )*P( 2 )*...*P( z - 1 ),
+    When PIVOT = 'V' (Variable pivot), the rotation is performed
+    for the plane (k,k+1), i.e., P(k) has the form
 
-    where  P( k ) is a plane rotation matrix for the following planes:
+       P(k) = (  1                                            )
+              (       ...                                     )
+              (              1                                )
+              (                   c(k)  s(k)                  )
+              (                  -s(k)  c(k)                  )
+              (                                1              )
+              (                                     ...       )
+              (                                            1  )
 
-       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
-          the plane ( k, k + 1 )
+    where R(k) appears as a rank-2 modification to the identity matrix in
+    rows and columns k and k+1.
 
-       when  PIVOT = 'T' or 't'  ( Top pivot ),
-          the plane ( 1, k + 1 )
+    When PIVOT = 'T' (Top pivot), the rotation is performed for the
+    plane (1,k+1), so P(k) has the form
 
-       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
-          the plane ( k, z )
+       P(k) = (  c(k)                    s(k)                 )
+              (         1                                     )
+              (              ...                              )
+              (                     1                         )
+              ( -s(k)                    c(k)                 )
+              (                                 1             )
+              (                                      ...      )
+              (                                             1 )
 
-    c( k ) and s( k )  must contain the  cosine and sine that define the
-    matrix  P( k ).  The two by two plane rotation part of the matrix
-    P( k ), R( k ), is assumed to be of the form
+    where R(k) appears in rows and columns 1 and k+1.
 
-       R( k ) = (  c( k )  s( k ) ).
-                ( -s( k )  c( k ) )
+    Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is
+    performed for the plane (k,z), giving P(k) the form
+
+       P(k) = ( 1                                             )
+              (      ...                                      )
+              (             1                                 )
+              (                  c(k)                    s(k) )
+              (                         1                     )
+              (                              ...              )
+              (                                     1         )
+              (                 -s(k)                    c(k) )
+
+    where R(k) appears in rows and columns k and z.  The rotations are
+    performed without ever forming P(k) explicitly.
 
     Arguments
     =========
@@ -15394,13 +18007,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             Specifies whether the plane rotation matrix P is applied to
             A on the left or the right.
             = 'L':  Left, compute A := P*A
-            = 'R':  Right, compute A:= A*P'
-
-    DIRECT  (input) CHARACTER*1
-            Specifies whether P is a forward or backward sequence of
-            plane rotations.
-            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
-            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+            = 'R':  Right, compute A:= A*P**T
 
     PIVOT   (input) CHARACTER*1
             Specifies the plane for which P(k) is a plane rotation
@@ -15409,6 +18016,12 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             = 'T':  Top pivot, the plane (1,k+1)
             = 'B':  Bottom pivot, the plane (k,z)
 
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P(z-1)*...*P(2)*P(1)
+            = 'B':  Backward, P = P(1)*P(2)*...*P(z-1)
+
     M       (input) INTEGER
             The number of rows of the matrix A.  If m <= 1, an immediate
             return is effected.
@@ -15417,18 +18030,22 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             The number of columns of the matrix A.  If n <= 1, an
             immediate return is effected.
 
-    C, S    (input) REAL arrays, dimension
+    C       (input) REAL array, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            The cosines c(k) of the plane rotations.
+
+    S       (input) REAL array, dimension
                     (M-1) if SIDE = 'L'
                     (N-1) if SIDE = 'R'
-            c(k) and s(k) contain the cosine and sine that define the
-            matrix P(k).  The two by two plane rotation part of the
-            matrix P(k), R(k), is assumed to be of the form
-            R( k ) = (  c( k )  s( k ) ).
-                     ( -s( k )  c( k ) )
+            The sines s(k) of the plane rotations.  The 2-by-2 plane
+            rotation part of the matrix P(k), R(k), has the form
+            R(k) = (  c(k)  s(k) )
+                   ( -s(k)  c(k) ).
 
     A       (input/output) COMPLEX array, dimension (LDA,N)
-            The m by n matrix A.  On exit, A is overwritten by P*A if
-            SIDE = 'R' or by A*P' if SIDE = 'L'.
+            The M-by-N matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P**T if SIDE = 'L'.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
@@ -15885,10 +18502,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -15997,10 +18614,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -16031,7 +18648,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             The last element of IPIV for which a row interchange will
             be done.
 
-    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+    IPIV    (input) INTEGER array, dimension (K2*abs(INCX))
             The vector of pivot indices.  Only the elements in positions
             K1 through K2 of IPIV are accessed.
             IPIV(K) = L implies rows K and L are to be interchanged.
@@ -16161,10 +18778,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -16185,7 +18802,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     Arguments
     =========
 
-    UPLO    (input) CHARACTER
+    UPLO    (input) CHARACTER*1
             Specifies whether the upper or lower triangular part of the
             Hermitian matrix A is stored:
             = 'U': Upper triangular
@@ -16330,7 +18947,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__, &i__2, &q__1, &a[(i__ + 1) *
 			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
-			c_b56, &a[i__ * a_dim1 + 1], &c__1);
+			c_b57, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
 		i__2 = *n - i__;
@@ -16339,7 +18956,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__, &i__2, &q__1, &w[(iw + 1) *
 			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b56, &a[i__ * a_dim1 + 1], &c__1);
+			c_b57, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
 		i__2 = i__ + i__ * a_dim1;
@@ -16367,32 +18984,32 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 /*              Compute W(1:i-1,i) */
 
 		i__2 = i__ - 1;
-		chemv_("Upper", &i__2, &c_b56, &a[a_offset], lda, &a[i__ *
-			a_dim1 + 1], &c__1, &c_b55, &w[iw * w_dim1 + 1], &
+		chemv_("Upper", &i__2, &c_b57, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b56, &w[iw * w_dim1 + 1], &
 			c__1);
 		if (i__ < *n) {
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[(
+		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &w[(
 			    iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1],
-			    &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1);
+			    &c__1, &c_b56, &w[i__ + 1 + iw * w_dim1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
 		    q__1.r = -1.f, q__1.i = -0.f;
 		    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[(i__ + 1) *
 			     a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1);
+			    c__1, &c_b57, &w[iw * w_dim1 + 1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[(
+		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[(
 			    i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1],
-			     &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1);
+			     &c__1, &c_b56, &w[i__ + 1 + iw * w_dim1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
 		    q__1.r = -1.f, q__1.i = -0.f;
 		    cgemv_("No transpose", &i__2, &i__3, &q__1, &w[(iw + 1) *
 			    w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1);
+			    c__1, &c_b57, &w[iw * w_dim1 + 1], &c__1);
 		}
 		i__2 = i__ - 1;
 		cscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
@@ -16432,7 +19049,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	    i__3 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
 	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda,
-		     &w[i__ + w_dim1], ldw, &c_b56, &a[i__ + i__ * a_dim1], &
+		     &w[i__ + w_dim1], ldw, &c_b57, &a[i__ + i__ * a_dim1], &
 		    c__1);
 	    i__2 = i__ - 1;
 	    clacgv_(&i__2, &w[i__ + w_dim1], ldw);
@@ -16442,7 +19059,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	    i__3 = i__ - 1;
 	    q__1.r = -1.f, q__1.i = -0.f;
 	    cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + w_dim1], ldw,
-		     &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1], &
+		     &a[i__ + a_dim1], lda, &c_b57, &a[i__ + i__ * a_dim1], &
 		    c__1);
 	    i__2 = i__ - 1;
 	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
@@ -16472,30 +19089,30 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 /*              Compute W(i+1:n,i) */
 
 		i__2 = *n - i__;
-		chemv_("Lower", &i__2, &c_b56, &a[i__ + 1 + (i__ + 1) *
+		chemv_("Lower", &i__2, &c_b57, &a[i__ + 1 + (i__ + 1) *
 			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &w[i__ + 1 + i__ * w_dim1], &c__1);
+			c_b56, &w[i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[i__ +
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &w[i__ +
 			1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &w[i__ * w_dim1 + 1], &c__1);
+			c_b56, &w[i__ * w_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
-			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b57, &w[
 			i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
 			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b55, &w[i__ * w_dim1 + 1], &c__1);
+			c_b56, &w[i__ * w_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + 1 +
-			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b57, &w[
 			i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
 		cscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
@@ -16576,10 +19193,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17196,7 +19813,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 /*                 Scale x by 1/2. */
 
-		    csscal_(n, &c_b1794, &x[1], &c__1);
+		    csscal_(n, &c_b2023, &x[1], &c__1);
 		    *scale *= .5f;
 		}
 
@@ -17714,10 +20331,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17813,7 +20430,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		i__2 = i__ - 1;
 		i__3 = *n - i__;
 		q__1.r = aii, q__1.i = 0.f;
-		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) *
+		cgemv_("No transpose", &i__2, &i__3, &c_b57, &a[(i__ + 1) *
 			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
 			q__1, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = *n - i__;
@@ -17844,7 +20461,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		q__1.r = aii, q__1.i = 0.f;
-		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
 			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
 			q__1, &a[i__ + a_dim1], lda);
 		i__2 = i__ - 1;
@@ -17887,10 +20504,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17992,19 +20609,19 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		ib = min(i__3,i__4);
 		i__3 = i__ - 1;
 		ctrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", &
-			i__3, &ib, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[
+			i__3, &ib, &c_b57, &a[i__ + i__ * a_dim1], lda, &a[
 			i__ * a_dim1 + 1], lda);
 		clauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
 		if (i__ + ib <= *n) {
 		    i__3 = i__ - 1;
 		    i__4 = *n - i__ - ib + 1;
 		    cgemm_("No transpose", "Conjugate transpose", &i__3, &ib,
-			    &i__4, &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda, &
-			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b56, &a[i__
+			    &i__4, &c_b57, &a[(i__ + ib) * a_dim1 + 1], lda, &
+			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b57, &a[i__
 			    * a_dim1 + 1], lda);
 		    i__3 = *n - i__ - ib + 1;
-		    cherk_("Upper", "No transpose", &ib, &i__3, &c_b871, &a[
-			    i__ + (i__ + ib) * a_dim1], lda, &c_b871, &a[i__
+		    cherk_("Upper", "No transpose", &ib, &i__3, &c_b894, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b894, &a[i__
 			    + i__ * a_dim1], lda);
 		}
 /* L10: */
@@ -18021,19 +20638,19 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		ib = min(i__3,i__4);
 		i__3 = i__ - 1;
 		ctrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", &
-			ib, &i__3, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[
+			ib, &i__3, &c_b57, &a[i__ + i__ * a_dim1], lda, &a[
 			i__ + a_dim1], lda);
 		clauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
 		if (i__ + ib <= *n) {
 		    i__3 = i__ - 1;
 		    i__4 = *n - i__ - ib + 1;
 		    cgemm_("Conjugate transpose", "No transpose", &ib, &i__3,
-			    &i__4, &c_b56, &a[i__ + ib + i__ * a_dim1], lda, &
-			    a[i__ + ib + a_dim1], lda, &c_b56, &a[i__ +
+			    &i__4, &c_b57, &a[i__ + ib + i__ * a_dim1], lda, &
+			    a[i__ + ib + a_dim1], lda, &c_b57, &a[i__ +
 			    a_dim1], lda);
 		    i__3 = *n - i__ - ib + 1;
 		    cherk_("Lower", "Conjugate transpose", &ib, &i__3, &
-			    c_b871, &a[i__ + ib + i__ * a_dim1], lda, &c_b871,
+			    c_b894, &a[i__ + ib + i__ * a_dim1], lda, &c_b894,
 			     &a[i__ + i__ * a_dim1], lda);
 		}
 /* L20: */
@@ -18071,13 +20688,14 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
 	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *,
 	    integer *);
+    extern logical sisnan_(real *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18176,7 +20794,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		    , &c__1);
 	    q__1.r = r__1 - q__2.r, q__1.i = -q__2.i;
 	    ajj = q__1.r;
-	    if (ajj <= 0.f) {
+	    if (ajj <= 0.f || sisnan_(&ajj)) {
 		i__2 = j + j * a_dim1;
 		a[i__2].r = ajj, a[i__2].i = 0.f;
 		goto L30;
@@ -18194,7 +20812,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		i__3 = *n - j;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("Transpose", &i__2, &i__3, &q__1, &a[(j + 1) * a_dim1
-			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b56, &a[j + (
+			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b57, &a[j + (
 			j + 1) * a_dim1], lda);
 		i__2 = j - 1;
 		clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
@@ -18219,7 +20837,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	    cdotc_(&q__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
 	    q__1.r = r__1 - q__2.r, q__1.i = -q__2.i;
 	    ajj = q__1.r;
-	    if (ajj <= 0.f) {
+	    if (ajj <= 0.f || sisnan_(&ajj)) {
 		i__2 = j + j * a_dim1;
 		a[i__2].r = ajj, a[i__2].i = 0.f;
 		goto L30;
@@ -18237,7 +20855,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		i__3 = j - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[j + 1 + a_dim1]
-			, lda, &a[j + a_dim1], lda, &c_b56, &a[j + 1 + j *
+			, lda, &a[j + a_dim1], lda, &c_b57, &a[j + 1 + j *
 			a_dim1], &c__1);
 		i__2 = j - 1;
 		clacgv_(&i__2, &a[j + a_dim1], lda);
@@ -18286,10 +20904,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18400,8 +21018,8 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		i__3 = nb, i__4 = *n - j + 1;
 		jb = min(i__3,i__4);
 		i__3 = j - 1;
-		cherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1150, &
-			a[j * a_dim1 + 1], lda, &c_b871, &a[j + j * a_dim1],
+		cherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1136, &
+			a[j * a_dim1 + 1], lda, &c_b894, &a[j + j * a_dim1],
 			lda);
 		cpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
 		if (*info != 0) {
@@ -18416,11 +21034,11 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		    q__1.r = -1.f, q__1.i = -0.f;
 		    cgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
 			    &i__4, &q__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
-			     * a_dim1 + 1], lda, &c_b56, &a[j + (j + jb) *
+			     * a_dim1 + 1], lda, &c_b57, &a[j + (j + jb) *
 			    a_dim1], lda);
 		    i__3 = *n - j - jb + 1;
 		    ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
-			     &jb, &i__3, &c_b56, &a[j + j * a_dim1], lda, &a[
+			     &jb, &i__3, &c_b57, &a[j + j * a_dim1], lda, &a[
 			    j + (j + jb) * a_dim1], lda);
 		}
 /* L10: */
@@ -18443,8 +21061,8 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		i__3 = nb, i__4 = *n - j + 1;
 		jb = min(i__3,i__4);
 		i__3 = j - 1;
-		cherk_("Lower", "No transpose", &jb, &i__3, &c_b1150, &a[j +
-			a_dim1], lda, &c_b871, &a[j + j * a_dim1], lda);
+		cherk_("Lower", "No transpose", &jb, &i__3, &c_b1136, &a[j +
+			a_dim1], lda, &c_b894, &a[j + j * a_dim1], lda);
 		cpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
 		if (*info != 0) {
 		    goto L30;
@@ -18458,11 +21076,11 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		    q__1.r = -1.f, q__1.i = -0.f;
 		    cgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
 			    &i__4, &q__1, &a[j + jb + a_dim1], lda, &a[j +
-			    a_dim1], lda, &c_b56, &a[j + jb + j * a_dim1],
+			    a_dim1], lda, &c_b57, &a[j + jb + j * a_dim1],
 			    lda);
 		    i__3 = *n - j - jb + 1;
 		    ctrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
-			    , &i__3, &jb, &c_b56, &a[j + j * a_dim1], lda, &a[
+			    , &i__3, &jb, &c_b57, &a[j + j * a_dim1], lda, &a[
 			    j + jb + j * a_dim1], lda);
 		}
 /* L20: */
@@ -18496,10 +21114,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18600,10 +21218,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18694,11 +21312,11 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 */
 
 	ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, &
-		c_b56, &a[a_offset], lda, &b[b_offset], ldb);
+		c_b57, &a[a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve U*X = B, overwriting B with X. */
 
-	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, &
+	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b57, &
 		a[a_offset], lda, &b[b_offset], ldb);
     } else {
 
@@ -18708,13 +21326,13 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
           Solve L*X = B, overwriting B with X.
 */
 
-	ctrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b56, &
+	ctrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b57, &
 		a[a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve L'*X = B, overwriting B with X. */
 
 	ctrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, &
-		c_b56, &a[a_offset], lda, &b[b_offset], ldb);
+		c_b57, &a[a_offset], lda, &b[b_offset], ldb);
     }
 
     return 0;
@@ -18723,24 +21341,62 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 } /* cpotrs_ */
 
-/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex *
-	cy, integer *incy, real *c__, real *s)
+/* Subroutine */ int crot_(integer *n, complex *cx, integer *incx, complex *
+	cy, integer *incy, real *c__, complex *s)
 {
     /* System generated locals */
     integer i__1, i__2, i__3, i__4;
-    complex q__1, q__2, q__3;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
 
     /* Local variables */
     static integer i__, ix, iy;
-    static complex ctemp;
+    static complex stemp;
 
 
 /*
-       applies a plane rotation, where the cos and sin (c and s) are real
-       and the vectors cx and cy are complex.
-       jack dongarra, linpack, 3/11/78.
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
-    =====================================================================
+
+    Purpose
+    =======
+
+    CROT   applies a plane rotation, where the cos (C) is real and the
+    sin (S) is complex, and the vectors CX and CY are complex.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of elements in the vectors CX and CY.
+
+    CX      (input/output) COMPLEX array, dimension (N)
+            On input, the vector X.
+            On output, CX is overwritten with C*X + S*Y.
+
+    INCX    (input) INTEGER
+            The increment between successive values of CY.  INCX <> 0.
+
+    CY      (input/output) COMPLEX array, dimension (N)
+            On input, the vector Y.
+            On output, CY is overwritten with -CONJG(S)*X + C*Y.
+
+    INCY    (input) INTEGER
+            The increment between successive values of CY.  INCX <> 0.
+
+    C       (input) REAL
+    S       (input) COMPLEX
+            C and S define a rotation
+               [  C          S  ]
+               [ -conjg(S)   C  ]
+            where C*C + S*CONJG(S) = 1.0.
+
+   =====================================================================
 */
 
 
@@ -18756,10 +21412,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	goto L20;
     }
 
-/*
-          code for unequal increments or equal increments not equal
-            to 1
-*/
+/*     Code for unequal increments or equal increments not equal to 1 */
 
     ix = 1;
     iy = 1;
@@ -18774,25 +21427,28 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	i__2 = ix;
 	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
 	i__3 = iy;
-	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
+	q__3.r = s->r * cy[i__3].r - s->i * cy[i__3].i, q__3.i = s->r * cy[
+		i__3].i + s->i * cy[i__3].r;
 	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	stemp.r = q__1.r, stemp.i = q__1.i;
 	i__2 = iy;
 	i__3 = iy;
 	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
+	r_cnjg(&q__4, s);
 	i__4 = ix;
-	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
+	q__3.r = q__4.r * cx[i__4].r - q__4.i * cx[i__4].i, q__3.i = q__4.r *
+		cx[i__4].i + q__4.i * cx[i__4].r;
 	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
 	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
 	i__2 = ix;
-	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+	cx[i__2].r = stemp.r, cx[i__2].i = stemp.i;
 	ix += *incx;
 	iy += *incy;
 /* L10: */
     }
     return 0;
 
-/*        code for both increments equal to 1 */
+/*     Code for both increments equal to 1 */
 
 L20:
     i__1 = *n;
@@ -18800,22 +21456,25 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	i__2 = i__;
 	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
 	i__3 = i__;
-	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
+	q__3.r = s->r * cy[i__3].r - s->i * cy[i__3].i, q__3.i = s->r * cy[
+		i__3].i + s->i * cy[i__3].r;
 	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	stemp.r = q__1.r, stemp.i = q__1.i;
 	i__2 = i__;
 	i__3 = i__;
 	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
+	r_cnjg(&q__4, s);
 	i__4 = i__;
-	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
+	q__3.r = q__4.r * cx[i__4].r - q__4.i * cx[i__4].i, q__3.i = q__4.r *
+		cx[i__4].i + q__4.i * cx[i__4].r;
 	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
 	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
 	i__2 = i__;
-	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+	cx[i__2].r = stemp.r, cx[i__2].i = stemp.i;
 /* L30: */
     }
     return 0;
-} /* csrot_ */
+} /* crot_ */
 
 /* Subroutine */ int cstedc_(char *compz, integer *n, real *d__, real *e,
 	complex *z__, integer *ldz, complex *work, integer *lwork, real *
@@ -18834,7 +21493,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     /* Local variables */
     static integer i__, j, k, m;
     static real p;
-    static integer ii, ll, end, lgn;
+    static integer ii, ll, lgn;
     static real eps, tiny;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
@@ -18852,6 +21511,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	    integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
+    static integer finish;
     extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
 	    real *, integer *, integer *, real *, integer *, integer *), sstedc_(char *, integer *, real *, real *, real *,
 	    integer *, real *, integer *, integer *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *,
@@ -18870,10 +21530,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18926,21 +21586,24 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             The leading dimension of the array Z.  LDZ >= 1.
             If eigenvectors are desired, then LDZ >= max(1,N).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX    array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
             The dimension of the array WORK.
             If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
             If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
+            Note that for COMPZ = 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LWORK need
+            only be 1.
 
             If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+            only calculates the optimal sizes of the WORK, RWORK and
+            IWORK arrays, returns these values as the first entries of
+            the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
-    RWORK   (workspace/output) REAL array,
-                                           dimension (LRWORK)
+    RWORK   (workspace/output) REAL array, dimension (MAX(1,LRWORK))
             On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
 
     LRWORK  (input) INTEGER
@@ -18952,13 +21615,17 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
                            that 2**k >= N.
             If COMPZ = 'I' and N > 1, LRWORK must be at least
                            1 + 4*N + 2*N**2 .
+            Note that for COMPZ = 'I' or 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LRWORK
+            need only be max(1,2*(N-1)).
 
             If LRWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the RWORK array,
-            returns this value as the first entry of the RWORK array, and
-            no error message related to LRWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK, RWORK
+            and IWORK arrays, returns these values as the first entries
+            of the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+    IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
             On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
 
     LIWORK  (input) INTEGER
@@ -18968,11 +21635,15 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
                                       6 + 6*N + 5*N*lg N.
             If COMPZ = 'I' or N > 1,  LIWORK must be at least
                                       3 + 5*N .
+            Note that for COMPZ = 'I' or 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LIWORK
+            need only be 1.
 
             If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK, RWORK
+            and IWORK arrays, returns these values as the first entries
+            of the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
     INFO    (output) INTEGER
             = 0:  successful exit.
@@ -19017,19 +21688,36 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     } else {
 	icompz = -1;
     }
-    if (*n <= 1 || icompz <= 0) {
-	lwmin = 1;
-	liwmin = 1;
-	lrwmin = 1;
-    } else {
-	lgn = (integer) (log((real) (*n)) / log(2.f));
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (icompz == 1) {
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
+    }
+
+    if (*info == 0) {
+
+/*        Compute the workspace requirements */
+
+	smlsiz = ilaenv_(&c__9, "CSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+		ftnlen)6, (ftnlen)1);
+	if (*n <= 1 || icompz == 0) {
+	    lwmin = 1;
+	    liwmin = 1;
+	    lrwmin = 1;
+	} else if (*n <= smlsiz) {
+	    lwmin = 1;
+	    liwmin = 1;
+	    lrwmin = *n - 1 << 1;
+	} else if (icompz == 1) {
+	    lgn = (integer) (log((real) (*n)) / log(2.f));
+	    if (pow_ii(&c__2, &lgn) < *n) {
+		++lgn;
+	    }
+	    if (pow_ii(&c__2, &lgn) < *n) {
+		++lgn;
+	    }
 	    lwmin = *n * *n;
 /* Computing 2nd power */
 	    i__1 = *n;
@@ -19042,25 +21730,17 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	    lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1);
 	    liwmin = *n * 5 + 3;
 	}
-    }
-    if (icompz < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
-	*info = -6;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*lrwork < lrwmin && ! lquery) {
-	*info = -10;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
 	work[1].r = (real) lwmin, work[1].i = 0.f;
 	rwork[1] = (real) lrwmin;
 	iwork[1] = liwmin;
+
+	if (*lwork < lwmin && ! lquery) {
+	    *info = -8;
+	} else if (*lrwork < lrwmin && ! lquery) {
+	    *info = -10;
+	} else if (*liwork < liwmin && ! lquery) {
+	    *info = -12;
+	}
     }
 
     if (*info != 0) {
@@ -19084,9 +21764,6 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	return 0;
     }
 
-    smlsiz = ilaenv_(&c__9, "CSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
 /*
        If the following conditional clause is removed, then the routine
        will use the Divide and Conquer routine to compute only the
@@ -19094,14 +21771,15 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
        (2 + 5N + 2N lg(N)) integer workspace.
        Since on many architectures SSTERF is much faster than any other
        algorithm for finding eigenvalues only, it is used here
-       as the default.
+       as the default. If the conditional clause is removed, then
+       information on the size of workspace needs to be changed.
 
        If COMPZ = 'N', use SSTERF to compute the eigenvalues.
 */
 
     if (icompz == 0) {
 	ssterf_(n, &d__[1], &e[1], info);
-	return 0;
+	goto L70;
     }
 
 /*
@@ -19110,161 +21788,156 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 */
 
     if (*n <= smlsiz) {
-	if (icompz == 0) {
-	    ssterf_(n, &d__[1], &e[1], info);
-	    return 0;
-	} else if (icompz == 2) {
-	    csteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
-	} else {
-	    csteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
-	}
-    }
 
-/*     If COMPZ = 'I', we simply call SSTEDC instead. */
+	csteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		info);
 
-    if (icompz == 2) {
-	slaset_("Full", n, n, &c_b1101, &c_b871, &rwork[1], n);
-	ll = *n * *n + 1;
-	i__1 = *lrwork - ll + 1;
-	sstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
-		iwork[1], liwork, info);
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *n;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * z_dim1;
-		i__4 = (j - 1) * *n + i__;
-		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.f;
+    } else {
+
+/*        If COMPZ = 'I', we simply call SSTEDC instead. */
+
+	if (icompz == 2) {
+	    slaset_("Full", n, n, &c_b1087, &c_b894, &rwork[1], n);
+	    ll = *n * *n + 1;
+	    i__1 = *lrwork - ll + 1;
+	    sstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
+		    iwork[1], liwork, info);
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * z_dim1;
+		    i__4 = (j - 1) * *n + i__;
+		    z__[i__3].r = rwork[i__4], z__[i__3].i = 0.f;
 /* L10: */
-	    }
+		}
 /* L20: */
+	    }
+	    goto L70;
 	}
-	return 0;
-    }
 
 /*
-       From now on, only option left to be handled is COMPZ = 'V',
-       i.e. ICOMPZ = 1.
+          From now on, only option left to be handled is COMPZ = 'V',
+          i.e. ICOMPZ = 1.
 
-       Scale.
+          Scale.
 */
 
-    orgnrm = slanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.f) {
-	return 0;
-    }
+	orgnrm = slanst_("M", n, &d__[1], &e[1]);
+	if (orgnrm == 0.f) {
+	    goto L70;
+	}
 
-    eps = slamch_("Epsilon");
+	eps = slamch_("Epsilon");
 
-    start = 1;
+	start = 1;
 
-/*     while ( START <= N ) */
+/*        while ( START <= N ) */
 
 L30:
-    if (start <= *n) {
+	if (start <= *n) {
 
 /*
-       Let END be the position of the next subdiagonal entry such that
-       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
-       matrix identified by the elements between START and END
-       constitutes an independent sub-problem.
+             Let FINISH be the position of the next subdiagonal entry
+             such that E( FINISH ) <= TINY or FINISH = N if no such
+             subdiagonal exists.  The matrix identified by the elements
+             between START and FINISH constitutes an independent
+             sub-problem.
 */
 
-	end = start;
+	    finish = start;
 L40:
-	if (end < *n) {
-	    tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 =
-		    d__[end + 1], dabs(r__2)));
-	    if ((r__1 = e[end], dabs(r__1)) > tiny) {
-		++end;
-		goto L40;
+	    if (finish < *n) {
+		tiny = eps * sqrt((r__1 = d__[finish], dabs(r__1))) * sqrt((
+			r__2 = d__[finish + 1], dabs(r__2)));
+		if ((r__1 = e[finish], dabs(r__1)) > tiny) {
+		    ++finish;
+		    goto L40;
+		}
 	    }
-	}
 
-/*        (Sub) Problem determined.  Compute its size and solve it. */
+/*           (Sub) Problem determined.  Compute its size and solve it. */
 
-	m = end - start + 1;
-	if (m > smlsiz) {
-	    *info = smlsiz;
+	    m = finish - start + 1;
+	    if (m > smlsiz) {
 
-/*           Scale. */
+/*              Scale. */
 
-	    orgnrm = slanst_("M", &m, &d__[start], &e[start]);
-	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &m, &c__1, &d__[
-		    start], &m, info);
-	    i__1 = m - 1;
-	    i__2 = m - 1;
-	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b871, &i__1, &c__1, &e[
-		    start], &i__2, info);
+		orgnrm = slanst_("M", &m, &d__[start], &e[start]);
+		slascl_("G", &c__0, &c__0, &orgnrm, &c_b894, &m, &c__1, &d__[
+			start], &m, info);
+		i__1 = m - 1;
+		i__2 = m - 1;
+		slascl_("G", &c__0, &c__0, &orgnrm, &c_b894, &i__1, &c__1, &e[
+			start], &i__2, info);
 
-	    claed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
-		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
-	    if (*info > 0) {
-		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
-			+ 1) + start - 1;
-		return 0;
-	    }
+		claed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 +
+			1], ldz, &work[1], n, &rwork[1], &iwork[1], info);
+		if (*info > 0) {
+		    *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info %
+			     (m + 1) + start - 1;
+		    goto L70;
+		}
 
-/*           Scale back. */
+/*              Scale back. */
 
-	    slascl_("G", &c__0, &c__0, &c_b871, &orgnrm, &m, &c__1, &d__[
-		    start], &m, info);
+		slascl_("G", &c__0, &c__0, &c_b894, &orgnrm, &m, &c__1, &d__[
+			start], &m, info);
 
-	} else {
-	    ssteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
-		     m + 1], info);
-	    clacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
-		    work[1], n, &rwork[m * m + 1]);
-	    clacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
-	    if (*info > 0) {
-		*info = start * (*n + 1) + end;
-		return 0;
+	    } else {
+		ssteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &
+			rwork[m * m + 1], info);
+		clacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
+			work[1], n, &rwork[m * m + 1]);
+		clacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1],
+			ldz);
+		if (*info > 0) {
+		    *info = start * (*n + 1) + finish;
+		    goto L70;
+		}
 	    }
-	}
 
-	start = end + 1;
-	goto L30;
-    }
+	    start = finish + 1;
+	    goto L30;
+	}
 
 /*
-       endwhile
+          endwhile
 
-       If the problem split any number of times, then the eigenvalues
-       will not be properly ordered.  Here we permute the eigenvalues
-       (and the associated eigenvectors) into ascending order.
+          If the problem split any number of times, then the eigenvalues
+          will not be properly ordered.  Here we permute the eigenvalues
+          (and the associated eigenvectors) into ascending order.
 */
 
-    if (m != *n) {
+	if (m != *n) {
 
-/*        Use Selection Sort to minimize swaps of eigenvectors */
+/*           Use Selection Sort to minimize swaps of eigenvectors */
 
-	i__1 = *n;
-	for (ii = 2; ii <= i__1; ++ii) {
-	    i__ = ii - 1;
-	    k = i__;
-	    p = d__[i__];
-	    i__2 = *n;
-	    for (j = ii; j <= i__2; ++j) {
-		if (d__[j] < p) {
-		    k = j;
-		    p = d__[j];
-		}
+	    i__1 = *n;
+	    for (ii = 2; ii <= i__1; ++ii) {
+		i__ = ii - 1;
+		k = i__;
+		p = d__[i__];
+		i__2 = *n;
+		for (j = ii; j <= i__2; ++j) {
+		    if (d__[j] < p) {
+			k = j;
+			p = d__[j];
+		    }
 /* L50: */
-	    }
-	    if (k != i__) {
-		d__[k] = d__[i__];
-		d__[i__] = p;
-		cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
-			 &c__1);
-	    }
+		}
+		if (k != i__) {
+		    d__[k] = d__[i__];
+		    d__[i__] = p;
+		    cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
+			    + 1], &c__1);
+		}
 /* L60: */
+	    }
 	}
     }
 
+L70:
     work[1].r = (real) lwmin, work[1].i = 0.f;
     rwork[1] = (real) lrwmin;
     iwork[1] = liwmin;
@@ -19326,10 +21999,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -19462,7 +22135,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 */
 
     if (icompz == 2) {
-	claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz);
+	claset_("Full", n, n, &c_b56, &c_b57, &z__[z_offset], ldz);
     }
 
     nmaxit = *n * 30;
@@ -19612,7 +22285,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 /*        Form shift. */
 
 	g = (d__[l + 1] - p) / (e[l] * 2.f);
-	r__ = slapy2_(&g, &c_b871);
+	r__ = slapy2_(&g, &c_b894);
 	g = d__[m] - p + e[l] / (g + r_sign(&r__, &g));
 
 	s = 1.f;
@@ -19738,7 +22411,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 /*        Form shift. */
 
 	g = (d__[l - 1] - p) / (e[l - 1] * 2.f);
-	r__ = slapy2_(&g, &c_b871);
+	r__ = slapy2_(&g, &c_b894);
 	g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g));
 
 	s = 1.f;
@@ -19917,10 +22590,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -19928,20 +22601,23 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
     CTREVC computes some or all of the right and/or left eigenvectors of
     a complex upper triangular matrix T.
+    Matrices of this type are produced by the Schur factorization of
+    a complex general matrix:  A = Q*T*Q**H, as computed by CHSEQR.
 
     The right eigenvector x and the left eigenvector y of T corresponding
     to an eigenvalue w are defined by:
 
-                 T*x = w*x,     y'*T = w*y'
+                 T*x = w*x,     (y**H)*T = w*(y**H)
 
-    where y' denotes the conjugate transpose of the vector y.
+    where y**H denotes the conjugate transpose of the vector y.
+    The eigenvalues are not input to this routine, but are read directly
+    from the diagonal of T.
 
-    If all eigenvectors are requested, the routine may either return the
-    matrices X and/or Y of right or left eigenvectors of T, or the
-    products Q*X and/or Q*Y, where Q is an input unitary
-    matrix. If T was obtained from the Schur factorization of an
-    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
-    right or left eigenvectors of A.
+    This routine returns the matrices X and/or Y of right and left
+    eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an
+    input matrix.  If Q is the unitary factor that reduces a matrix A to
+    Schur form T, then Q*X and Q*Y are the matrices of right and left
+    eigenvectors of A.
 
     Arguments
     =========
@@ -19954,17 +22630,17 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     HOWMNY  (input) CHARACTER*1
             = 'A':  compute all right and/or left eigenvectors;
             = 'B':  compute all right and/or left eigenvectors,
-                    and backtransform them using the input matrices
-                    supplied in VR and/or VL;
+                    backtransformed using the matrices supplied in
+                    VR and/or VL;
             = 'S':  compute selected right and/or left eigenvectors,
-                    specified by the logical array SELECT.
+                    as indicated by the logical array SELECT.
 
     SELECT  (input) LOGICAL array, dimension (N)
             If HOWMNY = 'S', SELECT specifies the eigenvectors to be
             computed.
-            If HOWMNY = 'A' or 'B', SELECT is not referenced.
-            To select the eigenvector corresponding to the j-th
-            eigenvalue, SELECT(j) must be set to .TRUE..
+            The eigenvector corresponding to the j-th eigenvalue is
+            computed if SELECT(j) = .TRUE..
+            Not referenced if HOWMNY = 'A' or 'B'.
 
     N       (input) INTEGER
             The order of the matrix T. N >= 0.
@@ -19982,19 +22658,16 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             Schur vectors returned by CHSEQR).
             On exit, if SIDE = 'L' or 'B', VL contains:
             if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
-                             VL is lower triangular. The i-th column
-                             VL(i) of VL is the eigenvector corresponding
-                             to T(i,i).
             if HOWMNY = 'B', the matrix Q*Y;
             if HOWMNY = 'S', the left eigenvectors of T specified by
                              SELECT, stored consecutively in the columns
                              of VL, in the same order as their
                              eigenvalues.
-            If SIDE = 'R', VL is not referenced.
+            Not referenced if SIDE = 'R'.
 
     LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= max(1,N) if
-            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+            The leading dimension of the array VL.  LDVL >= 1, and if
+            SIDE = 'L' or 'B', LDVL >= N.
 
     VR      (input/output) COMPLEX array, dimension (LDVR,MM)
             On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
@@ -20002,19 +22675,16 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             Schur vectors returned by CHSEQR).
             On exit, if SIDE = 'R' or 'B', VR contains:
             if HOWMNY = 'A', the matrix X of right eigenvectors of T;
-                             VR is upper triangular. The i-th column
-                             VR(i) of VR is the eigenvector corresponding
-                             to T(i,i).
             if HOWMNY = 'B', the matrix Q*X;
             if HOWMNY = 'S', the right eigenvectors of T specified by
                              SELECT, stored consecutively in the columns
                              of VR, in the same order as their
                              eigenvalues.
-            If SIDE = 'L', VR is not referenced.
+            Not referenced if SIDE = 'L'.
 
     LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= max(1,N) if
-             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+            The leading dimension of the array VR.  LDVR >= 1, and if
+            SIDE = 'R' or 'B'; LDVR >= N.
 
     MM      (input) INTEGER
             The number of columns in the arrays VL and/or VR. MM >= M.
@@ -20232,7 +22902,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		if (ki > 1) {
 		    i__1 = ki - 1;
 		    q__1.r = scale, q__1.i = 0.f;
-		    cgemv_("N", n, &i__1, &c_b56, &vr[vr_offset], ldvr, &work[
+		    cgemv_("N", n, &i__1, &c_b57, &vr[vr_offset], ldvr, &work[
 			    1], &c__1, &q__1, &vr[ki * vr_dim1 + 1], &c__1);
 		}
 
@@ -20348,7 +23018,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 		if (ki < *n) {
 		    i__2 = *n - ki;
 		    q__1.r = scale, q__1.i = 0.f;
-		    cgemv_("N", n, &i__2, &c_b56, &vl[(ki + 1) * vl_dim1 + 1],
+		    cgemv_("N", n, &i__2, &c_b57, &vl[(ki + 1) * vl_dim1 + 1],
 			     ldvl, &work[ki + 1], &c__1, &q__1, &vl[ki *
 			    vl_dim1 + 1], &c__1);
 		}
@@ -20382,6 +23052,193 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 } /* ctrevc_ */
 
+/* Subroutine */ int ctrexc_(char *compq, integer *n, complex *t, integer *
+	ldt, complex *q, integer *ldq, integer *ifst, integer *ilst, integer *
+	info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer k, m1, m2, m3;
+    static real cs;
+    static complex t11, t22, sn, temp;
+    extern /* Subroutine */ int crot_(integer *, complex *, integer *,
+	    complex *, integer *, real *, complex *);
+    extern logical lsame_(char *, char *);
+    static logical wantq;
+    extern /* Subroutine */ int clartg_(complex *, complex *, real *, complex
+	    *, complex *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    CTREXC reorders the Schur factorization of a complex matrix
+    A = Q*T*Q**H, so that the diagonal element of T with row index IFST
+    is moved to row ILST.
+
+    The Schur form T is reordered by a unitary similarity transformation
+    Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by
+    postmultplying it with Z.
+
+    Arguments
+    =========
+
+    COMPQ   (input) CHARACTER*1
+            = 'V':  update the matrix Q of Schur vectors;
+            = 'N':  do not update Q.
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input/output) COMPLEX array, dimension (LDT,N)
+            On entry, the upper triangular matrix T.
+            On exit, the reordered upper triangular matrix.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    Q       (input/output) COMPLEX array, dimension (LDQ,N)
+            On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+            On exit, if COMPQ = 'V', Q has been postmultiplied by the
+            unitary transformation matrix Z which reorders T.
+            If COMPQ = 'N', Q is not referenced.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    IFST    (input) INTEGER
+    ILST    (input) INTEGER
+            Specify the reordering of the diagonal elements of T:
+            The element with row index IFST is moved to row ILST by a
+            sequence of transpositions between adjacent elements.
+            1 <= IFST <= N; 1 <= ILST <= N.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Decode and test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(compq, "V");
+    if (! lsame_(compq, "N") && ! wantq) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldt < max(1,*n)) {
+	*info = -4;
+    } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) {
+	*info = -6;
+    } else if (*ifst < 1 || *ifst > *n) {
+	*info = -7;
+    } else if (*ilst < 1 || *ilst > *n) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CTREXC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 1 || *ifst == *ilst) {
+	return 0;
+    }
+
+    if (*ifst < *ilst) {
+
+/*        Move the IFST-th diagonal element forward down the diagonal. */
+
+	m1 = 0;
+	m2 = -1;
+	m3 = 1;
+    } else {
+
+/*        Move the IFST-th diagonal element backward up the diagonal. */
+
+	m1 = -1;
+	m2 = 0;
+	m3 = -1;
+    }
+
+    i__1 = *ilst + m2;
+    i__2 = m3;
+    for (k = *ifst + m1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
+
+/*        Interchange the k-th and (k+1)-th diagonal elements. */
+
+	i__3 = k + k * t_dim1;
+	t11.r = t[i__3].r, t11.i = t[i__3].i;
+	i__3 = k + 1 + (k + 1) * t_dim1;
+	t22.r = t[i__3].r, t22.i = t[i__3].i;
+
+/*        Determine the transformation to perform the interchange. */
+
+	q__1.r = t22.r - t11.r, q__1.i = t22.i - t11.i;
+	clartg_(&t[k + (k + 1) * t_dim1], &q__1, &cs, &sn, &temp);
+
+/*        Apply transformation to the matrix T. */
+
+	if (k + 2 <= *n) {
+	    i__3 = *n - k - 1;
+	    crot_(&i__3, &t[k + (k + 2) * t_dim1], ldt, &t[k + 1 + (k + 2) *
+		    t_dim1], ldt, &cs, &sn);
+	}
+	i__3 = k - 1;
+	r_cnjg(&q__1, &sn);
+	crot_(&i__3, &t[k * t_dim1 + 1], &c__1, &t[(k + 1) * t_dim1 + 1], &
+		c__1, &cs, &q__1);
+
+	i__3 = k + k * t_dim1;
+	t[i__3].r = t22.r, t[i__3].i = t22.i;
+	i__3 = k + 1 + (k + 1) * t_dim1;
+	t[i__3].r = t11.r, t[i__3].i = t11.i;
+
+	if (wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    r_cnjg(&q__1, &sn);
+	    crot_(n, &q[k * q_dim1 + 1], &c__1, &q[(k + 1) * q_dim1 + 1], &
+		    c__1, &cs, &q__1);
+	}
+
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of CTREXC */
+
+} /* ctrexc_ */
+
 /* Subroutine */ int ctrti2_(char *uplo, char *diag, integer *n, complex *a,
 	integer *lda, integer *info)
 {
@@ -20405,10 +23262,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -20494,7 +23351,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	for (j = 1; j <= i__1; ++j) {
 	    if (nounit) {
 		i__2 = j + j * a_dim1;
-		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
+		c_div(&q__1, &c_b57, &a[j + j * a_dim1]);
 		a[i__2].r = q__1.r, a[i__2].i = q__1.i;
 		i__2 = j + j * a_dim1;
 		q__1.r = -a[i__2].r, q__1.i = -a[i__2].i;
@@ -20520,7 +23377,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	for (j = *n; j >= 1; --j) {
 	    if (nounit) {
 		i__1 = j + j * a_dim1;
-		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
+		c_div(&q__1, &c_b57, &a[j + j * a_dim1]);
 		a[i__1].r = q__1.r, a[i__1].i = q__1.i;
 		i__1 = j + j * a_dim1;
 		q__1.r = -a[i__1].r, q__1.i = -a[i__1].i;
@@ -20578,10 +23435,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -20712,7 +23569,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		i__4 = j - 1;
 		ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
-			c_b56, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+			c_b57, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
 		i__4 = j - 1;
 		q__1.r = -1.f, q__1.i = -0.f;
 		ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
@@ -20740,7 +23597,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 		    i__1 = *n - j - jb + 1;
 		    ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
-			    &c_b56, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
+			    &c_b57, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
 			    + jb + j * a_dim1], lda);
 		    i__1 = *n - j - jb + 1;
 		    q__1.r = -1.f, q__1.i = -0.f;
@@ -20779,10 +23636,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -20943,10 +23800,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21012,7 +23869,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             reflector H(i) or G(i), which determines Q or P**H, as
             returned by CGEBRD in its array argument TAUQ or TAUP.
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -21226,10 +24083,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21266,7 +24123,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by CGEHRD.
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -21422,10 +24279,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21595,10 +24452,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21638,7 +24495,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by CGELQF.
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -21856,10 +24713,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21900,7 +24757,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by CGEQRF.
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -22118,10 +24975,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -22333,10 +25190,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -22563,10 +25420,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -22653,16 +25510,17 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
             The dimension of the array WORK.
             If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
+            if SIDE = 'R', LWORK >= max(1,M);
+            if N = 0 or M = 0, LWORK >= 1.
+            For optimum performance LWORK >= max(1,N*NB) if SIDE = 'L',
+            and LWORK >= max(1,M*NB) if SIDE = 'R', where NB is the
+            optimal blocksize. (NB = 0 if M = 0 or N = 0.)
 
             If LWORK = -1, then a workspace query is assumed; the routine
             only calculates the optimal size of the WORK array, returns
@@ -22705,6 +25563,9 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	nq = *n;
 	nw = *m;
     }
+    if (*m == 0 || *n == 0) {
+	nw = 0;
+    }
     if (! applyq && ! lsame_(vect, "P")) {
 	*info = -1;
     } else if (! left && ! lsame_(side, "R")) {
@@ -22730,48 +25591,54 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     }
 
     if (*info == 0) {
-	if (applyq) {
-	    if (left) {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__1, n, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
+	if (nw > 0) {
+	    if (applyq) {
+		if (left) {
 /* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__1, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	} else {
-	    if (left) {
+		    i__3[0] = 1, a__1[0] = side;
+		    i__3[1] = 1, a__1[1] = trans;
+		    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		    i__1 = *m - 1;
+		    i__2 = *m - 1;
+		    nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__1, n, &i__2, &
+			    c_n1, (ftnlen)6, (ftnlen)2);
+		} else {
 /* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "CUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
+		    i__3[0] = 1, a__1[0] = side;
+		    i__3[1] = 1, a__1[1] = trans;
+		    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		    i__1 = *n - 1;
+		    i__2 = *n - 1;
+		    nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__1, &i__2, &
+			    c_n1, (ftnlen)6, (ftnlen)2);
+		}
 	    } else {
+		if (left) {
 /* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "CUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
+		    i__3[0] = 1, a__1[0] = side;
+		    i__3[1] = 1, a__1[1] = trans;
+		    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		    i__1 = *m - 1;
+		    i__2 = *m - 1;
+		    nb = ilaenv_(&c__1, "CUNMLQ", ch__1, &i__1, n, &i__2, &
+			    c_n1, (ftnlen)6, (ftnlen)2);
+		} else {
+/* Writing concatenation */
+		    i__3[0] = 1, a__1[0] = side;
+		    i__3[1] = 1, a__1[1] = trans;
+		    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		    i__1 = *n - 1;
+		    i__2 = *n - 1;
+		    nb = ilaenv_(&c__1, "CUNMLQ", ch__1, m, &i__1, &i__2, &
+			    c_n1, (ftnlen)6, (ftnlen)2);
+		}
 	    }
+/* Computing MAX */
+	    i__1 = 1, i__2 = nw * nb;
+	    lwkopt = max(i__1,i__2);
+	} else {
+	    lwkopt = 1;
 	}
-	lwkopt = max(1,nw) * nb;
 	work[1].r = (real) lwkopt, work[1].i = 0.f;
     }
 
@@ -22780,11 +25647,11 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	xerbla_("CUNMBR", &i__1);
 	return 0;
     } else if (lquery) {
+	return 0;
     }
 
 /*     Quick return if possible */
 
-    work[1].r = 1.f, work[1].i = 0.f;
     if (*m == 0 || *n == 0) {
 	return 0;
     }
@@ -22861,6 +25728,235 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 } /* cunmbr_ */
 
+/* Subroutine */ int cunmhr_(char *side, char *trans, integer *m, integer *n,
+	integer *ilo, integer *ihi, complex *a, integer *lda, complex *tau,
+	complex *c__, integer *ldc, complex *work, integer *lwork, integer *
+	info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, nh, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    CUNMHR overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    IHI-ILO elementary reflectors, as returned by CGEHRD:
+
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q**H (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of CGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and
+            ILO = 1 and IHI = 0, if M = 0;
+            if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and
+            ILO = 1 and IHI = 0, if N = 0.
+
+    A       (input) COMPLEX array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by CGEHRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) COMPLEX array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEHRD.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    left = lsame_(side, "L");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ilo < 1 || *ilo > max(1,nq)) {
+	*info = -5;
+    } else if (*ihi < min(*ilo,nq) || *ihi > nq) {
+	*info = -6;
+    } else if (*lda < max(1,nq)) {
+	*info = -8;
+    } else if (*ldc < max(1,*m)) {
+	*info = -11;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -13;
+    }
+
+    if (*info == 0) {
+	if (left) {
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = side;
+	    i__1[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    nb = ilaenv_(&c__1, "CUNMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	} else {
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = side;
+	    i__1[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("CUNMHR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || nh == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    if (left) {
+	mi = nh;
+	ni = *n;
+	i1 = *ilo + 1;
+	i2 = 1;
+    } else {
+	mi = *m;
+	ni = nh;
+	i1 = 1;
+	i2 = *ilo + 1;
+    }
+
+    cunmqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, &
+	    tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMHR */
+
+} /* cunmhr_ */
+
 /* Subroutine */ int cunml2_(char *side, char *trans, integer *m, integer *n,
 	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
 	integer *ldc, complex *work, integer *info)
@@ -22886,10 +25982,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23127,10 +26223,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23195,7 +26291,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -23433,10 +26529,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23501,7 +26597,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -23547,10 +26643,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
     if (left) {
 	nq = *m;
-	nw = *n;
+	nw = max(1,*n);
     } else {
 	nq = *n;
-	nw = *m;
+	nw = max(1,*m);
     }
     if (! left && ! lsame_(side, "R")) {
 	*info = -1;
@@ -23566,27 +26662,33 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 	*info = -7;
     } else if (*ldc < max(1,*m)) {
 	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
     }
 
     if (*info == 0) {
+	if (*m == 0 || *n == 0) {
+	    lwkopt = 1;
+	} else {
 
 /*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
+             Determine the block size.  NB may be at most NBMAX, where
+             NBMAX is used to define the local array T.
 
    Computing MIN
    Writing concatenation
 */
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1,
+		    (ftnlen)6, (ftnlen)2);
+	    nb = min(i__1,i__2);
+	    lwkopt = nw * nb;
+	}
 	work[1].r = (real) lwkopt, work[1].i = 0.f;
+
+	if (*lwork < nw && ! lquery) {
+	    *info = -12;
+	}
     }
 
     if (*info != 0) {
@@ -23599,8 +26701,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 /*     Quick return if possible */
 
-    if (*m == 0 || *n == 0 || *k == 0) {
-	work[1].r = 1.f, work[1].i = 0.f;
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -23729,10 +26830,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23797,7 +26898,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -24025,10 +27126,10 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -24094,7 +27195,7 @@ doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
diff --git a/numpy/linalg/lapack_lite/f2c_config.c b/numpy/linalg/lapack_lite/f2c_config.c
index d0182a557386..1aa11ce5cbf1 100644
--- a/numpy/linalg/lapack_lite/f2c_config.c
+++ b/numpy/linalg/lapack_lite/f2c_config.c
@@ -62,10 +62,9 @@ doublereal dlamch_(char *cmach)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -107,7 +106,6 @@ doublereal dlamch_(char *cmach)
 
 
     if (first) {
-	first = FALSE_;
 	dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
 	base = (doublereal) beta;
 	t = (doublereal) it;
@@ -159,6 +157,7 @@ doublereal dlamch_(char *cmach)
     }
 
     ret_val = rmach;
+    first = FALSE_;
     return ret_val;
 
 /*     End of DLAMCH */
@@ -190,10 +189,9 @@ doublereal dlamch_(char *cmach)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -239,7 +237,6 @@ doublereal dlamch_(char *cmach)
 
 
     if (first) {
-	first = FALSE_;
 	one = 1.;
 
 /*
@@ -370,6 +367,7 @@ doublereal dlamch_(char *cmach)
     *t = lt;
     *rnd = lrnd;
     *ieee1 = lieee1;
+    first = FALSE_;
     return 0;
 
 /*     End of DLAMC1 */
@@ -431,10 +429,9 @@ doublereal dlamch_(char *cmach)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -492,7 +489,6 @@ doublereal dlamch_(char *cmach)
 
 
     if (first) {
-	first = FALSE_;
 	zero = 0.;
 	one = 1.;
 	two = 2.;
@@ -637,6 +633,7 @@ doublereal dlamch_(char *cmach)
 /*         ( A guess; no known machine ) */
 	    iwarn = TRUE_;
 	}
+	first = FALSE_;
 /*
    **
    Comment out this if block if EMIN is ok
@@ -703,10 +700,9 @@ doublereal dlamc3_(doublereal *a, doublereal *b)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -719,7 +715,8 @@ doublereal dlamc3_(doublereal *a, doublereal *b)
     Arguments
     =========
 
-    A, B    (input) DOUBLE PRECISION
+    A       (input) DOUBLE PRECISION
+    B       (input) DOUBLE PRECISION
             The values A and B.
 
    =====================================================================
@@ -751,10 +748,9 @@ doublereal dlamc3_(doublereal *a, doublereal *b)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -765,7 +761,7 @@ doublereal dlamc3_(doublereal *a, doublereal *b)
     Arguments
     =========
 
-    EMIN    (output) EMIN
+    EMIN    (output) INTEGER
             The minimum exponent before (gradual) underflow, computed by
             setting A = START and dividing by BASE until the previous A
             can not be recovered.
@@ -851,10 +847,9 @@ doublereal dlamc3_(doublereal *a, doublereal *b)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -1018,10 +1013,9 @@ logical lsame_(char *ca, char *cb)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -1146,10 +1140,9 @@ doublereal slamch_(char *cmach)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -1191,7 +1184,6 @@ doublereal slamch_(char *cmach)
 
 
     if (first) {
-	first = FALSE_;
 	slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
 	base = (real) beta;
 	t = (real) it;
@@ -1243,6 +1235,7 @@ doublereal slamch_(char *cmach)
     }
 
     ret_val = rmach;
+    first = FALSE_;
     return ret_val;
 
 /*     End of SLAMCH */
@@ -1274,10 +1267,9 @@ doublereal slamch_(char *cmach)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -1323,7 +1315,6 @@ doublereal slamch_(char *cmach)
 
 
     if (first) {
-	first = FALSE_;
 	one = 1.f;
 
 /*
@@ -1454,6 +1445,7 @@ doublereal slamch_(char *cmach)
     *t = lt;
     *rnd = lrnd;
     *ieee1 = lieee1;
+    first = FALSE_;
     return 0;
 
 /*     End of SLAMC1 */
@@ -1514,10 +1506,9 @@ doublereal slamch_(char *cmach)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -1575,7 +1566,6 @@ doublereal slamch_(char *cmach)
 
 
     if (first) {
-	first = FALSE_;
 	zero = 0.f;
 	one = 1.f;
 	two = 2.f;
@@ -1720,6 +1710,7 @@ doublereal slamch_(char *cmach)
 /*         ( A guess; no known machine ) */
 	    iwarn = TRUE_;
 	}
+	first = FALSE_;
 /*
    **
    Comment out this if block if EMIN is ok
@@ -1786,10 +1777,9 @@ doublereal slamc3_(real *a, real *b)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -1802,7 +1792,8 @@ doublereal slamc3_(real *a, real *b)
     Arguments
     =========
 
-    A, B    (input) REAL
+    A       (input) REAL
+    B       (input) REAL
             The values A and B.
 
    =====================================================================
@@ -1834,10 +1825,9 @@ doublereal slamc3_(real *a, real *b)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
@@ -1848,7 +1838,7 @@ doublereal slamc3_(real *a, real *b)
     Arguments
     =========
 
-    EMIN    (output) EMIN
+    EMIN    (output) INTEGER
             The minimum exponent before (gradual) underflow, computed by
             setting A = START and dividing by BASE until the previous A
             can not be recovered.
@@ -1934,10 +1924,9 @@ doublereal slamc3_(real *a, real *b)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd..
+       November 2006
 
 
     Purpose
diff --git a/numpy/linalg/lapack_lite/f2c_d_lapack.c b/numpy/linalg/lapack_lite/f2c_d_lapack.c
index cb28b686f3a4..b84ef2ec05a2 100644
--- a/numpy/linalg/lapack_lite/f2c_d_lapack.c
+++ b/numpy/linalg/lapack_lite/f2c_d_lapack.c
@@ -39,16 +39,20 @@ static doublereal c_b151 = -1.;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
-static integer c__8 = 8;
-static integer c__4 = 4;
 static integer c__65 = 65;
 static integer c__6 = 6;
-static integer c__15 = 15;
+static integer c__12 = 12;
+static integer c__49 = 49;
+static integer c__4 = 4;
 static logical c_false = FALSE_;
+static integer c__13 = 13;
+static integer c__15 = 15;
+static integer c__14 = 14;
+static integer c__16 = 16;
+static logical c_true = TRUE_;
 static integer c__10 = 10;
 static integer c__11 = 11;
-static doublereal c_b2804 = 2.;
-static logical c_true = TRUE_;
+static doublereal c_b3192 = 2.;
 
 /* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *
 	d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt,
@@ -106,10 +110,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -130,7 +134,7 @@ static logical c_true = TRUE_;
     It could conceivably fail on hexadecimal or decimal machines
     without guard digits, but we know of none.  See DLASD3 for details.
 
-    The code currently call DLASDQ if singular values only are desired.
+    The code currently calls DLASDQ if singular values only are desired.
     However, it can be slightly modified to compute singular values
     using the divide and conquer method.
 
@@ -156,7 +160,7 @@ static logical c_true = TRUE_;
             On entry, the n diagonal elements of the bidiagonal matrix B.
             On exit, if INFO=0, the singular values of B.
 
-    E       (input/output) DOUBLE PRECISION array, dimension (N)
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
             On entry, the elements of E contain the offdiagonal
             elements of the bidiagonal matrix whose SVD is desired.
             On exit, E has been destroyed.
@@ -205,7 +209,7 @@ static logical c_true = TRUE_;
                bottom of the computation tree (usually about 25).
             For other values of COMPQ, IQ is not referenced.
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             If COMPQ = 'N' then LWORK >= (4 * N).
             If COMPQ = 'P' then LWORK >= (6 * N).
             If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
@@ -215,7 +219,7 @@ static logical c_true = TRUE_;
     INFO    (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an singular value.
+            > 0:  The algorithm failed to compute a singular value.
                   The update process of divide and conquer failed.
 
     Further Details
@@ -225,6 +229,9 @@ static logical c_true = TRUE_;
        Ming Gu and Huan Ren, Computer Science Division, University of
        California at Berkeley, USA
 
+    =====================================================================
+    Changed dimension statement in comment describing E from (N) to
+    (N-1).  Sven, 17 Feb 05.
     =====================================================================
 
 
@@ -468,9 +475,9 @@ static logical c_true = TRUE_;
 			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
 			start + (is + qstart - 2) * *n], &work[wstart], &
 			iwork[1], info);
-		if (*info != 0) {
-		    return 0;
-		}
+	    }
+	    if (*info != 0) {
+		return 0;
 	    }
 	    start = i__ + 1;
 	}
@@ -592,27 +599,39 @@ static logical c_true = TRUE_;
 	    integer *);
     static doublereal sminoa, thresh;
     static logical rotate;
-    static doublereal sminlo, tolmul;
+    static doublereal tolmul;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       January 2007
 
 
     Purpose
     =======
 
-    DBDSQR computes the singular value decomposition (SVD) of a real
-    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
-    denotes the transpose of P), where S is a diagonal matrix with
-    non-negative diagonal elements (the singular values of B), and Q
-    and P are orthogonal matrices.
+    DBDSQR computes the singular values and, optionally, the right and/or
+    left singular vectors from the singular value decomposition (SVD) of
+    a real N-by-N (upper or lower) bidiagonal matrix B using the implicit
+    zero-shift QR algorithm.  The SVD of B has the form
 
-    The routine computes S, and optionally computes U * Q, P' * VT,
-    or Q' * C, for given real input matrices U, VT, and C.
+       B = Q * S * P**T
+
+    where S is the diagonal matrix of singular values, Q is an orthogonal
+    matrix of left singular vectors, and P is an orthogonal matrix of
+    right singular vectors.  If left singular vectors are requested, this
+    subroutine actually returns U*Q instead of Q, and, if right singular
+    vectors are requested, this subroutine returns P**T*VT instead of
+    P**T, for given real input matrices U and VT.  When U and VT are the
+    orthogonal matrices that reduce a general matrix A to bidiagonal
+    form:  A = U*B*VT, as computed by DGEBRD, then
+
+       A = (U*Q) * S * (P**T*VT)
+
+    is the SVD of A.  Optionally, the subroutine may also compute Q**T*C
+    for a given real input matrix C.
 
     See "Computing  Small Singular Values of Bidiagonal Matrices With
     Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
@@ -647,19 +666,18 @@ static logical c_true = TRUE_;
             On exit, if INFO=0, the singular values of B in decreasing
             order.
 
-    E       (input/output) DOUBLE PRECISION array, dimension (N)
-            On entry, the elements of E contain the
-            offdiagonal elements of the bidiagonal matrix whose SVD
-            is desired. On normal exit (INFO = 0), E is destroyed.
-            If the algorithm does not converge (INFO > 0), D and E
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the N-1 offdiagonal elements of the bidiagonal
+            matrix B.
+            On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E
             will contain the diagonal and superdiagonal elements of a
             bidiagonal matrix orthogonally equivalent to the one given
-            as input. E(N) is used for workspace.
+            as input.
 
     VT      (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
             On entry, an N-by-NCVT matrix VT.
-            On exit, VT is overwritten by P' * VT.
-            VT is not referenced if NCVT = 0.
+            On exit, VT is overwritten by P**T * VT.
+            Not referenced if NCVT = 0.
 
     LDVT    (input) INTEGER
             The leading dimension of the array VT.
@@ -668,15 +686,15 @@ static logical c_true = TRUE_;
     U       (input/output) DOUBLE PRECISION array, dimension (LDU, N)
             On entry, an NRU-by-N matrix U.
             On exit, U is overwritten by U * Q.
-            U is not referenced if NRU = 0.
+            Not referenced if NRU = 0.
 
     LDU     (input) INTEGER
             The leading dimension of the array U.  LDU >= max(1,NRU).
 
     C       (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
             On entry, an N-by-NCC matrix C.
-            On exit, C is overwritten by Q' * C.
-            C is not referenced if NCC = 0.
+            On exit, C is overwritten by Q**T * C.
+            Not referenced if NCC = 0.
 
     LDC     (input) INTEGER
             The leading dimension of the array C.
@@ -687,10 +705,18 @@ static logical c_true = TRUE_;
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  If INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm did not converge; D and E contain the
-                  elements of a bidiagonal matrix which is orthogonally
-                  similar to the input matrix B;  if INFO = i, i
-                  elements of E have not converged to zero.
+            > 0:
+               if NCVT = NRU = NCC = 0,
+                  = 1, a split was marked by a positive value in E
+                  = 2, current block of Z not diagonalized after 30*N
+                       iterations (in inner while loop)
+                  = 3, termination criterion of outer while loop not met
+                       (program created more than N unreduced blocks)
+               else NCVT = NRU = NCC = 0,
+                     the algorithm did not converge; D and E contain the
+                     elements of a bidiagonal matrix which is orthogonally
+                     similar to the input matrix B;  if INFO = i, i
+                     elements of E have not converged to zero.
 
     Internal Parameters
     ===================
@@ -1033,7 +1059,6 @@ static logical c_true = TRUE_;
 		    e[lll] = 0.;
 		    goto L60;
 		}
-		sminlo = sminl;
 		mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[
 			lll], abs(d__1))));
 		sminl = min(sminl,mu);
@@ -1069,7 +1094,6 @@ static logical c_true = TRUE_;
 		    e[lll] = 0.;
 		    goto L60;
 		}
-		sminlo = sminl;
 		mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll]
 			, abs(d__1))));
 		sminl = min(sminl,mu);
@@ -1458,10 +1482,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -1669,15 +1693,16 @@ static logical c_true = TRUE_;
     static doublereal sfmin1, sfmin2, sfmax1, sfmax2;
 
     extern integer idamax_(integer *, doublereal *, integer *);
+    extern logical disnan_(doublereal *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
     static logical noconv;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -1915,7 +1940,7 @@ static logical c_true = TRUE_;
 
     sfmin1 = SAFEMINIMUM / PRECISION;
     sfmax1 = 1. / sfmin1;
-    sfmin2 = sfmin1 * 8.;
+    sfmin2 = sfmin1 * 2.;
     sfmax2 = 1. / sfmin2;
 L140:
     noconv = FALSE_;
@@ -1946,7 +1971,7 @@ static logical c_true = TRUE_;
 	if (c__ == 0. || r__ == 0.) {
 	    goto L200;
 	}
-	g = r__ / 8.;
+	g = r__ / 2.;
 	f = 1.;
 	s = c__ + r__;
 L160:
@@ -1957,28 +1982,38 @@ static logical c_true = TRUE_;
 	if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) {
 	    goto L170;
 	}
-	f *= 8.;
-	c__ *= 8.;
-	ca *= 8.;
-	r__ /= 8.;
-	g /= 8.;
-	ra /= 8.;
+	d__1 = c__ + f + ca + r__ + g + ra;
+	if (disnan_(&d__1)) {
+
+/*           Exit if NaN to avoid infinite loop */
+
+	    *info = -3;
+	    i__2 = -(*info);
+	    xerbla_("DGEBAL", &i__2);
+	    return 0;
+	}
+	f *= 2.;
+	c__ *= 2.;
+	ca *= 2.;
+	r__ /= 2.;
+	g /= 2.;
+	ra /= 2.;
 	goto L160;
 
 L170:
-	g = c__ / 8.;
+	g = c__ / 2.;
 L180:
 /* Computing MIN */
 	d__1 = min(f,c__), d__1 = min(d__1,g);
 	if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) {
 	    goto L190;
 	}
-	f /= 8.;
-	c__ /= 8.;
-	g /= 8.;
-	ca /= 8.;
-	r__ *= 8.;
-	ra *= 8.;
+	f /= 2.;
+	c__ /= 2.;
+	g /= 2.;
+	ca /= 2.;
+	r__ *= 2.;
+	ra *= 2.;
 	goto L180;
 
 /*        Now balance. */
@@ -2028,7 +2063,7 @@ static logical c_true = TRUE_;
 	taup, doublereal *work, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__;
@@ -2039,10 +2074,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2204,10 +2239,13 @@ static logical c_true = TRUE_;
 
 /*           Apply H(i) to A(i:m,i+1:n) from the left */
 
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
-		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    if (i__ < *n) {
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &
+			tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]
+			);
+	    }
 	    a[i__ + i__ * a_dim1] = d__[i__];
 
 	    if (i__ < *n) {
@@ -2257,12 +2295,12 @@ static logical c_true = TRUE_;
 
 /*           Apply G(i) to A(i+1:m,i:n) from the right */
 
-	    i__2 = *m - i__;
-	    i__3 = *n - i__ + 1;
-/* Computing MIN */
-	    i__4 = i__ + 1;
-	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
-		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    if (i__ < *m) {
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &
+			taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
 	    a[i__ + i__ * a_dim1] = d__[i__];
 
 	    if (i__ < *m) {
@@ -2327,10 +2365,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2389,7 +2427,7 @@ static logical c_true = TRUE_;
             The scalar factors of the elementary reflectors which
             represent the orthogonal matrix P. See Further Details.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -2619,7 +2657,7 @@ static logical c_true = TRUE_;
 {
     /* System generated locals */
     integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4;
+	    i__2, i__3;
     doublereal d__1, d__2;
 
     /* Builtin functions */
@@ -2634,7 +2672,6 @@ static logical c_true = TRUE_;
     static doublereal dum[1], eps;
     static integer ibal;
     static char side[1];
-    static integer maxb;
     static doublereal anrm;
     static integer ierr, itau;
     extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
@@ -2684,10 +2721,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 8, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2765,7 +2802,7 @@ static logical c_true = TRUE_;
             The leading dimension of the array VR.  LDVR >= 1; if
             JOBVR = 'V', LDVR >= N.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -2837,69 +2874,66 @@ static logical c_true = TRUE_;
          the worst case.)
 */
 
-    minwrk = 1;
-    if (*info == 0 && (*lwork >= 1 || lquery)) {
-	maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1, n, &
-		c__0, (ftnlen)6, (ftnlen)1);
-	if (! wantvl && ! wantvr) {
-/* Computing MAX */
-	    i__1 = 1, i__2 = *n * 3;
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "DHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "EN", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
-		    hswork;
-	    maxwrk = max(i__1,i__2);
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
 	} else {
+	    maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1,
+		    n, &c__0, (ftnlen)6, (ftnlen)1);
+	    if (wantvl) {
+		minwrk = *n << 2;
 /* Computing MAX */
-	    i__1 = 1, i__2 = *n << 2;
-	    minwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1,
+			"DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = max(i__1,i__2);
+		dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vl[vl_offset], ldvl, &work[1], &c_n1, info);
+		hswork = (integer) work[1];
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "DOR"
-		    "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "DHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "SV", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n << 2;
+		maxwrk = max(i__1,i__2);
+	    } else if (wantvr) {
+		minwrk = *n << 2;
 /* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1,
+			"DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = max(i__1,i__2);
+		dhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+		hswork = (integer) work[1];
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
-		    hswork;
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n << 2;
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n << 2;
+		maxwrk = max(i__1,i__2);
+	    } else {
+		minwrk = *n * 3;
+		dhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+		hswork = (integer) work[1];
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = max(i__1,i__2);
+	    }
+	    maxwrk = max(maxwrk,minwrk);
 	}
 	work[1] = (doublereal) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -13;
+	}
     }
-    if (*lwork < minwrk && ! lquery) {
-	*info = -13;
-    }
+
     if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGEEV ", &i__1);
@@ -3195,10 +3229,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -3349,7 +3383,7 @@ static logical c_true = TRUE_;
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
-    static integer i__;
+    static integer i__, j;
     static doublereal t[4160]	/* was [65][64] */;
     static integer ib;
     static doublereal ei;
@@ -3358,13 +3392,17 @@ static logical c_true = TRUE_;
 	    integer *, doublereal *, doublereal *, integer *, doublereal *,
 	    integer *, doublereal *, doublereal *, integer *);
     static integer nbmin, iinfo;
-    extern /* Subroutine */ int dgehd2_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), daxpy_(
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *), dgehd2_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *), dlahr2_(
+	    integer *, integer *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *),
 	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
 	    integer *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, integer *), dlahrd_(integer *, integer *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *, integer *,
-	    doublereal *, integer *), xerbla_(char *, integer *);
+	    doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
     static integer ldwork, lwkopt;
@@ -3372,10 +3410,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2.1)                                  --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+    -- April 2009                                                      --
 
 
     Purpose
@@ -3464,6 +3502,10 @@ static logical c_true = TRUE_;
     modified element of the upper Hessenberg matrix H, and vi denotes an
     element of the vector defining H(i).
 
+    This file is a slight modification of LAPACK-3.0's DGEHRD
+    subroutine incorporating improvements proposed by Quintana-Orti and
+    Van de Geijn (2006). (See DLAHR2.)
+
     =====================================================================
 
 
@@ -3527,7 +3569,7 @@ static logical c_true = TRUE_;
     }
 
 /*
-       Determine the block size.
+       Determine the block size
 
    Computing MIN
 */
@@ -3540,7 +3582,7 @@ static logical c_true = TRUE_;
 
 /*
           Determine when to cross over from blocked to unblocked code
-          (last block is always handled by unblocked code).
+          (last block is always handled by unblocked code)
 
    Computing MAX
 */
@@ -3549,7 +3591,7 @@ static logical c_true = TRUE_;
 	nx = max(i__1,i__2);
 	if (nx < nh) {
 
-/*           Determine if workspace is large enough for blocked code. */
+/*           Determine if workspace is large enough for blocked code */
 
 	    iws = *n * nb;
 	    if (*lwork < iws) {
@@ -3557,7 +3599,7 @@ static logical c_true = TRUE_;
 /*
                 Not enough workspace to use optimal NB:  determine the
                 minimum value of NB, and reduce NB or force use of
-                unblocked code.
+                unblocked code
 
    Computing MAX
 */
@@ -3597,13 +3639,13 @@ static logical c_true = TRUE_;
              which performs the reduction, and also the matrix Y = A*V*T
 */
 
-	    dlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+	    dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
 		    c__65, &work[1], &ldwork);
 
 /*
              Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
              right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
-             to 1.
+             to 1
 */
 
 	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
@@ -3614,6 +3656,21 @@ static logical c_true = TRUE_;
 		    c_b15, &a[(i__ + ib) * a_dim1 + 1], lda);
 	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
 
+/*
+             Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
+             right
+*/
+
+	    i__3 = ib - 1;
+	    dtrmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b15,
+		     &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork);
+	    i__3 = ib - 2;
+	    for (j = 0; j <= i__3; ++j) {
+		daxpy_(&i__, &c_b151, &work[ldwork * j + 1], &c__1, &a[(i__ +
+			j + 1) * a_dim1 + 1], &c__1);
+/* L30: */
+	    }
+
 /*
              Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
              left
@@ -3624,7 +3681,7 @@ static logical c_true = TRUE_;
 	    dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
 		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
-/* L30: */
+/* L40: */
 	}
     }
 
@@ -3655,10 +3712,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -3793,10 +3850,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -3829,7 +3886,7 @@ static logical c_true = TRUE_;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -4051,17 +4108,17 @@ static logical c_true = TRUE_;
     extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
 	    integer *, doublereal *, integer *, doublereal *, doublereal *,
 	    integer *, doublereal *, integer *, integer *);
-    static integer minwrk, maxwrk;
+    static integer liwork, minwrk, maxwrk;
     static doublereal smlnum;
     static logical lquery;
     static integer smlsiz;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK driver routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -4140,7 +4197,7 @@ static logical c_true = TRUE_;
             The effective rank of A, i.e., the number of singular values
             which are greater than RCOND*S(1).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -4162,9 +4219,10 @@ static logical c_true = TRUE_;
             this value as the first entry of the WORK array, and no error
             message related to LWORK is issued by XERBLA.
 
-    IWORK   (workspace) INTEGER array, dimension (LIWORK)
-            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+    IWORK   (workspace) INTEGER array, dimension (MAX(1,LIWORK))
+            LIWORK >= max(1, 3 * MINMN * NLVL + 11 * MINMN),
             where MINMN = MIN( M,N ).
+            On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK.
 
     INFO    (output) INTEGER
             = 0:  successful exit
@@ -4230,6 +4288,7 @@ static logical c_true = TRUE_;
 */
 
     minwrk = 1;
+    liwork = 1;
     minmn = max(1,minmn);
 /* Computing MAX */
     i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) /
@@ -4238,6 +4297,7 @@ static logical c_true = TRUE_;
 
     if (*info == 0) {
 	maxwrk = 0;
+	liwork = minmn * 3 * nlvl + minmn * 11;
 	mm = *m;
 	if (*m >= *n && *m >= mnthr) {
 
@@ -4328,6 +4388,16 @@ static logical c_true = TRUE_;
 /* Computing MAX */
 		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + wlalsd;
 		maxwrk = max(i__1,i__2);
+/*
+       XXX: Ensure the Path 2a case below is triggered.  The workspace
+       calculation should use queries for all routines eventually.
+   Computing MAX
+   Computing MAX
+*/
+		i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 =
+			 max(i__3,*nrhs), i__4 = *n - *m * 3;
+		i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4);
+		maxwrk = max(i__1,i__2);
 	    } else {
 
 /*              Path 2 - remaining underdetermined cases. */
@@ -4353,6 +4423,7 @@ static logical c_true = TRUE_;
 	}
 	minwrk = min(minwrk,maxwrk);
 	work[1] = (doublereal) maxwrk;
+	iwork[1] = liwork;
 	if (*lwork < minwrk && ! lquery) {
 	    *info = -12;
 	}
@@ -4520,8 +4591,8 @@ static logical c_true = TRUE_;
     } else /* if(complicated condition) */ {
 /* Computing MAX */
 	i__1 = *m, i__2 = (*m << 1) - 4, i__1 = max(i__1,i__2), i__1 = max(
-		i__1,*nrhs), i__2 = *n - *m * 3;
-	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,i__2)) {
+		i__1,*nrhs), i__2 = *n - *m * 3, i__1 = max(i__1,i__2);
+	if (*n >= mnthr && *lwork >= (*m << 2) + *m * *m + max(i__1,wlalsd)) {
 
 /*
           Path 2a - underdetermined, with many more columns than rows
@@ -4536,7 +4607,8 @@ static logical c_true = TRUE_;
 	    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4), i__3 =
 		    max(i__3,*nrhs), i__4 = *n - *m * 3;
 	    i__1 = (*m << 2) + *m * *lda + max(i__3,i__4), i__2 = *m * *lda +
-		    *m + *m * *nrhs;
+		    *m + *m * *nrhs, i__1 = max(i__1,i__2), i__2 = (*m << 2)
+		    + *m * *lda + wlalsd;
 	    if (*lwork >= max(i__1,i__2)) {
 		ldwork = *lda;
 	    }
@@ -4680,6 +4752,7 @@ static logical c_true = TRUE_;
 
 L10:
     work[1] = (doublereal) maxwrk;
+    iwork[1] = liwork;
     return 0;
 
 /*     End of DGELSD */
@@ -4702,10 +4775,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -4840,10 +4913,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -4877,7 +4950,7 @@ static logical c_true = TRUE_;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -5111,10 +5184,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK driver routine (version 3.2.1)                                  --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       March 2009
 
 
     Purpose
@@ -5160,7 +5233,7 @@ static logical c_true = TRUE_;
                     the array VT;
                     otherwise, all columns of U are returned in the
                     array U and the first M rows of V**T are overwritten
-                    in the array VT;
+                    in the array A;
             = 'N':  no columns of U or rows of V**T are computed.
 
     M       (input) INTEGER
@@ -5211,21 +5284,21 @@ static logical c_true = TRUE_;
             JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
             if JOBZ = 'S', LDVT >= min(M,N).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
 
     LWORK   (input) INTEGER
             The dimension of the array WORK. LWORK >= 1.
             If JOBZ = 'N',
-              LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
+              LWORK >= 3*min(M,N) + max(max(M,N),7*min(M,N)).
             If JOBZ = 'O',
-              LWORK >= 3*min(M,N)*min(M,N) +
+              LWORK >= 3*min(M,N) +
                        max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
             If JOBZ = 'S' or 'A'
-              LWORK >= 3*min(M,N)*min(M,N) +
+              LWORK >= 3*min(M,N) +
                        max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
             For good performance, LWORK should generally be larger.
-            If LWORK < 0 but other input arguments are legal, WORK(1)
+            If LWORK = -1 but other input arguments are legal, WORK(1)
             returns the optimal LWORK.
 
     IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
@@ -5265,14 +5338,11 @@ static logical c_true = TRUE_;
     /* Function Body */
     *info = 0;
     minmn = min(*m,*n);
-    mnthr = (integer) (minmn * 11. / 6.);
     wntqa = lsame_(jobz, "A");
     wntqs = lsame_(jobz, "S");
     wntqas = wntqa || wntqs;
     wntqo = lsame_(jobz, "O");
     wntqn = lsame_(jobz, "N");
-    minwrk = 1;
-    maxwrk = 1;
     lquery = *lwork == -1;
 
     if (! (wntqa || wntqs || wntqo || wntqn)) {
@@ -5300,11 +5370,14 @@ static logical c_true = TRUE_;
          following subroutine, as returned by ILAENV.)
 */
 
-    if (*info == 0 && *m > 0 && *n > 0) {
-	if (*m >= *n) {
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	if (*m >= *n && minmn > 0) {
 
 /*           Compute space needed for DBDSDC */
 
+	    mnthr = (integer) (minmn * 11. / 6.);
 	    if (wntqn) {
 		bdspac = *n * 7;
 	    } else {
@@ -5466,10 +5539,11 @@ static logical c_true = TRUE_;
 		    minwrk = *n * 3 + max(*m,bdspac);
 		}
 	    }
-	} else {
+	} else if (minmn > 0) {
 
 /*           Compute space needed for DBDSDC */
 
+	    mnthr = (integer) (minmn * 11. / 6.);
 	    if (wntqn) {
 		bdspac = *m * 7;
 	    } else {
@@ -5632,12 +5706,14 @@ static logical c_true = TRUE_;
 		}
 	    }
 	}
+	maxwrk = max(maxwrk,minwrk);
 	work[1] = (doublereal) maxwrk;
-    }
 
-    if (*lwork < minwrk && ! lquery) {
-	*info = -12;
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
     }
+
     if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("DGESDD", &i__1);
@@ -5649,9 +5725,6 @@ static logical c_true = TRUE_;
 /*     Quick return if possible */
 
     if (*m == 0 || *n == 0) {
-	if (*lwork >= 1) {
-	    work[1] = 1.;
-	}
 	return 0;
     }
 
@@ -6195,10 +6268,12 @@ static logical c_true = TRUE_;
 
 /*              Set the right corner of U to identity matrix */
 
-		i__1 = *m - *n;
-		i__2 = *m - *n;
-		dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (*n +
-			1) * u_dim1], ldu);
+		if (*m > *n) {
+		    i__1 = *m - *n;
+		    i__2 = *m - *n;
+		    dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (*
+			    n + 1) * u_dim1], ldu);
+		}
 
 /*
                 Overwrite U by left singular vectors of A and VT
@@ -6733,10 +6808,12 @@ static logical c_true = TRUE_;
 
 /*              Set the right corner of VT to identity matrix */
 
-		i__1 = *n - *m;
-		i__2 = *n - *m;
-		dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (*m +
-			1) * vt_dim1], ldvt);
+		if (*n > *m) {
+		    i__1 = *n - *m;
+		    i__2 = *n - *m;
+		    dlaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (*
+			    m + 1) * vt_dim1], ldvt);
+		}
 
 /*
                 Overwrite U by left singular vectors of A and VT
@@ -6793,10 +6870,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6906,21 +6983,24 @@ static logical c_true = TRUE_;
     doublereal d__1;
 
     /* Local variables */
-    static integer j, jp;
+    static integer i__, j, jp;
     extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
 	    doublereal *, integer *, doublereal *, integer *, doublereal *,
 	    integer *), dscal_(integer *, doublereal *, doublereal *, integer
-	    *), dswap_(integer *, doublereal *, integer *, doublereal *,
-	    integer *);
+	    *);
+    static doublereal sfmin;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+
     extern integer idamax_(integer *, doublereal *, integer *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6999,6 +7079,10 @@ static logical c_true = TRUE_;
 	return 0;
     }
 
+/*     Compute machine safe minimum */
+
+    sfmin = SAFEMINIMUM;
+
     i__1 = min(*m,*n);
     for (j = 1; j <= i__1; ++j) {
 
@@ -7018,9 +7102,17 @@ static logical c_true = TRUE_;
 /*           Compute elements J+1:M of J-th column. */
 
 	    if (j < *m) {
-		i__2 = *m - j;
-		d__1 = 1. / a[j + j * a_dim1];
-		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+		if ((d__1 = a[j + j * a_dim1], abs(d__1)) >= sfmin) {
+		    i__2 = *m - j;
+		    d__1 = 1. / a[j + j * a_dim1];
+		    dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+		} else {
+		    i__2 = *m - j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
+/* L20: */
+		    }
+		}
 	    }
 
 	} else if (*info == 0) {
@@ -7070,10 +7162,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7253,10 +7345,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7400,169 +7492,270 @@ static logical c_true = TRUE_;
 {
     /* System generated locals */
     address a__1[2];
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    doublereal d__1, d__2;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3;
+    doublereal d__1;
     char ch__1[2];
 
     /* Builtin functions */
     /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
 
     /* Local variables */
-    static integer i__, j, k, l;
-    static doublereal s[225]	/* was [15][15] */, v[16];
-    static integer i1, i2, ii, nh, nr, ns, nv;
-    static doublereal vv[16];
-    static integer itn;
-    static doublereal tau;
-    static integer its;
-    static doublereal ulp, tst1;
-    static integer maxb;
-    static doublereal absw;
-    static integer ierr;
-    static doublereal unfl, temp, ovfl;
-    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *);
+    static integer i__;
+    static doublereal hl[2401]	/* was [49][49] */;
+    static integer kbot, nmin;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
-    static integer itemp;
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *);
-    static logical initz, wantt, wantz;
-    extern doublereal dlapy2_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
-
-    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
-	     integer *, doublereal *);
-    extern integer idamax_(integer *, doublereal *, integer *);
-    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
-	    doublereal *);
-    extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *,
+    static logical initz;
+    static doublereal workl[49];
+    static logical wantt, wantz;
+    extern /* Subroutine */ int dlaqr0_(logical *, logical *, integer *,
 	    integer *, integer *, doublereal *, integer *, doublereal *,
 	    doublereal *, integer *, integer *, doublereal *, integer *,
-	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
-	    integer *, doublereal *, integer *), dlaset_(char *,
-	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    integer *);
+	    doublereal *, integer *, integer *), dlahqr_(logical *, logical *,
+	     integer *, integer *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *),
+	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *), dlarfx_(
-	    char *, integer *, integer *, doublereal *, doublereal *,
-	    doublereal *, integer *, doublereal *);
-    static doublereal smlnum;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
     static logical lquery;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
-    and, optionally, the matrices T and Z from the Schur decomposition
-    H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
-    form), and Z is the orthogonal matrix of Schur vectors.
-
-    Optionally Z may be postmultiplied into an input orthogonal matrix Q,
-    so that this routine can give the Schur factorization of a matrix A
-    which has been reduced to the Hessenberg form H by the orthogonal
-    matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
-
-    Arguments
-    =========
-
-    JOB     (input) CHARACTER*1
-            = 'E':  compute eigenvalues only;
-            = 'S':  compute eigenvalues and the Schur form T.
-
-    COMPZ   (input) CHARACTER*1
-            = 'N':  no Schur vectors are computed;
-            = 'I':  Z is initialized to the unit matrix and the matrix Z
-                    of Schur vectors of H is returned;
-            = 'V':  Z must contain an orthogonal matrix Q on entry, and
-                    the product Q*Z is returned.
-
-    N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to DGEBAL, and then passed to SGEHRD
-            when the matrix output by DGEBAL is reduced to Hessenberg
-            form. Otherwise ILO and IHI should be set to 1 and N
-            respectively.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if JOB = 'S', H contains the upper quasi-triangular
-            matrix T from the Schur decomposition (the Schur form);
-            2-by-2 diagonal blocks (corresponding to complex conjugate
-            pairs of eigenvalues) are returned in standard form, with
-            H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
-            the contents of H are unspecified on exit.
-
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
-
-    WR      (output) DOUBLE PRECISION array, dimension (N)
-    WI      (output) DOUBLE PRECISION array, dimension (N)
-            The real and imaginary parts, respectively, of the computed
-            eigenvalues. If two eigenvalues are computed as a complex
-            conjugate pair, they are stored in consecutive elements of
-            WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
-            WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
-            same order as on the diagonal of the Schur form returned in
-            H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
-            diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
-            WI(i+1) = -WI(i).
-
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
-            If COMPZ = 'N': Z is not referenced.
-            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
-            contains the orthogonal matrix Z of the Schur vectors of H.
-            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
-            which is assumed to be equal to the unit matrix except for
-            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
-            Normally Q is the orthogonal matrix generated by DORGHR after
-            the call to DGEHRD which formed the Hessenberg matrix H.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.
-            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
-
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+    -- LAPACK computational routine (version 3.2.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       June 2010
+
+       Purpose
+       =======
+
+       DHSEQR computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**T, where T is an upper quasi-triangular matrix (the
+       Schur form), and Z is the orthogonal matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input orthogonal
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+       Arguments
+       =========
+
+       JOB   (input) CHARACTER*1
+             = 'E':  compute eigenvalues only;
+             = 'S':  compute eigenvalues and the Schur form T.
+
+       COMPZ (input) CHARACTER*1
+             = 'N':  no Schur vectors are computed;
+             = 'I':  Z is initialized to the unit matrix and the matrix Z
+                     of Schur vectors of H is returned;
+             = 'V':  Z must contain an orthogonal matrix Q on entry, and
+                     the product Q*Z is returned.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+             set by a previous call to DGEBAL, and then passed to DGEHRD
+             when the matrix output by DGEBAL is reduced to Hessenberg
+             form. Otherwise ILO and IHI should be set to 1 and N
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and JOB = 'S', then H contains the
+             upper quasi-triangular matrix T from the Schur decomposition
+             (the Schur form); 2-by-2 diagonal blocks (corresponding to
+             complex conjugate pairs of eigenvalues) are returned in
+             standard form, with H(i,i) = H(i+1,i+1) and
+             H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the
+             contents of H are unspecified on exit.  (The output value of
+             H when INFO.GT.0 is given under the description of INFO
+             below.)
+
+             Unlike earlier versions of DHSEQR, this subroutine may
+             explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1
+             or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       WR    (output) DOUBLE PRECISION array, dimension (N)
+       WI    (output) DOUBLE PRECISION array, dimension (N)
+             The real and imaginary parts, respectively, of the computed
+             eigenvalues. If two eigenvalues are computed as a complex
+             conjugate pair, they are stored in consecutive elements of
+             WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and
+             WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in
+             the same order as on the diagonal of the Schur form returned
+             in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
+             diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and
+             WI(i+1) = -WI(i).
+
+       Z     (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+             If COMPZ = 'N', Z is not referenced.
+             If COMPZ = 'I', on entry Z need not be set and on exit,
+             if INFO = 0, Z contains the orthogonal matrix Z of the Schur
+             vectors of H.  If COMPZ = 'V', on entry Z must contain an
+             N-by-N matrix Q, which is assumed to be equal to the unit
+             matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit,
+             if INFO = 0, Z contains Q*Z.
+             Normally Q is the orthogonal matrix generated by DORGHR
+             after the call to DGEHRD which formed the Hessenberg matrix
+             H. (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if COMPZ = 'I' or
+             COMPZ = 'V', then LDZ.GE.MAX(1,N).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+             On exit, if INFO = 0, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient and delivers very good and sometimes
+             optimal performance.  However, LWORK as large as 11*N
+             may be required for optimal performance.  A workspace
+             query is recommended to determine the optimal workspace
+             size.
+
+             If LWORK = -1, then DHSEQR does a workspace query.
+             In this case, DHSEQR checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .LT. 0:  if INFO = -i, the i-th argument had an illegal
+                      value
+             .GT. 0:  if INFO = i, DHSEQR failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and JOB = 'E', then on exit, the
+                  remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and JOB   = 'S', then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is an orthogonal matrix.  The final
+                  value of H is upper Hessenberg and quasi-triangular
+                  in rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and COMPZ = 'V', then on exit
+
+                    (final value of Z)  =  (initial value of Z)*U
+
+                  where U is the orthogonal matrix in (*) (regard-
+                  less of the value of JOB.)
+
+                  If INFO .GT. 0 and COMPZ = 'I', then on exit
+                        (final value of Z)  = U
+                  where U is the orthogonal matrix in (*) (regard-
+                  less of the value of JOB.)
+
+                  If INFO .GT. 0 and COMPZ = 'N', then Z is not
+                  accessed.
+
+       ================================================================
+               Default values supplied by
+               ILAENV(ISPEC,'DHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK).
+               It is suggested that these defaults be adjusted in order
+               to attain best performance in each particular
+               computational environment.
+
+              ISPEC=12: The DLAHQR vs DLAQR0 crossover point.
+                        Default: 75. (Must be at least 11.)
+
+              ISPEC=13: Recommended deflation window size.
+                        This depends on ILO, IHI and NS.  NS is the
+                        number of simultaneous shifts returned
+                        by ILAENV(ISPEC=15).  (See ISPEC=15 below.)
+                        The default for (IHI-ILO+1).LE.500 is NS.
+                        The default for (IHI-ILO+1).GT.500 is 3*NS/2.
+
+              ISPEC=14: Nibble crossover point. (See IPARMQ for
+                        details.)  Default: 14% of deflation window
+                        size.
+
+              ISPEC=15: Number of simultaneous shifts in a multishift
+                        QR iteration.
+
+                        If IHI-ILO+1 is ...
+
+                        greater than      ...but less    ... the
+                        or equal to ...      than        default is
+
+                             1               30          NS =   2(+)
+                            30               60          NS =   4(+)
+                            60              150          NS =  10(+)
+                           150              590          NS =  **
+                           590             3000          NS =  64
+                          3000             6000          NS = 128
+                          6000             infinity      NS = 256
+
+                    (+)  By default some or all matrices of this order
+                         are passed to the implicit double shift routine
+                         DLAHQR and this parameter is ignored.  See
+                         ISPEC=12 above and comments in IPARMQ for
+                         details.
+
+                   (**)  The asterisks (**) indicate an ad-hoc
+                         function of N increasing from 10 to 64.
+
+              ISPEC=16: Select structured matrix multiply.
+                        If the number of simultaneous shifts (specified
+                        by ISPEC=15) is less than 14, then the default
+                        for ISPEC=16 is 0.  Otherwise the default for
+                        ISPEC=16 is 2.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
 
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, DHSEQR failed to compute all of the
-                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
-                  elements 1:ilo-1 and i+1:n of WR and WI contain those
-                  eigenvalues which have been successfully computed.
+       ================================================================
 
-    =====================================================================
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    DLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
 
+       ==== NL allocates some local workspace to help small matrices
+       .    through a rare DLAHQR failure.  NL .GT. NTINY = 11 is
+       .    required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom-
+       .    mended.  (The default value of NMIN is 75.)  Using NL = 49
+       .    allows up to six simultaneous shifts and a 16-by-16
+       .    deflation window.  ====
 
-       Decode and test the input parameters
+       ==== Decode and check the input parameters. ====
 */
 
     /* Parameter adjustments */
@@ -7580,10 +7773,10 @@ static logical c_true = TRUE_;
     wantt = lsame_(job, "S");
     initz = lsame_(compz, "I");
     wantz = initz || lsame_(compz, "V");
-
-    *info = 0;
     work[1] = (doublereal) max(1,*n);
     lquery = *lwork == -1;
+
+    *info = 0;
     if (! lsame_(job, "E") && ! wantt) {
 	*info = -1;
     } else if (! lsame_(compz, "N") && ! wantz) {
@@ -7601,401 +7794,195 @@ static logical c_true = TRUE_;
     } else if (*lwork < max(1,*n) && ! lquery) {
 	*info = -13;
     }
+
     if (*info != 0) {
+
+/*        ==== Quick return in case of invalid argument. ==== */
+
 	i__1 = -(*info);
 	xerbla_("DHSEQR", &i__1);
 	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Initialize Z, if necessary */
 
-    if (initz) {
-	dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
-    }
-
-/*     Store the eigenvalues isolated by DGEBAL. */
-
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.;
-/* L10: */
-    }
-    i__1 = *n;
-    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.;
-/* L20: */
-    }
+    } else if (*n == 0) {
 
-/*     Quick return if possible. */
+/*        ==== Quick return in case N = 0; nothing to do. ==== */
 
-    if (*n == 0) {
-	return 0;
-    }
-    if (*ilo == *ihi) {
-	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
-	wi[*ilo] = 0.;
 	return 0;
-    }
 
-/*
-       Set rows and columns ILO to IHI to zero below the first
-       subdiagonal.
-*/
+    } else if (lquery) {
 
-    i__1 = *ihi - 2;
-    for (j = *ilo; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = j + 2; i__ <= i__2; ++i__) {
-	    h__[i__ + j * h_dim1] = 0.;
-/* L30: */
-	}
-/* L40: */
-    }
-    nh = *ihi - *ilo + 1;
+/*        ==== Quick return in case of a workspace query ==== */
 
+	dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
+		1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info);
 /*
-       Determine the order of the multi-shift QR algorithm to be used.
-
-   Writing concatenation
+          ==== Ensure reported workspace size is backward-compatible with
+          .    previous LAPACK versions. ====
+   Computing MAX
 */
-    i__3[0] = 1, a__1[0] = job;
-    i__3[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    ns = ilaenv_(&c__4, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-/* Writing concatenation */
-    i__3[0] = 1, a__1[0] = job;
-    i__3[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    maxb = ilaenv_(&c__8, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if (ns <= 2 || ns > nh || maxb >= nh) {
-
-/*        Use the standard double-shift algorithm */
-
-	dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
-		1], ilo, ihi, &z__[z_offset], ldz, info);
+	d__1 = (doublereal) max(1,*n);
+	work[1] = max(d__1,work[1]);
 	return 0;
-    }
-    maxb = max(3,maxb);
-/* Computing MIN */
-    i__1 = min(ns,maxb);
-    ns = min(i__1,15);
 
-/*
-       Now 2 < NS <= MAXB < NH.
-
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
-
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
-    smlnum = unfl * (nh / ulp);
-
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are set inside the main loop.
-*/
-
-    if (wantt) {
-	i1 = 1;
-	i2 = *n;
-    }
-
-/*     ITN is the total number of multiple-shift QR iterations allowed. */
-
-    itn = nh * 30;
-
-/*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of at most MAXB. Each iteration of the loop
-       works with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
-
-    i__ = *ihi;
-L50:
-    l = *ilo;
-    if (i__ < *ilo) {
-	goto L170;
-    }
-
-/*
-       Perform multiple-shift QR iterations on rows and columns ILO to I
-       until a submatrix of order at most MAXB splits off at the bottom
-       because a subdiagonal element has become negligible.
-*/
-
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
+    } else {
 
-/*        Look for a single small subdiagonal element. */
+/*        ==== copy eigenvalues isolated by DGEBAL ==== */
 
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
-		     h__[k + k * h_dim1], abs(d__2));
-	    if (tst1 == 0.) {
-		i__4 = i__ - l + 1;
-		tst1 = dlanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
-			);
-	    }
-/* Computing MAX */
-	    d__2 = ulp * tst1;
-	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
-		    smlnum)) {
-		goto L70;
-	    }
-/* L60: */
+	i__1 = *ilo - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    wr[i__] = h__[i__ + i__ * h_dim1];
+	    wi[i__] = 0.;
+/* L10: */
+	}
+	i__1 = *n;
+	for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	    wr[i__] = h__[i__ + i__ * h_dim1];
+	    wi[i__] = 0.;
+/* L20: */
 	}
-L70:
-	l = k;
-	if (l > *ilo) {
 
-/*           H(L,L-1) is negligible. */
+/*        ==== Initialize Z, if requested ==== */
 
-	    h__[l + (l - 1) * h_dim1] = 0.;
+	if (initz) {
+	    dlaset_("A", n, n, &c_b29, &c_b15, &z__[z_offset], ldz)
+		    ;
 	}
 
-/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+/*        ==== Quick return if possible ==== */
 
-	if (l >= i__ - maxb + 1) {
-	    goto L160;
+	if (*ilo == *ihi) {
+	    wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	    wi[*ilo] = 0.;
+	    return 0;
 	}
 
 /*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
+          ==== DLAHQR/DLAQR0 crossover point ====
 
-	if (! wantt) {
-	    i1 = l;
-	    i2 = i__;
-	}
-
-	if (its == 20 || its == 30) {
+   Writing concatenation
+*/
+	i__2[0] = 1, a__1[0] = job;
+	i__2[1] = 1, a__1[1] = compz;
+	s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+	nmin = ilaenv_(&c__12, "DHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nmin = max(11,nmin);
 
-/*           Exceptional shifts. */
+/*        ==== DLAQR0 for big matrices; DLAHQR for small ones ==== */
 
-	    i__2 = i__;
-	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
-		wr[ii] = ((d__1 = h__[ii + (ii - 1) * h_dim1], abs(d__1)) + (
-			d__2 = h__[ii + ii * h_dim1], abs(d__2))) * 1.5;
-		wi[ii] = 0.;
-/* L80: */
-	    }
+	if (*n > nmin) {
+	    dlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1],
+		    &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork,
+		    info);
 	} else {
 
-/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
+/*           ==== Small matrix ==== */
 
-	    dlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
-		    h_dim1], ldh, s, &c__15);
-	    dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
-		    ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset],
-		    ldz, &ierr);
-	    if (ierr > 0) {
+	    dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1],
+		    &wi[1], ilo, ihi, &z__[z_offset], ldz, info);
+
+	    if (*info > 0) {
 
 /*
-                If DLAHQR failed to compute all NS eigenvalues, use the
-                unconverged diagonal elements as the remaining shifts.
+                ==== A rare DLAHQR failure!  DLAQR0 sometimes succeeds
+                .    when DLAHQR fails. ====
 */
 
-		i__2 = ierr;
-		for (ii = 1; ii <= i__2; ++ii) {
-		    wr[i__ - ns + ii] = s[ii + ii * 15 - 16];
-		    wi[i__ - ns + ii] = 0.;
-/* L90: */
-		}
-	    }
-	}
+		kbot = *info;
+
+		if (*n >= 49) {
 
 /*
-          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
-          where G is the Hessenberg submatrix H(L:I,L:I) and w is
-          the vector of shifts (stored in WR and WI). The result is
-          stored in the local array V.
+                   ==== Larger matrices have enough subdiagonal scratch
+                   .    space to call DLAQR0 directly. ====
 */
 
-	v[0] = 1.;
-	i__2 = ns + 1;
-	for (ii = 2; ii <= i__2; ++ii) {
-	    v[ii - 1] = 0.;
-/* L100: */
-	}
-	nv = 1;
-	i__2 = i__;
-	for (j = i__ - ns + 1; j <= i__2; ++j) {
-	    if (wi[j] >= 0.) {
-		if (wi[j] == 0.) {
-
-/*                 real shift */
-
-		    i__4 = nv + 1;
-		    dcopy_(&i__4, v, &c__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    d__1 = -wr[j];
-		    dgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l *
-			    h_dim1], ldh, vv, &c__1, &d__1, v, &c__1);
-		    ++nv;
-		} else if (wi[j] > 0.) {
-
-/*                 complex conjugate pair of shifts */
-
-		    i__4 = nv + 1;
-		    dcopy_(&i__4, v, &c__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    d__1 = wr[j] * -2.;
-		    dgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l *
-			    h_dim1], ldh, v, &c__1, &d__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    itemp = idamax_(&i__4, vv, &c__1);
-/* Computing MAX */
-		    d__2 = (d__1 = vv[itemp - 1], abs(d__1));
-		    temp = 1. / max(d__2,smlnum);
-		    i__4 = nv + 1;
-		    dscal_(&i__4, &temp, vv, &c__1);
-		    absw = dlapy2_(&wr[j], &wi[j]);
-		    temp = temp * absw * absw;
-		    i__4 = nv + 2;
-		    i__5 = nv + 1;
-		    dgemv_("No transpose", &i__4, &i__5, &c_b15, &h__[l + l *
-			    h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
-		    nv += 2;
-		}
+		    dlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset],
+			    ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset],
+			    ldz, &work[1], lwork, info);
+
+		} else {
 
 /*
-                Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
-                reset it to the unit vector.
+                   ==== Tiny matrices don't have enough subdiagonal
+                   .    scratch space to benefit from DLAQR0.  Hence,
+                   .    tiny matrices must be copied into a larger
+                   .    array before calling DLAQR0. ====
 */
 
-		itemp = idamax_(&nv, v, &c__1);
-		temp = (d__1 = v[itemp - 1], abs(d__1));
-		if (temp == 0.) {
-		    v[0] = 1.;
-		    i__4 = nv;
-		    for (ii = 2; ii <= i__4; ++ii) {
-			v[ii - 1] = 0.;
-/* L110: */
+		    dlacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49);
+		    hl[*n + 1 + *n * 49 - 50] = 0.;
+		    i__1 = 49 - *n;
+		    dlaset_("A", &c__49, &i__1, &c_b29, &c_b29, &hl[(*n + 1) *
+			     49 - 49], &c__49);
+		    dlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &
+			    wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz,
+			    workl, &c__49, info);
+		    if (wantt || *info != 0) {
+			dlacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh);
 		    }
-		} else {
-		    temp = max(temp,smlnum);
-		    d__1 = 1. / temp;
-		    dscal_(&nv, &d__1, v, &c__1);
 		}
 	    }
-/* L120: */
 	}
 
-/*        Multiple-shift QR step */
+/*        ==== Clear out the trash, if necessary. ==== */
 
-	i__2 = i__ - 1;
-	for (k = l; k <= i__2; ++k) {
+	if ((wantt || *info != 0) && *n > 2) {
+	    i__1 = *n - 2;
+	    i__3 = *n - 2;
+	    dlaset_("L", &i__1, &i__3, &c_b29, &c_b29, &h__[h_dim1 + 3], ldh);
+	}
 
 /*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
-
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
-
-   Computing MIN
-*/
-	    i__4 = ns + 1, i__5 = i__ - k + 1;
-	    nr = min(i__4,i__5);
-	    if (k > l) {
-		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    dlarfg_(&nr, v, &v[1], &c__1, &tau);
-	    if (k > l) {
-		h__[k + (k - 1) * h_dim1] = v[0];
-		i__4 = i__;
-		for (ii = k + 1; ii <= i__4; ++ii) {
-		    h__[ii + (k - 1) * h_dim1] = 0.;
-/* L130: */
-		}
-	    }
-	    v[0] = 1.;
+          ==== Ensure reported workspace size is backward-compatible with
+          .    previous LAPACK versions. ====
 
-/*
-             Apply G from the left to transform the rows of the matrix in
-             columns K to I2.
+   Computing MAX
 */
+	d__1 = (doublereal) max(1,*n);
+	work[1] = max(d__1,work[1]);
+    }
 
-	    i__4 = i2 - k + 1;
-	    dlarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
-		    work[1]);
-
-/*
-             Apply G from the right to transform the columns of the
-             matrix in rows I1 to min(K+NR,I).
+/*     ==== End of DHSEQR ==== */
 
-   Computing MIN
-*/
-	    i__5 = k + nr;
-	    i__4 = min(i__5,i__) - i1 + 1;
-	    dlarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
-		    &work[1]);
+    return 0;
+} /* dhseqr_ */
 
-	    if (wantz) {
+logical disnan_(doublereal *din)
+{
+    /* System generated locals */
+    logical ret_val;
 
-/*              Accumulate transformations in the matrix Z */
+    /* Local variables */
+    extern logical dlaisnan_(doublereal *, doublereal *);
 
-		dlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
-			ldz, &work[1]);
-	    }
-/* L140: */
-	}
 
-/* L150: */
-    }
+/*
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
-/*     Failure to converge in remaining number of iterations */
 
-    *info = i__;
-    return 0;
+    Purpose
+    =======
 
-L160:
+    DISNAN returns .TRUE. if its argument is NaN, and .FALSE.
+    otherwise.  To be replaced by the Fortran 2003 intrinsic in the
+    future.
 
-/*
-       A submatrix of order <= MAXB in rows and columns L to I has split
-       off. Use the double-shift QR algorithm to handle it.
-*/
+    Arguments
+    =========
 
-    dlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
-	    ilo, ihi, &z__[z_offset], ldz, info);
-    if (*info > 0) {
-	return 0;
-    }
+    DIN     (input) DOUBLE PRECISION
+            Input to test for NaN.
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with a new value of I.
+    =====================================================================
 */
 
-    itn -= its;
-    i__ = l - 1;
-    goto L50;
-
-L170:
-    work[1] = (doublereal) max(1,*n);
-    return 0;
-
-/*     End of DHSEQR */
-
-} /* dhseqr_ */
+    ret_val = dlaisnan_(din, din);
+    return ret_val;
+} /* disnan_ */
 
 /* Subroutine */ int dlabad_(doublereal *small, doublereal *large)
 {
@@ -8004,10 +7991,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8072,10 +8059,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8151,7 +8138,7 @@ static logical c_true = TRUE_;
             The n-by-nb matrix Y required to update the unreduced part
             of A.
 
-    LDY     (output) INTEGER
+    LDY     (input) INTEGER
             The leading dimension of the array Y. LDY >= N.
 
     Further Details
@@ -8468,10 +8455,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8566,10 +8553,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8633,10 +8620,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8792,10 +8779,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9191,10 +9178,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9425,10 +9412,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9936,10 +9923,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -10250,10 +10237,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       December 23, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -10292,10 +10279,10 @@ static logical c_true = TRUE_;
            The components of the updating vector.
 
     DELTA  (output) DOUBLE PRECISION array, dimension (N)
-           If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
-           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
-           contains the information necessary to construct the
-           eigenvectors.
+           If N .GT. 2, DELTA contains (D(j) - lambda_I) in its  j-th
+           component.  If N = 1, then DELTA(1) = 1. If N = 2, see DLAED5
+           for detail. The vector DELTA contains the information necessary
+           to construct the eigenvectors by DLAED3 and DLAED9.
 
     RHO    (input) DOUBLE PRECISION
            The scalar in the symmetric updating formula.
@@ -10911,7 +10898,6 @@ static logical c_true = TRUE_;
 
 	prew = w;
 
-/* L170: */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    delta[j] -= eta;
@@ -11177,10 +11163,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -11293,10 +11279,6 @@ static logical c_true = TRUE_;
 	rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal *
 	tau, integer *info)
 {
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
     /* System generated locals */
     integer i__1;
     doublereal d__1, d__2, d__3, d__4;
@@ -11307,7 +11289,7 @@ static logical c_true = TRUE_;
     /* Local variables */
     static doublereal a, b, c__, f;
     static integer i__;
-    static doublereal fc, df, ddf, eta, eps, base;
+    static doublereal fc, df, ddf, lbd, eta, ubd, eps, base;
     static integer iter;
     static doublereal temp, temp1, temp2, temp3, temp4;
     static logical scale;
@@ -11318,10 +11300,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       February 2007
 
 
     Purpose
@@ -11377,21 +11359,40 @@ static logical c_true = TRUE_;
     Further Details
     ===============
 
-    Based on contributions by
+    30/06/99: Based on contributions by
        Ren-Cang Li, Computer Science Division, University of California
        at Berkeley, USA
 
+    10/02/03: This version has a few statements commented out for thread
+    safety (machine parameters are computed on each entry). SJH.
+
+    05/10/06: Modified from a new version of Ren-Cang Li, use
+       Gragg-Thornton-Warner cubic convergent scheme for better stability.
+
     =====================================================================
 */
 
+
     /* Parameter adjustments */
     --z__;
     --d__;
 
     /* Function Body */
-
     *info = 0;
 
+    if (*orgati) {
+	lbd = d__[2];
+	ubd = d__[3];
+    } else {
+	lbd = d__[1];
+	ubd = d__[2];
+    }
+    if (*finit < 0.) {
+	lbd = 0.;
+    } else {
+	ubd = 0.;
+    }
+
     niter = 1;
     *tau = 0.;
     if (*kniter == 2) {
@@ -11421,28 +11422,41 @@ static logical c_true = TRUE_;
 	    *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))
 		    ));
 	}
-	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
-		z__[3] / (d__[3] - *tau);
-	if (abs(*finit) <= abs(temp)) {
+	if (*tau < lbd || *tau > ubd) {
+	    *tau = (lbd + ubd) / 2.;
+	}
+	if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
 	    *tau = 0.;
+	} else {
+	    temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau
+		    * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / (
+		    d__[3] * (d__[3] - *tau));
+	    if (temp <= 0.) {
+		lbd = *tau;
+	    } else {
+		ubd = *tau;
+	    }
+	    if (abs(*finit) <= abs(temp)) {
+		*tau = 0.;
+	    }
 	}
     }
 
 /*
-       On first call to routine, get machine parameters for
-       possible scaling to avoid overflow
+       get machine parameters for possible scaling to avoid overflow
+
+       modified by Sven: parameters SMALL1, SMINV1, SMALL2,
+       SMINV2, EPS are not SAVEd anymore between one call to the
+       others but recomputed at each call
 */
 
-    if (first) {
-	eps = EPSILON;
-	base = BASE;
-	i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.);
-	small1 = pow_di(&base, &i__1);
-	sminv1 = 1. / small1;
-	small2 = small1 * small1;
-	sminv2 = sminv1 * sminv1;
-	first = FALSE_;
-    }
+    eps = EPSILON;
+    base = BASE;
+    i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.);
+    small1 = pow_di(&base, &i__1);
+    sminv1 = 1. / small1;
+    small2 = small1 * small1;
+    sminv2 = sminv1 * sminv1;
 
 /*
        Determine if scaling of inputs necessary to avoid overflow
@@ -11485,6 +11499,8 @@ static logical c_true = TRUE_;
 /* L10: */
 	}
 	*tau *= sclfac;
+	lbd *= sclfac;
+	ubd *= sclfac;
     } else {
 
 /*        Copy D and Z to DSCALE and ZSCALE */
@@ -11514,9 +11530,15 @@ static logical c_true = TRUE_;
     if (abs(f) <= 0.) {
 	goto L60;
     }
+    if (f <= 0.) {
+	lbd = *tau;
+    } else {
+	ubd = *tau;
+    }
 
 /*
-          Iteration begins
+          Iteration begins -- Use Gragg-Thornton-Warner cubic convergent
+                              scheme
 
        It is not hard to see that
 
@@ -11529,7 +11551,7 @@ static logical c_true = TRUE_;
 
     iter = niter + 1;
 
-    for (niter = iter; niter <= 20; ++niter) {
+    for (niter = iter; niter <= 40; ++niter) {
 
 	if (*orgati) {
 	    temp1 = dscale[1] - *tau;
@@ -11560,23 +11582,10 @@ static logical c_true = TRUE_;
 	    eta = -f / df;
 	}
 
-	temp = eta + *tau;
-	if (*orgati) {
-	    if (eta > 0. && temp >= dscale[2]) {
-		eta = (dscale[2] - *tau) / 2.;
-	    }
-	    if (eta < 0. && temp <= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.;
-	    }
-	} else {
-	    if (eta > 0. && temp >= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.;
-	    }
-	    if (eta < 0. && temp <= dscale[0]) {
-		eta = (dscale[0] - *tau) / 2.;
-	    }
-	}
 	*tau += eta;
+	if (*tau < lbd || *tau > ubd) {
+	    *tau = (lbd + ubd) / 2.;
+	}
 
 	fc = 0.;
 	erretm = 0.;
@@ -11599,6 +11608,11 @@ static logical c_true = TRUE_;
 	if (abs(f) <= eps * erretm) {
 	    goto L60;
 	}
+	if (f <= 0.) {
+	    lbd = *tau;
+	} else {
+	    ubd = *tau;
+	}
 /* L50: */
     }
     *info = 1;
@@ -11652,10 +11666,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -11976,10 +11990,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -12151,6 +12165,15 @@ static logical c_true = TRUE_;
 	return 0;
     }
 
+/*
+       Need to initialize GIVPTR to O here in case of quick exit
+       to prevent an unspecified code behavior (usually sigfault)
+       when IWORK array on entry to *stedc is not zeroed
+       (or at least some IWORK entries which used in *laed7 for GIVPTR).
+*/
+
+    *givptr = 0;
+
 /*     Quick return if possible */
 
     if (*n == 0) {
@@ -12242,7 +12265,6 @@ static logical c_true = TRUE_;
 */
 
     *k = 0;
-    *givptr = 0;
     k2 = *n + 1;
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
@@ -12421,10 +12443,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -12669,10 +12691,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -12825,7 +12847,7 @@ static logical c_true = TRUE_;
     }
 
 /*
-       Loop thru remaining levels 1 -> CURLVL applying the Givens
+       Loop through remaining levels 1 -> CURLVL applying the Givens
        rotations and permutation and then multiplying the center matrices
        against the current Z.
 */
@@ -12924,10 +12946,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -13080,78 +13102,512 @@ static logical c_true = TRUE_;
 
 } /* dlaev2_ */
 
+/* Subroutine */ int dlaexc_(logical *wantq, integer *n, doublereal *t,
+	integer *ldt, doublereal *q, integer *ldq, integer *j1, integer *n1,
+	integer *n2, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, t_dim1, t_offset, i__1;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    static doublereal d__[16]	/* was [4][4] */;
+    static integer k;
+    static doublereal u[3], x[4]	/* was [2][2] */;
+    static integer j2, j3, j4;
+    static doublereal u1[3], u2[3];
+    static integer nd;
+    static doublereal cs, t11, t22, t33, sn, wi1, wi2, wr1, wr2, eps, tau,
+	    tau1, tau2;
+    static integer ierr;
+    static doublereal temp;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static doublereal scale, dnorm, xnorm;
+    extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *), dlasy2_(
+	    logical *, logical *, integer *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *), dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *),
+	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *), dlarfx_(char *, integer *, integer *, doublereal *,
+	     doublereal *, doublereal *, integer *, doublereal *);
+    static doublereal thresh, smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
+
+
+    Purpose
+    =======
+
+    DLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in
+    an upper quasi-triangular matrix T by an orthogonal similarity
+    transformation.
+
+    T must be in Schur canonical form, that is, block upper triangular
+    with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block
+    has its diagonal elemnts equal and its off-diagonal elements of
+    opposite sign.
+
+    Arguments
+    =========
+
+    WANTQ   (input) LOGICAL
+            = .TRUE. : accumulate the transformation in the matrix Q;
+            = .FALSE.: do not accumulate the transformation.
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input/output) DOUBLE PRECISION array, dimension (LDT,N)
+            On entry, the upper quasi-triangular matrix T, in Schur
+            canonical form.
+            On exit, the updated matrix T, again in Schur canonical form.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+            On entry, if WANTQ is .TRUE., the orthogonal matrix Q.
+            On exit, if WANTQ is .TRUE., the updated matrix Q.
+            If WANTQ is .FALSE., Q is not referenced.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.
+            LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N.
+
+    J1      (input) INTEGER
+            The index of the first row of the first block T11.
+
+    N1      (input) INTEGER
+            The order of the first block T11. N1 = 0, 1 or 2.
+
+    N2      (input) INTEGER
+            The order of the second block T22. N2 = 0, 1 or 2.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            = 1: the transformed matrix T would be too far from Schur
+                 form; the blocks are not swapped and T and Q are
+                 unchanged.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *n1 == 0 || *n2 == 0) {
+	return 0;
+    }
+    if (*j1 + *n1 > *n) {
+	return 0;
+    }
+
+    j2 = *j1 + 1;
+    j3 = *j1 + 2;
+    j4 = *j1 + 3;
+
+    if (*n1 == 1 && *n2 == 1) {
+
+/*        Swap two 1-by-1 blocks. */
+
+	t11 = t[*j1 + *j1 * t_dim1];
+	t22 = t[j2 + j2 * t_dim1];
+
+/*        Determine the transformation to perform the interchange. */
+
+	d__1 = t22 - t11;
+	dlartg_(&t[*j1 + j2 * t_dim1], &d__1, &cs, &sn, &temp);
+
+/*        Apply transformation to the matrix T. */
+
+	if (j3 <= *n) {
+	    i__1 = *n - *j1 - 1;
+	    drot_(&i__1, &t[*j1 + j3 * t_dim1], ldt, &t[j2 + j3 * t_dim1],
+		    ldt, &cs, &sn);
+	}
+	i__1 = *j1 - 1;
+	drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1,
+		&cs, &sn);
+
+	t[*j1 + *j1 * t_dim1] = t22;
+	t[j2 + j2 * t_dim1] = t11;
+
+	if (*wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1,
+		    &cs, &sn);
+	}
+
+    } else {
+
+/*
+          Swapping involves at least one 2-by-2 block.
+
+          Copy the diagonal block of order N1+N2 to the local array D
+          and compute its norm.
+*/
+
+	nd = *n1 + *n2;
+	dlacpy_("Full", &nd, &nd, &t[*j1 + *j1 * t_dim1], ldt, d__, &c__4);
+	dnorm = dlange_("Max", &nd, &nd, d__, &c__4, &work[1]);
+
+/*
+          Compute machine-dependent threshold for test for accepting
+          swap.
+*/
+
+	eps = PRECISION;
+	smlnum = SAFEMINIMUM / eps;
+/* Computing MAX */
+	d__1 = eps * 10. * dnorm;
+	thresh = max(d__1,smlnum);
+
+/*        Solve T11*X - X*T22 = scale*T12 for X. */
+
+	dlasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d__[*n1 + 1 +
+		(*n1 + 1 << 2) - 5], &c__4, &d__[(*n1 + 1 << 2) - 4], &c__4, &
+		scale, x, &c__2, &xnorm, &ierr);
+
+/*        Swap the adjacent diagonal blocks. */
+
+	k = *n1 + *n1 + *n2 - 3;
+	switch (k) {
+	    case 1:  goto L10;
+	    case 2:  goto L20;
+	    case 3:  goto L30;
+	}
+
+L10:
+
+/*
+          N1 = 1, N2 = 2: generate elementary reflector H so that:
+
+          ( scale, X11, X12 ) H = ( 0, 0, * )
+*/
+
+	u[0] = scale;
+	u[1] = x[0];
+	u[2] = x[2];
+	dlarfg_(&c__3, &u[2], u, &c__1, &tau);
+	u[2] = 1.;
+	t11 = t[*j1 + *j1 * t_dim1];
+
+/*        Perform swap provisionally on diagonal block in D. */
+
+	dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
+	dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
+
+/*
+          Test whether to reject swap.
+
+   Computing MAX
+*/
+	d__2 = abs(d__[2]), d__3 = abs(d__[6]), d__2 = max(d__2,d__3), d__3 =
+		(d__1 = d__[10] - t11, abs(d__1));
+	if (max(d__2,d__3) > thresh) {
+	    goto L50;
+	}
+
+/*        Accept swap: apply transformation to the entire matrix T. */
+
+	i__1 = *n - *j1 + 1;
+	dlarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + *j1 * t_dim1], ldt, &
+		work[1]);
+	dlarfx_("R", &j2, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]);
+
+	t[j3 + *j1 * t_dim1] = 0.;
+	t[j3 + j2 * t_dim1] = 0.;
+	t[j3 + j3 * t_dim1] = t11;
+
+	if (*wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    dlarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[
+		    1]);
+	}
+	goto L40;
+
+L20:
+
+/*
+          N1 = 2, N2 = 1: generate elementary reflector H so that:
+
+          H (  -X11 ) = ( * )
+            (  -X21 ) = ( 0 )
+            ( scale ) = ( 0 )
+*/
+
+	u[0] = -x[0];
+	u[1] = -x[1];
+	u[2] = scale;
+	dlarfg_(&c__3, u, &u[1], &c__1, &tau);
+	u[0] = 1.;
+	t33 = t[j3 + j3 * t_dim1];
+
+/*        Perform swap provisionally on diagonal block in D. */
+
+	dlarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
+	dlarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
+
+/*
+          Test whether to reject swap.
+
+   Computing MAX
+*/
+	d__2 = abs(d__[1]), d__3 = abs(d__[2]), d__2 = max(d__2,d__3), d__3 =
+		(d__1 = d__[0] - t33, abs(d__1));
+	if (max(d__2,d__3) > thresh) {
+	    goto L50;
+	}
+
+/*        Accept swap: apply transformation to the entire matrix T. */
+
+	dlarfx_("R", &j3, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]);
+	i__1 = *n - *j1;
+	dlarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + j2 * t_dim1], ldt, &work[
+		1]);
+
+	t[*j1 + *j1 * t_dim1] = t33;
+	t[j2 + *j1 * t_dim1] = 0.;
+	t[j3 + *j1 * t_dim1] = 0.;
+
+	if (*wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    dlarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[
+		    1]);
+	}
+	goto L40;
+
+L30:
+
+/*
+          N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so
+          that:
+
+          H(2) H(1) (  -X11  -X12 ) = (  *  * )
+                    (  -X21  -X22 )   (  0  * )
+                    ( scale    0  )   (  0  0 )
+                    (    0  scale )   (  0  0 )
+*/
+
+	u1[0] = -x[0];
+	u1[1] = -x[1];
+	u1[2] = scale;
+	dlarfg_(&c__3, u1, &u1[1], &c__1, &tau1);
+	u1[0] = 1.;
+
+	temp = -tau1 * (x[2] + u1[1] * x[3]);
+	u2[0] = -temp * u1[1] - x[3];
+	u2[1] = -temp * u1[2];
+	u2[2] = scale;
+	dlarfg_(&c__3, u2, &u2[1], &c__1, &tau2);
+	u2[0] = 1.;
+
+/*        Perform swap provisionally on diagonal block in D. */
+
+	dlarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1])
+		;
+	dlarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1])
+		;
+	dlarfx_("L", &c__3, &c__4, u2, &tau2, &d__[1], &c__4, &work[1]);
+	dlarfx_("R", &c__4, &c__3, u2, &tau2, &d__[4], &c__4, &work[1]);
+
+/*
+          Test whether to reject swap.
+
+   Computing MAX
+*/
+	d__1 = abs(d__[2]), d__2 = abs(d__[6]), d__1 = max(d__1,d__2), d__2 =
+		abs(d__[3]), d__1 = max(d__1,d__2), d__2 = abs(d__[7]);
+	if (max(d__1,d__2) > thresh) {
+	    goto L50;
+	}
+
+/*        Accept swap: apply transformation to the entire matrix T. */
+
+	i__1 = *n - *j1 + 1;
+	dlarfx_("L", &c__3, &i__1, u1, &tau1, &t[*j1 + *j1 * t_dim1], ldt, &
+		work[1]);
+	dlarfx_("R", &j4, &c__3, u1, &tau1, &t[*j1 * t_dim1 + 1], ldt, &work[
+		1]);
+	i__1 = *n - *j1 + 1;
+	dlarfx_("L", &c__3, &i__1, u2, &tau2, &t[j2 + *j1 * t_dim1], ldt, &
+		work[1]);
+	dlarfx_("R", &j4, &c__3, u2, &tau2, &t[j2 * t_dim1 + 1], ldt, &work[1]
+		);
+
+	t[j3 + *j1 * t_dim1] = 0.;
+	t[j3 + j2 * t_dim1] = 0.;
+	t[j4 + *j1 * t_dim1] = 0.;
+	t[j4 + j2 * t_dim1] = 0.;
+
+	if (*wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    dlarfx_("R", n, &c__3, u1, &tau1, &q[*j1 * q_dim1 + 1], ldq, &
+		    work[1]);
+	    dlarfx_("R", n, &c__3, u2, &tau2, &q[j2 * q_dim1 + 1], ldq, &work[
+		    1]);
+	}
+
+L40:
+
+	if (*n2 == 2) {
+
+/*           Standardize new 2-by-2 block T11 */
+
+	    dlanv2_(&t[*j1 + *j1 * t_dim1], &t[*j1 + j2 * t_dim1], &t[j2 + *
+		    j1 * t_dim1], &t[j2 + j2 * t_dim1], &wr1, &wi1, &wr2, &
+		    wi2, &cs, &sn);
+	    i__1 = *n - *j1 - 1;
+	    drot_(&i__1, &t[*j1 + (*j1 + 2) * t_dim1], ldt, &t[j2 + (*j1 + 2)
+		    * t_dim1], ldt, &cs, &sn);
+	    i__1 = *j1 - 1;
+	    drot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &
+		    c__1, &cs, &sn);
+	    if (*wantq) {
+		drot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &
+			c__1, &cs, &sn);
+	    }
+	}
+
+	if (*n1 == 2) {
+
+/*           Standardize new 2-by-2 block T22 */
+
+	    j3 = *j1 + *n2;
+	    j4 = j3 + 1;
+	    dlanv2_(&t[j3 + j3 * t_dim1], &t[j3 + j4 * t_dim1], &t[j4 + j3 *
+		    t_dim1], &t[j4 + j4 * t_dim1], &wr1, &wi1, &wr2, &wi2, &
+		    cs, &sn);
+	    if (j3 + 2 <= *n) {
+		i__1 = *n - j3 - 1;
+		drot_(&i__1, &t[j3 + (j3 + 2) * t_dim1], ldt, &t[j4 + (j3 + 2)
+			 * t_dim1], ldt, &cs, &sn);
+	    }
+	    i__1 = j3 - 1;
+	    drot_(&i__1, &t[j3 * t_dim1 + 1], &c__1, &t[j4 * t_dim1 + 1], &
+		    c__1, &cs, &sn);
+	    if (*wantq) {
+		drot_(n, &q[j3 * q_dim1 + 1], &c__1, &q[j4 * q_dim1 + 1], &
+			c__1, &cs, &sn);
+	    }
+	}
+
+    }
+    return 0;
+
+/*     Exit with INFO = 1 if swap was rejected. */
+
+L50:
+    *info = 1;
+    return 0;
+
+/*     End of DLAEXC */
+
+} /* dlaexc_ */
+
 /* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n,
 	integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal
 	*wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__,
 	integer *ldz, integer *info)
 {
     /* System generated locals */
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
-    doublereal d__1, d__2;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3;
+    doublereal d__1, d__2, d__3, d__4;
 
     /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+    double sqrt(doublereal);
 
     /* Local variables */
     static integer i__, j, k, l, m;
     static doublereal s, v[3];
     static integer i1, i2;
-    static doublereal t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22,
-	    h33, h44;
+    static doublereal t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22,
+	    cs;
     static integer nh;
-    static doublereal cs;
-    static integer nr;
     static doublereal sn;
+    static integer nr;
+    static doublereal tr;
     static integer nz;
-    static doublereal ave, h33s, h44s;
-    static integer itn, its;
-    static doublereal ulp, sum, tst1, h43h34, disc, unfl, ovfl;
+    static doublereal det, h21s;
+    static integer its;
+    static doublereal ulp, sum, tst, rt1i, rt2i, rt1r, rt2r;
     extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
-	    doublereal *, integer *, doublereal *, doublereal *);
-    static doublereal work[1];
-    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
-	    doublereal *, integer *), dlanv2_(doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *, doublereal *), dcopy_(
+	    integer *, doublereal *, integer *, doublereal *, integer *),
+	    dlanv2_(doublereal *, doublereal *, doublereal *, doublereal *,
 	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *), dlabad_(
-	    doublereal *, doublereal *);
+	    doublereal *, doublereal *), dlabad_(doublereal *, doublereal *);
 
     extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
 	     integer *, doublereal *);
-    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
-	    doublereal *);
-    static doublereal smlnum;
+    static doublereal safmin, safmax, rtdisc, smlnum;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
 
 
-    Purpose
-    =======
+       Purpose
+       =======
 
-    DLAHQR is an auxiliary routine called by DHSEQR to update the
-    eigenvalues and Schur decomposition already computed by DHSEQR, by
-    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+       DLAHQR is an auxiliary routine called by DHSEQR to update the
+       eigenvalues and Schur decomposition already computed by DHSEQR, by
+       dealing with the Hessenberg submatrix in rows and columns ILO to
+       IHI.
 
-    Arguments
-    =========
+       Arguments
+       =========
 
-    WANTT   (input) LOGICAL
+       WANTT   (input) LOGICAL
             = .TRUE. : the full Schur form T is required;
             = .FALSE.: only eigenvalues are required.
 
-    WANTZ   (input) LOGICAL
+       WANTZ   (input) LOGICAL
             = .TRUE. : the matrix of Schur vectors Z is required;
             = .FALSE.: Schur vectors are not required.
 
-    N       (input) INTEGER
+       N       (input) INTEGER
             The order of the matrix H.  N >= 0.
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
+       ILO     (input) INTEGER
+       IHI     (input) INTEGER
             It is assumed that H is already upper quasi-triangular in
             rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
             ILO = 1). DLAHQR works primarily with the Hessenberg
@@ -13159,18 +13615,20 @@ static logical c_true = TRUE_;
             transformations to all of H if WANTT is .TRUE..
             1 <= ILO <= max(1,IHI); IHI <= N.
 
-    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+       H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
             On entry, the upper Hessenberg matrix H.
-            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
-            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
-            standard form. If WANTT is .FALSE., the contents of H are
-            unspecified on exit.
-
-    LDH     (input) INTEGER
+            On exit, if INFO is zero and if WANTT is .TRUE., H is upper
+            quasi-triangular in rows and columns ILO:IHI, with any
+            2-by-2 diagonal blocks in standard form. If INFO is zero
+            and WANTT is .FALSE., the contents of H are unspecified on
+            exit.  The output state of H if INFO is nonzero is given
+            below under the description of INFO.
+
+       LDH     (input) INTEGER
             The leading dimension of the array H. LDH >= max(1,N).
 
-    WR      (output) DOUBLE PRECISION array, dimension (N)
-    WI      (output) DOUBLE PRECISION array, dimension (N)
+       WR      (output) DOUBLE PRECISION array, dimension (N)
+       WI      (output) DOUBLE PRECISION array, dimension (N)
             The real and imaginary parts, respectively, of the computed
             eigenvalues ILO to IHI are stored in the corresponding
             elements of WR and WI. If two eigenvalues are computed as a
@@ -13182,36 +13640,61 @@ static logical c_true = TRUE_;
             H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
             WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
 
-    ILOZ    (input) INTEGER
-    IHIZ    (input) INTEGER
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
             Specify the rows of Z to which transformations must be
             applied if WANTZ is .TRUE..
             1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 
-    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+       Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
             If WANTZ is .TRUE., on entry Z must contain the current
             matrix Z of transformations accumulated by DHSEQR, and on
             exit Z has been updated; transformations are applied only to
             the submatrix Z(ILOZ:IHIZ,ILO:IHI).
             If WANTZ is .FALSE., Z is not referenced.
 
-    LDZ     (input) INTEGER
+       LDZ     (input) INTEGER
             The leading dimension of the array Z. LDZ >= max(1,N).
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI
-                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
-                 elements i+1:ihi of WR and WI contain those eigenvalues
-                 which have been successfully computed.
-
-    Further Details
-    ===============
-
-    2-96 Based on modifications by
+       INFO    (output) INTEGER
+             =   0: successful exit
+            .GT. 0: If INFO = i, DLAHQR failed to compute all the
+                    eigenvalues ILO to IHI in a total of 30 iterations
+                    per eigenvalue; elements i+1:ihi of WR and WI
+                    contain those eigenvalues which have been
+                    successfully computed.
+
+                    If INFO .GT. 0 and WANTT is .FALSE., then on exit,
+                    the remaining unconverged eigenvalues are the
+                    eigenvalues of the upper Hessenberg matrix rows
+                    and columns ILO thorugh INFO of the final, output
+                    value of H.
+
+                    If INFO .GT. 0 and WANTT is .TRUE., then on exit
+            (*)       (initial value of H)*U  = U*(final value of H)
+                    where U is an orthognal matrix.    The final
+                    value of H is upper Hessenberg and triangular in
+                    rows and columns INFO+1 through IHI.
+
+                    If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+                        (final value of Z)  = (initial value of Z)*U
+                    where U is the orthogonal matrix in (*)
+                    (regardless of the value of WANTT.)
+
+       Further Details
+       ===============
+
+       02-96 Based on modifications by
        David Day, Sandia National Laboratory, USA
 
-    =====================================================================
+       12-04 Further modifications by
+       Ralph Byers, University of Kansas, USA
+       This is a modified version of DLAHQR from LAPACK version 3.0.
+       It is (1) more robust against overflow and underflow and
+       (2) adopts the more conservative Ahues & Tisseur stopping
+       criterion (LAWN 122, 1997).
+
+       =========================================================
 */
 
 
@@ -13239,19 +13722,27 @@ static logical c_true = TRUE_;
 	return 0;
     }
 
+/*     ==== clear out the trash ==== */
+    i__1 = *ihi - 3;
+    for (j = *ilo; j <= i__1; ++j) {
+	h__[j + 2 + j * h_dim1] = 0.;
+	h__[j + 3 + j * h_dim1] = 0.;
+/* L10: */
+    }
+    if (*ilo <= *ihi - 2) {
+	h__[*ihi + (*ihi - 2) * h_dim1] = 0.;
+    }
+
     nh = *ihi - *ilo + 1;
     nz = *ihiz - *iloz + 1;
 
-/*
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
+/*     Set machine-dependent constants for the stopping criterion. */
 
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    dlabad_(&safmin, &safmax);
     ulp = PRECISION;
-    smlnum = unfl * (nh / ulp);
+    smlnum = safmin * ((doublereal) nh / ulp);
 
 /*
        I1 and I2 are the indices of the first row and last column of H
@@ -13264,10 +13755,6 @@ static logical c_true = TRUE_;
 	i2 = *n;
     }
 
-/*     ITN is the total number of QR iterations allowed. */
-
-    itn = nh * 30;
-
 /*
        The main loop begins here. I is the loop index and decreases from
        IHI to ILO in steps of 1 or 2. Each iteration of the loop works
@@ -13277,10 +13764,10 @@ static logical c_true = TRUE_;
 */
 
     i__ = *ihi;
-L10:
+L20:
     l = *ilo;
     if (i__ < *ilo) {
-	goto L150;
+	goto L160;
     }
 
 /*
@@ -13289,28 +13776,60 @@ static logical c_true = TRUE_;
        subdiagonal element has become negligible.
 */
 
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
+    for (its = 0; its <= 30; ++its) {
 
 /*        Look for a single small subdiagonal element. */
 
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
-		     h__[k + k * h_dim1], abs(d__2));
-	    if (tst1 == 0.) {
-		i__3 = i__ - l + 1;
-		tst1 = dlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
+	i__1 = l + 1;
+	for (k = i__; k >= i__1; --k) {
+	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= smlnum) {
+		goto L40;
+	    }
+	    tst = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
+		    h__[k + k * h_dim1], abs(d__2));
+	    if (tst == 0.) {
+		if (k - 2 >= *ilo) {
+		    tst += (d__1 = h__[k - 1 + (k - 2) * h_dim1], abs(d__1));
+		}
+		if (k + 1 <= *ihi) {
+		    tst += (d__1 = h__[k + 1 + k * h_dim1], abs(d__1));
+		}
 	    }
+/*
+             ==== The following is a conservative small subdiagonal
+             .    deflation  criterion due to Ahues & Tisseur (LAWN 122,
+             .    1997). It has better mathematical foundation and
+             .    improves accuracy in some cases.  ====
+*/
+	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= ulp * tst) {
 /* Computing MAX */
-	    d__2 = ulp * tst1;
-	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
-		    smlnum)) {
-		goto L30;
+		d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), d__4 = (
+			d__2 = h__[k - 1 + k * h_dim1], abs(d__2));
+		ab = max(d__3,d__4);
+/* Computing MIN */
+		d__3 = (d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)), d__4 = (
+			d__2 = h__[k - 1 + k * h_dim1], abs(d__2));
+		ba = min(d__3,d__4);
+/* Computing MAX */
+		d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), d__4 = (d__2 =
+			 h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1],
+			abs(d__2));
+		aa = max(d__3,d__4);
+/* Computing MIN */
+		d__3 = (d__1 = h__[k + k * h_dim1], abs(d__1)), d__4 = (d__2 =
+			 h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1],
+			abs(d__2));
+		bb = min(d__3,d__4);
+		s = aa + ab;
+/* Computing MAX */
+		d__1 = smlnum, d__2 = ulp * (bb * (aa / s));
+		if (ba * (ab / s) <= max(d__1,d__2)) {
+		    goto L40;
+		}
 	    }
-/* L20: */
+/* L30: */
 	}
-L30:
+L40:
 	l = k;
 	if (l > *ilo) {
 
@@ -13322,7 +13841,7 @@ static logical c_true = TRUE_;
 /*        Exit from loop if a submatrix of order 1 or 2 has split off. */
 
 	if (l >= i__ - 1) {
-	    goto L140;
+	    goto L150;
 	}
 
 /*
@@ -13336,15 +13855,26 @@ static logical c_true = TRUE_;
 	    i2 = i__;
 	}
 
-	if (its == 10 || its == 20) {
+	if (its == 10) {
+
+/*           Exceptional shift. */
+
+	    s = (d__1 = h__[l + 1 + l * h_dim1], abs(d__1)) + (d__2 = h__[l +
+		    2 + (l + 1) * h_dim1], abs(d__2));
+	    h11 = s * .75 + h__[l + l * h_dim1];
+	    h12 = s * -.4375;
+	    h21 = s;
+	    h22 = h11;
+	} else if (its == 20) {
 
 /*           Exceptional shift. */
 
 	    s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 =
 		    h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2));
-	    h44 = s * .75 + h__[i__ + i__ * h_dim1];
-	    h33 = h44;
-	    h43h34 = s * -.4375 * s;
+	    h11 = s * .75 + h__[i__ + i__ * h_dim1];
+	    h12 = s * -.4375;
+	    h21 = s;
+	    h22 = h11;
 	} else {
 
 /*
@@ -13352,74 +13882,95 @@ static logical c_true = TRUE_;
              (i.e. 2nd degree generalized Rayleigh quotient)
 */
 
-	    h44 = h__[i__ + i__ * h_dim1];
-	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
-	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
-		    h_dim1];
-	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
-		    h_dim1];
-	    disc = (h33 - h44) * .5;
-	    disc = disc * disc + h43h34;
-	    if (disc > 0.) {
+	    h11 = h__[i__ - 1 + (i__ - 1) * h_dim1];
+	    h21 = h__[i__ + (i__ - 1) * h_dim1];
+	    h12 = h__[i__ - 1 + i__ * h_dim1];
+	    h22 = h__[i__ + i__ * h_dim1];
+	}
+	s = abs(h11) + abs(h12) + abs(h21) + abs(h22);
+	if (s == 0.) {
+	    rt1r = 0.;
+	    rt1i = 0.;
+	    rt2r = 0.;
+	    rt2i = 0.;
+	} else {
+	    h11 /= s;
+	    h21 /= s;
+	    h12 /= s;
+	    h22 /= s;
+	    tr = (h11 + h22) / 2.;
+	    det = (h11 - tr) * (h22 - tr) - h12 * h21;
+	    rtdisc = sqrt((abs(det)));
+	    if (det >= 0.) {
+
+/*              ==== complex conjugate shifts ==== */
+
+		rt1r = tr * s;
+		rt2r = rt1r;
+		rt1i = rtdisc * s;
+		rt2i = -rt1i;
+	    } else {
 
-/*              Real roots: use Wilkinson's shift twice */
+/*              ==== real shifts (use only one of them)  ==== */
 
-		disc = sqrt(disc);
-		ave = (h33 + h44) * .5;
-		if (abs(h33) - abs(h44) > 0.) {
-		    h33 = h33 * h44 - h43h34;
-		    h44 = h33 / (d_sign(&disc, &ave) + ave);
+		rt1r = tr + rtdisc;
+		rt2r = tr - rtdisc;
+		if ((d__1 = rt1r - h22, abs(d__1)) <= (d__2 = rt2r - h22, abs(
+			d__2))) {
+		    rt1r *= s;
+		    rt2r = rt1r;
 		} else {
-		    h44 = d_sign(&disc, &ave) + ave;
+		    rt2r *= s;
+		    rt1r = rt2r;
 		}
-		h33 = h44;
-		h43h34 = 0.;
+		rt1i = 0.;
+		rt2i = 0.;
 	    }
 	}
 
 /*        Look for two consecutive small subdiagonal elements. */
 
-	i__2 = l;
-	for (m = i__ - 2; m >= i__2; --m) {
+	i__1 = l;
+	for (m = i__ - 2; m >= i__1; --m) {
 /*
              Determine the effect of starting the double-shift QR
              iteration at row M, and see if this would make H(M,M-1)
-             negligible.
-*/
-
-	    h11 = h__[m + m * h_dim1];
-	    h22 = h__[m + 1 + (m + 1) * h_dim1];
-	    h21 = h__[m + 1 + m * h_dim1];
-	    h12 = h__[m + (m + 1) * h_dim1];
-	    h44s = h44 - h11;
-	    h33s = h33 - h11;
-	    v1 = (h33s * h44s - h43h34) / h21 + h12;
-	    v2 = h22 - h11 - h33s - h44s;
-	    v3 = h__[m + 2 + (m + 1) * h_dim1];
-	    s = abs(v1) + abs(v2) + abs(v3);
-	    v1 /= s;
-	    v2 /= s;
-	    v3 /= s;
-	    v[0] = v1;
-	    v[1] = v2;
-	    v[2] = v3;
+             negligible.  (The following uses scaling to avoid
+             overflows and most underflows.)
+*/
+
+	    h21s = h__[m + 1 + m * h_dim1];
+	    s = (d__1 = h__[m + m * h_dim1] - rt2r, abs(d__1)) + abs(rt2i) +
+		    abs(h21s);
+	    h21s = h__[m + 1 + m * h_dim1] / s;
+	    v[0] = h21s * h__[m + (m + 1) * h_dim1] + (h__[m + m * h_dim1] -
+		    rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - rt1i * (rt2i
+		    / s);
+	    v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1]
+		     - rt1r - rt2r);
+	    v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1];
+	    s = abs(v[0]) + abs(v[1]) + abs(v[2]);
+	    v[0] /= s;
+	    v[1] /= s;
+	    v[2] /= s;
 	    if (m == l) {
-		goto L50;
+		goto L60;
 	    }
-	    h00 = h__[m - 1 + (m - 1) * h_dim1];
-	    h10 = h__[m + (m - 1) * h_dim1];
-	    tst1 = abs(v1) * (abs(h00) + abs(h11) + abs(h22));
-	    if (abs(h10) * (abs(v2) + abs(v3)) <= ulp * tst1) {
-		goto L50;
+	    if ((d__1 = h__[m + (m - 1) * h_dim1], abs(d__1)) * (abs(v[1]) +
+		    abs(v[2])) <= ulp * abs(v[0]) * ((d__2 = h__[m - 1 + (m -
+		    1) * h_dim1], abs(d__2)) + (d__3 = h__[m + m * h_dim1],
+		    abs(d__3)) + (d__4 = h__[m + 1 + (m + 1) * h_dim1], abs(
+		    d__4)))) {
+		goto L60;
 	    }
-/* L40: */
+/* L50: */
 	}
-L50:
+L60:
 
 /*        Double-shift QR step */
 
-	i__2 = i__ - 1;
-	for (k = m; k <= i__2; ++k) {
+	i__1 = i__ - 1;
+	for (k = m; k <= i__1; ++k) {
 
 /*
              The first iteration of this loop determines a reflection G
@@ -13433,8 +13984,8 @@ static logical c_true = TRUE_;
 
    Computing MIN
 */
-	    i__3 = 3, i__4 = i__ - k + 1;
-	    nr = min(i__3,i__4);
+	    i__2 = 3, i__3 = i__ - k + 1;
+	    nr = min(i__2,i__3);
 	    if (k > m) {
 		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
 	    }
@@ -13446,7 +13997,13 @@ static logical c_true = TRUE_;
 		    h__[k + 2 + (k - 1) * h_dim1] = 0.;
 		}
 	    } else if (m > l) {
-		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
+/*
+                 ==== Use the following instead of
+                 .    H( K, K-1 ) = -H( K, K-1 ) to
+                 .    avoid a bug when v(2) and v(3)
+                 .    underflow. ====
+*/
+		h__[k + (k - 1) * h_dim1] *= 1. - t1;
 	    }
 	    v2 = v[1];
 	    t2 = t1 * v2;
@@ -13459,14 +14016,14 @@ static logical c_true = TRUE_;
                 in columns K to I2.
 */
 
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
+		i__2 = i2;
+		for (j = k; j <= i__2; ++j) {
 		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
 			    + v3 * h__[k + 2 + j * h_dim1];
 		    h__[k + j * h_dim1] -= sum * t1;
 		    h__[k + 1 + j * h_dim1] -= sum * t2;
 		    h__[k + 2 + j * h_dim1] -= sum * t3;
-/* L60: */
+/* L70: */
 		}
 
 /*
@@ -13475,29 +14032,29 @@ static logical c_true = TRUE_;
 
    Computing MIN
 */
-		i__4 = k + 3;
-		i__3 = min(i__4,i__);
-		for (j = i1; j <= i__3; ++j) {
+		i__3 = k + 3;
+		i__2 = min(i__3,i__);
+		for (j = i1; j <= i__2; ++j) {
 		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
 			     + v3 * h__[j + (k + 2) * h_dim1];
 		    h__[j + k * h_dim1] -= sum * t1;
 		    h__[j + (k + 1) * h_dim1] -= sum * t2;
 		    h__[j + (k + 2) * h_dim1] -= sum * t3;
-/* L70: */
+/* L80: */
 		}
 
 		if (*wantz) {
 
 /*                 Accumulate transformations in the matrix Z */
 
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
+		    i__2 = *ihiz;
+		    for (j = *iloz; j <= i__2; ++j) {
 			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
 				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
 			z__[j + k * z_dim1] -= sum * t1;
 			z__[j + (k + 1) * z_dim1] -= sum * t2;
 			z__[j + (k + 2) * z_dim1] -= sum * t3;
-/* L80: */
+/* L90: */
 		    }
 		}
 	    } else if (nr == 2) {
@@ -13507,12 +14064,12 @@ static logical c_true = TRUE_;
                 in columns K to I2.
 */
 
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
+		i__2 = i2;
+		for (j = k; j <= i__2; ++j) {
 		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
 		    h__[k + j * h_dim1] -= sum * t1;
 		    h__[k + 1 + j * h_dim1] -= sum * t2;
-/* L90: */
+/* L100: */
 		}
 
 /*
@@ -13520,33 +14077,33 @@ static logical c_true = TRUE_;
                 matrix in rows I1 to min(K+3,I).
 */
 
-		i__3 = i__;
-		for (j = i1; j <= i__3; ++j) {
+		i__2 = i__;
+		for (j = i1; j <= i__2; ++j) {
 		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
 			    ;
 		    h__[j + k * h_dim1] -= sum * t1;
 		    h__[j + (k + 1) * h_dim1] -= sum * t2;
-/* L100: */
+/* L110: */
 		}
 
 		if (*wantz) {
 
 /*                 Accumulate transformations in the matrix Z */
 
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
+		    i__2 = *ihiz;
+		    for (j = *iloz; j <= i__2; ++j) {
 			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
 				z_dim1];
 			z__[j + k * z_dim1] -= sum * t1;
 			z__[j + (k + 1) * z_dim1] -= sum * t2;
-/* L110: */
+/* L120: */
 		    }
 		}
 	    }
-/* L120: */
+/* L130: */
 	}
 
-/* L130: */
+/* L140: */
     }
 
 /*     Failure to converge in remaining number of iterations */
@@ -13554,7 +14111,7 @@ static logical c_true = TRUE_;
     *info = i__;
     return 0;
 
-L140:
+L150:
 
     if (l == i__) {
 
@@ -13598,23 +14155,19 @@ static logical c_true = TRUE_;
 	}
     }
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with new value of I.
-*/
+/*     return to start of the main loop with new value of I. */
 
-    itn -= its;
     i__ = l - 1;
-    goto L10;
+    goto L20;
 
-L150:
+L160:
     return 0;
 
 /*     End of DLAHQR */
 
 } /* dlahqr_ */
 
-/* Subroutine */ int dlahrd_(integer *n, integer *k, integer *nb, doublereal *
+/* Subroutine */ int dlahr2_(integer *n, integer *k, integer *nb, doublereal *
 	a, integer *lda, doublereal *tau, doublereal *t, integer *ldt,
 	doublereal *y, integer *ldy)
 {
@@ -13627,27 +14180,33 @@ static logical c_true = TRUE_;
     static integer i__;
     static doublereal ei;
     extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
-	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    doublereal *, integer *), dcopy_(integer *, doublereal *,
-	    integer *, doublereal *, integer *), daxpy_(integer *, doublereal
-	    *, doublereal *, integer *, doublereal *, integer *), dtrmv_(char
-	    *, char *, char *, integer *, doublereal *, integer *, doublereal
-	    *, integer *), dlarfg_(integer *,
-	    doublereal *, doublereal *, integer *, doublereal *);
+	    integer *), dgemm_(char *, char *, integer *, integer *, integer *
+	    , doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dgemv_(
+	    char *, integer *, integer *, doublereal *, doublereal *, integer
+	    *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *, doublereal *,
+	     integer *), dtrmm_(char *, char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *), daxpy_(integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *),
+	    dtrmv_(char *, char *, char *, integer *, doublereal *, integer *,
+	     doublereal *, integer *), dlarfg_(
+	    integer *, doublereal *, doublereal *, integer *, doublereal *),
+	    dlacpy_(char *, integer *, integer *, doublereal *, integer *,
+	    doublereal *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+    -- April 2009                                                      --
 
 
     Purpose
     =======
 
-    DLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
+    DLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1)
     matrix A so that elements below the k-th subdiagonal are zero. The
     reduction is performed by an orthogonal similarity transformation
     Q' * A * Q. The routine returns the matrices V and T which determine
@@ -13664,6 +14223,7 @@ static logical c_true = TRUE_;
     K       (input) INTEGER
             The offset for the reduction. Elements below the k-th
             subdiagonal in the first NB columns are reduced to zero.
+            K < N.
 
     NB      (input) INTEGER
             The number of columns to be reduced.
@@ -13719,9 +14279,9 @@ static logical c_true = TRUE_;
     The contents of A on exit are illustrated by the following example
     with n = 7, k = 3 and nb = 2:
 
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
+       ( a   a   a   a   a )
+       ( a   a   a   a   a )
+       ( a   a   a   a   a )
        ( h   h   a   a   a )
        ( v1  h   a   a   a )
        ( v1  v2  a   a   a )
@@ -13731,6 +14291,19 @@ static logical c_true = TRUE_;
     modified element of the upper Hessenberg matrix H, and vi denotes an
     element of the vector defining H(i).
 
+    This subroutine is a slight modification of LAPACK-3.0's DLAHRD
+    incorporating improvements proposed by Quintana-Orti and Van de
+    Gejin. Note that the entries of A(1:K,2:NB) differ from those
+    returned by the original LAPACK-3.0's DLAHRD routine. (This
+    subroutine is not backward compatible with LAPACK-3.0's DLAHRD.)
+
+    References
+    ==========
+
+    Gregorio Quintana-Orti and Robert van de Geijn, "Improving the
+    performance of reduction to Hessenberg form," ACM Transactions on
+    Mathematical Software, 32(2):180-194, June 2006.
+
     =====================================================================
 
 
@@ -13759,15 +14332,16 @@ static logical c_true = TRUE_;
 	if (i__ > 1) {
 
 /*
-             Update A(1:n,i)
+             Update A(K+1:N,I)
 
-             Compute i-th column of A - Y * V'
+             Update I-th column of A - Y * V'
 */
 
-	    i__2 = i__ - 1;
-	    dgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &a[*
-		    k + i__ - 1 + a_dim1], lda, &c_b15, &a[i__ * a_dim1 + 1],
-		    &c__1);
+	    i__2 = *n - *k;
+	    i__3 = i__ - 1;
+	    dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &y[*k + 1 + y_dim1],
+		     ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b15, &a[*k + 1 +
+		    i__ * a_dim1], &c__1);
 
 /*
              Apply I - V * T' * V' to this column (call it b) from the
@@ -13785,7 +14359,7 @@ static logical c_true = TRUE_;
 	    dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
 		    1], &c__1);
 	    i__2 = i__ - 1;
-	    dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
+	    dtrmv_("Lower", "Transpose", "UNIT", &i__2, &a[*k + 1 + a_dim1],
 		    lda, &t[*nb * t_dim1 + 1], &c__1);
 
 /*           w := w + V2'*b2 */
@@ -13799,21 +14373,21 @@ static logical c_true = TRUE_;
 /*           w := T'*w */
 
 	    i__2 = i__ - 1;
-	    dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+	    dtrmv_("Upper", "Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
 		     &t[*nb * t_dim1 + 1], &c__1);
 
 /*           b2 := b2 - V2*w */
 
 	    i__2 = *n - *k - i__ + 1;
 	    i__3 = i__ - 1;
-	    dgemv_("No transpose", &i__2, &i__3, &c_b151, &a[*k + i__ +
+	    dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &a[*k + i__ +
 		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b15, &a[*k
 		    + i__ + i__ * a_dim1], &c__1);
 
 /*           b1 := b1 - V1*w */
 
 	    i__2 = i__ - 1;
-	    dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+	    dtrmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1]
 		    , lda, &t[*nb * t_dim1 + 1], &c__1);
 	    i__2 = i__ - 1;
 	    daxpy_(&i__2, &c_b151, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
@@ -13823,8 +14397,8 @@ static logical c_true = TRUE_;
 	}
 
 /*
-          Generate the elementary reflector H(i) to annihilate
-          A(k+i+1:n,i)
+          Generate the elementary reflector H(I) to annihilate
+          A(K+I+1:N,I)
 */
 
 	i__2 = *n - *k - i__ + 1;
@@ -13835,29 +14409,33 @@ static logical c_true = TRUE_;
 	ei = a[*k + i__ + i__ * a_dim1];
 	a[*k + i__ + i__ * a_dim1] = 1.;
 
-/*        Compute  Y(1:n,i) */
+/*        Compute  Y(K+1:N,I) */
 
-	i__2 = *n - *k - i__ + 1;
-	dgemv_("No transpose", n, &i__2, &c_b15, &a[(i__ + 1) * a_dim1 + 1],
-		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ *
-		y_dim1 + 1], &c__1);
+	i__2 = *n - *k;
+	i__3 = *n - *k - i__ + 1;
+	dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b15, &a[*k + 1 + (i__ + 1) *
+		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[*
+		k + 1 + i__ * y_dim1], &c__1);
 	i__2 = *n - *k - i__ + 1;
 	i__3 = i__ - 1;
 	dgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], lda,
 		&a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &t[i__ * t_dim1 +
 		1], &c__1);
-	i__2 = i__ - 1;
-	dgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &t[i__ *
-		t_dim1 + 1], &c__1, &c_b15, &y[i__ * y_dim1 + 1], &c__1);
-	dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+	i__2 = *n - *k;
+	i__3 = i__ - 1;
+	dgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &y[*k + 1 + y_dim1],
+		ldy, &t[i__ * t_dim1 + 1], &c__1, &c_b15, &y[*k + 1 + i__ *
+		y_dim1], &c__1);
+	i__2 = *n - *k;
+	dscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1);
 
-/*        Compute T(1:i,i) */
+/*        Compute T(1:I,I) */
 
 	i__2 = i__ - 1;
 	d__1 = -tau[i__];
 	dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1);
 	i__2 = i__ - 1;
-	dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+	dtrmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
 		&t[i__ * t_dim1 + 1], &c__1)
 		;
 	t[i__ + i__ * t_dim1] = tau[i__];
@@ -13866,11 +14444,70 @@ static logical c_true = TRUE_;
     }
     a[*k + *nb + *nb * a_dim1] = ei;
 
+/*     Compute Y(1:K,1:NB) */
+
+    dlacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy);
+    dtrmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b15, &a[*k + 1
+	    + a_dim1], lda, &y[y_offset], ldy);
+    if (*n > *k + *nb) {
+	i__1 = *n - *k - *nb;
+	dgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b15, &a[(*nb
+		+ 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, &
+		c_b15, &y[y_offset], ldy);
+    }
+    dtrmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b15, &t[
+	    t_offset], ldt, &y[y_offset], ldy);
+
     return 0;
 
-/*     End of DLAHRD */
+/*     End of DLAHR2 */
+
+} /* dlahr2_ */
+
+logical dlaisnan_(doublereal *din1, doublereal *din2)
+{
+    /* System generated locals */
+    logical ret_val;
 
-} /* dlahrd_ */
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
+
+
+    Purpose
+    =======
+
+    This routine is not for general use.  It exists solely to avoid
+    over-optimization in DISNAN.
+
+    DLAISNAN checks for NaNs by comparing its two arguments for
+    inequality.  NaN is the only floating-point value where NaN != NaN
+    returns .TRUE.  To check for NaNs, pass the same variable as both
+    arguments.
+
+    A compiler must assume that the two arguments are
+    not the same variable, and the test will not be optimized away.
+    Interprocedural or whole-program optimization may delete this
+    test.  The ISNAN functions will be replaced by the correct
+    Fortran 03 intrinsic once the intrinsic is widely available.
+
+    Arguments
+    =========
+
+    DIN1    (input) DOUBLE PRECISION
+
+    DIN2    (input) DOUBLE PRECISION
+            Two numbers to compare for inequality.
+
+    =====================================================================
+*/
+
+    ret_val = *din1 != *din2;
+    return ret_val;
+} /* dlaisnan_ */
 
 /* Subroutine */ int dlaln2_(logical *ltrans, integer *na, integer *nw,
 	doublereal *smin, doublereal *ca, doublereal *a, integer *lda,
@@ -13909,10 +14546,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14464,10 +15101,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14909,10 +15546,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14949,10 +15586,10 @@ static logical c_true = TRUE_;
     NRHS   (input) INTEGER
            The number of columns of B and BX. NRHS must be at least 1.
 
-    B      (input) DOUBLE PRECISION array, dimension ( LDB, NRHS )
+    B      (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS )
            On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
+           squares problem in rows 1 through M.
+           On output, B contains the solution X in rows 1 through N.
 
     LDB    (input) INTEGER
            The leading dimension of B in the calling subprogram.
@@ -15342,7 +15979,9 @@ static logical c_true = TRUE_;
     static doublereal eps;
     static integer iwk;
     static doublereal tol;
-    static integer difl, difr, perm, nsub;
+    static integer difl, difr;
+    static doublereal rcnd;
+    static integer perm, nsub;
     extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
 	    doublereal *, integer *, doublereal *, doublereal *);
     static integer nlvl, sqre, bxst;
@@ -15384,10 +16023,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -15431,7 +16070,7 @@ static logical c_true = TRUE_;
            On entry D contains the main diagonal of the bidiagonal
            matrix. On exit, if INFO = 0, D contains its singular values.
 
-    E      (input) DOUBLE PRECISION array, dimension (N-1)
+    E      (input/output) DOUBLE PRECISION array, dimension (N-1)
            Contains the super-diagonal entries of the bidiagonal matrix.
            On exit, E has been destroyed.
 
@@ -15468,7 +16107,7 @@ static logical c_true = TRUE_;
     INFO   (output) INTEGER
            = 0:  successful exit.
            < 0:  if INFO = -i, the i-th argument had an illegal value.
-           > 0:  The algorithm failed to compute an singular value while
+           > 0:  The algorithm failed to compute a singular value while
                  working on the submatrix lying in rows and columns
                  INFO/(N+1) through MOD(INFO,N+1).
 
@@ -15516,7 +16155,9 @@ static logical c_true = TRUE_;
 /*     Set up the tolerance. */
 
     if (*rcond <= 0. || *rcond >= 1.) {
-	*rcond = eps;
+	rcnd = eps;
+    } else {
+	rcnd = *rcond;
     }
 
     *rank = 0;
@@ -15597,7 +16238,7 @@ static logical c_true = TRUE_;
 	if (*info != 0) {
 	    return 0;
 	}
-	tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+	tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (d__[i__] <= tol) {
@@ -15759,7 +16400,7 @@ static logical c_true = TRUE_;
 
 /*     Apply the singular values and treat the tiny ones as zero. */
 
-    tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+    tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
 
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
@@ -15833,10 +16474,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -15955,10 +16596,10 @@ doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -15984,7 +16625,7 @@ doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -16007,7 +16648,7 @@ doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(M,1).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+    WORK    (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)),
             where LWORK >= M when NORM = 'I'; otherwise, WORK is not
             referenced.
 
@@ -16104,181 +16745,6 @@ doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer
 
 } /* dlange_ */
 
-doublereal dlanhs_(char *norm, integer *n, doublereal *a, integer *lda,
-	doublereal *work)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    doublereal ret_val, d__1, d__2, d__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j;
-    static doublereal sum, scale;
-    extern logical lsame_(char *, char *);
-    static doublereal value;
-    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
-	    doublereal *, doublereal *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    DLANHS  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    Hessenberg matrix A.
-
-    Description
-    ===========
-
-    DLANHS returns the value
-
-       DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-
-    Arguments
-    =========
-
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in DLANHS as described
-            above.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, DLANHS is
-            set to zero.
-
-    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
-            The n by n upper Hessenberg matrix A; the part of A below the
-            first sub-diagonal is not referenced.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
-
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-            referenced.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
-
-    /* Function Body */
-    if (*n == 0) {
-	value = 0.;
-    } else if (lsame_(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
-		value = max(d__2,d__3);
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else if (lsame_(norm, "O") || *(unsigned char *)
-	    norm == '1') {
-
-/*        Find norm1(A). */
-
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.;
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L30: */
-	    }
-	    value = max(value,sum);
-/* L40: */
-	}
-    } else if (lsame_(norm, "I")) {
-
-/*        Find normI(A). */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
-/* L60: */
-	    }
-/* L70: */
-	}
-	value = 0.;
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    d__1 = value, d__2 = work[i__];
-	    value = max(d__1,d__2);
-/* L80: */
-	}
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-
-/*        Find normF(A). */
-
-	scale = 0.;
-	sum = 1.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
-	}
-	value = scale * sqrt(sum);
-    }
-
-    ret_val = value;
-    return ret_val;
-
-/*     End of DLANHS */
-
-} /* dlanhs_ */
-
 doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e)
 {
     /* System generated locals */
@@ -16298,10 +16764,10 @@ doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -16327,7 +16793,7 @@ doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e)
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -16435,10 +16901,10 @@ doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -16464,7 +16930,7 @@ doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -16495,7 +16961,7 @@ doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(N,1).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+    WORK    (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)),
             where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
             WORK is not referenced.
 
@@ -16641,10 +17107,10 @@ doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -16854,10 +17320,10 @@ doublereal dlapy2_(doublereal *x, doublereal *y)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -16907,10 +17373,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -16938,7 +17404,12 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     d__1 = max(xabs,yabs);
     w = max(d__1,zabs);
     if (w == 0.) {
-	ret_val = 0.;
+/*
+       W can be zero for max(0,nan,0)
+       adding all three entries together will make sure
+       NaN will not disappear.
+*/
+	ret_val = xabs + yabs + zabs;
     } else {
 /* Computing 2nd power */
 	d__1 = xabs / w;
@@ -16948,11 +17419,4062 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	d__3 = zabs / w;
 	ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3);
     }
-    return ret_val;
+    return ret_val;
+
+/*     End of DLAPY3 */
+
+} /* dlapy3_ */
+
+/* Subroutine */ int dlaqr0_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal
+	*wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__,
+	integer *ldz, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal aa, bb, cc, dd;
+    static integer ld;
+    static doublereal cs;
+    static integer nh, it, ks, kt;
+    static doublereal sn;
+    static integer ku, kv, ls, ns;
+    static doublereal ss;
+    static integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl,
+	    kbot, nmin;
+    static doublereal swap;
+    static integer ktop;
+    static doublereal zdum[1]	/* was [1][1] */;
+    static integer kacc22, itmax, nsmax, nwmax, kwtop;
+    extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *), dlaqr3_(
+	    logical *, logical *, integer *, integer *, integer *, integer *,
+	    doublereal *, integer *, integer *, integer *, doublereal *,
+	    integer *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *),
+	    dlaqr4_(logical *, logical *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    integer *), dlaqr5_(logical *, logical *, integer *, integer *,
+	    integer *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    integer *, doublereal *, integer *, integer *, doublereal *,
+	    integer *);
+    static integer nibble;
+    extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static char jbcmpz[2];
+    static integer nwupbd;
+    static logical sorted;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+       Purpose
+       =======
+
+       DLAQR0 computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**T, where T is an upper quasi-triangular matrix (the
+       Schur form), and Z is the orthogonal matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input orthogonal
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+       Arguments
+       =========
+
+       WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+       WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+             H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
+             previous call to DGEBAL, and then passed to DGEHRD when the
+             matrix output by DGEBAL is reduced to Hessenberg form.
+             Otherwise, ILO and IHI should be set to 1 and N,
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and WANTT is .TRUE., then H contains
+             the upper quasi-triangular matrix T from the Schur
+             decomposition (the Schur form); 2-by-2 diagonal blocks
+             (corresponding to complex conjugate pairs of eigenvalues)
+             are returned in standard form, with H(i,i) = H(i+1,i+1)
+             and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is
+             .FALSE., then the contents of H are unspecified on exit.
+             (The output value of H when INFO.GT.0 is given under the
+             description of INFO below.)
+
+             This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+             j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       WR    (output) DOUBLE PRECISION array, dimension (IHI)
+       WI    (output) DOUBLE PRECISION array, dimension (IHI)
+             The real and imaginary parts, respectively, of the computed
+             eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI)
+             and WI(ILO:IHI). If two eigenvalues are computed as a
+             complex conjugate pair, they are stored in consecutive
+             elements of WR and WI, say the i-th and (i+1)th, with
+             WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then
+             the eigenvalues are stored in the same order as on the
+             diagonal of the Schur form returned in H, with
+             WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal
+             block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and
+             WI(i+1) = -WI(i).
+
+       ILOZ     (input) INTEGER
+       IHIZ     (input) INTEGER
+             Specify the rows of Z to which transformations must be
+             applied if WANTZ is .TRUE..
+             1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N.
+
+       Z     (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI)
+             If WANTZ is .FALSE., then Z is not referenced.
+             If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
+             replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
+             orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
+             (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if WANTZ is .TRUE.
+             then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) DOUBLE PRECISION array, dimension LWORK
+             On exit, if LWORK = -1, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient, but LWORK typically as large as 6*N may
+             be required for optimal performance.  A workspace query
+             to determine the optimal workspace size is recommended.
+
+             If LWORK = -1, then DLAQR0 does a workspace query.
+             In this case, DLAQR0 checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .GT. 0:  if INFO = i, DLAQR0 failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and WANT is .FALSE., then on exit,
+                  the remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is an orthogonal matrix.  The final
+                  value of H is upper Hessenberg and quasi-triangular
+                  in rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+
+                    (final value of Z(ILO:IHI,ILOZ:IHIZ)
+                     =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
+
+                  where U is the orthogonal matrix in (*) (regard-
+                  less of the value of WANTT.)
+
+                  If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+                  accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
+
+       ================================================================
+
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    DLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
+
+       ==== Exceptional deflation windows:  try to cure rare
+       .    slow convergence by varying the size of the
+       .    deflation window after KEXNW iterations. ====
+
+       ==== Exceptional shifts: try to cure rare slow convergence
+       .    with ad-hoc exceptional shifts every KEXSH iterations.
+       .    ====
+
+       ==== The constants WILK1 and WILK2 are used to form the
+       .    exceptional shifts. ====
+*/
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+/*     ==== Quick return for N = 0: nothing to do. ==== */
+
+    if (*n == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    if (*n <= 11) {
+
+/*        ==== Tiny matrices must use DLAHQR. ==== */
+
+	lwkopt = 1;
+	if (*lwork != -1) {
+	    dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &
+		    wi[1], iloz, ihiz, &z__[z_offset], ldz, info);
+	}
+    } else {
+
+/*
+          ==== Use small bulge multi-shift QR with aggressive early
+          .    deflation on larger-than-tiny matrices. ====
+
+          ==== Hope for the best. ====
+*/
+
+	*info = 0;
+
+/*        ==== Set up job flags for ILAENV. ==== */
+
+	if (*wantt) {
+	    *(unsigned char *)jbcmpz = 'S';
+	} else {
+	    *(unsigned char *)jbcmpz = 'E';
+	}
+	if (*wantz) {
+	    *(unsigned char *)&jbcmpz[1] = 'V';
+	} else {
+	    *(unsigned char *)&jbcmpz[1] = 'N';
+	}
+
+/*
+          ==== NWR = recommended deflation window size.  At this
+          .    point,  N .GT. NTINY = 11, so there is enough
+          .    subdiagonal workspace for NWR.GE.2 as required.
+          .    (In fact, there is enough subdiagonal space for
+          .    NWR.GE.3.) ====
+*/
+
+	nwr = ilaenv_(&c__13, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nwr = max(2,nwr);
+/* Computing MIN */
+	i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2);
+	nwr = min(i__1,nwr);
+
+/*
+          ==== NSR = recommended number of simultaneous shifts.
+          .    At this point N .GT. NTINY = 11, so there is at
+          .    enough subdiagonal workspace for NSR to be even
+          .    and greater than or equal to two as required. ====
+*/
+
+	nsr = ilaenv_(&c__15, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+/* Computing MIN */
+	i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi -
+		*ilo;
+	nsr = min(i__1,i__2);
+/* Computing MAX */
+	i__1 = 2, i__2 = nsr - nsr % 2;
+	nsr = max(i__1,i__2);
+
+/*
+          ==== Estimate optimal workspace ====
+
+          ==== Workspace query call to DLAQR3 ====
+*/
+
+	i__1 = nwr + 1;
+	dlaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz,
+		ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[
+		h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset],
+		ldh, &work[1], &c_n1);
+
+/*
+          ==== Optimal workspace = MAX(DLAQR5, DLAQR3) ====
+
+   Computing MAX
+*/
+	i__1 = nsr * 3 / 2, i__2 = (integer) work[1];
+	lwkopt = max(i__1,i__2);
+
+/*        ==== Quick return in case of workspace query. ==== */
+
+	if (*lwork == -1) {
+	    work[1] = (doublereal) lwkopt;
+	    return 0;
+	}
+
+/*        ==== DLAHQR/DLAQR0 crossover point ==== */
+
+	nmin = ilaenv_(&c__12, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)
+		6, (ftnlen)2);
+	nmin = max(11,nmin);
+
+/*        ==== Nibble crossover point ==== */
+
+	nibble = ilaenv_(&c__14, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	nibble = max(0,nibble);
+
+/*
+          ==== Accumulate reflections during ttswp?  Use block
+          .    2-by-2 structure during matrix-matrix multiply? ====
+*/
+
+	kacc22 = ilaenv_(&c__16, "DLAQR0", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	kacc22 = max(0,kacc22);
+	kacc22 = min(2,kacc22);
+
+/*
+          ==== NWMAX = the largest possible deflation window for
+          .    which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n - 1) / 3, i__2 = *lwork / 2;
+	nwmax = min(i__1,i__2);
+	nw = nwmax;
+
+/*
+          ==== NSMAX = the Largest number of simultaneous shifts
+          .    for which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3;
+	nsmax = min(i__1,i__2);
+	nsmax -= nsmax % 2;
+
+/*        ==== NDFL: an iteration count restarted at deflation. ==== */
+
+	ndfl = 1;
+
+/*
+          ==== ITMAX = iteration limit ====
+
+   Computing MAX
+*/
+	i__1 = 10, i__2 = *ihi - *ilo + 1;
+	itmax = max(i__1,i__2) * 30;
+
+/*        ==== Last row and column in the active block ==== */
+
+	kbot = *ihi;
+
+/*        ==== Main Loop ==== */
+
+	i__1 = itmax;
+	for (it = 1; it <= i__1; ++it) {
+
+/*           ==== Done when KBOT falls below ILO ==== */
+
+	    if (kbot < *ilo) {
+		goto L90;
+	    }
+
+/*           ==== Locate active block ==== */
+
+	    i__2 = *ilo + 1;
+	    for (k = kbot; k >= i__2; --k) {
+		if (h__[k + (k - 1) * h_dim1] == 0.) {
+		    goto L20;
+		}
+/* L10: */
+	    }
+	    k = *ilo;
+L20:
+	    ktop = k;
+
+/*
+             ==== Select deflation window size:
+             .    Typical Case:
+             .      If possible and advisable, nibble the entire
+             .      active block.  If not, use size MIN(NWR,NWMAX)
+             .      or MIN(NWR+1,NWMAX) depending upon which has
+             .      the smaller corresponding subdiagonal entry
+             .      (a heuristic).
+             .
+             .    Exceptional Case:
+             .      If there have been no deflations in KEXNW or
+             .      more iterations, then vary the deflation window
+             .      size.   At first, because, larger windows are,
+             .      in general, more powerful than smaller ones,
+             .      rapidly increase the window to the maximum possible.
+             .      Then, gradually reduce the window size. ====
+*/
+
+	    nh = kbot - ktop + 1;
+	    nwupbd = min(nh,nwmax);
+	    if (ndfl < 5) {
+		nw = min(nwupbd,nwr);
+	    } else {
+/* Computing MIN */
+		i__2 = nwupbd, i__3 = nw << 1;
+		nw = min(i__2,i__3);
+	    }
+	    if (nw < nwmax) {
+		if (nw >= nh - 1) {
+		    nw = nh;
+		} else {
+		    kwtop = kbot - nw + 1;
+		    if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1))
+			    > (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1],
+			    abs(d__2))) {
+			++nw;
+		    }
+		}
+	    }
+	    if (ndfl < 5) {
+		ndec = -1;
+	    } else if (ndec >= 0 || nw >= nwupbd) {
+		++ndec;
+		if (nw - ndec < 2) {
+		    ndec = 0;
+		}
+		nw -= ndec;
+	    }
+
+/*
+             ==== Aggressive early deflation:
+             .    split workspace under the subdiagonal into
+             .      - an nw-by-nw work array V in the lower
+             .        left-hand-corner,
+             .      - an NW-by-at-least-NW-but-more-is-better
+             .        (NW-by-NHO) horizontal work array along
+             .        the bottom edge,
+             .      - an at-least-NW-but-more-is-better (NHV-by-NW)
+             .        vertical work array along the left-hand-edge.
+             .        ====
+*/
+
+	    kv = *n - nw + 1;
+	    kt = nw + 1;
+	    nho = *n - nw - 1 - kt + 1;
+	    kwv = nw + 2;
+	    nve = *n - nw - kwv + 1;
+
+/*           ==== Aggressive early deflation ==== */
+
+	    dlaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh,
+		    iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1],
+		     &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1],
+		    ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork);
+
+/*           ==== Adjust KBOT accounting for new deflations. ==== */
+
+	    kbot -= ld;
+
+/*           ==== KS points to the shifts. ==== */
+
+	    ks = kbot - ls + 1;
+
+/*
+             ==== Skip an expensive QR sweep if there is a (partly
+             .    heuristic) reason to expect that many eigenvalues
+             .    will deflate without it.  Here, the QR sweep is
+             .    skipped if many eigenvalues have just been deflated
+             .    or if the remaining active block is small.
+*/
+
+	    if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(
+		    nmin,nwmax)) {
+
+/*
+                ==== NS = nominal number of simultaneous shifts.
+                .    This may be lowered (slightly) if DLAQR3
+                .    did not provide that many shifts. ====
+
+   Computing MIN
+   Computing MAX
+*/
+		i__4 = 2, i__5 = kbot - ktop;
+		i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5);
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+
+/*
+                ==== If there have been no deflations
+                .    in a multiple of KEXSH iterations,
+                .    then try exceptional shifts.
+                .    Otherwise use shifts provided by
+                .    DLAQR3 above or from the eigenvalues
+                .    of a trailing principal submatrix. ====
+*/
+
+		if (ndfl % 6 == 0) {
+		    ks = kbot - ns + 1;
+/* Computing MAX */
+		    i__3 = ks + 1, i__4 = ktop + 2;
+		    i__2 = max(i__3,i__4);
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1))
+				 + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1],
+				abs(d__2));
+			aa = ss * .75 + h__[i__ + i__ * h_dim1];
+			bb = ss;
+			cc = ss * -.4375;
+			dd = aa;
+			dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1]
+				, &wr[i__], &wi[i__], &cs, &sn);
+/* L30: */
+		    }
+		    if (ks == ktop) {
+			wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1];
+			wi[ks + 1] = 0.;
+			wr[ks] = wr[ks + 1];
+			wi[ks] = wi[ks + 1];
+		    }
+		} else {
+
+/*
+                   ==== Got NS/2 or fewer shifts? Use DLAQR4 or
+                   .    DLAHQR on a trailing principal submatrix to
+                   .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
+                   .    there is enough space below the subdiagonal
+                   .    to fit an NS-by-NS scratch array.) ====
+*/
+
+		    if (kbot - ks + 1 <= ns / 2) {
+			ks = kbot - ns + 1;
+			kt = *n - ns + 1;
+			dlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &
+				h__[kt + h_dim1], ldh);
+			if (ns > nmin) {
+			    dlaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[
+				    kt + h_dim1], ldh, &wr[ks], &wi[ks], &
+				    c__1, &c__1, zdum, &c__1, &work[1], lwork,
+				     &inf);
+			} else {
+			    dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[
+				    kt + h_dim1], ldh, &wr[ks], &wi[ks], &
+				    c__1, &c__1, zdum, &c__1, &inf);
+			}
+			ks += inf;
+
+/*
+                      ==== In case of a rare QR failure use
+                      .    eigenvalues of the trailing 2-by-2
+                      .    principal submatrix.  ====
+*/
+
+			if (ks >= kbot) {
+			    aa = h__[kbot - 1 + (kbot - 1) * h_dim1];
+			    cc = h__[kbot + (kbot - 1) * h_dim1];
+			    bb = h__[kbot - 1 + kbot * h_dim1];
+			    dd = h__[kbot + kbot * h_dim1];
+			    dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[
+				    kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn)
+				    ;
+			    ks = kbot - 1;
+			}
+		    }
+
+		    if (kbot - ks + 1 > ns) {
+
+/*
+                      ==== Sort the shifts (Helps a little)
+                      .    Bubble sort keeps complex conjugate
+                      .    pairs together. ====
+*/
+
+			sorted = FALSE_;
+			i__2 = ks + 1;
+			for (k = kbot; k >= i__2; --k) {
+			    if (sorted) {
+				goto L60;
+			    }
+			    sorted = TRUE_;
+			    i__3 = k - 1;
+			    for (i__ = ks; i__ <= i__3; ++i__) {
+				if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[
+					i__], abs(d__2)) < (d__3 = wr[i__ + 1]
+					, abs(d__3)) + (d__4 = wi[i__ + 1],
+					abs(d__4))) {
+				    sorted = FALSE_;
+
+				    swap = wr[i__];
+				    wr[i__] = wr[i__ + 1];
+				    wr[i__ + 1] = swap;
+
+				    swap = wi[i__];
+				    wi[i__] = wi[i__ + 1];
+				    wi[i__ + 1] = swap;
+				}
+/* L40: */
+			    }
+/* L50: */
+			}
+L60:
+			;
+		    }
+
+/*
+                   ==== Shuffle shifts into pairs of real shifts
+                   .    and pairs of complex conjugate shifts
+                   .    assuming complex conjugate shifts are
+                   .    already adjacent to one another. (Yes,
+                   .    they are.)  ====
+*/
+
+		    i__2 = ks + 2;
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			if (wi[i__] != -wi[i__ - 1]) {
+
+			    swap = wr[i__];
+			    wr[i__] = wr[i__ - 1];
+			    wr[i__ - 1] = wr[i__ - 2];
+			    wr[i__ - 2] = swap;
+
+			    swap = wi[i__];
+			    wi[i__] = wi[i__ - 1];
+			    wi[i__ - 1] = wi[i__ - 2];
+			    wi[i__ - 2] = swap;
+			}
+/* L70: */
+		    }
+		}
+
+/*
+                ==== If there are only two shifts and both are
+                .    real, then use only one.  ====
+*/
+
+		if (kbot - ks + 1 == 2) {
+		    if (wi[kbot] == 0.) {
+			if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs(
+				d__1)) < (d__2 = wr[kbot - 1] - h__[kbot +
+				kbot * h_dim1], abs(d__2))) {
+			    wr[kbot - 1] = wr[kbot];
+			} else {
+			    wr[kbot] = wr[kbot - 1];
+			}
+		    }
+		}
+
+/*
+                ==== Use up to NS of the the smallest magnatiude
+                .    shifts.  If there aren't NS shifts available,
+                .    then use them all, possibly dropping one to
+                .    make the number of shifts even. ====
+
+   Computing MIN
+*/
+		i__2 = ns, i__3 = kbot - ks + 1;
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+		ks = kbot - ns + 1;
+
+/*
+                ==== Small-bulge multi-shift QR sweep:
+                .    split workspace under the subdiagonal into
+                .    - a KDU-by-KDU work array U in the lower
+                .      left-hand-corner,
+                .    - a KDU-by-at-least-KDU-but-more-is-better
+                .      (KDU-by-NHo) horizontal work array WH along
+                .      the bottom edge,
+                .    - and an at-least-KDU-but-more-is-better-by-KDU
+                .      (NVE-by-KDU) vertical work WV arrow along
+                .      the left-hand-edge. ====
+*/
+
+		kdu = ns * 3 - 3;
+		ku = *n - kdu + 1;
+		kwh = kdu + 1;
+		nho = *n - kdu - 3 - (kdu + 1) + 1;
+		kwv = kdu + 4;
+		nve = *n - kdu - kwv + 1;
+
+/*              ==== Small-bulge multi-shift QR sweep ==== */
+
+		dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks],
+			&wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[
+			z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1],
+			ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku +
+			kwh * h_dim1], ldh);
+	    }
+
+/*           ==== Note progress (or the lack of it). ==== */
+
+	    if (ld > 0) {
+		ndfl = 1;
+	    } else {
+		++ndfl;
+	    }
+
+/*
+             ==== End of main loop ====
+   L80:
+*/
+	}
+
+/*
+          ==== Iteration limit exceeded.  Set INFO to show where
+          .    the problem occurred and exit. ====
+*/
+
+	*info = kbot;
+L90:
+	;
+    }
+
+/*     ==== Return the optimal value of LWORK. ==== */
+
+    work[1] = (doublereal) lwkopt;
+
+/*     ==== End of DLAQR0 ==== */
+
+    return 0;
+} /* dlaqr0_ */
+
+/* Subroutine */ int dlaqr1_(integer *n, doublereal *h__, integer *ldh,
+	doublereal *sr1, doublereal *si1, doublereal *sr2, doublereal *si2,
+	doublereal *v)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset;
+    doublereal d__1, d__2, d__3;
+
+    /* Local variables */
+    static doublereal s, h21s, h31s;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+         Given a 2-by-2 or 3-by-3 matrix H, DLAQR1 sets v to a
+         scalar multiple of the first column of the product
+
+         (*)  K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I)
+
+         scaling to avoid overflows and most underflows. It
+         is assumed that either
+
+                 1) sr1 = sr2 and si1 = -si2
+             or
+                 2) si1 = si2 = 0.
+
+         This is useful for starting double implicit shift bulges
+         in the QR algorithm.
+
+
+         N      (input) integer
+                Order of the matrix H. N must be either 2 or 3.
+
+         H      (input) DOUBLE PRECISION array of dimension (LDH,N)
+                The 2-by-2 or 3-by-3 matrix H in (*).
+
+         LDH    (input) integer
+                The leading dimension of H as declared in
+                the calling procedure.  LDH.GE.N
+
+         SR1    (input) DOUBLE PRECISION
+         SI1    The shifts in (*).
+         SR2
+         SI2
+
+         V      (output) DOUBLE PRECISION array of dimension N
+                A scalar multiple of the first column of the
+                matrix K in (*).
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --v;
+
+    /* Function Body */
+    if (*n == 2) {
+	s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + (d__2 =
+		h__[h_dim1 + 2], abs(d__2));
+	if (s == 0.) {
+	    v[1] = 0.;
+	    v[2] = 0.;
+	} else {
+	    h21s = h__[h_dim1 + 2] / s;
+	    v[1] = h21s * h__[(h_dim1 << 1) + 1] + (h__[h_dim1 + 1] - *sr1) *
+		    ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s);
+	    v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *
+		    sr2);
+	}
+    } else {
+	s = (d__1 = h__[h_dim1 + 1] - *sr2, abs(d__1)) + abs(*si2) + (d__2 =
+		h__[h_dim1 + 2], abs(d__2)) + (d__3 = h__[h_dim1 + 3], abs(
+		d__3));
+	if (s == 0.) {
+	    v[1] = 0.;
+	    v[2] = 0.;
+	    v[3] = 0.;
+	} else {
+	    h21s = h__[h_dim1 + 2] / s;
+	    h31s = h__[h_dim1 + 3] / s;
+	    v[1] = (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s)
+		    - *si1 * (*si2 / s) + h__[(h_dim1 << 1) + 1] * h21s + h__[
+		    h_dim1 * 3 + 1] * h31s;
+	    v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *
+		    sr2) + h__[h_dim1 * 3 + 2] * h31s;
+	    v[3] = h31s * (h__[h_dim1 + 1] + h__[h_dim1 * 3 + 3] - *sr1 - *
+		    sr2) + h21s * h__[(h_dim1 << 1) + 3];
+	}
+    }
+    return 0;
+} /* dlaqr1_ */
+
+/* Subroutine */ int dlaqr2_(logical *wantt, logical *wantz, integer *n,
+	integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer *
+	ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz,
+	integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal *
+	v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer *
+	nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1,
+	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static doublereal s, aa, bb, cc, dd, cs, sn;
+    static integer jw;
+    static doublereal evi, evk, foo;
+    static integer kln;
+    static doublereal tau, ulp;
+    static integer lwk1, lwk2;
+    static doublereal beta;
+    static integer kend, kcol, info, ifst, ilst, ltop, krow;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dgemm_(char *, char *, integer *, integer *
+	    , integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static logical bulge;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer infqr, kwtop;
+    extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *), dlabad_(
+	    doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dlarfg_(integer *, doublereal *, doublereal *,
+	    integer *, doublereal *), dlahqr_(logical *, logical *, integer *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int dlaset_(char *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *);
+    static doublereal safmax;
+    extern /* Subroutine */ int dtrexc_(char *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, integer *, integer *,
+	    doublereal *, integer *), dormhr_(char *, char *, integer
+	    *, integer *, integer *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    integer *);
+    static logical sorted;
+    static doublereal smlnum;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+    -- June 2010                                                       --
+
+
+       This subroutine is identical to DLAQR3 except that it avoids
+       recursion by calling DLAHQR instead of DLAQR4.
+
+
+       ******************************************************************
+       Aggressive early deflation:
+
+       This subroutine accepts as input an upper Hessenberg matrix
+       H and performs an orthogonal similarity transformation
+       designed to detect and deflate fully converged eigenvalues from
+       a trailing principal submatrix.  On output H has been over-
+       written by a new Hessenberg matrix that is a perturbation of
+       an orthogonal similarity transformation of H.  It is to be
+       hoped that the final version of H has many zero subdiagonal
+       entries.
+
+       ******************************************************************
+       WANTT   (input) LOGICAL
+            If .TRUE., then the Hessenberg matrix H is fully updated
+            so that the quasi-triangular Schur factor may be
+            computed (in cooperation with the calling subroutine).
+            If .FALSE., then only enough of H is updated to preserve
+            the eigenvalues.
+
+       WANTZ   (input) LOGICAL
+            If .TRUE., then the orthogonal matrix Z is updated so
+            so that the orthogonal Schur factor may be computed
+            (in cooperation with the calling subroutine).
+            If .FALSE., then Z is not referenced.
+
+       N       (input) INTEGER
+            The order of the matrix H and (if WANTZ is .TRUE.) the
+            order of the orthogonal matrix Z.
+
+       KTOP    (input) INTEGER
+            It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+            KBOT and KTOP together determine an isolated block
+            along the diagonal of the Hessenberg matrix.
+
+       KBOT    (input) INTEGER
+            It is assumed without a check that either
+            KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
+            determine an isolated block along the diagonal of the
+            Hessenberg matrix.
+
+       NW      (input) INTEGER
+            Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+
+       H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+            On input the initial N-by-N section of H stores the
+            Hessenberg matrix undergoing aggressive early deflation.
+            On output H has been transformed by an orthogonal
+            similarity transformation, perturbed, and the returned
+            to Hessenberg form that (it is to be hoped) has some
+            zero subdiagonal entries.
+
+       LDH     (input) integer
+            Leading dimension of H just as declared in the calling
+            subroutine.  N .LE. LDH
+
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+
+       Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+            IF WANTZ is .TRUE., then on output, the orthogonal
+            similarity transformation mentioned above has been
+            accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+            If WANTZ is .FALSE., then Z is unreferenced.
+
+       LDZ     (input) integer
+            The leading dimension of Z just as declared in the
+            calling subroutine.  1 .LE. LDZ.
+
+       NS      (output) integer
+            The number of unconverged (ie approximate) eigenvalues
+            returned in SR and SI that may be used as shifts by the
+            calling subroutine.
+
+       ND      (output) integer
+            The number of converged eigenvalues uncovered by this
+            subroutine.
+
+       SR      (output) DOUBLE PRECISION array, dimension (KBOT)
+       SI      (output) DOUBLE PRECISION array, dimension (KBOT)
+            On output, the real and imaginary parts of approximate
+            eigenvalues that may be used for shifts are stored in
+            SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
+            SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
+            The real and imaginary parts of converged eigenvalues
+            are stored in SR(KBOT-ND+1) through SR(KBOT) and
+            SI(KBOT-ND+1) through SI(KBOT), respectively.
+
+       V       (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
+            An NW-by-NW work array.
+
+       LDV     (input) integer scalar
+            The leading dimension of V just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       NH      (input) integer scalar
+            The number of columns of T.  NH.GE.NW.
+
+       T       (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
+
+       LDT     (input) integer
+            The leading dimension of T just as declared in the
+            calling subroutine.  NW .LE. LDT
+
+       NV      (input) integer
+            The number of rows of work array WV available for
+            workspace.  NV.GE.NW.
+
+       WV      (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
+
+       LDWV    (input) integer
+            The leading dimension of W just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, WORK(1) is set to an estimate of the optimal value
+            of LWORK for the given values of N, NW, KTOP and KBOT.
+
+       LWORK   (input) integer
+            The dimension of the work array WORK.  LWORK = 2*NW
+            suffices, but greater efficiency may result from larger
+            values of LWORK.
+
+            If LWORK = -1, then a workspace query is assumed; DLAQR2
+            only estimates the optimal workspace size for the given
+            values of N, NW, KTOP and KBOT.  The estimate is returned
+            in WORK(1).  No error message related to LWORK is issued
+            by XERBLA.  Neither H nor Z are accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+
+       ==== Estimate optimal workspace. ====
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --sr;
+    --si;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    --work;
+
+    /* Function Body */
+/* Computing MIN */
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    if (jw <= 2) {
+	lwkopt = 1;
+    } else {
+
+/*        ==== Workspace query call to DGEHRD ==== */
+
+	i__1 = jw - 1;
+	dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
+		c_n1, &info);
+	lwk1 = (integer) work[1];
+
+/*        ==== Workspace query call to DORMHR ==== */
+
+	i__1 = jw - 1;
+	dormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1],
+		 &v[v_offset], ldv, &work[1], &c_n1, &info);
+	lwk2 = (integer) work[1];
+
+/*        ==== Optimal workspace ==== */
+
+	lwkopt = jw + max(lwk1,lwk2);
+    }
+
+/*     ==== Quick return in case of workspace query. ==== */
+
+    if (*lwork == -1) {
+	work[1] = (doublereal) lwkopt;
+	return 0;
+    }
+
+/*
+       ==== Nothing to do ...
+       ... for an empty active block ... ====
+*/
+    *ns = 0;
+    *nd = 0;
+    work[1] = 1.;
+    if (*ktop > *kbot) {
+	return 0;
+    }
+/*     ... nor for an empty deflation window. ==== */
+    if (*nw < 1) {
+	return 0;
+    }
+
+/*     ==== Machine constants ==== */
+
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    dlabad_(&safmin, &safmax);
+    ulp = PRECISION;
+    smlnum = safmin * ((doublereal) (*n) / ulp);
+
+/*
+       ==== Setup deflation window ====
+
+   Computing MIN
+*/
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    kwtop = *kbot - jw + 1;
+    if (kwtop == *ktop) {
+	s = 0.;
+    } else {
+	s = h__[kwtop + (kwtop - 1) * h_dim1];
+    }
+
+    if (*kbot == kwtop) {
+
+/*        ==== 1-by-1 deflation window: not much to do ==== */
+
+	sr[kwtop] = h__[kwtop + kwtop * h_dim1];
+	si[kwtop] = 0.;
+	*ns = 1;
+	*nd = 0;
+/* Computing MAX */
+	d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs(
+		d__1));
+	if (abs(s) <= max(d__2,d__3)) {
+	    *ns = 0;
+	    *nd = 1;
+	    if (kwtop > *ktop) {
+		h__[kwtop + (kwtop - 1) * h_dim1] = 0.;
+	    }
+	}
+	work[1] = 1.;
+	return 0;
+    }
+
+/*
+       ==== Convert to spike-triangular form.  (In case of a
+       .    rare QR failure, this routine continues to do
+       .    aggressive early deflation using that part of
+       .    the deflation window that converged using INFQR
+       .    here and there to keep track.) ====
+*/
+
+    dlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset],
+	    ldt);
+    i__1 = jw - 1;
+    i__2 = *ldh + 1;
+    i__3 = *ldt + 1;
+    dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
+	    i__3);
+
+    dlaset_("A", &jw, &jw, &c_b29, &c_b15, &v[v_offset], ldv);
+    dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop],
+	    &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr);
+
+/*     ==== DTREXC needs a clean margin near the diagonal ==== */
+
+    i__1 = jw - 3;
+    for (j = 1; j <= i__1; ++j) {
+	t[j + 2 + j * t_dim1] = 0.;
+	t[j + 3 + j * t_dim1] = 0.;
+/* L10: */
+    }
+    if (jw > 2) {
+	t[jw + (jw - 2) * t_dim1] = 0.;
+    }
+
+/*     ==== Deflation detection loop ==== */
+
+    *ns = jw;
+    ilst = infqr + 1;
+L20:
+    if (ilst <= *ns) {
+	if (*ns == 1) {
+	    bulge = FALSE_;
+	} else {
+	    bulge = t[*ns + (*ns - 1) * t_dim1] != 0.;
+	}
+
+/*        ==== Small spike tip test for deflation ==== */
+
+	if (! bulge) {
+
+/*           ==== Real eigenvalue ==== */
+
+	    foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1));
+	    if (foo == 0.) {
+		foo = abs(s);
+	    }
+/* Computing MAX */
+	    d__2 = smlnum, d__3 = ulp * foo;
+	    if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2,d__3))
+		     {
+
+/*              ==== Deflatable ==== */
+
+		--(*ns);
+	    } else {
+
+/*
+                ==== Undeflatable.   Move it up out of the way.
+                .    (DTREXC can not fail in this case.) ====
+*/
+
+		ifst = *ns;
+		dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		++ilst;
+	    }
+	} else {
+
+/*           ==== Complex conjugate pair ==== */
+
+	    foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + sqrt((d__1 = t[*
+		    ns + (*ns - 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[*
+		    ns - 1 + *ns * t_dim1], abs(d__2)));
+	    if (foo == 0.) {
+		foo = abs(s);
+	    }
+/* Computing MAX */
+	    d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), d__4 = (d__2 =
+		     s * v[(*ns - 1) * v_dim1 + 1], abs(d__2));
+/* Computing MAX */
+	    d__5 = smlnum, d__6 = ulp * foo;
+	    if (max(d__3,d__4) <= max(d__5,d__6)) {
+
+/*              ==== Deflatable ==== */
+
+		*ns += -2;
+	    } else {
+
+/*
+                ==== Undeflatable. Move them up out of the way.
+                .    Fortunately, DTREXC does the right thing with
+                .    ILST in case of a rare exchange failure. ====
+*/
+
+		ifst = *ns;
+		dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		ilst += 2;
+	    }
+	}
+
+/*        ==== End deflation detection loop ==== */
+
+	goto L20;
+    }
+
+/*        ==== Return to Hessenberg form ==== */
+
+    if (*ns == 0) {
+	s = 0.;
+    }
+
+    if (*ns < jw) {
+
+/*
+          ==== sorting diagonal blocks of T improves accuracy for
+          .    graded matrices.  Bubble sort deals well with
+          .    exchange failures. ====
+*/
+
+	sorted = FALSE_;
+	i__ = *ns + 1;
+L30:
+	if (sorted) {
+	    goto L50;
+	}
+	sorted = TRUE_;
+
+	kend = i__ - 1;
+	i__ = infqr + 1;
+	if (i__ == *ns) {
+	    k = i__ + 1;
+	} else if (t[i__ + 1 + i__ * t_dim1] == 0.) {
+	    k = i__ + 1;
+	} else {
+	    k = i__ + 2;
+	}
+L40:
+	if (k <= kend) {
+	    if (k == i__ + 1) {
+		evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1));
+	    } else {
+		evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + sqrt((d__1 =
+			 t[i__ + 1 + i__ * t_dim1], abs(d__1))) * sqrt((d__2 =
+			 t[i__ + (i__ + 1) * t_dim1], abs(d__2)));
+	    }
+
+	    if (k == kend) {
+		evk = (d__1 = t[k + k * t_dim1], abs(d__1));
+	    } else if (t[k + 1 + k * t_dim1] == 0.) {
+		evk = (d__1 = t[k + k * t_dim1], abs(d__1));
+	    } else {
+		evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + sqrt((d__1 = t[
+			k + 1 + k * t_dim1], abs(d__1))) * sqrt((d__2 = t[k +
+			(k + 1) * t_dim1], abs(d__2)));
+	    }
+
+	    if (evi >= evk) {
+		i__ = k;
+	    } else {
+		sorted = FALSE_;
+		ifst = i__;
+		ilst = k;
+		dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		if (info == 0) {
+		    i__ = ilst;
+		} else {
+		    i__ = k;
+		}
+	    }
+	    if (i__ == kend) {
+		k = i__ + 1;
+	    } else if (t[i__ + 1 + i__ * t_dim1] == 0.) {
+		k = i__ + 1;
+	    } else {
+		k = i__ + 2;
+	    }
+	    goto L40;
+	}
+	goto L30;
+L50:
+	;
+    }
+
+/*     ==== Restore shift/eigenvalue array from T ==== */
+
+    i__ = jw;
+L60:
+    if (i__ >= infqr + 1) {
+	if (i__ == infqr + 1) {
+	    sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1];
+	    si[kwtop + i__ - 1] = 0.;
+	    --i__;
+	} else if (t[i__ + (i__ - 1) * t_dim1] == 0.) {
+	    sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1];
+	    si[kwtop + i__ - 1] = 0.;
+	    --i__;
+	} else {
+	    aa = t[i__ - 1 + (i__ - 1) * t_dim1];
+	    cc = t[i__ + (i__ - 1) * t_dim1];
+	    bb = t[i__ - 1 + i__ * t_dim1];
+	    dd = t[i__ + i__ * t_dim1];
+	    dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__
+		    - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &
+		    sn);
+	    i__ += -2;
+	}
+	goto L60;
+    }
+
+    if (*ns < jw || s == 0.) {
+	if (*ns > 1 && s != 0.) {
+
+/*           ==== Reflect spike back into lower triangle ==== */
+
+	    dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
+	    beta = work[1];
+	    dlarfg_(ns, &beta, &work[2], &c__1, &tau);
+	    work[1] = 1.;
+
+	    i__1 = jw - 2;
+	    i__2 = jw - 2;
+	    dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &t[t_dim1 + 3], ldt);
+
+	    dlarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    dlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    dlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
+		    work[jw + 1]);
+
+	    i__1 = *lwork - jw;
+	    dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
+		    , &i__1, &info);
+	}
+
+/*        ==== Copy updated reduced window into place ==== */
+
+	if (kwtop > 1) {
+	    h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1];
+	}
+	dlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
+		, ldh);
+	i__1 = jw - 1;
+	i__2 = *ldt + 1;
+	i__3 = *ldh + 1;
+	dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1],
+		 &i__3);
+
+/*
+          ==== Accumulate orthogonal matrix in order update
+          .    H and Z, if requested.  ====
+*/
+
+	if (*ns > 1 && s != 0.) {
+	    i__1 = *lwork - jw;
+	    dormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1],
+		     &v[v_offset], ldv, &work[jw + 1], &i__1, &info);
+	}
+
+/*        ==== Update vertical slab in H ==== */
+
+	if (*wantt) {
+	    ltop = 1;
+	} else {
+	    ltop = *ktop;
+	}
+	i__1 = kwtop - 1;
+	i__2 = *nv;
+	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		i__2) {
+/* Computing MIN */
+	    i__3 = *nv, i__4 = kwtop - krow;
+	    kln = min(i__3,i__4);
+	    dgemm_("N", "N", &kln, &jw, &jw, &c_b15, &h__[krow + kwtop *
+		    h_dim1], ldh, &v[v_offset], ldv, &c_b29, &wv[wv_offset],
+		    ldwv);
+	    dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop *
+		    h_dim1], ldh);
+/* L70: */
+	}
+
+/*        ==== Update horizontal slab in H ==== */
+
+	if (*wantt) {
+	    i__2 = *n;
+	    i__1 = *nh;
+	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2;
+		    kcol += i__1) {
+/* Computing MIN */
+		i__3 = *nh, i__4 = *n - kcol + 1;
+		kln = min(i__3,i__4);
+		dgemm_("C", "N", &jw, &kln, &jw, &c_b15, &v[v_offset], ldv, &
+			h__[kwtop + kcol * h_dim1], ldh, &c_b29, &t[t_offset],
+			 ldt);
+		dlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
+			 h_dim1], ldh);
+/* L80: */
+	    }
+	}
+
+/*        ==== Update vertical slab in Z ==== */
+
+	if (*wantz) {
+	    i__1 = *ihiz;
+	    i__2 = *nv;
+	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		     i__2) {
+/* Computing MIN */
+		i__3 = *nv, i__4 = *ihiz - krow + 1;
+		kln = min(i__3,i__4);
+		dgemm_("N", "N", &kln, &jw, &jw, &c_b15, &z__[krow + kwtop *
+			z_dim1], ldz, &v[v_offset], ldv, &c_b29, &wv[
+			wv_offset], ldwv);
+		dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow +
+			kwtop * z_dim1], ldz);
+/* L90: */
+	    }
+	}
+    }
+
+/*     ==== Return the number of deflations ... ==== */
+
+    *nd = jw - *ns;
+
+/*
+       ==== ... and the number of shifts. (Subtracting
+       .    INFQR from the spike length takes care
+       .    of the case of a rare QR failure while
+       .    calculating eigenvalues of the deflation
+       .    window.)  ====
+*/
+
+    *ns -= infqr;
+
+/*      ==== Return optimal workspace. ==== */
+
+    work[1] = (doublereal) lwkopt;
+
+/*     ==== End of DLAQR2 ==== */
+
+    return 0;
+} /* dlaqr2_ */
+
+/* Subroutine */ int dlaqr3_(logical *wantt, logical *wantz, integer *n,
+	integer *ktop, integer *kbot, integer *nw, doublereal *h__, integer *
+	ldh, integer *iloz, integer *ihiz, doublereal *z__, integer *ldz,
+	integer *ns, integer *nd, doublereal *sr, doublereal *si, doublereal *
+	v, integer *ldv, integer *nh, doublereal *t, integer *ldt, integer *
+	nv, doublereal *wv, integer *ldwv, doublereal *work, integer *lwork)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1,
+	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static doublereal s, aa, bb, cc, dd, cs, sn;
+    static integer jw;
+    static doublereal evi, evk, foo;
+    static integer kln;
+    static doublereal tau, ulp;
+    static integer lwk1, lwk2, lwk3;
+    static doublereal beta;
+    static integer kend, kcol, info, nmin, ifst, ilst, ltop, krow;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dgemm_(char *, char *, integer *, integer *
+	    , integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static logical bulge;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer infqr, kwtop;
+    extern /* Subroutine */ int dlanv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *), dlaqr4_(
+	    logical *, logical *, integer *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, integer *),
+	    dlabad_(doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dlarfg_(integer *, doublereal *, doublereal *,
+	    integer *, doublereal *), dlahqr_(logical *, logical *, integer *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static doublereal safmin;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static doublereal safmax;
+    extern /* Subroutine */ int dlaset_(char *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *),
+	    dtrexc_(char *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, doublereal *, integer *),
+	     dormhr_(char *, char *, integer *, integer *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, doublereal *, integer *,
+	     doublereal *, integer *, integer *);
+    static logical sorted;
+    static doublereal smlnum;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+    -- June 2010                                                       --
+
+
+       ******************************************************************
+       Aggressive early deflation:
+
+       This subroutine accepts as input an upper Hessenberg matrix
+       H and performs an orthogonal similarity transformation
+       designed to detect and deflate fully converged eigenvalues from
+       a trailing principal submatrix.  On output H has been over-
+       written by a new Hessenberg matrix that is a perturbation of
+       an orthogonal similarity transformation of H.  It is to be
+       hoped that the final version of H has many zero subdiagonal
+       entries.
+
+       ******************************************************************
+       WANTT   (input) LOGICAL
+            If .TRUE., then the Hessenberg matrix H is fully updated
+            so that the quasi-triangular Schur factor may be
+            computed (in cooperation with the calling subroutine).
+            If .FALSE., then only enough of H is updated to preserve
+            the eigenvalues.
+
+       WANTZ   (input) LOGICAL
+            If .TRUE., then the orthogonal matrix Z is updated so
+            so that the orthogonal Schur factor may be computed
+            (in cooperation with the calling subroutine).
+            If .FALSE., then Z is not referenced.
+
+       N       (input) INTEGER
+            The order of the matrix H and (if WANTZ is .TRUE.) the
+            order of the orthogonal matrix Z.
+
+       KTOP    (input) INTEGER
+            It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+            KBOT and KTOP together determine an isolated block
+            along the diagonal of the Hessenberg matrix.
+
+       KBOT    (input) INTEGER
+            It is assumed without a check that either
+            KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
+            determine an isolated block along the diagonal of the
+            Hessenberg matrix.
+
+       NW      (input) INTEGER
+            Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+
+       H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+            On input the initial N-by-N section of H stores the
+            Hessenberg matrix undergoing aggressive early deflation.
+            On output H has been transformed by an orthogonal
+            similarity transformation, perturbed, and the returned
+            to Hessenberg form that (it is to be hoped) has some
+            zero subdiagonal entries.
+
+       LDH     (input) integer
+            Leading dimension of H just as declared in the calling
+            subroutine.  N .LE. LDH
+
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+
+       Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+            IF WANTZ is .TRUE., then on output, the orthogonal
+            similarity transformation mentioned above has been
+            accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+            If WANTZ is .FALSE., then Z is unreferenced.
+
+       LDZ     (input) integer
+            The leading dimension of Z just as declared in the
+            calling subroutine.  1 .LE. LDZ.
+
+       NS      (output) integer
+            The number of unconverged (ie approximate) eigenvalues
+            returned in SR and SI that may be used as shifts by the
+            calling subroutine.
+
+       ND      (output) integer
+            The number of converged eigenvalues uncovered by this
+            subroutine.
+
+       SR      (output) DOUBLE PRECISION array, dimension (KBOT)
+       SI      (output) DOUBLE PRECISION array, dimension (KBOT)
+            On output, the real and imaginary parts of approximate
+            eigenvalues that may be used for shifts are stored in
+            SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
+            SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
+            The real and imaginary parts of converged eigenvalues
+            are stored in SR(KBOT-ND+1) through SR(KBOT) and
+            SI(KBOT-ND+1) through SI(KBOT), respectively.
+
+       V       (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
+            An NW-by-NW work array.
+
+       LDV     (input) integer scalar
+            The leading dimension of V just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       NH      (input) integer scalar
+            The number of columns of T.  NH.GE.NW.
+
+       T       (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
+
+       LDT     (input) integer
+            The leading dimension of T just as declared in the
+            calling subroutine.  NW .LE. LDT
+
+       NV      (input) integer
+            The number of rows of work array WV available for
+            workspace.  NV.GE.NW.
+
+       WV      (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
+
+       LDWV    (input) integer
+            The leading dimension of W just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, WORK(1) is set to an estimate of the optimal value
+            of LWORK for the given values of N, NW, KTOP and KBOT.
+
+       LWORK   (input) integer
+            The dimension of the work array WORK.  LWORK = 2*NW
+            suffices, but greater efficiency may result from larger
+            values of LWORK.
+
+            If LWORK = -1, then a workspace query is assumed; DLAQR3
+            only estimates the optimal workspace size for the given
+            values of N, NW, KTOP and KBOT.  The estimate is returned
+            in WORK(1).  No error message related to LWORK is issued
+            by XERBLA.  Neither H nor Z are accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+
+       ==== Estimate optimal workspace. ====
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --sr;
+    --si;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    --work;
+
+    /* Function Body */
+/* Computing MIN */
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    if (jw <= 2) {
+	lwkopt = 1;
+    } else {
+
+/*        ==== Workspace query call to DGEHRD ==== */
+
+	i__1 = jw - 1;
+	dgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
+		c_n1, &info);
+	lwk1 = (integer) work[1];
+
+/*        ==== Workspace query call to DORMHR ==== */
+
+	i__1 = jw - 1;
+	dormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1],
+		 &v[v_offset], ldv, &work[1], &c_n1, &info);
+	lwk2 = (integer) work[1];
+
+/*        ==== Workspace query call to DLAQR4 ==== */
+
+	dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[1],
+		&si[1], &c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, &
+		infqr);
+	lwk3 = (integer) work[1];
+
+/*
+          ==== Optimal workspace ====
+
+   Computing MAX
+*/
+	i__1 = jw + max(lwk1,lwk2);
+	lwkopt = max(i__1,lwk3);
+    }
+
+/*     ==== Quick return in case of workspace query. ==== */
+
+    if (*lwork == -1) {
+	work[1] = (doublereal) lwkopt;
+	return 0;
+    }
+
+/*
+       ==== Nothing to do ...
+       ... for an empty active block ... ====
+*/
+    *ns = 0;
+    *nd = 0;
+    work[1] = 1.;
+    if (*ktop > *kbot) {
+	return 0;
+    }
+/*     ... nor for an empty deflation window. ==== */
+    if (*nw < 1) {
+	return 0;
+    }
+
+/*     ==== Machine constants ==== */
+
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    dlabad_(&safmin, &safmax);
+    ulp = PRECISION;
+    smlnum = safmin * ((doublereal) (*n) / ulp);
+
+/*
+       ==== Setup deflation window ====
+
+   Computing MIN
+*/
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    kwtop = *kbot - jw + 1;
+    if (kwtop == *ktop) {
+	s = 0.;
+    } else {
+	s = h__[kwtop + (kwtop - 1) * h_dim1];
+    }
+
+    if (*kbot == kwtop) {
+
+/*        ==== 1-by-1 deflation window: not much to do ==== */
+
+	sr[kwtop] = h__[kwtop + kwtop * h_dim1];
+	si[kwtop] = 0.;
+	*ns = 1;
+	*nd = 0;
+/* Computing MAX */
+	d__2 = smlnum, d__3 = ulp * (d__1 = h__[kwtop + kwtop * h_dim1], abs(
+		d__1));
+	if (abs(s) <= max(d__2,d__3)) {
+	    *ns = 0;
+	    *nd = 1;
+	    if (kwtop > *ktop) {
+		h__[kwtop + (kwtop - 1) * h_dim1] = 0.;
+	    }
+	}
+	work[1] = 1.;
+	return 0;
+    }
+
+/*
+       ==== Convert to spike-triangular form.  (In case of a
+       .    rare QR failure, this routine continues to do
+       .    aggressive early deflation using that part of
+       .    the deflation window that converged using INFQR
+       .    here and there to keep track.) ====
+*/
+
+    dlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset],
+	    ldt);
+    i__1 = jw - 1;
+    i__2 = *ldh + 1;
+    i__3 = *ldt + 1;
+    dcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
+	    i__3);
+
+    dlaset_("A", &jw, &jw, &c_b29, &c_b15, &v[v_offset], ldv);
+    nmin = ilaenv_(&c__12, "DLAQR3", "SV", &jw, &c__1, &jw, lwork, (ftnlen)6,
+	    (ftnlen)2);
+    if (jw > nmin) {
+	dlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[
+		kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1],
+		lwork, &infqr);
+    } else {
+	dlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[
+		kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr);
+    }
+
+/*     ==== DTREXC needs a clean margin near the diagonal ==== */
+
+    i__1 = jw - 3;
+    for (j = 1; j <= i__1; ++j) {
+	t[j + 2 + j * t_dim1] = 0.;
+	t[j + 3 + j * t_dim1] = 0.;
+/* L10: */
+    }
+    if (jw > 2) {
+	t[jw + (jw - 2) * t_dim1] = 0.;
+    }
+
+/*     ==== Deflation detection loop ==== */
+
+    *ns = jw;
+    ilst = infqr + 1;
+L20:
+    if (ilst <= *ns) {
+	if (*ns == 1) {
+	    bulge = FALSE_;
+	} else {
+	    bulge = t[*ns + (*ns - 1) * t_dim1] != 0.;
+	}
+
+/*        ==== Small spike tip test for deflation ==== */
+
+	if (! bulge) {
+
+/*           ==== Real eigenvalue ==== */
+
+	    foo = (d__1 = t[*ns + *ns * t_dim1], abs(d__1));
+	    if (foo == 0.) {
+		foo = abs(s);
+	    }
+/* Computing MAX */
+	    d__2 = smlnum, d__3 = ulp * foo;
+	    if ((d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)) <= max(d__2,d__3))
+		     {
+
+/*              ==== Deflatable ==== */
+
+		--(*ns);
+	    } else {
+
+/*
+                ==== Undeflatable.   Move it up out of the way.
+                .    (DTREXC can not fail in this case.) ====
+*/
+
+		ifst = *ns;
+		dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		++ilst;
+	    }
+	} else {
+
+/*           ==== Complex conjugate pair ==== */
+
+	    foo = (d__3 = t[*ns + *ns * t_dim1], abs(d__3)) + sqrt((d__1 = t[*
+		    ns + (*ns - 1) * t_dim1], abs(d__1))) * sqrt((d__2 = t[*
+		    ns - 1 + *ns * t_dim1], abs(d__2)));
+	    if (foo == 0.) {
+		foo = abs(s);
+	    }
+/* Computing MAX */
+	    d__3 = (d__1 = s * v[*ns * v_dim1 + 1], abs(d__1)), d__4 = (d__2 =
+		     s * v[(*ns - 1) * v_dim1 + 1], abs(d__2));
+/* Computing MAX */
+	    d__5 = smlnum, d__6 = ulp * foo;
+	    if (max(d__3,d__4) <= max(d__5,d__6)) {
+
+/*              ==== Deflatable ==== */
+
+		*ns += -2;
+	    } else {
+
+/*
+                ==== Undeflatable. Move them up out of the way.
+                .    Fortunately, DTREXC does the right thing with
+                .    ILST in case of a rare exchange failure. ====
+*/
+
+		ifst = *ns;
+		dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		ilst += 2;
+	    }
+	}
+
+/*        ==== End deflation detection loop ==== */
+
+	goto L20;
+    }
+
+/*        ==== Return to Hessenberg form ==== */
+
+    if (*ns == 0) {
+	s = 0.;
+    }
+
+    if (*ns < jw) {
+
+/*
+          ==== sorting diagonal blocks of T improves accuracy for
+          .    graded matrices.  Bubble sort deals well with
+          .    exchange failures. ====
+*/
+
+	sorted = FALSE_;
+	i__ = *ns + 1;
+L30:
+	if (sorted) {
+	    goto L50;
+	}
+	sorted = TRUE_;
+
+	kend = i__ - 1;
+	i__ = infqr + 1;
+	if (i__ == *ns) {
+	    k = i__ + 1;
+	} else if (t[i__ + 1 + i__ * t_dim1] == 0.) {
+	    k = i__ + 1;
+	} else {
+	    k = i__ + 2;
+	}
+L40:
+	if (k <= kend) {
+	    if (k == i__ + 1) {
+		evi = (d__1 = t[i__ + i__ * t_dim1], abs(d__1));
+	    } else {
+		evi = (d__3 = t[i__ + i__ * t_dim1], abs(d__3)) + sqrt((d__1 =
+			 t[i__ + 1 + i__ * t_dim1], abs(d__1))) * sqrt((d__2 =
+			 t[i__ + (i__ + 1) * t_dim1], abs(d__2)));
+	    }
+
+	    if (k == kend) {
+		evk = (d__1 = t[k + k * t_dim1], abs(d__1));
+	    } else if (t[k + 1 + k * t_dim1] == 0.) {
+		evk = (d__1 = t[k + k * t_dim1], abs(d__1));
+	    } else {
+		evk = (d__3 = t[k + k * t_dim1], abs(d__3)) + sqrt((d__1 = t[
+			k + 1 + k * t_dim1], abs(d__1))) * sqrt((d__2 = t[k +
+			(k + 1) * t_dim1], abs(d__2)));
+	    }
+
+	    if (evi >= evk) {
+		i__ = k;
+	    } else {
+		sorted = FALSE_;
+		ifst = i__;
+		ilst = k;
+		dtrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		if (info == 0) {
+		    i__ = ilst;
+		} else {
+		    i__ = k;
+		}
+	    }
+	    if (i__ == kend) {
+		k = i__ + 1;
+	    } else if (t[i__ + 1 + i__ * t_dim1] == 0.) {
+		k = i__ + 1;
+	    } else {
+		k = i__ + 2;
+	    }
+	    goto L40;
+	}
+	goto L30;
+L50:
+	;
+    }
+
+/*     ==== Restore shift/eigenvalue array from T ==== */
+
+    i__ = jw;
+L60:
+    if (i__ >= infqr + 1) {
+	if (i__ == infqr + 1) {
+	    sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1];
+	    si[kwtop + i__ - 1] = 0.;
+	    --i__;
+	} else if (t[i__ + (i__ - 1) * t_dim1] == 0.) {
+	    sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1];
+	    si[kwtop + i__ - 1] = 0.;
+	    --i__;
+	} else {
+	    aa = t[i__ - 1 + (i__ - 1) * t_dim1];
+	    cc = t[i__ + (i__ - 1) * t_dim1];
+	    bb = t[i__ - 1 + i__ * t_dim1];
+	    dd = t[i__ + i__ * t_dim1];
+	    dlanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__
+		    - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &
+		    sn);
+	    i__ += -2;
+	}
+	goto L60;
+    }
+
+    if (*ns < jw || s == 0.) {
+	if (*ns > 1 && s != 0.) {
+
+/*           ==== Reflect spike back into lower triangle ==== */
+
+	    dcopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
+	    beta = work[1];
+	    dlarfg_(ns, &beta, &work[2], &c__1, &tau);
+	    work[1] = 1.;
+
+	    i__1 = jw - 2;
+	    i__2 = jw - 2;
+	    dlaset_("L", &i__1, &i__2, &c_b29, &c_b29, &t[t_dim1 + 3], ldt);
+
+	    dlarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    dlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    dlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
+		    work[jw + 1]);
+
+	    i__1 = *lwork - jw;
+	    dgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
+		    , &i__1, &info);
+	}
+
+/*        ==== Copy updated reduced window into place ==== */
+
+	if (kwtop > 1) {
+	    h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1];
+	}
+	dlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
+		, ldh);
+	i__1 = jw - 1;
+	i__2 = *ldt + 1;
+	i__3 = *ldh + 1;
+	dcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1],
+		 &i__3);
+
+/*
+          ==== Accumulate orthogonal matrix in order update
+          .    H and Z, if requested.  ====
+*/
+
+	if (*ns > 1 && s != 0.) {
+	    i__1 = *lwork - jw;
+	    dormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1],
+		     &v[v_offset], ldv, &work[jw + 1], &i__1, &info);
+	}
+
+/*        ==== Update vertical slab in H ==== */
+
+	if (*wantt) {
+	    ltop = 1;
+	} else {
+	    ltop = *ktop;
+	}
+	i__1 = kwtop - 1;
+	i__2 = *nv;
+	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		i__2) {
+/* Computing MIN */
+	    i__3 = *nv, i__4 = kwtop - krow;
+	    kln = min(i__3,i__4);
+	    dgemm_("N", "N", &kln, &jw, &jw, &c_b15, &h__[krow + kwtop *
+		    h_dim1], ldh, &v[v_offset], ldv, &c_b29, &wv[wv_offset],
+		    ldwv);
+	    dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop *
+		    h_dim1], ldh);
+/* L70: */
+	}
+
+/*        ==== Update horizontal slab in H ==== */
+
+	if (*wantt) {
+	    i__2 = *n;
+	    i__1 = *nh;
+	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2;
+		    kcol += i__1) {
+/* Computing MIN */
+		i__3 = *nh, i__4 = *n - kcol + 1;
+		kln = min(i__3,i__4);
+		dgemm_("C", "N", &jw, &kln, &jw, &c_b15, &v[v_offset], ldv, &
+			h__[kwtop + kcol * h_dim1], ldh, &c_b29, &t[t_offset],
+			 ldt);
+		dlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
+			 h_dim1], ldh);
+/* L80: */
+	    }
+	}
+
+/*        ==== Update vertical slab in Z ==== */
+
+	if (*wantz) {
+	    i__1 = *ihiz;
+	    i__2 = *nv;
+	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		     i__2) {
+/* Computing MIN */
+		i__3 = *nv, i__4 = *ihiz - krow + 1;
+		kln = min(i__3,i__4);
+		dgemm_("N", "N", &kln, &jw, &jw, &c_b15, &z__[krow + kwtop *
+			z_dim1], ldz, &v[v_offset], ldv, &c_b29, &wv[
+			wv_offset], ldwv);
+		dlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow +
+			kwtop * z_dim1], ldz);
+/* L90: */
+	    }
+	}
+    }
+
+/*     ==== Return the number of deflations ... ==== */
+
+    *nd = jw - *ns;
+
+/*
+       ==== ... and the number of shifts. (Subtracting
+       .    INFQR from the spike length takes care
+       .    of the case of a rare QR failure while
+       .    calculating eigenvalues of the deflation
+       .    window.)  ====
+*/
+
+    *ns -= infqr;
+
+/*      ==== Return optimal workspace. ==== */
+
+    work[1] = (doublereal) lwkopt;
+
+/*     ==== End of DLAQR3 ==== */
+
+    return 0;
+} /* dlaqr3_ */
+
+/* Subroutine */ int dlaqr4_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal
+	*wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__,
+	integer *ldz, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal aa, bb, cc, dd;
+    static integer ld;
+    static doublereal cs;
+    static integer nh, it, ks, kt;
+    static doublereal sn;
+    static integer ku, kv, ls, ns;
+    static doublereal ss;
+    static integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl,
+	    kbot, nmin;
+    static doublereal swap;
+    static integer ktop;
+    static doublereal zdum[1]	/* was [1][1] */;
+    static integer kacc22, itmax, nsmax, nwmax, kwtop;
+    extern /* Subroutine */ int dlaqr2_(logical *, logical *, integer *,
+	    integer *, integer *, integer *, doublereal *, integer *, integer
+	    *, integer *, doublereal *, integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, integer *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlanv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *), dlaqr5_(
+	    logical *, logical *, integer *, integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *, doublereal *, integer *);
+    static integer nibble;
+    extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static char jbcmpz[2];
+    static integer nwupbd;
+    static logical sorted;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+       This subroutine implements one level of recursion for DLAQR0.
+       It is a complete implementation of the small bulge multi-shift
+       QR algorithm.  It may be called by DLAQR0 and, for large enough
+       deflation window size, it may be called by DLAQR3.  This
+       subroutine is identical to DLAQR0 except that it calls DLAQR2
+       instead of DLAQR3.
+
+       Purpose
+       =======
+
+       DLAQR4 computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**T, where T is an upper quasi-triangular matrix (the
+       Schur form), and Z is the orthogonal matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input orthogonal
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+       Arguments
+       =========
+
+       WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+       WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+             H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
+             previous call to DGEBAL, and then passed to DGEHRD when the
+             matrix output by DGEBAL is reduced to Hessenberg form.
+             Otherwise, ILO and IHI should be set to 1 and N,
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and WANTT is .TRUE., then H contains
+             the upper quasi-triangular matrix T from the Schur
+             decomposition (the Schur form); 2-by-2 diagonal blocks
+             (corresponding to complex conjugate pairs of eigenvalues)
+             are returned in standard form, with H(i,i) = H(i+1,i+1)
+             and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is
+             .FALSE., then the contents of H are unspecified on exit.
+             (The output value of H when INFO.GT.0 is given under the
+             description of INFO below.)
+
+             This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+             j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       WR    (output) DOUBLE PRECISION array, dimension (IHI)
+       WI    (output) DOUBLE PRECISION array, dimension (IHI)
+             The real and imaginary parts, respectively, of the computed
+             eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI)
+             and WI(ILO:IHI). If two eigenvalues are computed as a
+             complex conjugate pair, they are stored in consecutive
+             elements of WR and WI, say the i-th and (i+1)th, with
+             WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then
+             the eigenvalues are stored in the same order as on the
+             diagonal of the Schur form returned in H, with
+             WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal
+             block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and
+             WI(i+1) = -WI(i).
+
+       ILOZ     (input) INTEGER
+       IHIZ     (input) INTEGER
+             Specify the rows of Z to which transformations must be
+             applied if WANTZ is .TRUE..
+             1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N.
+
+       Z     (input/output) DOUBLE PRECISION array, dimension (LDZ,IHI)
+             If WANTZ is .FALSE., then Z is not referenced.
+             If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
+             replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
+             orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
+             (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if WANTZ is .TRUE.
+             then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) DOUBLE PRECISION array, dimension LWORK
+             On exit, if LWORK = -1, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient, but LWORK typically as large as 6*N may
+             be required for optimal performance.  A workspace query
+             to determine the optimal workspace size is recommended.
+
+             If LWORK = -1, then DLAQR4 does a workspace query.
+             In this case, DLAQR4 checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .GT. 0:  if INFO = i, DLAQR4 failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and WANT is .FALSE., then on exit,
+                  the remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is an orthogonal matrix.  The final
+                  value of H is upper Hessenberg and quasi-triangular
+                  in rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+
+                    (final value of Z(ILO:IHI,ILOZ:IHIZ)
+                     =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
+
+                  where U is the orthogonal matrix in (*) (regard-
+                  less of the value of WANTT.)
+
+                  If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+                  accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
+
+       ================================================================
+
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    DLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
+
+       ==== Exceptional deflation windows:  try to cure rare
+       .    slow convergence by varying the size of the
+       .    deflation window after KEXNW iterations. ====
+
+       ==== Exceptional shifts: try to cure rare slow convergence
+       .    with ad-hoc exceptional shifts every KEXSH iterations.
+       .    ====
+
+       ==== The constants WILK1 and WILK2 are used to form the
+       .    exceptional shifts. ====
+*/
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+/*     ==== Quick return for N = 0: nothing to do. ==== */
+
+    if (*n == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    if (*n <= 11) {
+
+/*        ==== Tiny matrices must use DLAHQR. ==== */
+
+	lwkopt = 1;
+	if (*lwork != -1) {
+	    dlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &
+		    wi[1], iloz, ihiz, &z__[z_offset], ldz, info);
+	}
+    } else {
+
+/*
+          ==== Use small bulge multi-shift QR with aggressive early
+          .    deflation on larger-than-tiny matrices. ====
+
+          ==== Hope for the best. ====
+*/
+
+	*info = 0;
+
+/*        ==== Set up job flags for ILAENV. ==== */
+
+	if (*wantt) {
+	    *(unsigned char *)jbcmpz = 'S';
+	} else {
+	    *(unsigned char *)jbcmpz = 'E';
+	}
+	if (*wantz) {
+	    *(unsigned char *)&jbcmpz[1] = 'V';
+	} else {
+	    *(unsigned char *)&jbcmpz[1] = 'N';
+	}
+
+/*
+          ==== NWR = recommended deflation window size.  At this
+          .    point,  N .GT. NTINY = 11, so there is enough
+          .    subdiagonal workspace for NWR.GE.2 as required.
+          .    (In fact, there is enough subdiagonal space for
+          .    NWR.GE.3.) ====
+*/
+
+	nwr = ilaenv_(&c__13, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nwr = max(2,nwr);
+/* Computing MIN */
+	i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2);
+	nwr = min(i__1,nwr);
+
+/*
+          ==== NSR = recommended number of simultaneous shifts.
+          .    At this point N .GT. NTINY = 11, so there is at
+          .    enough subdiagonal workspace for NSR to be even
+          .    and greater than or equal to two as required. ====
+*/
+
+	nsr = ilaenv_(&c__15, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+/* Computing MIN */
+	i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi -
+		*ilo;
+	nsr = min(i__1,i__2);
+/* Computing MAX */
+	i__1 = 2, i__2 = nsr - nsr % 2;
+	nsr = max(i__1,i__2);
+
+/*
+          ==== Estimate optimal workspace ====
+
+          ==== Workspace query call to DLAQR2 ====
+*/
+
+	i__1 = nwr + 1;
+	dlaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz,
+		ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[
+		h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset],
+		ldh, &work[1], &c_n1);
+
+/*
+          ==== Optimal workspace = MAX(DLAQR5, DLAQR2) ====
+
+   Computing MAX
+*/
+	i__1 = nsr * 3 / 2, i__2 = (integer) work[1];
+	lwkopt = max(i__1,i__2);
+
+/*        ==== Quick return in case of workspace query. ==== */
+
+	if (*lwork == -1) {
+	    work[1] = (doublereal) lwkopt;
+	    return 0;
+	}
+
+/*        ==== DLAHQR/DLAQR0 crossover point ==== */
+
+	nmin = ilaenv_(&c__12, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)
+		6, (ftnlen)2);
+	nmin = max(11,nmin);
+
+/*        ==== Nibble crossover point ==== */
+
+	nibble = ilaenv_(&c__14, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	nibble = max(0,nibble);
+
+/*
+          ==== Accumulate reflections during ttswp?  Use block
+          .    2-by-2 structure during matrix-matrix multiply? ====
+*/
+
+	kacc22 = ilaenv_(&c__16, "DLAQR4", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	kacc22 = max(0,kacc22);
+	kacc22 = min(2,kacc22);
+
+/*
+          ==== NWMAX = the largest possible deflation window for
+          .    which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n - 1) / 3, i__2 = *lwork / 2;
+	nwmax = min(i__1,i__2);
+	nw = nwmax;
+
+/*
+          ==== NSMAX = the Largest number of simultaneous shifts
+          .    for which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3;
+	nsmax = min(i__1,i__2);
+	nsmax -= nsmax % 2;
+
+/*        ==== NDFL: an iteration count restarted at deflation. ==== */
+
+	ndfl = 1;
+
+/*
+          ==== ITMAX = iteration limit ====
+
+   Computing MAX
+*/
+	i__1 = 10, i__2 = *ihi - *ilo + 1;
+	itmax = max(i__1,i__2) * 30;
+
+/*        ==== Last row and column in the active block ==== */
+
+	kbot = *ihi;
+
+/*        ==== Main Loop ==== */
+
+	i__1 = itmax;
+	for (it = 1; it <= i__1; ++it) {
+
+/*           ==== Done when KBOT falls below ILO ==== */
+
+	    if (kbot < *ilo) {
+		goto L90;
+	    }
+
+/*           ==== Locate active block ==== */
+
+	    i__2 = *ilo + 1;
+	    for (k = kbot; k >= i__2; --k) {
+		if (h__[k + (k - 1) * h_dim1] == 0.) {
+		    goto L20;
+		}
+/* L10: */
+	    }
+	    k = *ilo;
+L20:
+	    ktop = k;
+
+/*
+             ==== Select deflation window size:
+             .    Typical Case:
+             .      If possible and advisable, nibble the entire
+             .      active block.  If not, use size MIN(NWR,NWMAX)
+             .      or MIN(NWR+1,NWMAX) depending upon which has
+             .      the smaller corresponding subdiagonal entry
+             .      (a heuristic).
+             .
+             .    Exceptional Case:
+             .      If there have been no deflations in KEXNW or
+             .      more iterations, then vary the deflation window
+             .      size.   At first, because, larger windows are,
+             .      in general, more powerful than smaller ones,
+             .      rapidly increase the window to the maximum possible.
+             .      Then, gradually reduce the window size. ====
+*/
+
+	    nh = kbot - ktop + 1;
+	    nwupbd = min(nh,nwmax);
+	    if (ndfl < 5) {
+		nw = min(nwupbd,nwr);
+	    } else {
+/* Computing MIN */
+		i__2 = nwupbd, i__3 = nw << 1;
+		nw = min(i__2,i__3);
+	    }
+	    if (nw < nwmax) {
+		if (nw >= nh - 1) {
+		    nw = nh;
+		} else {
+		    kwtop = kbot - nw + 1;
+		    if ((d__1 = h__[kwtop + (kwtop - 1) * h_dim1], abs(d__1))
+			    > (d__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1],
+			    abs(d__2))) {
+			++nw;
+		    }
+		}
+	    }
+	    if (ndfl < 5) {
+		ndec = -1;
+	    } else if (ndec >= 0 || nw >= nwupbd) {
+		++ndec;
+		if (nw - ndec < 2) {
+		    ndec = 0;
+		}
+		nw -= ndec;
+	    }
+
+/*
+             ==== Aggressive early deflation:
+             .    split workspace under the subdiagonal into
+             .      - an nw-by-nw work array V in the lower
+             .        left-hand-corner,
+             .      - an NW-by-at-least-NW-but-more-is-better
+             .        (NW-by-NHO) horizontal work array along
+             .        the bottom edge,
+             .      - an at-least-NW-but-more-is-better (NHV-by-NW)
+             .        vertical work array along the left-hand-edge.
+             .        ====
+*/
+
+	    kv = *n - nw + 1;
+	    kt = nw + 1;
+	    nho = *n - nw - 1 - kt + 1;
+	    kwv = nw + 2;
+	    nve = *n - nw - kwv + 1;
+
+/*           ==== Aggressive early deflation ==== */
+
+	    dlaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh,
+		    iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1],
+		     &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1],
+		    ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork);
+
+/*           ==== Adjust KBOT accounting for new deflations. ==== */
+
+	    kbot -= ld;
+
+/*           ==== KS points to the shifts. ==== */
+
+	    ks = kbot - ls + 1;
+
+/*
+             ==== Skip an expensive QR sweep if there is a (partly
+             .    heuristic) reason to expect that many eigenvalues
+             .    will deflate without it.  Here, the QR sweep is
+             .    skipped if many eigenvalues have just been deflated
+             .    or if the remaining active block is small.
+*/
+
+	    if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(
+		    nmin,nwmax)) {
+
+/*
+                ==== NS = nominal number of simultaneous shifts.
+                .    This may be lowered (slightly) if DLAQR2
+                .    did not provide that many shifts. ====
+
+   Computing MIN
+   Computing MAX
+*/
+		i__4 = 2, i__5 = kbot - ktop;
+		i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5);
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+
+/*
+                ==== If there have been no deflations
+                .    in a multiple of KEXSH iterations,
+                .    then try exceptional shifts.
+                .    Otherwise use shifts provided by
+                .    DLAQR2 above or from the eigenvalues
+                .    of a trailing principal submatrix. ====
+*/
+
+		if (ndfl % 6 == 0) {
+		    ks = kbot - ns + 1;
+/* Computing MAX */
+		    i__3 = ks + 1, i__4 = ktop + 2;
+		    i__2 = max(i__3,i__4);
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			ss = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1))
+				 + (d__2 = h__[i__ - 1 + (i__ - 2) * h_dim1],
+				abs(d__2));
+			aa = ss * .75 + h__[i__ + i__ * h_dim1];
+			bb = ss;
+			cc = ss * -.4375;
+			dd = aa;
+			dlanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1]
+				, &wr[i__], &wi[i__], &cs, &sn);
+/* L30: */
+		    }
+		    if (ks == ktop) {
+			wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1];
+			wi[ks + 1] = 0.;
+			wr[ks] = wr[ks + 1];
+			wi[ks] = wi[ks + 1];
+		    }
+		} else {
+
+/*
+                   ==== Got NS/2 or fewer shifts? Use DLAHQR
+                   .    on a trailing principal submatrix to
+                   .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
+                   .    there is enough space below the subdiagonal
+                   .    to fit an NS-by-NS scratch array.) ====
+*/
+
+		    if (kbot - ks + 1 <= ns / 2) {
+			ks = kbot - ns + 1;
+			kt = *n - ns + 1;
+			dlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &
+				h__[kt + h_dim1], ldh);
+			dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt
+				+ h_dim1], ldh, &wr[ks], &wi[ks], &c__1, &
+				c__1, zdum, &c__1, &inf);
+			ks += inf;
+
+/*
+                      ==== In case of a rare QR failure use
+                      .    eigenvalues of the trailing 2-by-2
+                      .    principal submatrix.  ====
+*/
+
+			if (ks >= kbot) {
+			    aa = h__[kbot - 1 + (kbot - 1) * h_dim1];
+			    cc = h__[kbot + (kbot - 1) * h_dim1];
+			    bb = h__[kbot - 1 + kbot * h_dim1];
+			    dd = h__[kbot + kbot * h_dim1];
+			    dlanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[
+				    kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn)
+				    ;
+			    ks = kbot - 1;
+			}
+		    }
+
+		    if (kbot - ks + 1 > ns) {
+
+/*
+                      ==== Sort the shifts (Helps a little)
+                      .    Bubble sort keeps complex conjugate
+                      .    pairs together. ====
+*/
+
+			sorted = FALSE_;
+			i__2 = ks + 1;
+			for (k = kbot; k >= i__2; --k) {
+			    if (sorted) {
+				goto L60;
+			    }
+			    sorted = TRUE_;
+			    i__3 = k - 1;
+			    for (i__ = ks; i__ <= i__3; ++i__) {
+				if ((d__1 = wr[i__], abs(d__1)) + (d__2 = wi[
+					i__], abs(d__2)) < (d__3 = wr[i__ + 1]
+					, abs(d__3)) + (d__4 = wi[i__ + 1],
+					abs(d__4))) {
+				    sorted = FALSE_;
+
+				    swap = wr[i__];
+				    wr[i__] = wr[i__ + 1];
+				    wr[i__ + 1] = swap;
+
+				    swap = wi[i__];
+				    wi[i__] = wi[i__ + 1];
+				    wi[i__ + 1] = swap;
+				}
+/* L40: */
+			    }
+/* L50: */
+			}
+L60:
+			;
+		    }
+
+/*
+                   ==== Shuffle shifts into pairs of real shifts
+                   .    and pairs of complex conjugate shifts
+                   .    assuming complex conjugate shifts are
+                   .    already adjacent to one another. (Yes,
+                   .    they are.)  ====
+*/
+
+		    i__2 = ks + 2;
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			if (wi[i__] != -wi[i__ - 1]) {
+
+			    swap = wr[i__];
+			    wr[i__] = wr[i__ - 1];
+			    wr[i__ - 1] = wr[i__ - 2];
+			    wr[i__ - 2] = swap;
+
+			    swap = wi[i__];
+			    wi[i__] = wi[i__ - 1];
+			    wi[i__ - 1] = wi[i__ - 2];
+			    wi[i__ - 2] = swap;
+			}
+/* L70: */
+		    }
+		}
+
+/*
+                ==== If there are only two shifts and both are
+                .    real, then use only one.  ====
+*/
+
+		if (kbot - ks + 1 == 2) {
+		    if (wi[kbot] == 0.) {
+			if ((d__1 = wr[kbot] - h__[kbot + kbot * h_dim1], abs(
+				d__1)) < (d__2 = wr[kbot - 1] - h__[kbot +
+				kbot * h_dim1], abs(d__2))) {
+			    wr[kbot - 1] = wr[kbot];
+			} else {
+			    wr[kbot] = wr[kbot - 1];
+			}
+		    }
+		}
+
+/*
+                ==== Use up to NS of the the smallest magnatiude
+                .    shifts.  If there aren't NS shifts available,
+                .    then use them all, possibly dropping one to
+                .    make the number of shifts even. ====
+
+   Computing MIN
+*/
+		i__2 = ns, i__3 = kbot - ks + 1;
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+		ks = kbot - ns + 1;
+
+/*
+                ==== Small-bulge multi-shift QR sweep:
+                .    split workspace under the subdiagonal into
+                .    - a KDU-by-KDU work array U in the lower
+                .      left-hand-corner,
+                .    - a KDU-by-at-least-KDU-but-more-is-better
+                .      (KDU-by-NHo) horizontal work array WH along
+                .      the bottom edge,
+                .    - and an at-least-KDU-but-more-is-better-by-KDU
+                .      (NVE-by-KDU) vertical work WV arrow along
+                .      the left-hand-edge. ====
+*/
+
+		kdu = ns * 3 - 3;
+		ku = *n - kdu + 1;
+		kwh = kdu + 1;
+		nho = *n - kdu - 3 - (kdu + 1) + 1;
+		kwv = kdu + 4;
+		nve = *n - kdu - kwv + 1;
+
+/*              ==== Small-bulge multi-shift QR sweep ==== */
+
+		dlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks],
+			&wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[
+			z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1],
+			ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku +
+			kwh * h_dim1], ldh);
+	    }
+
+/*           ==== Note progress (or the lack of it). ==== */
+
+	    if (ld > 0) {
+		ndfl = 1;
+	    } else {
+		++ndfl;
+	    }
+
+/*
+             ==== End of main loop ====
+   L80:
+*/
+	}
+
+/*
+          ==== Iteration limit exceeded.  Set INFO to show where
+          .    the problem occurred and exit. ====
+*/
+
+	*info = kbot;
+L90:
+	;
+    }
+
+/*     ==== Return the optimal value of LWORK. ==== */
+
+    work[1] = (doublereal) lwkopt;
+
+/*     ==== End of DLAQR4 ==== */
+
+    return 0;
+} /* dlaqr4_ */
+
+/* Subroutine */ int dlaqr5_(logical *wantt, logical *wantz, integer *kacc22,
+	integer *n, integer *ktop, integer *kbot, integer *nshfts, doublereal
+	*sr, doublereal *si, doublereal *h__, integer *ldh, integer *iloz,
+	integer *ihiz, doublereal *z__, integer *ldz, doublereal *v, integer *
+	ldv, doublereal *u, integer *ldu, integer *nv, doublereal *wv,
+	integer *ldwv, integer *nh, doublereal *wh, integer *ldwh)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1,
+	    wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3,
+	     i__4, i__5, i__6, i__7;
+    doublereal d__1, d__2, d__3, d__4, d__5;
+
+    /* Local variables */
+    static integer i__, j, k, m, i2, j2, i4, j4, k1;
+    static doublereal h11, h12, h21, h22;
+    static integer m22, ns, nu;
+    static doublereal vt[3], scl;
+    static integer kdu, kms;
+    static doublereal ulp;
+    static integer knz, kzs;
+    static doublereal tst1, tst2, beta;
+    static logical blk22, bmp22;
+    static integer mend, jcol, jlen, jbot, mbot;
+    static doublereal swap;
+    static integer jtop, jrow, mtop;
+    static doublereal alpha;
+    static logical accum;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer ndcol, incol, krcol, nbmps;
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), dlaqr1_(
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *), dlabad_(doublereal *,
+	    doublereal *);
+
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *), dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int dlaset_(char *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *);
+    static doublereal safmax, refsum;
+    static integer mstart;
+    static doublereal smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+       This auxiliary subroutine called by DLAQR0 performs a
+       single small-bulge multi-shift QR sweep.
+
+        WANTT  (input) logical scalar
+               WANTT = .true. if the quasi-triangular Schur factor
+               is being computed.  WANTT is set to .false. otherwise.
+
+        WANTZ  (input) logical scalar
+               WANTZ = .true. if the orthogonal Schur factor is being
+               computed.  WANTZ is set to .false. otherwise.
+
+        KACC22 (input) integer with value 0, 1, or 2.
+               Specifies the computation mode of far-from-diagonal
+               orthogonal updates.
+          = 0: DLAQR5 does not accumulate reflections and does not
+               use matrix-matrix multiply to update far-from-diagonal
+               matrix entries.
+          = 1: DLAQR5 accumulates reflections and uses matrix-matrix
+               multiply to update the far-from-diagonal matrix entries.
+          = 2: DLAQR5 accumulates reflections, uses matrix-matrix
+               multiply to update the far-from-diagonal matrix entries,
+               and takes advantage of 2-by-2 block structure during
+               matrix multiplies.
+
+        N      (input) integer scalar
+               N is the order of the Hessenberg matrix H upon which this
+               subroutine operates.
+
+        KTOP   (input) integer scalar
+        KBOT   (input) integer scalar
+               These are the first and last rows and columns of an
+               isolated diagonal block upon which the QR sweep is to be
+               applied. It is assumed without a check that
+                         either KTOP = 1  or   H(KTOP,KTOP-1) = 0
+               and
+                         either KBOT = N  or   H(KBOT+1,KBOT) = 0.
+
+        NSHFTS (input) integer scalar
+               NSHFTS gives the number of simultaneous shifts.  NSHFTS
+               must be positive and even.
+
+        SR     (input/output) DOUBLE PRECISION array of size (NSHFTS)
+        SI     (input/output) DOUBLE PRECISION array of size (NSHFTS)
+               SR contains the real parts and SI contains the imaginary
+               parts of the NSHFTS shifts of origin that define the
+               multi-shift QR sweep.  On output SR and SI may be
+               reordered.
+
+        H      (input/output) DOUBLE PRECISION array of size (LDH,N)
+               On input H contains a Hessenberg matrix.  On output a
+               multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
+               to the isolated diagonal block in rows and columns KTOP
+               through KBOT.
+
+        LDH    (input) integer scalar
+               LDH is the leading dimension of H just as declared in the
+               calling procedure.  LDH.GE.MAX(1,N).
+
+        ILOZ   (input) INTEGER
+        IHIZ   (input) INTEGER
+               Specify the rows of Z to which transformations must be
+               applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
+
+        Z      (input/output) DOUBLE PRECISION array of size (LDZ,IHI)
+               If WANTZ = .TRUE., then the QR Sweep orthogonal
+               similarity transformation is accumulated into
+               Z(ILOZ:IHIZ,ILO:IHI) from the right.
+               If WANTZ = .FALSE., then Z is unreferenced.
+
+        LDZ    (input) integer scalar
+               LDA is the leading dimension of Z just as declared in
+               the calling procedure. LDZ.GE.N.
+
+        V      (workspace) DOUBLE PRECISION array of size (LDV,NSHFTS/2)
+
+        LDV    (input) integer scalar
+               LDV is the leading dimension of V as declared in the
+               calling procedure.  LDV.GE.3.
+
+        U      (workspace) DOUBLE PRECISION array of size
+               (LDU,3*NSHFTS-3)
+
+        LDU    (input) integer scalar
+               LDU is the leading dimension of U just as declared in the
+               in the calling subroutine.  LDU.GE.3*NSHFTS-3.
+
+        NH     (input) integer scalar
+               NH is the number of columns in array WH available for
+               workspace. NH.GE.1.
+
+        WH     (workspace) DOUBLE PRECISION array of size (LDWH,NH)
+
+        LDWH   (input) integer scalar
+               Leading dimension of WH just as declared in the
+               calling procedure.  LDWH.GE.3*NSHFTS-3.
+
+        NV     (input) integer scalar
+               NV is the number of rows in WV agailable for workspace.
+               NV.GE.1.
+
+        WV     (workspace) DOUBLE PRECISION array of size
+               (LDWV,3*NSHFTS-3)
+
+        LDWV   (input) integer scalar
+               LDWV is the leading dimension of WV as declared in the
+               in the calling subroutine.  LDWV.GE.NV.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       Reference:
+
+       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+       Algorithm Part I: Maintaining Well Focused Shifts, and
+       Level 3 Performance, SIAM Journal of Matrix Analysis,
+       volume 23, pages 929--947, 2002.
+
+       ================================================================
+
+
+       ==== If there are no shifts, then there is nothing to do. ====
+*/
+
+    /* Parameter adjustments */
+    --sr;
+    --si;
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    wh_dim1 = *ldwh;
+    wh_offset = 1 + wh_dim1;
+    wh -= wh_offset;
+
+    /* Function Body */
+    if (*nshfts < 2) {
+	return 0;
+    }
+
+/*
+       ==== If the active block is empty or 1-by-1, then there
+       .    is nothing to do. ====
+*/
+
+    if (*ktop >= *kbot) {
+	return 0;
+    }
+
+/*
+       ==== Shuffle shifts into pairs of real shifts and pairs
+       .    of complex conjugate shifts assuming complex
+       .    conjugate shifts are already adjacent to one
+       .    another. ====
+*/
+
+    i__1 = *nshfts - 2;
+    for (i__ = 1; i__ <= i__1; i__ += 2) {
+	if (si[i__] != -si[i__ + 1]) {
+
+	    swap = sr[i__];
+	    sr[i__] = sr[i__ + 1];
+	    sr[i__ + 1] = sr[i__ + 2];
+	    sr[i__ + 2] = swap;
+
+	    swap = si[i__];
+	    si[i__] = si[i__ + 1];
+	    si[i__ + 1] = si[i__ + 2];
+	    si[i__ + 2] = swap;
+	}
+/* L10: */
+    }
+
+/*
+       ==== NSHFTS is supposed to be even, but if it is odd,
+       .    then simply reduce it by one.  The shuffle above
+       .    ensures that the dropped shift is real and that
+       .    the remaining shifts are paired. ====
+*/
+
+    ns = *nshfts - *nshfts % 2;
+
+/*     ==== Machine constants for deflation ==== */
+
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    dlabad_(&safmin, &safmax);
+    ulp = PRECISION;
+    smlnum = safmin * ((doublereal) (*n) / ulp);
+
+/*
+       ==== Use accumulated reflections to update far-from-diagonal
+       .    entries ? ====
+*/
+
+    accum = *kacc22 == 1 || *kacc22 == 2;
+
+/*     ==== If so, exploit the 2-by-2 block structure? ==== */
+
+    blk22 = ns > 2 && *kacc22 == 2;
+
+/*     ==== clear trash ==== */
+
+    if (*ktop + 2 <= *kbot) {
+	h__[*ktop + 2 + *ktop * h_dim1] = 0.;
+    }
 
-/*     End of DLAPY3 */
+/*     ==== NBMPS = number of 2-shift bulges in the chain ==== */
 
-} /* dlapy3_ */
+    nbmps = ns / 2;
+
+/*     ==== KDU = width of slab ==== */
+
+    kdu = nbmps * 6 - 3;
+
+/*     ==== Create and chase chains of NBMPS bulges ==== */
+
+    i__1 = *kbot - 2;
+    i__2 = nbmps * 3 - 2;
+    for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 :
+	    incol <= i__1; incol += i__2) {
+	ndcol = incol + kdu;
+	if (accum) {
+	    dlaset_("ALL", &kdu, &kdu, &c_b29, &c_b15, &u[u_offset], ldu);
+	}
+
+/*
+          ==== Near-the-diagonal bulge chase.  The following loop
+          .    performs the near-the-diagonal part of a small bulge
+          .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal
+          .    chunk extends from column INCOL to column NDCOL
+          .    (including both column INCOL and column NDCOL). The
+          .    following loop chases a 3*NBMPS column long chain of
+          .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL
+          .    may be less than KTOP and and NDCOL may be greater than
+          .    KBOT indicating phantom columns from which to chase
+          .    bulges before they are actually introduced or to which
+          .    to chase bulges beyond column KBOT.)  ====
+
+   Computing MIN
+*/
+	i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2;
+	i__3 = min(i__4,i__5);
+	for (krcol = incol; krcol <= i__3; ++krcol) {
+
+/*
+             ==== Bulges number MTOP to MBOT are active double implicit
+             .    shift bulges.  There may or may not also be small
+             .    2-by-2 bulge, if there is room.  The inactive bulges
+             .    (if any) must wait until the active bulges have moved
+             .    down the diagonal to make room.  The phantom matrix
+             .    paradigm described above helps keep track.  ====
+
+   Computing MAX
+*/
+	    i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1;
+	    mtop = max(i__4,i__5);
+/* Computing MIN */
+	    i__4 = nbmps, i__5 = (*kbot - krcol) / 3;
+	    mbot = min(i__4,i__5);
+	    m22 = mbot + 1;
+	    bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2;
+
+/*
+             ==== Generate reflections to chase the chain right
+             .    one column.  (The minimum value of K is KTOP-1.) ====
+*/
+
+	    i__4 = mbot;
+	    for (m = mtop; m <= i__4; ++m) {
+		k = krcol + (m - 1) * 3;
+		if (k == *ktop - 1) {
+		    dlaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m
+			    << 1) - 1], &si[(m << 1) - 1], &sr[m * 2], &si[m *
+			     2], &v[m * v_dim1 + 1]);
+		    alpha = v[m * v_dim1 + 1];
+		    dlarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m *
+			    v_dim1 + 1]);
+		} else {
+		    beta = h__[k + 1 + k * h_dim1];
+		    v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1];
+		    v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1];
+		    dlarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m *
+			    v_dim1 + 1]);
+
+/*
+                   ==== A Bulge may collapse because of vigilant
+                   .    deflation or destructive underflow.  In the
+                   .    underflow case, try the two-small-subdiagonals
+                   .    trick to try to reinflate the bulge.  ====
+*/
+
+		    if (h__[k + 3 + k * h_dim1] != 0. || h__[k + 3 + (k + 1) *
+			     h_dim1] != 0. || h__[k + 3 + (k + 2) * h_dim1] ==
+			     0.) {
+
+/*                    ==== Typical case: not collapsed (yet). ==== */
+
+			h__[k + 1 + k * h_dim1] = beta;
+			h__[k + 2 + k * h_dim1] = 0.;
+			h__[k + 3 + k * h_dim1] = 0.;
+		    } else {
+
+/*
+                      ==== Atypical case: collapsed.  Attempt to
+                      .    reintroduce ignoring H(K+1,K) and H(K+2,K).
+                      .    If the fill resulting from the new
+                      .    reflector is too large, then abandon it.
+                      .    Otherwise, use the new one. ====
+*/
+
+			dlaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, &
+				sr[(m << 1) - 1], &si[(m << 1) - 1], &sr[m *
+				2], &si[m * 2], vt);
+			alpha = vt[0];
+			dlarfg_(&c__3, &alpha, &vt[1], &c__1, vt);
+			refsum = vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] *
+				h__[k + 2 + k * h_dim1]);
+
+			if ((d__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1],
+				abs(d__1)) + (d__2 = refsum * vt[2], abs(d__2)
+				) > ulp * ((d__3 = h__[k + k * h_dim1], abs(
+				d__3)) + (d__4 = h__[k + 1 + (k + 1) * h_dim1]
+				, abs(d__4)) + (d__5 = h__[k + 2 + (k + 2) *
+				h_dim1], abs(d__5)))) {
+
+/*
+                         ==== Starting a new bulge here would
+                         .    create non-negligible fill.  Use
+                         .    the old one with trepidation. ====
+*/
+
+			    h__[k + 1 + k * h_dim1] = beta;
+			    h__[k + 2 + k * h_dim1] = 0.;
+			    h__[k + 3 + k * h_dim1] = 0.;
+			} else {
+
+/*
+                         ==== Stating a new bulge here would
+                         .    create only negligible fill.
+                         .    Replace the old reflector with
+                         .    the new one. ====
+*/
+
+			    h__[k + 1 + k * h_dim1] -= refsum;
+			    h__[k + 2 + k * h_dim1] = 0.;
+			    h__[k + 3 + k * h_dim1] = 0.;
+			    v[m * v_dim1 + 1] = vt[0];
+			    v[m * v_dim1 + 2] = vt[1];
+			    v[m * v_dim1 + 3] = vt[2];
+			}
+		    }
+		}
+/* L20: */
+	    }
+
+/*           ==== Generate a 2-by-2 reflection, if needed. ==== */
+
+	    k = krcol + (m22 - 1) * 3;
+	    if (bmp22) {
+		if (k == *ktop - 1) {
+		    dlaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[(
+			    m22 << 1) - 1], &si[(m22 << 1) - 1], &sr[m22 * 2],
+			     &si[m22 * 2], &v[m22 * v_dim1 + 1]);
+		    beta = v[m22 * v_dim1 + 1];
+		    dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22
+			    * v_dim1 + 1]);
+		} else {
+		    beta = h__[k + 1 + k * h_dim1];
+		    v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1];
+		    dlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22
+			    * v_dim1 + 1]);
+		    h__[k + 1 + k * h_dim1] = beta;
+		    h__[k + 2 + k * h_dim1] = 0.;
+		}
+	    }
+
+/*           ==== Multiply H by reflections from the left ==== */
+
+	    if (accum) {
+		jbot = min(ndcol,*kbot);
+	    } else if (*wantt) {
+		jbot = *n;
+	    } else {
+		jbot = *kbot;
+	    }
+	    i__4 = jbot;
+	    for (j = max(*ktop,krcol); j <= i__4; ++j) {
+/* Computing MIN */
+		i__5 = mbot, i__6 = (j - krcol + 2) / 3;
+		mend = min(i__5,i__6);
+		i__5 = mend;
+		for (m = mtop; m <= i__5; ++m) {
+		    k = krcol + (m - 1) * 3;
+		    refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + v[
+			    m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + v[m *
+			    v_dim1 + 3] * h__[k + 3 + j * h_dim1]);
+		    h__[k + 1 + j * h_dim1] -= refsum;
+		    h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2];
+		    h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3];
+/* L30: */
+		}
+/* L40: */
+	    }
+	    if (bmp22) {
+		k = krcol + (m22 - 1) * 3;
+/* Computing MAX */
+		i__4 = k + 1;
+		i__5 = jbot;
+		for (j = max(i__4,*ktop); j <= i__5; ++j) {
+		    refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] +
+			    v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]);
+		    h__[k + 1 + j * h_dim1] -= refsum;
+		    h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2];
+/* L50: */
+		}
+	    }
+
+/*
+             ==== Multiply H by reflections from the right.
+             .    Delay filling in the last row until the
+             .    vigilant deflation check is complete. ====
+*/
+
+	    if (accum) {
+		jtop = max(*ktop,incol);
+	    } else if (*wantt) {
+		jtop = 1;
+	    } else {
+		jtop = *ktop;
+	    }
+	    i__5 = mbot;
+	    for (m = mtop; m <= i__5; ++m) {
+		if (v[m * v_dim1 + 1] != 0.) {
+		    k = krcol + (m - 1) * 3;
+/* Computing MIN */
+		    i__6 = *kbot, i__7 = k + 3;
+		    i__4 = min(i__6,i__7);
+		    for (j = jtop; j <= i__4; ++j) {
+			refsum = v[m * v_dim1 + 1] * (h__[j + (k + 1) *
+				h_dim1] + v[m * v_dim1 + 2] * h__[j + (k + 2)
+				* h_dim1] + v[m * v_dim1 + 3] * h__[j + (k +
+				3) * h_dim1]);
+			h__[j + (k + 1) * h_dim1] -= refsum;
+			h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 +
+				2];
+			h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 +
+				3];
+/* L60: */
+		    }
+
+		    if (accum) {
+
+/*
+                      ==== Accumulate U. (If necessary, update Z later
+                      .    with with an efficient matrix-matrix
+                      .    multiply.) ====
+*/
+
+			kms = k - incol;
+/* Computing MAX */
+			i__4 = 1, i__6 = *ktop - incol;
+			i__7 = kdu;
+			for (j = max(i__4,i__6); j <= i__7; ++j) {
+			    refsum = v[m * v_dim1 + 1] * (u[j + (kms + 1) *
+				    u_dim1] + v[m * v_dim1 + 2] * u[j + (kms
+				    + 2) * u_dim1] + v[m * v_dim1 + 3] * u[j
+				    + (kms + 3) * u_dim1]);
+			    u[j + (kms + 1) * u_dim1] -= refsum;
+			    u[j + (kms + 2) * u_dim1] -= refsum * v[m *
+				    v_dim1 + 2];
+			    u[j + (kms + 3) * u_dim1] -= refsum * v[m *
+				    v_dim1 + 3];
+/* L70: */
+			}
+		    } else if (*wantz) {
+
+/*
+                      ==== U is not accumulated, so update Z
+                      .    now by multiplying by reflections
+                      .    from the right. ====
+*/
+
+			i__7 = *ihiz;
+			for (j = *iloz; j <= i__7; ++j) {
+			    refsum = v[m * v_dim1 + 1] * (z__[j + (k + 1) *
+				    z_dim1] + v[m * v_dim1 + 2] * z__[j + (k
+				    + 2) * z_dim1] + v[m * v_dim1 + 3] * z__[
+				    j + (k + 3) * z_dim1]);
+			    z__[j + (k + 1) * z_dim1] -= refsum;
+			    z__[j + (k + 2) * z_dim1] -= refsum * v[m *
+				    v_dim1 + 2];
+			    z__[j + (k + 3) * z_dim1] -= refsum * v[m *
+				    v_dim1 + 3];
+/* L80: */
+			}
+		    }
+		}
+/* L90: */
+	    }
+
+/*           ==== Special case: 2-by-2 reflection (if needed) ==== */
+
+	    k = krcol + (m22 - 1) * 3;
+	    if (bmp22 && v[m22 * v_dim1 + 1] != 0.) {
+/* Computing MIN */
+		i__7 = *kbot, i__4 = k + 3;
+		i__5 = min(i__7,i__4);
+		for (j = jtop; j <= i__5; ++j) {
+		    refsum = v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1]
+			    + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1])
+			    ;
+		    h__[j + (k + 1) * h_dim1] -= refsum;
+		    h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2];
+/* L100: */
+		}
+
+		if (accum) {
+		    kms = k - incol;
+/* Computing MAX */
+		    i__5 = 1, i__7 = *ktop - incol;
+		    i__4 = kdu;
+		    for (j = max(i__5,i__7); j <= i__4; ++j) {
+			refsum = v[m22 * v_dim1 + 1] * (u[j + (kms + 1) *
+				u_dim1] + v[m22 * v_dim1 + 2] * u[j + (kms +
+				2) * u_dim1]);
+			u[j + (kms + 1) * u_dim1] -= refsum;
+			u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1
+				+ 2];
+/* L110: */
+		    }
+		} else if (*wantz) {
+		    i__4 = *ihiz;
+		    for (j = *iloz; j <= i__4; ++j) {
+			refsum = v[m22 * v_dim1 + 1] * (z__[j + (k + 1) *
+				z_dim1] + v[m22 * v_dim1 + 2] * z__[j + (k +
+				2) * z_dim1]);
+			z__[j + (k + 1) * z_dim1] -= refsum;
+			z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1
+				+ 2];
+/* L120: */
+		    }
+		}
+	    }
+
+/*           ==== Vigilant deflation check ==== */
+
+	    mstart = mtop;
+	    if (krcol + (mstart - 1) * 3 < *ktop) {
+		++mstart;
+	    }
+	    mend = mbot;
+	    if (bmp22) {
+		++mend;
+	    }
+	    if (krcol == *kbot - 2) {
+		++mend;
+	    }
+	    i__4 = mend;
+	    for (m = mstart; m <= i__4; ++m) {
+/* Computing MIN */
+		i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3;
+		k = min(i__5,i__7);
+
+/*
+                ==== The following convergence test requires that
+                .    the tradition small-compared-to-nearby-diagonals
+                .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
+                .    criteria both be satisfied.  The latter improves
+                .    accuracy in some examples. Falling back on an
+                .    alternate convergence criterion when TST1 or TST2
+                .    is zero (as done here) is traditional but probably
+                .    unnecessary. ====
+*/
+
+		if (h__[k + 1 + k * h_dim1] != 0.) {
+		    tst1 = (d__1 = h__[k + k * h_dim1], abs(d__1)) + (d__2 =
+			    h__[k + 1 + (k + 1) * h_dim1], abs(d__2));
+		    if (tst1 == 0.) {
+			if (k >= *ktop + 1) {
+			    tst1 += (d__1 = h__[k + (k - 1) * h_dim1], abs(
+				    d__1));
+			}
+			if (k >= *ktop + 2) {
+			    tst1 += (d__1 = h__[k + (k - 2) * h_dim1], abs(
+				    d__1));
+			}
+			if (k >= *ktop + 3) {
+			    tst1 += (d__1 = h__[k + (k - 3) * h_dim1], abs(
+				    d__1));
+			}
+			if (k <= *kbot - 2) {
+			    tst1 += (d__1 = h__[k + 2 + (k + 1) * h_dim1],
+				    abs(d__1));
+			}
+			if (k <= *kbot - 3) {
+			    tst1 += (d__1 = h__[k + 3 + (k + 1) * h_dim1],
+				    abs(d__1));
+			}
+			if (k <= *kbot - 4) {
+			    tst1 += (d__1 = h__[k + 4 + (k + 1) * h_dim1],
+				    abs(d__1));
+			}
+		    }
+/* Computing MAX */
+		    d__2 = smlnum, d__3 = ulp * tst1;
+		    if ((d__1 = h__[k + 1 + k * h_dim1], abs(d__1)) <= max(
+			    d__2,d__3)) {
+/* Computing MAX */
+			d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)),
+				d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs(
+				d__2));
+			h12 = max(d__3,d__4);
+/* Computing MIN */
+			d__3 = (d__1 = h__[k + 1 + k * h_dim1], abs(d__1)),
+				d__4 = (d__2 = h__[k + (k + 1) * h_dim1], abs(
+				d__2));
+			h21 = min(d__3,d__4);
+/* Computing MAX */
+			d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs(
+				d__1)), d__4 = (d__2 = h__[k + k * h_dim1] -
+				h__[k + 1 + (k + 1) * h_dim1], abs(d__2));
+			h11 = max(d__3,d__4);
+/* Computing MIN */
+			d__3 = (d__1 = h__[k + 1 + (k + 1) * h_dim1], abs(
+				d__1)), d__4 = (d__2 = h__[k + k * h_dim1] -
+				h__[k + 1 + (k + 1) * h_dim1], abs(d__2));
+			h22 = min(d__3,d__4);
+			scl = h11 + h12;
+			tst2 = h22 * (h11 / scl);
+
+/* Computing MAX */
+			d__1 = smlnum, d__2 = ulp * tst2;
+			if (tst2 == 0. || h21 * (h12 / scl) <= max(d__1,d__2))
+				 {
+			    h__[k + 1 + k * h_dim1] = 0.;
+			}
+		    }
+		}
+/* L130: */
+	    }
+
+/*
+             ==== Fill in the last row of each bulge. ====
+
+   Computing MIN
+*/
+	    i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3;
+	    mend = min(i__4,i__5);
+	    i__4 = mend;
+	    for (m = mtop; m <= i__4; ++m) {
+		k = krcol + (m - 1) * 3;
+		refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + (
+			k + 3) * h_dim1];
+		h__[k + 4 + (k + 1) * h_dim1] = -refsum;
+		h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2];
+		h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3];
+/* L140: */
+	    }
+
+/*
+             ==== End of near-the-diagonal bulge chase. ====
+
+   L150:
+*/
+	}
+
+/*
+          ==== Use U (if accumulated) to update far-from-diagonal
+          .    entries in H.  If required, use U to update Z as
+          .    well. ====
+*/
+
+	if (accum) {
+	    if (*wantt) {
+		jtop = 1;
+		jbot = *n;
+	    } else {
+		jtop = *ktop;
+		jbot = *kbot;
+	    }
+	    if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) {
+
+/*
+                ==== Updates not exploiting the 2-by-2 block
+                .    structure of U.  K1 and NU keep track of
+                .    the location and size of U in the special
+                .    cases of introducing bulges and chasing
+                .    bulges off the bottom.  In these special
+                .    cases and in case the number of shifts
+                .    is NS = 2, there is no 2-by-2 block
+                .    structure to exploit.  ====
+
+   Computing MAX
+*/
+		i__3 = 1, i__4 = *ktop - incol;
+		k1 = max(i__3,i__4);
+/* Computing MAX */
+		i__3 = 0, i__4 = ndcol - *kbot;
+		nu = kdu - max(i__3,i__4) - k1 + 1;
+
+/*              ==== Horizontal Multiply ==== */
+
+		i__3 = jbot;
+		i__4 = *nh;
+		for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 :
+			jcol <= i__3; jcol += i__4) {
+/* Computing MIN */
+		    i__5 = *nh, i__7 = jbot - jcol + 1;
+		    jlen = min(i__5,i__7);
+		    dgemm_("C", "N", &nu, &jlen, &nu, &c_b15, &u[k1 + k1 *
+			    u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1],
+			    ldh, &c_b29, &wh[wh_offset], ldwh);
+		    dlacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[
+			    incol + k1 + jcol * h_dim1], ldh);
+/* L160: */
+		}
+
+/*              ==== Vertical multiply ==== */
+
+		i__4 = max(*ktop,incol) - 1;
+		i__3 = *nv;
+		for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4;
+			jrow += i__3) {
+/* Computing MIN */
+		    i__5 = *nv, i__7 = max(*ktop,incol) - jrow;
+		    jlen = min(i__5,i__7);
+		    dgemm_("N", "N", &jlen, &nu, &nu, &c_b15, &h__[jrow + (
+			    incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1],
+			    ldu, &c_b29, &wv[wv_offset], ldwv);
+		    dlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[
+			    jrow + (incol + k1) * h_dim1], ldh);
+/* L170: */
+		}
+
+/*              ==== Z multiply (also vertical) ==== */
+
+		if (*wantz) {
+		    i__3 = *ihiz;
+		    i__4 = *nv;
+		    for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3;
+			     jrow += i__4) {
+/* Computing MIN */
+			i__5 = *nv, i__7 = *ihiz - jrow + 1;
+			jlen = min(i__5,i__7);
+			dgemm_("N", "N", &jlen, &nu, &nu, &c_b15, &z__[jrow +
+				(incol + k1) * z_dim1], ldz, &u[k1 + k1 *
+				u_dim1], ldu, &c_b29, &wv[wv_offset], ldwv);
+			dlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[
+				jrow + (incol + k1) * z_dim1], ldz)
+				;
+/* L180: */
+		    }
+		}
+	    } else {
+
+/*
+                ==== Updates exploiting U's 2-by-2 block structure.
+                .    (I2, I4, J2, J4 are the last rows and columns
+                .    of the blocks.) ====
+*/
+
+		i2 = (kdu + 1) / 2;
+		i4 = kdu;
+		j2 = i4 - i2;
+		j4 = kdu;
+
+/*
+                ==== KZS and KNZ deal with the band of zeros
+                .    along the diagonal of one of the triangular
+                .    blocks. ====
+*/
+
+		kzs = j4 - j2 - (ns + 1);
+		knz = ns + 1;
+
+/*              ==== Horizontal multiply ==== */
+
+		i__4 = jbot;
+		i__3 = *nh;
+		for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 :
+			jcol <= i__4; jcol += i__3) {
+/* Computing MIN */
+		    i__5 = *nh, i__7 = jbot - jcol + 1;
+		    jlen = min(i__5,i__7);
+
+/*
+                   ==== Copy bottom of H to top+KZS of scratch ====
+                    (The first KZS rows get multiplied by zero.) ====
+*/
+
+		    dlacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol *
+			    h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U21' ==== */
+
+		    dlaset_("ALL", &kzs, &jlen, &c_b29, &c_b29, &wh[wh_offset]
+			    , ldwh);
+		    dtrmm_("L", "U", "C", "N", &knz, &jlen, &c_b15, &u[j2 + 1
+			    + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1]
+			    , ldwh);
+
+/*                 ==== Multiply top of H by U11' ==== */
+
+		    dgemm_("C", "N", &i2, &jlen, &j2, &c_b15, &u[u_offset],
+			    ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b15,
+			     &wh[wh_offset], ldwh);
+
+/*                 ==== Copy top of H to bottom of WH ==== */
+
+		    dlacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1]
+			    , ldh, &wh[i2 + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U21' ==== */
+
+		    dtrmm_("L", "L", "C", "N", &j2, &jlen, &c_b15, &u[(i2 + 1)
+			     * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U22 ==== */
+
+		    i__5 = i4 - i2;
+		    i__7 = j4 - j2;
+		    dgemm_("C", "N", &i__5, &jlen, &i__7, &c_b15, &u[j2 + 1 +
+			    (i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 +
+			    jcol * h_dim1], ldh, &c_b15, &wh[i2 + 1 + wh_dim1]
+			    , ldwh);
+
+/*                 ==== Copy it back ==== */
+
+		    dlacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[
+			    incol + 1 + jcol * h_dim1], ldh);
+/* L190: */
+		}
+
+/*              ==== Vertical multiply ==== */
+
+		i__3 = max(incol,*ktop) - 1;
+		i__4 = *nv;
+		for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3;
+			jrow += i__4) {
+/* Computing MIN */
+		    i__5 = *nv, i__7 = max(incol,*ktop) - jrow;
+		    jlen = min(i__5,i__7);
+
+/*
+                   ==== Copy right of H to scratch (the first KZS
+                   .    columns get multiplied by zero) ====
+*/
+
+		    dlacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) *
+			     h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U21 ==== */
+
+		    dlaset_("ALL", &jlen, &kzs, &c_b29, &c_b29, &wv[wv_offset]
+			    , ldwv);
+		    dtrmm_("R", "U", "N", "N", &jlen, &knz, &c_b15, &u[j2 + 1
+			    + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) *
+			    wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U11 ==== */
+
+		    dgemm_("N", "N", &jlen, &i2, &j2, &c_b15, &h__[jrow + (
+			    incol + 1) * h_dim1], ldh, &u[u_offset], ldu, &
+			    c_b15, &wv[wv_offset], ldwv)
+			    ;
+
+/*                 ==== Copy left of H to right of scratch ==== */
+
+		    dlacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) *
+			    h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U21 ==== */
+
+		    i__5 = i4 - i2;
+		    dtrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b15, &u[(i2 +
+			    1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1]
+			    , ldwv);
+
+/*                 ==== Multiply by U22 ==== */
+
+		    i__5 = i4 - i2;
+		    i__7 = j4 - j2;
+		    dgemm_("N", "N", &jlen, &i__5, &i__7, &c_b15, &h__[jrow +
+			    (incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2
+			    + 1) * u_dim1], ldu, &c_b15, &wv[(i2 + 1) *
+			    wv_dim1 + 1], ldwv);
+
+/*                 ==== Copy it back ==== */
+
+		    dlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[
+			    jrow + (incol + 1) * h_dim1], ldh);
+/* L200: */
+		}
+
+/*              ==== Multiply Z (also vertical) ==== */
+
+		if (*wantz) {
+		    i__4 = *ihiz;
+		    i__3 = *nv;
+		    for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4;
+			     jrow += i__3) {
+/* Computing MIN */
+			i__5 = *nv, i__7 = *ihiz - jrow + 1;
+			jlen = min(i__5,i__7);
+
+/*
+                      ==== Copy right of Z to left of scratch (first
+                      .     KZS columns get multiplied by zero) ====
+*/
+
+			dlacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 +
+				j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 +
+				1], ldwv);
+
+/*                    ==== Multiply by U12 ==== */
+
+			dlaset_("ALL", &jlen, &kzs, &c_b29, &c_b29, &wv[
+				wv_offset], ldwv);
+			dtrmm_("R", "U", "N", "N", &jlen, &knz, &c_b15, &u[j2
+				+ 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1)
+				* wv_dim1 + 1], ldwv);
+
+/*                    ==== Multiply by U11 ==== */
+
+			dgemm_("N", "N", &jlen, &i2, &j2, &c_b15, &z__[jrow +
+				(incol + 1) * z_dim1], ldz, &u[u_offset], ldu,
+				 &c_b15, &wv[wv_offset], ldwv);
+
+/*                    ==== Copy left of Z to right of scratch ==== */
+
+			dlacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) *
+				z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1],
+				ldwv);
+
+/*                    ==== Multiply by U21 ==== */
+
+			i__5 = i4 - i2;
+			dtrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b15, &u[(
+				i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) *
+				wv_dim1 + 1], ldwv);
+
+/*                    ==== Multiply by U22 ==== */
+
+			i__5 = i4 - i2;
+			i__7 = j4 - j2;
+			dgemm_("N", "N", &jlen, &i__5, &i__7, &c_b15, &z__[
+				jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2
+				+ 1 + (i2 + 1) * u_dim1], ldu, &c_b15, &wv[(
+				i2 + 1) * wv_dim1 + 1], ldwv);
+
+/*                    ==== Copy the result back to Z ==== */
+
+			dlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &
+				z__[jrow + (incol + 1) * z_dim1], ldz);
+/* L210: */
+		    }
+		}
+	    }
+	}
+/* L220: */
+    }
+
+/*     ==== End of DLAQR5 ==== */
+
+    return 0;
+} /* dlaqr5_ */
 
 /* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v,
 	 integer *incv, doublereal *tau, doublereal *c__, integer *ldc,
@@ -16963,6 +21485,8 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     doublereal d__1;
 
     /* Local variables */
+    static integer i__;
+    static logical applyleft;
     extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
 	    doublereal *, integer *, doublereal *, integer *, doublereal *,
 	    integer *);
@@ -16970,13 +21494,16 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
 	    doublereal *, doublereal *, integer *, doublereal *, integer *,
 	    doublereal *, doublereal *, integer *);
+    static integer lastc, lastv;
+    extern integer iladlc_(integer *, integer *, doublereal *, integer *),
+	    iladlr_(integer *, integer *, doublereal *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17040,39 +21567,74 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     --work;
 
     /* Function Body */
-    if (lsame_(side, "L")) {
+    applyleft = lsame_(side, "L");
+    lastv = 0;
+    lastc = 0;
+    if (*tau != 0.) {
+/*
+       Set up variables for scanning V.  LASTV begins pointing to the end
+       of V.
+*/
+	if (applyleft) {
+	    lastv = *m;
+	} else {
+	    lastv = *n;
+	}
+	if (*incv > 0) {
+	    i__ = (lastv - 1) * *incv + 1;
+	} else {
+	    i__ = 1;
+	}
+/*     Look for the last non-zero row in V. */
+	while(lastv > 0 && v[i__] == 0.) {
+	    --lastv;
+	    i__ -= *incv;
+	}
+	if (applyleft) {
+/*     Scan for the last non-zero column in C(1:lastv,:). */
+	    lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
+	} else {
+/*     Scan for the last non-zero row in C(:,1:lastv). */
+	    lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
+	}
+    }
+/*
+       Note that lastc.eq.0 renders the BLAS operations null; no special
+       case is needed at this level.
+*/
+    if (applyleft) {
 
 /*        Form  H * C */
 
-	if (*tau != 0.) {
+	if (lastv > 0) {
 
-/*           w := C' * v */
+/*           w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */
 
-	    dgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b29, &work[1], &c__1);
+	    dgemv_("Transpose", &lastv, &lastc, &c_b15, &c__[c_offset], ldc, &
+		    v[1], incv, &c_b29, &work[1], &c__1);
 
-/*           C := C - v * w' */
+/*           C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */
 
 	    d__1 = -(*tau);
-	    dger_(m, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
-		    ldc);
+	    dger_(&lastv, &lastc, &d__1, &v[1], incv, &work[1], &c__1, &c__[
+		    c_offset], ldc);
 	}
     } else {
 
 /*        Form  C * H */
 
-	if (*tau != 0.) {
+	if (lastv > 0) {
 
-/*           w := C * v */
+/*           w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */
 
-	    dgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b29, &work[1], &c__1);
+	    dgemv_("No transpose", &lastc, &lastv, &c_b15, &c__[c_offset],
+		    ldc, &v[1], incv, &c_b29, &work[1], &c__1);
 
-/*           C := C - w * v' */
+/*           C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */
 
 	    d__1 = -(*tau);
-	    dger_(m, n, &d__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
-		    ldc);
+	    dger_(&lastc, &lastv, &d__1, &work[1], &c__1, &v[1], incv, &c__[
+		    c_offset], ldc);
 	}
     }
     return 0;
@@ -17096,18 +21658,22 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	    integer *, doublereal *, doublereal *, integer *, doublereal *,
 	    integer *, doublereal *, doublereal *, integer *);
     extern logical lsame_(char *, char *);
+    static integer lastc;
     extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
 	    doublereal *, integer *), dtrmm_(char *, char *, char *, char *,
 	    integer *, integer *, doublereal *, doublereal *, integer *,
 	    doublereal *, integer *);
+    static integer lastv;
+    extern integer iladlc_(integer *, integer *, doublereal *, integer *),
+	    iladlr_(integer *, integer *, doublereal *, integer *);
     static char transt[1];
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17229,6 +21795,13 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = iladlr_(m, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
 
                 W := C1'
@@ -17236,52 +21809,53 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
+		    dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
+			    + 1], &c__1);
 /* L10: */
 		}
 
 /*              W := W * V1 */
 
-		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2'*V2 */
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, &
-			    c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
-			    ldv, &c_b15, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    dgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
+			    c_b15, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 +
+			    v_dim1], ldv, &c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
+			c_b15, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - V2 * W' */
 
-		    i__1 = *m - *k;
-		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151,
-			    &v[*k + 1 + v_dim1], ldv, &work[work_offset],
-			    ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc);
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
+			    c_b151, &v[*k + 1 + v_dim1], ldv, &work[
+			    work_offset], ldwork, &c_b15, &c__[*k + 1 +
+			    c_dim1], ldc);
 		}
 
 /*              W := W * V1' */
 
-		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
 /* L20: */
@@ -17294,6 +21868,13 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = iladlr_(n, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
                 W := C1
@@ -17301,21 +21882,21 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L40: */
 		}
 
 /*              W := W * V1 */
 
-		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2 * V2 */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
 			    c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
 			    1 + v_dim1], ldv, &c_b15, &work[work_offset],
 			    ldwork);
@@ -17323,31 +21904,32 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 /*              W := W * T  or  W * T' */
 
-		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b15,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - W * V2' */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151,
-			    &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
-			    ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc);
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
+			    c_b151, &work[work_offset], ldwork, &v[*k + 1 +
+			    v_dim1], ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1],
+			     ldc);
 		}
 
 /*              W := W * V1' */
 
-		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
 /* L50: */
@@ -17370,6 +21952,13 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = iladlr_(m, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
 
                 W := C2'
@@ -17377,57 +21966,56 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
+		    dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
+			    j * work_dim1 + 1], &c__1);
 /* L70: */
 		}
 
 /*              W := W * V2 */
 
-		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15,
-			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*m > *k) {
+		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1'*V1 */
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
-			    work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    dgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
+			    c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
+			c_b15, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - V1 * W' */
 
-		    i__1 = *m - *k;
-		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151,
-			    &v[v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b15, &c__[c_offset], ldc)
-			    ;
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
+			    c_b151, &v[v_offset], ldv, &work[work_offset],
+			    ldwork, &c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2' */
 
-		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, &
-			v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
+		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
 
 /*              C2 := C2 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j *
 				work_dim1];
 /* L80: */
 		    }
@@ -17439,6 +22027,13 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = iladlr_(n, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
                 W := C2
@@ -17446,58 +22041,57 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
+		    dcopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &
+			    work[j * work_dim1 + 1], &c__1);
 /* L100: */
 		}
 
 /*              W := W * V2 */
 
-		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15,
-			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*n > *k) {
+		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1 * V1 */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
 			    c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, &
 			    c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b15,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - W * V1' */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151,
-			    &work[work_offset], ldwork, &v[v_offset], ldv, &
-			    c_b15, &c__[c_offset], ldc)
-			    ;
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
+			    c_b151, &work[work_offset], ldwork, &v[v_offset],
+			    ldv, &c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2' */
 
-		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, &
-			v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
+		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
 
 /*              C2 := C2 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
-				work_dim1];
+			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
+				 work_dim1];
 /* L110: */
 		    }
 /* L120: */
@@ -17520,6 +22114,13 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = iladlc_(k, m, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
                 W := C1'
@@ -17527,52 +22128,53 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
+		    dcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
+			    + 1], &c__1);
 /* L130: */
 		}
 
 /*              W := W * V1' */
 
-		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2'*V2' */
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, &
-			    c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 +
-			    1], ldv, &c_b15, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b15,
+			     &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1
+			    + 1], ldv, &c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
+			c_b15, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - V2' * W' */
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[
-			    (*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
-			    ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc);
+		    i__1 = lastv - *k;
+		    dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &
+			    c_b151, &v[(*k + 1) * v_dim1 + 1], ldv, &work[
+			    work_offset], ldwork, &c_b15, &c__[*k + 1 +
+			    c_dim1], ldc);
 		}
 
 /*              W := W * V1 */
 
-		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
 /* L140: */
@@ -17585,6 +22187,13 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = iladlc_(k, n, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
                 W := C1
@@ -17592,39 +22201,39 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    dcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L160: */
 		}
 
 /*              W := W * V1' */
 
-		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+		dtrmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2 * V2' */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, &
-			    c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
-			    v_dim1 + 1], ldv, &c_b15, &work[work_offset],
-			    ldwork);
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
+			    c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k +
+			    1) * v_dim1 + 1], ldv, &c_b15, &work[work_offset],
+			     ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b15,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - W * V2 */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
 			    c_b151, &work[work_offset], ldwork, &v[(*k + 1) *
 			    v_dim1 + 1], ldv, &c_b15, &c__[(*k + 1) * c_dim1
 			    + 1], ldc);
@@ -17632,14 +22241,14 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 /*              W := W * V1 */
 
-		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		dtrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
 /* L170: */
@@ -17662,6 +22271,13 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = iladlc_(k, m, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = iladlc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
                 W := C2'
@@ -17669,56 +22285,56 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
+		    dcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
+			    j * work_dim1 + 1], &c__1);
 /* L190: */
 		}
 
 /*              W := W * V2' */
 
-		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, &
-			v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
-			, ldwork);
-		if (*m > *k) {
+		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1'*V1' */
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
+		    i__1 = lastv - *k;
+		    dgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b15,
+			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
 			    work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
+			c_b15, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - V1' * W' */
 
-		    i__1 = *m - *k;
-		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[
-			    v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b15, &c__[c_offset], ldc);
+		    i__1 = lastv - *k;
+		    dgemm_("Transpose", "Transpose", &i__1, &lastc, k, &
+			    c_b151, &v[v_offset], ldv, &work[work_offset],
+			    ldwork, &c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15,
-			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C2 := C2 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j *
 				work_dim1];
 /* L200: */
 		    }
@@ -17730,6 +22346,13 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = iladlc_(k, n, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = iladlr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
                 W := C2
@@ -17737,57 +22360,57 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
+		    dcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1,
+			     &work[j * work_dim1 + 1], &c__1);
 /* L220: */
 		}
 
 /*              W := W * V2' */
 
-		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, &
-			v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
-			, ldwork);
-		if (*n > *k) {
+		dtrmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1 * V1' */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
-			    work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
+			    c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		dtrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b15,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - W * V1 */
 
-		    i__1 = *n - *k;
-		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
+		    i__1 = lastv - *k;
+		    dgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
 			    c_b151, &work[work_offset], ldwork, &v[v_offset],
 			    ldv, &c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15,
-			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+		dtrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
-				work_dim1];
+			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
+				 work_dim1];
 /* L230: */
 		    }
 /* L240: */
@@ -17826,10 +22449,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17904,12 +22527,12 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	d__1 = dlapy2_(alpha, &xnorm);
 	beta = -d_sign(&d__1, alpha);
 	safmin = SAFEMINIMUM / EPSILON;
+	knt = 0;
 	if (abs(beta) < safmin) {
 
 /*           XNORM, BETA may be inaccurate; scale X and recompute them */
 
 	    rsafmn = 1. / safmin;
-	    knt = 0;
 L10:
 	    ++knt;
 	    i__1 = *n - 1;
@@ -17926,26 +22549,20 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	    xnorm = dnrm2_(&i__1, &x[1], incx);
 	    d__1 = dlapy2_(alpha, &xnorm);
 	    beta = -d_sign(&d__1, alpha);
-	    *tau = (beta - *alpha) / beta;
-	    i__1 = *n - 1;
-	    d__1 = 1. / (*alpha - beta);
-	    dscal_(&i__1, &d__1, &x[1], incx);
+	}
+	*tau = (beta - *alpha) / beta;
+	i__1 = *n - 1;
+	d__1 = 1. / (*alpha - beta);
+	dscal_(&i__1, &d__1, &x[1], incx);
 
-/*           If ALPHA is subnormal, it may lose relative accuracy */
+/*        If ALPHA is subnormal, it may lose relative accuracy */
 
-	    *alpha = beta;
-	    i__1 = knt;
-	    for (j = 1; j <= i__1; ++j) {
-		*alpha *= safmin;
+	i__1 = knt;
+	for (j = 1; j <= i__1; ++j) {
+	    beta *= safmin;
 /* L20: */
-	    }
-	} else {
-	    *tau = (beta - *alpha) / beta;
-	    i__1 = *n - 1;
-	    d__1 = 1. / (*alpha - beta);
-	    dscal_(&i__1, &d__1, &x[1], incx);
-	    *alpha = beta;
 	}
+	*alpha = beta;
     }
 
     return 0;
@@ -17963,21 +22580,22 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     doublereal d__1;
 
     /* Local variables */
-    static integer i__, j;
+    static integer i__, j, prevlastv;
     static doublereal vii;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
 	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *), dtrmv_(char *,
-	    char *, char *, integer *, doublereal *, integer *, doublereal *,
-	    integer *);
+	    doublereal *, doublereal *, integer *);
+    static integer lastv;
+    extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *,
+	    doublereal *, integer *, doublereal *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18089,8 +22707,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     }
 
     if (lsame_(direct, "F")) {
+	prevlastv = *n;
 	i__1 = *k;
 	for (i__ = 1; i__ <= i__1; ++i__) {
+	    prevlastv = max(i__,prevlastv);
 	    if (tau[i__] == 0.) {
 
 /*              H(i)  =  I */
@@ -18107,21 +22727,39 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 		vii = v[i__ + i__ * v_dim1];
 		v[i__ + i__ * v_dim1] = 1.;
 		if (lsame_(storev, "C")) {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			if (v[lastv + i__ * v_dim1] != 0.) {
+			    goto L15;
+			}
+		    }
+L15:
+		    j = min(lastv,prevlastv);
 
-/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */
 
-		    i__2 = *n - i__ + 1;
+		    i__2 = j - i__ + 1;
 		    i__3 = i__ - 1;
 		    d__1 = -tau[i__];
 		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1],
 			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b29, &t[
 			    i__ * t_dim1 + 1], &c__1);
 		} else {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			if (v[i__ + lastv * v_dim1] != 0.) {
+			    goto L16;
+			}
+		    }
+L16:
+		    j = min(lastv,prevlastv);
 
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */
 
 		    i__2 = i__ - 1;
-		    i__3 = *n - i__ + 1;
+		    i__3 = j - i__ + 1;
 		    d__1 = -tau[i__];
 		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ *
 			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
@@ -18135,10 +22773,16 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
 			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
 		t[i__ + i__ * t_dim1] = tau[i__];
+		if (i__ > 1) {
+		    prevlastv = max(prevlastv,lastv);
+		} else {
+		    prevlastv = lastv;
+		}
 	    }
 /* L20: */
 	}
     } else {
+	prevlastv = 1;
 	for (i__ = *k; i__ >= 1; --i__) {
 	    if (tau[i__] == 0.) {
 
@@ -18157,35 +22801,54 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 		    if (lsame_(storev, "C")) {
 			vii = v[*n - *k + i__ + i__ * v_dim1];
 			v[*n - *k + i__ + i__ * v_dim1] = 1.;
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    if (v[lastv + i__ * v_dim1] != 0.) {
+				goto L35;
+			    }
+			}
+L35:
+			j = max(lastv,prevlastv);
 
 /*
                       T(i+1:k,i) :=
-                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+                              - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i)
 */
 
-			i__1 = *n - *k + i__;
+			i__1 = *n - *k + i__ - j + 1;
 			i__2 = *k - i__;
 			d__1 = -tau[i__];
-			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[(i__ + 1)
-				* v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], &
+			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[j + (i__
+				+ 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
 				c__1, &c_b29, &t[i__ + 1 + i__ * t_dim1], &
 				c__1);
 			v[*n - *k + i__ + i__ * v_dim1] = vii;
 		    } else {
 			vii = v[i__ + (*n - *k + i__) * v_dim1];
 			v[i__ + (*n - *k + i__) * v_dim1] = 1.;
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    if (v[i__ + lastv * v_dim1] != 0.) {
+				goto L36;
+			    }
+			}
+L36:
+			j = max(lastv,prevlastv);
 
 /*
                       T(i+1:k,i) :=
-                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+                              - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)'
 */
 
 			i__1 = *k - i__;
-			i__2 = *n - *k + i__;
+			i__2 = *n - *k + i__ - j + 1;
 			d__1 = -tau[i__];
 			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ +
-				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
-				c_b29, &t[i__ + 1 + i__ * t_dim1], &c__1);
+				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
+				ldv, &c_b29, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
 			v[i__ + (*n - *k + i__) * v_dim1] = vii;
 		    }
 
@@ -18196,6 +22859,11 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
 			     t_dim1], &c__1)
 			    ;
+		    if (i__ > 1) {
+			prevlastv = min(prevlastv,lastv);
+		    } else {
+			prevlastv = lastv;
+		    }
 		}
 		t[i__ + i__ * t_dim1] = tau[i__];
 	    }
@@ -18213,26 +22881,22 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 {
     /* System generated locals */
     integer c_dim1, c_offset, i__1;
-    doublereal d__1;
 
     /* Local variables */
     static integer j;
     static doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5,
 	    v6, v7, v8, v9, t10, v10, sum;
-    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
-	    doublereal *, integer *, doublereal *, integer *, doublereal *,
-	    integer *);
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
-	    doublereal *, doublereal *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18315,20 +22979,9 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	    case 10:  goto L190;
 	}
 
-/*
-          Code for general M
-
-          w := C'*v
-*/
-
-	dgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &c__1, &
-		c_b29, &work[1], &c__1);
+/*        Code for general M */
 
-/*        C := C - tau * v * w' */
-
-	d__1 = -(*tau);
-	dger_(m, n, &d__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
-		;
+	dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]);
 	goto L410;
 L10:
 
@@ -18630,20 +23283,9 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	    case 10:  goto L390;
 	}
 
-/*
-          Code for general N
-
-          w := C * v
-*/
-
-	dgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &
-		c__1, &c_b29, &work[1], &c__1);
+/*        Code for general N */
 
-/*        C := C - tau * w * v' */
-
-	d__1 = -(*tau);
-	dger_(m, n, &d__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
-		;
+	dlarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]);
 	goto L410;
 L210:
 
@@ -18939,10 +23581,6 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs,
 	doublereal *sn, doublereal *r__)
 {
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
     /* System generated locals */
     integer i__1;
     doublereal d__1, d__2;
@@ -18960,10 +23598,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -19002,20 +23640,27 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     R       (output) DOUBLE PRECISION
             The nonzero component of the rotated vector.
 
+    This version has a few statements commented out for thread safety
+    (machine parameters are computed on each entry). 10 feb 03, SJH.
+
     =====================================================================
-*/
 
+       LOGICAL            FIRST
+       SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+       DATA               FIRST / .TRUE. /
 
-    if (first) {
-	first = FALSE_;
-	safmin = SAFEMINIMUM;
-	eps = EPSILON;
-	d__1 = BASE;
-	i__1 = (integer) (log(safmin / eps) / log(BASE) /
-		2.);
-	safmn2 = pow_di(&d__1, &i__1);
-	safmx2 = 1. / safmn2;
-    }
+       IF( FIRST ) THEN
+*/
+    safmin = SAFEMINIMUM;
+    eps = EPSILON;
+    d__1 = BASE;
+    i__1 = (integer) (log(safmin / eps) / log(BASE) / 2.);
+    safmn2 = pow_di(&d__1, &i__1);
+    safmx2 = 1. / safmn2;
+/*
+          FIRST = .FALSE.
+       END IF
+*/
     if (*g == 0.) {
 	*cs = 1.;
 	*sn = 0.;
@@ -19113,10 +23758,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -19242,15 +23887,16 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     static doublereal cfrom1;
 
     static doublereal cfromc;
+    extern logical disnan_(doublereal *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
     static doublereal bignum, smlnum;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -19301,7 +23947,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
             The matrix to be multiplied by CTO/CFROM.  See TYPE for the
             storage type.
 
@@ -19346,8 +23992,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
     if (itype == -1) {
 	*info = -1;
-    } else if (*cfrom == 0.) {
+    } else if (*cfrom == 0. || disnan_(cfrom)) {
 	*info = -4;
+    } else if (disnan_(cto)) {
+	*info = -5;
     } else if (*m < 0) {
 	*info = -6;
     } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
@@ -19394,18 +24042,36 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 L10:
     cfrom1 = cfromc * smlnum;
-    cto1 = ctoc / bignum;
-    if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
-	mul = smlnum;
-	done = FALSE_;
-	cfromc = cfrom1;
-    } else if (abs(cto1) > abs(cfromc)) {
-	mul = bignum;
-	done = FALSE_;
-	ctoc = cto1;
-    } else {
+    if (cfrom1 == cfromc) {
+/*
+          CFROMC is an inf.  Multiply by a correctly signed zero for
+          finite CTOC, or a NaN if CTOC is infinite.
+*/
 	mul = ctoc / cfromc;
 	done = TRUE_;
+	cto1 = ctoc;
+    } else {
+	cto1 = ctoc / bignum;
+	if (cto1 == ctoc) {
+/*
+             CTOC is either 0 or an inf.  In both cases, CTOC itself
+             serves as the correct multiplication factor.
+*/
+	    mul = ctoc;
+	    done = TRUE_;
+	    cfromc = 1.;
+	} else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
+	    mul = smlnum;
+	    done = FALSE_;
+	    cfromc = cfrom1;
+	} else if (abs(cto1) > abs(cfromc)) {
+	    mul = bignum;
+	    done = FALSE_;
+	    ctoc = cto1;
+	} else {
+	    mul = ctoc / cfromc;
+	    done = TRUE_;
+	}
     }
 
     if (itype == 0) {
@@ -19566,10 +24232,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -19621,16 +24287,16 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
            On entry, maximum size of the subproblems at the
            bottom of the computation tree.
 
-    IWORK  INTEGER work array.
+    IWORK  (workspace) INTEGER work array.
            Dimension must be at least (8 * N)
 
-    WORK   DOUBLE PRECISION work array.
+    WORK   (workspace) DOUBLE PRECISION work array.
            Dimension must be at least (3 * M**2 + 2 * M)
 
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -19840,10 +24506,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -19911,10 +24577,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
            the lower block. On exit D(1:N) contains the singular values
            of the modified matrix.
 
-    ALPHA  (input) DOUBLE PRECISION
+    ALPHA  (input/output) DOUBLE PRECISION
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) DOUBLE PRECISION
+    BETA   (input/output) DOUBLE PRECISION
            Contains the off-diagonal element associated with the added
            row.
 
@@ -19950,7 +24616,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -20105,10 +24771,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -20149,6 +24815,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
            singular values (those which were deflated) sorted into
            increasing order.
 
+    Z      (output) DOUBLE PRECISION array, dimension(N)
+           On exit Z contains the updating row vector in the secular
+           equation.
+
     ALPHA  (input) DOUBLE PRECISION
            Contains the diagonal element associated with the added row.
 
@@ -20166,9 +24836,17 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= N.
 
-    Z      (output) DOUBLE PRECISION array, dimension(N)
-           On exit Z contains the updating row vector in the secular
-           equation.
+    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
+           On entry VT' contains the right singular vectors of two
+           submatrices in the two square blocks with corners at (1,1),
+           (NL+1, NL+1), and (NL+2, NL+2), (M,M).
+           On exit VT' contains the trailing (N-K) updated right singular
+           vectors (those which were deflated) in its last N-K columns.
+           In case SQRE =1, the last row of VT spans the right null
+           space.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= M.
 
     DSIGMA (output) DOUBLE PRECISION array, dimension (N)
            Contains a copy of the diagonal elements (K-1 singular values
@@ -20186,18 +24864,6 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     LDU2   (input) INTEGER
            The leading dimension of the array U2.  LDU2 >= N.
 
-    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
-           On entry VT' contains the right singular vectors of two
-           submatrices in the two square blocks with corners at (1,1),
-           (NL+1, NL+1), and (NL+2, NL+2), (M,M).
-           On exit VT' contains the trailing (N-K) updated right singular
-           vectors (those which were deflated) in its last N-K columns.
-           In case SQRE =1, the last row of VT spans the right null
-           space.
-
-    LDVT   (input) INTEGER
-           The leading dimension of the array VT.  LDVT >= M.
-
     VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
            VT2' contains a copy of the first K right singular vectors
            which will be used by DLASD3 in a matrix multiply (DGEMM) to
@@ -20210,24 +24876,31 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     LDVT2  (input) INTEGER
            The leading dimension of the array VT2.  LDVT2 >= M.
 
-    IDXP   (workspace) INTEGER array, dimension(N)
+    IDXP   (workspace) INTEGER array dimension(N)
            This will contain the permutation used to place deflated
            values of D at the end of the array. On output IDXP(2:K)
            points to the nondeflated D-values and IDXP(K+1:N)
            points to the deflated singular values.
 
-    IDX    (workspace) INTEGER array, dimension(N)
+    IDX    (workspace) INTEGER array dimension(N)
            This will contain the permutation used to sort the contents of
            D into ascending order.
 
-    IDXC   (output) INTEGER array, dimension(N)
+    IDXC   (output) INTEGER array dimension(N)
            This will contain the permutation used to arrange the columns
            of the deflated U matrix into three groups:  the first group
            contains non-zero entries only at and above NL, the second
            contains non-zero entries only below NL+2, and the third is
            dense.
 
-    COLTYP (workspace/output) INTEGER array, dimension(N)
+    IDXQ   (input/output) INTEGER array dimension(N)
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that entries in
+           the first hlaf of this permutation must first be moved one
+           position backward; and entries in the second half
+           must first have NL+1 added to their values.
+
+    COLTYP (workspace/output) INTEGER array dimension(N)
            As workspace, this will contain a label which will indicate
            which of the following types a column in the U2 matrix or a
            row in the VT2 matrix is:
@@ -20239,13 +24912,6 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
            On exit, it is an array of dimension 4, with COLTYP(I) being
            the dimension of the I-th type columns.
 
-    IDXQ   (input) INTEGER array, dimension(N)
-           This contains the permutation which separately sorts the two
-           sub-problems in D into ascending order.  Note that entries in
-           the first hlaf of this permutation must first be moved one
-           position backward; and entries in the second half
-           must first have NL+1 added to their values.
-
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
@@ -20702,10 +25368,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -20759,28 +25425,28 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
            of the deflated updating problem.  These are the poles
            of the secular equation.
 
-    U      (input) DOUBLE PRECISION array, dimension (LDU, N)
+    U      (output) DOUBLE PRECISION array, dimension (LDU, N)
            The last N - K columns of this matrix contain the deflated
            left singular vectors.
 
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= N.
 
-    U2     (input) DOUBLE PRECISION array, dimension (LDU2, N)
+    U2     (input/output) DOUBLE PRECISION array, dimension (LDU2, N)
            The first K columns of this matrix contain the non-deflated
            left singular vectors for the split problem.
 
     LDU2   (input) INTEGER
            The leading dimension of the array U2.  LDU2 >= N.
 
-    VT     (input) DOUBLE PRECISION array, dimension (LDVT, M)
+    VT     (output) DOUBLE PRECISION array, dimension (LDVT, M)
            The last M - K columns of VT' contain the deflated
            right singular vectors.
 
     LDVT   (input) INTEGER
            The leading dimension of the array VT.  LDVT >= N.
 
-    VT2    (input) DOUBLE PRECISION array, dimension (LDVT2, N)
+    VT2    (input/output) DOUBLE PRECISION array, dimension (LDVT2, N)
            The first K columns of VT2' contain the non-deflated
            right singular vectors for the split problem.
 
@@ -20811,7 +25477,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     INFO   (output) INTEGER
            = 0:  successful exit.
            < 0:  if INFO = -i, the i-th argument had an illegal value.
-           > 0:  if INFO = 1, an singular value did not converge
+           > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -20915,7 +25581,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
        changes the bottommost bits of DSIGMA(I). It does not account
        for hexadecimal or decimal machines without guard digits
        (we know of none). We use a subroutine call to compute
-       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       2*DSIGMA(I) to prevent optimizing compilers from eliminating
        this code.
 */
 
@@ -21127,10 +25793,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21179,7 +25845,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
            The scalar in the symmetric updating formula.
 
     SIGMA  (output) DOUBLE PRECISION
-           The computed lambda_I, the I-th updated eigenvalue.
+           The computed sigma_I, the I-th updated eigenvalue.
 
     WORK   (workspace) DOUBLE PRECISION array, dimension ( N )
            If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
@@ -22113,10 +26779,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -22148,7 +26814,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
            The components of the updating vector.
 
     DELTA  (output) DOUBLE PRECISION array, dimension ( 2 )
-           Contains (D(j) - lambda_I) in its  j-th component.
+           Contains (D(j) - sigma_I) in its  j-th component.
            The vector DELTA contains the information necessary
            to construct the eigenvectors.
 
@@ -22156,7 +26822,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
            The scalar in the symmetric updating formula.
 
     DSIGMA (output) DOUBLE PRECISION
-           The computed lambda_I, the I-th updated eigenvalue.
+           The computed sigma_I, the I-th updated eigenvalue.
 
     WORK   (workspace) DOUBLE PRECISION array, dimension ( 2 )
            WORK contains (D(j) + sigma_I) in its  j-th component.
@@ -22309,10 +26975,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -22405,10 +27071,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
            the lower block. On exit, VL contains the last components of
            all right singular vectors of the bidiagonal matrix.
 
-    ALPHA  (input) DOUBLE PRECISION
+    ALPHA  (input/output) DOUBLE PRECISION
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) DOUBLE PRECISION
+    BETA   (input/output) DOUBLE PRECISION
            Contains the off-diagonal element associated with the added
            row.
 
@@ -22485,7 +27151,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -22646,10 +27312,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23150,10 +27816,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -23184,9 +27850,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     D       (output) DOUBLE PRECISION array, dimension ( K )
             On output, D contains the updated singular values.
 
-    Z       (input) DOUBLE PRECISION array, dimension ( K )
-            The first K elements of this array contain the components
-            of the deflation-adjusted updating row vector.
+    Z       (input/output) DOUBLE PRECISION array, dimension ( K )
+            On entry, the first K elements of this array contain the
+            components of the deflation-adjusted updating row vector.
+            On exit, Z is updated.
 
     VF      (input/output) DOUBLE PRECISION array, dimension ( K )
             On entry, VF contains  information passed through DBEDE8.
@@ -23215,17 +27882,19 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     LDDIFR  (input) INTEGER
             The leading dimension of DIFR, must be at least K.
 
-    DSIGMA  (input) DOUBLE PRECISION array, dimension ( K )
-            The first K elements of this array contain the old roots
-            of the deflated updating problem.  These are the poles
+    DSIGMA  (input/output) DOUBLE PRECISION array, dimension ( K )
+            On entry, the first K elements of this array contain the old
+            roots of the deflated updating problem.  These are the poles
             of the secular equation.
+            On exit, the elements of DSIGMA may be very slightly altered
+            in value.
 
     WORK    (workspace) DOUBLE PRECISION array, dimension at least 3 * K
 
     INFO    (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -23453,10 +28122,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -23598,7 +28267,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -23903,10 +28572,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -24249,10 +28918,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -24283,7 +28952,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
      NDIMR  (output) INTEGER array, dimension ( N )
             On exit, row dimensions of right children.
 
-     MSUB   (input) INTEGER.
+     MSUB   (input) INTEGER
             On entry, the maximum row dimension each subproblem at the
             bottom of the tree can be of.
 
@@ -24361,10 +29030,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -24515,10 +29184,15 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -24676,25 +29350,32 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     double sqrt(doublereal);
 
     /* Local variables */
-    static doublereal d__, e;
+    static doublereal d__, e, g;
     static integer k;
     static doublereal s, t;
-    static integer i0, i4, n0, pp;
-    static doublereal eps, tol;
+    static integer i0, i4, n0;
+    static doublereal dn;
+    static integer pp;
+    static doublereal dn1, dn2, dee, eps, tau, tol;
     static integer ipn4;
     static doublereal tol2;
     static logical ieee;
     static integer nbig;
     static doublereal dmin__, emin, emax;
-    static integer ndiv, iter;
+    static integer kmin, ndiv, iter;
     static doublereal qmin, temp, qmax, zmax;
-    static integer splt, nfail;
+    static integer splt;
+    static doublereal dmin1, dmin2;
+    static integer nfail;
     static doublereal desig, trace, sigma;
-    static integer iinfo;
+    static integer iinfo, ttype;
     extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *,
 	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
-	     integer *, integer *, integer *, logical *);
+	     integer *, integer *, integer *, logical *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
 
+    static doublereal deemin;
     static integer iwhila, iwhilb;
     static doublereal oldemn, safmin;
     extern /* Subroutine */ int xerbla_(char *, integer *);
@@ -24705,10 +29386,15 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -24736,7 +29422,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     N     (input) INTEGER
           The number of rows and columns in the matrix. N >= 0.
 
-    Z     (workspace) DOUBLE PRECISION array, dimension ( 4*N )
+    Z     (input/output) DOUBLE PRECISION array, dimension ( 4*N )
           On entry Z holds the qd array. On exit, entries 1 to N hold
           the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
           trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
@@ -24993,6 +29679,17 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /* L80: */
     }
 
+/*     Initialise variables to pass to DLASQ3. */
+
+    ttype = 0;
+    dmin1 = 0.;
+    dmin2 = 0.;
+    dn = 0.;
+    dn1 = 0.;
+    dn2 = 0.;
+    g = 0.;
+    tau = 0.;
+
     iter = 2;
     nfail = 0;
     ndiv = n0 - i0 << 1;
@@ -25000,7 +29697,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     i__1 = *n + 1;
     for (iwhila = 1; iwhila <= i__1; ++iwhila) {
 	if (n0 < 1) {
-	    goto L150;
+	    goto L170;
 	}
 
 /*
@@ -25058,10 +29755,43 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 L100:
 	i0 = i4 / 4;
+	pp = 0;
 
-/*        Store EMIN for passing to DLASQ3. */
-
-	z__[(n0 << 2) - 1] = emin;
+	if (n0 - i0 > 1) {
+	    dee = z__[(i0 << 2) - 3];
+	    deemin = dee;
+	    kmin = i0;
+	    i__2 = (n0 << 2) - 3;
+	    for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) {
+		dee = z__[i4] * (dee / (dee + z__[i4 - 2]));
+		if (dee <= deemin) {
+		    deemin = dee;
+		    kmin = (i4 + 3) / 4;
+		}
+/* L110: */
+	    }
+	    if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] *
+		    .5) {
+		ipn4 = i0 + n0 << 2;
+		pp = 2;
+		i__2 = i0 + n0 - 1 << 1;
+		for (i4 = i0 << 2; i4 <= i__2; i4 += 4) {
+		    temp = z__[i4 - 3];
+		    z__[i4 - 3] = z__[ipn4 - i4 - 3];
+		    z__[ipn4 - i4 - 3] = temp;
+		    temp = z__[i4 - 2];
+		    z__[i4 - 2] = z__[ipn4 - i4 - 2];
+		    z__[ipn4 - i4 - 2] = temp;
+		    temp = z__[i4 - 1];
+		    z__[i4 - 1] = z__[ipn4 - i4 - 5];
+		    z__[ipn4 - i4 - 5] = temp;
+		    temp = z__[i4];
+		    z__[i4] = z__[ipn4 - i4 - 4];
+		    z__[ipn4 - i4 - 4] = temp;
+/* L120: */
+		}
+	    }
+	}
 
 /*
           Put -(initial shift) into DMIN.
@@ -25071,21 +29801,26 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
 	dmin__ = -max(d__1,d__2);
 
-/*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */
-
-	pp = 0;
+/*
+          Now I0:N0 is unreduced.
+          PP = 0 for ping, PP = 1 for pong.
+          PP = 2 indicates that flipping was applied to the Z array and
+                 and that the tests for deflation upon entry in DLASQ3
+                 should not be performed.
+*/
 
 	nbig = (n0 - i0 + 1) * 30;
 	i__2 = nbig;
 	for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
 	    if (i0 > n0) {
-		goto L130;
+		goto L150;
 	    }
 
 /*           While submatrix unfinished take a good dqds step. */
 
 	    dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
-		    nfail, &iter, &ndiv, &ieee);
+		    nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, &
+		    dn1, &dn2, &g, &tau);
 
 	    pp = 1 - pp;
 
@@ -25118,7 +29853,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 			    d__1 = oldemn, d__2 = z__[i4];
 			    oldemn = min(d__1,d__2);
 			}
-/* L110: */
+/* L130: */
 		    }
 		    z__[(n0 << 2) - 1] = emin;
 		    z__[n0 * 4] = oldemn;
@@ -25126,7 +29861,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 		}
 	    }
 
-/* L120: */
+/* L140: */
 	}
 
 	*info = 2;
@@ -25134,9 +29869,9 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 /*        end IWHILB */
 
-L130:
+L150:
 
-/* L140: */
+/* L160: */
 	;
     }
 
@@ -25145,14 +29880,14 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 /*     end IWHILA */
 
-L150:
+L170:
 
 /*     Move q's to the front. */
 
     i__1 = *n;
     for (k = 2; k <= i__1; ++k) {
 	z__[k] = z__[(k << 2) - 3];
-/* L160: */
+/* L180: */
     }
 
 /*     Sort and compute sum of eigenvalues. */
@@ -25162,7 +29897,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     e = 0.;
     for (k = *n; k >= 1; --k) {
 	e += z__[k];
-/* L170: */
+/* L190: */
     }
 
 /*     Store trace, sum(eigenvalues) and information on performance. */
@@ -25183,18 +29918,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__,
 	integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig,
 	 doublereal *qmax, integer *nfail, integer *iter, integer *ndiv,
-	logical *ieee)
+	logical *ieee, integer *ttype, doublereal *dmin1, doublereal *dmin2,
+	doublereal *dn, doublereal *dn1, doublereal *dn2, doublereal *g,
+	doublereal *tau)
 {
-    /* Initialized data */
-
-    static integer ttype = 0;
-    static doublereal dmin1 = 0.;
-    static doublereal dmin2 = 0.;
-    static doublereal dn = 0.;
-    static doublereal dn1 = 0.;
-    static doublereal dn2 = 0.;
-    static doublereal tau = 0.;
-
     /* System generated locals */
     integer i__1;
     doublereal d__1, d__2;
@@ -25210,22 +29937,27 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     static doublereal tol2, temp;
     extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *,
 	    integer *, integer *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
-	    , dlasq5_(integer *, integer *, doublereal *, integer *,
-	    doublereal *, doublereal *, doublereal *, doublereal *,
-	    doublereal *, doublereal *, doublereal *, logical *), dlasq6_(
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     doublereal *), dlasq5_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, doublereal *, doublereal *, logical *), dlasq6_(
 	    integer *, integer *, doublereal *, integer *, doublereal *,
 	    doublereal *, doublereal *, doublereal *, doublereal *,
 	    doublereal *);
 
-    static doublereal safmin;
+    extern logical disnan_(doublereal *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       May 17, 2000
+    -- LAPACK routine (version 3.2.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -25241,14 +29973,17 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     I0     (input) INTEGER
            First index.
 
-    N0     (input) INTEGER
+    N0     (input/output) INTEGER
            Last index.
 
     Z      (input) DOUBLE PRECISION array, dimension ( 4*N )
            Z holds the qd array.
 
-    PP     (input) INTEGER
+    PP     (input/output) INTEGER
            PP=0 for ping, PP=1 for pong.
+           PP=2 indicates that flipping was applied to the Z array
+           and that the initial tests for deflation should not be
+           performed.
 
     DMIN   (output) DOUBLE PRECISION
            Minimum value of d.
@@ -25271,23 +30006,39 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     NDIV   (output) INTEGER
            Number of divisions.
 
-    TTYPE  (output) INTEGER
-           Shift type.
-
     IEEE   (input) LOGICAL
            Flag for IEEE or non IEEE arithmetic (passed to DLASQ5).
 
+    TTYPE  (input/output) INTEGER
+           Shift type.
+
+    DMIN1  (input/output) DOUBLE PRECISION
+
+    DMIN2  (input/output) DOUBLE PRECISION
+
+    DN     (input/output) DOUBLE PRECISION
+
+    DN1    (input/output) DOUBLE PRECISION
+
+    DN2    (input/output) DOUBLE PRECISION
+
+    G      (input/output) DOUBLE PRECISION
+
+    TAU    (input/output) DOUBLE PRECISION
+
+           These are passed as arguments in order to save their values
+           between calls to DLASQ3.
+
     =====================================================================
 */
 
+
     /* Parameter adjustments */
     --z__;
 
     /* Function Body */
-
     n0in = *n0;
     eps = PRECISION;
-    safmin = SAFEMINIMUM;
     tol = eps * 100.;
 /* Computing 2nd power */
     d__1 = tol;
@@ -25355,6 +30106,9 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     goto L10;
 
 L50:
+    if (*pp == 2) {
+	*pp = 0;
+    }
 
 /*     Reverse the qd-array, if warranted. */
 
@@ -25382,8 +30136,8 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 		z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
 	    }
 /* Computing MIN */
-	    d__1 = dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
-	    dmin2 = min(d__1,d__2);
+	    d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
+	    *dmin2 = min(d__1,d__2);
 /* Computing MIN */
 	    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
 		    , d__1 = min(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
@@ -25400,100 +30154,94 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	}
     }
 
-/*
-   L70:
+/*     Choose a shift. */
 
-   Computing MIN
-*/
-    d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*n0 << 2) + *pp - 9], d__1 =
-	     min(d__1,d__2), d__2 = dmin2 + z__[(*n0 << 2) - *pp];
-    if (*dmin__ < 0. || safmin * *qmax < min(d__1,d__2)) {
-
-/*        Choose a shift. */
-
-	dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
-		 &dn2, &tau, &ttype);
+    dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2,
+	    tau, ttype, g);
 
-/*        Call dqds until DMIN > 0. */
+/*     Call dqds until DMIN > 0. */
 
-L80:
+L70:
 
-	dlasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
-		&dn2, ieee);
+    dlasq5_(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2,
+	    ieee);
 
-	*ndiv += *n0 - *i0 + 2;
-	++(*iter);
+    *ndiv += *n0 - *i0 + 2;
+    ++(*iter);
 
-/*        Check status. */
+/*     Check status. */
 
-	if (*dmin__ >= 0. && dmin1 > 0.) {
+    if (*dmin__ >= 0. && *dmin1 > 0.) {
 
-/*           Success. */
+/*        Success. */
 
-	    goto L100;
+	goto L90;
 
-	} else if (*dmin__ < 0. && dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] <
-		tol * (*sigma + dn1) && abs(dn) < tol * *sigma) {
+    } else if (*dmin__ < 0. && *dmin1 > 0. && z__[(*n0 - 1 << 2) - *pp] < tol
+	    * (*sigma + *dn1) && abs(*dn) < tol * *sigma) {
 
-/*           Convergence hidden by negative DN. */
+/*        Convergence hidden by negative DN. */
 
-	    z__[(*n0 - 1 << 2) - *pp + 2] = 0.;
-	    *dmin__ = 0.;
-	    goto L100;
-	} else if (*dmin__ < 0.) {
+	z__[(*n0 - 1 << 2) - *pp + 2] = 0.;
+	*dmin__ = 0.;
+	goto L90;
+    } else if (*dmin__ < 0.) {
 
-/*           TAU too big. Select new TAU and try again. */
+/*        TAU too big. Select new TAU and try again. */
 
-	    ++(*nfail);
-	    if (ttype < -22) {
+	++(*nfail);
+	if (*ttype < -22) {
 
-/*              Failed twice. Play it safe. */
+/*           Failed twice. Play it safe. */
 
-		tau = 0.;
-	    } else if (dmin1 > 0.) {
+	    *tau = 0.;
+	} else if (*dmin1 > 0.) {
 
-/*              Late failure. Gives excellent shift. */
+/*           Late failure. Gives excellent shift. */
 
-		tau = (tau + *dmin__) * (1. - eps * 2.);
-		ttype += -11;
-	    } else {
+	    *tau = (*tau + *dmin__) * (1. - eps * 2.);
+	    *ttype += -11;
+	} else {
 
-/*              Early failure. Divide by 4. */
+/*           Early failure. Divide by 4. */
 
-		tau *= .25;
-		ttype += -12;
-	    }
-	    goto L80;
-	} else if (*dmin__ != *dmin__) {
+	    *tau *= .25;
+	    *ttype += -12;
+	}
+	goto L70;
+    } else if (disnan_(dmin__)) {
 
-/*           NaN. */
+/*        NaN. */
 
-	    tau = 0.;
+	if (*tau == 0.) {
 	    goto L80;
 	} else {
+	    *tau = 0.;
+	    goto L70;
+	}
+    } else {
 
-/*           Possible underflow. Play it safe. */
+/*        Possible underflow. Play it safe. */
 
-	    goto L90;
-	}
+	goto L80;
     }
 
 /*     Risk of underflow. */
 
-L90:
-    dlasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
+L80:
+    dlasq6_(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
     *ndiv += *n0 - *i0 + 2;
     ++(*iter);
-    tau = 0.;
+    *tau = 0.;
 
-L100:
-    if (tau < *sigma) {
-	*desig += tau;
+L90:
+    if (*tau < *sigma) {
+	*desig += *tau;
 	t = *sigma + *desig;
 	*desig -= t - *sigma;
     } else {
-	t = *sigma + tau;
-	*desig = *sigma - (t - tau) + *desig;
+	t = *sigma + *tau;
+	*desig = *sigma - (t - *tau) + *desig;
     }
     *sigma = t;
 
@@ -25506,12 +30254,8 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__,
 	integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1,
 	doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2,
-	doublereal *tau, integer *ttype)
+	doublereal *tau, integer *ttype, doublereal *g)
 {
-    /* Initialized data */
-
-    static doublereal g = 0.;
-
     /* System generated locals */
     integer i__1;
     doublereal d__1, d__2;
@@ -25526,10 +30270,15 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -25577,23 +30326,25 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     TTYPE (output) INTEGER
           Shift type.
 
+    G     (input/output) REAL
+          G is passed as an argument in order to save its value between
+          calls to DLASQ4.
+
     Further Details
     ===============
     CNST1 = 9/16
 
     =====================================================================
-*/
-
-    /* Parameter adjustments */
-    --z__;
 
-    /* Function Body */
 
-/*
        A negative DMIN forces the shift to take that absolute value
        TTYPE records the type of shift.
 */
 
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
     if (*dmin__ <= 0.) {
 	*tau = -(*dmin__);
 	*ttype = -1;
@@ -25748,13 +30499,13 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /*           Case 6, no information to guide us. */
 
 	    if (*ttype == -6) {
-		g += (1. - g) * .333;
+		*g += (1. - *g) * .333;
 	    } else if (*ttype == -18) {
-		g = .083250000000000005;
+		*g = .083250000000000005;
 	    } else {
-		g = .25;
+		*g = .25;
 	    }
-	    s = g * *dmin__;
+	    s = *g * *dmin__;
 	    *ttype = -6;
 	}
 
@@ -25898,10 +30649,15 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       May 17, 2000
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -26111,10 +30867,15 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -26292,53 +31053,86 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
     =======
 
-    DLASR   performs the transformation
+    DLASR applies a sequence of plane rotations to a real matrix A,
+    from either the left or the right.
+
+    When SIDE = 'L', the transformation takes the form
+
+       A := P*A
+
+    and when SIDE = 'R', the transformation takes the form
 
-       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+       A := A*P**T
 
-       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+    where P is an orthogonal matrix consisting of a sequence of z plane
+    rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R',
+    and P**T is the transpose of P.
 
-    where A is an m by n real matrix and P is an orthogonal matrix,
-    consisting of a sequence of plane rotations determined by the
-    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
-    and z = n when SIDE = 'R' or 'r' ):
+    When DIRECT = 'F' (Forward sequence), then
 
-    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+       P = P(z-1) * ... * P(2) * P(1)
 
-       P = P( z - 1 )*...*P( 2 )*P( 1 ),
+    and when DIRECT = 'B' (Backward sequence), then
 
-    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+       P = P(1) * P(2) * ... * P(z-1)
 
-       P = P( 1 )*P( 2 )*...*P( z - 1 ),
+    where P(k) is a plane rotation matrix defined by the 2-by-2 rotation
 
-    where  P( k ) is a plane rotation matrix for the following planes:
+       R(k) = (  c(k)  s(k) )
+            = ( -s(k)  c(k) ).
 
-       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
-          the plane ( k, k + 1 )
+    When PIVOT = 'V' (Variable pivot), the rotation is performed
+    for the plane (k,k+1), i.e., P(k) has the form
 
-       when  PIVOT = 'T' or 't'  ( Top pivot ),
-          the plane ( 1, k + 1 )
+       P(k) = (  1                                            )
+              (       ...                                     )
+              (              1                                )
+              (                   c(k)  s(k)                  )
+              (                  -s(k)  c(k)                  )
+              (                                1              )
+              (                                     ...       )
+              (                                            1  )
 
-       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
-          the plane ( k, z )
+    where R(k) appears as a rank-2 modification to the identity matrix in
+    rows and columns k and k+1.
 
-    c( k ) and s( k )  must contain the  cosine and sine that define the
-    matrix  P( k ).  The two by two plane rotation part of the matrix
-    P( k ), R( k ), is assumed to be of the form
+    When PIVOT = 'T' (Top pivot), the rotation is performed for the
+    plane (1,k+1), so P(k) has the form
 
-       R( k ) = (  c( k )  s( k ) ).
-                ( -s( k )  c( k ) )
+       P(k) = (  c(k)                    s(k)                 )
+              (         1                                     )
+              (              ...                              )
+              (                     1                         )
+              ( -s(k)                    c(k)                 )
+              (                                 1             )
+              (                                      ...      )
+              (                                             1 )
 
-    This version vectorises across rows of the array A when SIDE = 'L'.
+    where R(k) appears in rows and columns 1 and k+1.
+
+    Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is
+    performed for the plane (k,z), giving P(k) the form
+
+       P(k) = ( 1                                             )
+              (      ...                                      )
+              (             1                                 )
+              (                  c(k)                    s(k) )
+              (                         1                     )
+              (                              ...              )
+              (                                     1         )
+              (                 -s(k)                    c(k) )
+
+    where R(k) appears in rows and columns k and z.  The rotations are
+    performed without ever forming P(k) explicitly.
 
     Arguments
     =========
@@ -26347,13 +31141,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             Specifies whether the plane rotation matrix P is applied to
             A on the left or the right.
             = 'L':  Left, compute A := P*A
-            = 'R':  Right, compute A:= A*P'
-
-    DIRECT  (input) CHARACTER*1
-            Specifies whether P is a forward or backward sequence of
-            plane rotations.
-            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
-            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+            = 'R':  Right, compute A:= A*P**T
 
     PIVOT   (input) CHARACTER*1
             Specifies the plane for which P(k) is a plane rotation
@@ -26362,6 +31150,12 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             = 'T':  Top pivot, the plane (1,k+1)
             = 'B':  Bottom pivot, the plane (k,z)
 
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P(z-1)*...*P(2)*P(1)
+            = 'B':  Backward, P = P(1)*P(2)*...*P(z-1)
+
     M       (input) INTEGER
             The number of rows of the matrix A.  If m <= 1, an immediate
             return is effected.
@@ -26370,18 +31164,22 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             The number of columns of the matrix A.  If n <= 1, an
             immediate return is effected.
 
-    C, S    (input) DOUBLE PRECISION arrays, dimension
+    C       (input) DOUBLE PRECISION array, dimension
                     (M-1) if SIDE = 'L'
                     (N-1) if SIDE = 'R'
-            c(k) and s(k) contain the cosine and sine that define the
-            matrix P(k).  The two by two plane rotation part of the
-            matrix P(k), R(k), is assumed to be of the form
-            R( k ) = (  c( k )  s( k ) ).
-                     ( -s( k )  c( k ) )
+            The cosines c(k) of the plane rotations.
+
+    S       (input) DOUBLE PRECISION array, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            The sines s(k) of the plane rotations.  The 2-by-2 plane
+            rotation part of the matrix P(k), R(k), has the form
+            R(k) = (  c(k)  s(k) )
+                   ( -s(k)  c(k) ).
 
     A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
-            The m by n matrix A.  On exit, A is overwritten by P*A if
-            SIDE = 'R' or by A*P' if SIDE = 'L'.
+            The M-by-N matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P**T if SIDE = 'L'.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
@@ -26687,10 +31485,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26941,10 +31739,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -27044,10 +31842,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -27225,7 +32023,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 /*              Note that M is very tiny */
 
 		if (l == 0.) {
-		    t = d_sign(&c_b2804, &ft) * d_sign(&c_b15, &gt);
+		    t = d_sign(&c_b3192, &ft) * d_sign(&c_b15, &gt);
 		} else {
 		    t = gt / d_sign(&d__, &ft) + m / t;
 		}
@@ -27284,10 +32082,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -27318,7 +32116,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             The last element of IPIV for which a row interchange will
             be done.
 
-    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+    IPIV    (input) INTEGER array, dimension (K2*abs(INCX))
             The vector of pivot indices.  Only the elements in positions
             K1 through K2 of IPIV are accessed.
             IPIV(K) = L implies rows K and L are to be interchanged.
@@ -27412,6 +32210,459 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 } /* dlaswp_ */
 
+/* Subroutine */ int dlasy2_(logical *ltranl, logical *ltranr, integer *isgn,
+	integer *n1, integer *n2, doublereal *tl, integer *ldtl, doublereal *
+	tr, integer *ldtr, doublereal *b, integer *ldb, doublereal *scale,
+	doublereal *x, integer *ldx, doublereal *xnorm, integer *info)
+{
+    /* Initialized data */
+
+    static integer locu12[4] = { 3,4,1,2 };
+    static integer locl21[4] = { 2,1,4,3 };
+    static integer locu22[4] = { 4,3,2,1 };
+    static logical xswpiv[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
+    static logical bswpiv[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
+
+    /* System generated locals */
+    integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1,
+	    x_offset;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8;
+
+    /* Local variables */
+    static integer i__, j, k;
+    static doublereal x2[2], l21, u11, u12;
+    static integer ip, jp;
+    static doublereal u22, t16[16]	/* was [4][4] */, gam, bet, eps, sgn,
+	    tmp[4], tau1, btmp[4], smin;
+    static integer ipiv;
+    static doublereal temp;
+    static integer jpiv[4];
+    static doublereal xmax;
+    static integer ipsv, jpsv;
+    static logical bswap;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dswap_(integer *, doublereal *, integer
+	    *, doublereal *, integer *);
+    static logical xswap;
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    static doublereal smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    DLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in
+
+           op(TL)*X + ISGN*X*op(TR) = SCALE*B,
+
+    where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or
+    -1.  op(T) = T or T', where T' denotes the transpose of T.
+
+    Arguments
+    =========
+
+    LTRANL  (input) LOGICAL
+            On entry, LTRANL specifies the op(TL):
+               = .FALSE., op(TL) = TL,
+               = .TRUE., op(TL) = TL'.
+
+    LTRANR  (input) LOGICAL
+            On entry, LTRANR specifies the op(TR):
+              = .FALSE., op(TR) = TR,
+              = .TRUE., op(TR) = TR'.
+
+    ISGN    (input) INTEGER
+            On entry, ISGN specifies the sign of the equation
+            as described before. ISGN may only be 1 or -1.
+
+    N1      (input) INTEGER
+            On entry, N1 specifies the order of matrix TL.
+            N1 may only be 0, 1 or 2.
+
+    N2      (input) INTEGER
+            On entry, N2 specifies the order of matrix TR.
+            N2 may only be 0, 1 or 2.
+
+    TL      (input) DOUBLE PRECISION array, dimension (LDTL,2)
+            On entry, TL contains an N1 by N1 matrix.
+
+    LDTL    (input) INTEGER
+            The leading dimension of the matrix TL. LDTL >= max(1,N1).
+
+    TR      (input) DOUBLE PRECISION array, dimension (LDTR,2)
+            On entry, TR contains an N2 by N2 matrix.
+
+    LDTR    (input) INTEGER
+            The leading dimension of the matrix TR. LDTR >= max(1,N2).
+
+    B       (input) DOUBLE PRECISION array, dimension (LDB,2)
+            On entry, the N1 by N2 matrix B contains the right-hand
+            side of the equation.
+
+    LDB     (input) INTEGER
+            The leading dimension of the matrix B. LDB >= max(1,N1).
+
+    SCALE   (output) DOUBLE PRECISION
+            On exit, SCALE contains the scale factor. SCALE is chosen
+            less than or equal to 1 to prevent the solution overflowing.
+
+    X       (output) DOUBLE PRECISION array, dimension (LDX,2)
+            On exit, X contains the N1 by N2 solution.
+
+    LDX     (input) INTEGER
+            The leading dimension of the matrix X. LDX >= max(1,N1).
+
+    XNORM   (output) DOUBLE PRECISION
+            On exit, XNORM is the infinity-norm of the solution.
+
+    INFO    (output) INTEGER
+            On exit, INFO is set to
+               0: successful exit.
+               1: TL and TR have too close eigenvalues, so TL or
+                  TR is perturbed to get a nonsingular equation.
+            NOTE: In the interests of speed, this routine does not
+                  check the inputs for errors.
+
+   =====================================================================
+*/
+
+    /* Parameter adjustments */
+    tl_dim1 = *ldtl;
+    tl_offset = 1 + tl_dim1;
+    tl -= tl_offset;
+    tr_dim1 = *ldtr;
+    tr_offset = 1 + tr_dim1;
+    tr -= tr_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+
+    /* Function Body */
+
+/*     Do not check the input parameters for errors */
+
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n1 == 0 || *n2 == 0) {
+	return 0;
+    }
+
+/*     Set constants to control overflow */
+
+    eps = PRECISION;
+    smlnum = SAFEMINIMUM / eps;
+    sgn = (doublereal) (*isgn);
+
+    k = *n1 + *n1 + *n2 - 2;
+    switch (k) {
+	case 1:  goto L10;
+	case 2:  goto L20;
+	case 3:  goto L30;
+	case 4:  goto L50;
+    }
+
+/*     1 by 1: TL11*X + SGN*X*TR11 = B11 */
+
+L10:
+    tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
+    bet = abs(tau1);
+    if (bet <= smlnum) {
+	tau1 = smlnum;
+	bet = smlnum;
+	*info = 1;
+    }
+
+    *scale = 1.;
+    gam = (d__1 = b[b_dim1 + 1], abs(d__1));
+    if (smlnum * gam > bet) {
+	*scale = 1. / gam;
+    }
+
+    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1;
+    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1));
+    return 0;
+
+/*
+       1 by 2:
+       TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12]  = [B11 B12]
+                                         [TR21 TR22]
+*/
+
+L20:
+
+/*
+   Computing MAX
+   Computing MAX
+*/
+    d__7 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__8 = (d__2 = tr[tr_dim1 + 1]
+	    , abs(d__2)), d__7 = max(d__7,d__8), d__8 = (d__3 = tr[(tr_dim1 <<
+	     1) + 1], abs(d__3)), d__7 = max(d__7,d__8), d__8 = (d__4 = tr[
+	    tr_dim1 + 2], abs(d__4)), d__7 = max(d__7,d__8), d__8 = (d__5 =
+	    tr[(tr_dim1 << 1) + 2], abs(d__5));
+    d__6 = eps * max(d__7,d__8);
+    smin = max(d__6,smlnum);
+    tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
+    tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2];
+    if (*ltranr) {
+	tmp[1] = sgn * tr[tr_dim1 + 2];
+	tmp[2] = sgn * tr[(tr_dim1 << 1) + 1];
+    } else {
+	tmp[1] = sgn * tr[(tr_dim1 << 1) + 1];
+	tmp[2] = sgn * tr[tr_dim1 + 2];
+    }
+    btmp[0] = b[b_dim1 + 1];
+    btmp[1] = b[(b_dim1 << 1) + 1];
+    goto L40;
+
+/*
+       2 by 1:
+            op[TL11 TL12]*[X11] + ISGN* [X11]*TR11  = [B11]
+              [TL21 TL22] [X21]         [X21]         [B21]
+*/
+
+L30:
+/*
+   Computing MAX
+   Computing MAX
+*/
+    d__7 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__8 = (d__2 = tl[tl_dim1 + 1]
+	    , abs(d__2)), d__7 = max(d__7,d__8), d__8 = (d__3 = tl[(tl_dim1 <<
+	     1) + 1], abs(d__3)), d__7 = max(d__7,d__8), d__8 = (d__4 = tl[
+	    tl_dim1 + 2], abs(d__4)), d__7 = max(d__7,d__8), d__8 = (d__5 =
+	    tl[(tl_dim1 << 1) + 2], abs(d__5));
+    d__6 = eps * max(d__7,d__8);
+    smin = max(d__6,smlnum);
+    tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
+    tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1];
+    if (*ltranl) {
+	tmp[1] = tl[(tl_dim1 << 1) + 1];
+	tmp[2] = tl[tl_dim1 + 2];
+    } else {
+	tmp[1] = tl[tl_dim1 + 2];
+	tmp[2] = tl[(tl_dim1 << 1) + 1];
+    }
+    btmp[0] = b[b_dim1 + 1];
+    btmp[1] = b[b_dim1 + 2];
+L40:
+
+/*
+       Solve 2 by 2 system using complete pivoting.
+       Set pivots less than SMIN to SMIN.
+*/
+
+    ipiv = idamax_(&c__4, tmp, &c__1);
+    u11 = tmp[ipiv - 1];
+    if (abs(u11) <= smin) {
+	*info = 1;
+	u11 = smin;
+    }
+    u12 = tmp[locu12[ipiv - 1] - 1];
+    l21 = tmp[locl21[ipiv - 1] - 1] / u11;
+    u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21;
+    xswap = xswpiv[ipiv - 1];
+    bswap = bswpiv[ipiv - 1];
+    if (abs(u22) <= smin) {
+	*info = 1;
+	u22 = smin;
+    }
+    if (bswap) {
+	temp = btmp[1];
+	btmp[1] = btmp[0] - l21 * temp;
+	btmp[0] = temp;
+    } else {
+	btmp[1] -= l21 * btmp[0];
+    }
+    *scale = 1.;
+    if (smlnum * 2. * abs(btmp[1]) > abs(u22) || smlnum * 2. * abs(btmp[0]) >
+	    abs(u11)) {
+/* Computing MAX */
+	d__1 = abs(btmp[0]), d__2 = abs(btmp[1]);
+	*scale = .5 / max(d__1,d__2);
+	btmp[0] *= *scale;
+	btmp[1] *= *scale;
+    }
+    x2[1] = btmp[1] / u22;
+    x2[0] = btmp[0] / u11 - u12 / u11 * x2[1];
+    if (xswap) {
+	temp = x2[1];
+	x2[1] = x2[0];
+	x2[0] = temp;
+    }
+    x[x_dim1 + 1] = x2[0];
+    if (*n1 == 1) {
+	x[(x_dim1 << 1) + 1] = x2[1];
+	*xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[(x_dim1 << 1)
+		+ 1], abs(d__2));
+    } else {
+	x[x_dim1 + 2] = x2[1];
+/* Computing MAX */
+	d__3 = (d__1 = x[x_dim1 + 1], abs(d__1)), d__4 = (d__2 = x[x_dim1 + 2]
+		, abs(d__2));
+	*xnorm = max(d__3,d__4);
+    }
+    return 0;
+
+/*
+       2 by 2:
+       op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12]
+         [TL21 TL22] [X21 X22]        [X21 X22]   [TR21 TR22]   [B21 B22]
+
+       Solve equivalent 4 by 4 system using complete pivoting.
+       Set pivots less than SMIN to SMIN.
+*/
+
+L50:
+/* Computing MAX */
+    d__5 = (d__1 = tr[tr_dim1 + 1], abs(d__1)), d__6 = (d__2 = tr[(tr_dim1 <<
+	    1) + 1], abs(d__2)), d__5 = max(d__5,d__6), d__6 = (d__3 = tr[
+	    tr_dim1 + 2], abs(d__3)), d__5 = max(d__5,d__6), d__6 = (d__4 =
+	    tr[(tr_dim1 << 1) + 2], abs(d__4));
+    smin = max(d__5,d__6);
+/* Computing MAX */
+    d__5 = smin, d__6 = (d__1 = tl[tl_dim1 + 1], abs(d__1)), d__5 = max(d__5,
+	    d__6), d__6 = (d__2 = tl[(tl_dim1 << 1) + 1], abs(d__2)), d__5 =
+	    max(d__5,d__6), d__6 = (d__3 = tl[tl_dim1 + 2], abs(d__3)), d__5 =
+	     max(d__5,d__6), d__6 = (d__4 = tl[(tl_dim1 << 1) + 2], abs(d__4))
+	    ;
+    smin = max(d__5,d__6);
+/* Computing MAX */
+    d__1 = eps * smin;
+    smin = max(d__1,smlnum);
+    btmp[0] = 0.;
+    dcopy_(&c__16, btmp, &c__0, t16, &c__1);
+    t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
+    t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1];
+    t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2];
+    t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2];
+    if (*ltranl) {
+	t16[4] = tl[tl_dim1 + 2];
+	t16[1] = tl[(tl_dim1 << 1) + 1];
+	t16[14] = tl[tl_dim1 + 2];
+	t16[11] = tl[(tl_dim1 << 1) + 1];
+    } else {
+	t16[4] = tl[(tl_dim1 << 1) + 1];
+	t16[1] = tl[tl_dim1 + 2];
+	t16[14] = tl[(tl_dim1 << 1) + 1];
+	t16[11] = tl[tl_dim1 + 2];
+    }
+    if (*ltranr) {
+	t16[8] = sgn * tr[(tr_dim1 << 1) + 1];
+	t16[13] = sgn * tr[(tr_dim1 << 1) + 1];
+	t16[2] = sgn * tr[tr_dim1 + 2];
+	t16[7] = sgn * tr[tr_dim1 + 2];
+    } else {
+	t16[8] = sgn * tr[tr_dim1 + 2];
+	t16[13] = sgn * tr[tr_dim1 + 2];
+	t16[2] = sgn * tr[(tr_dim1 << 1) + 1];
+	t16[7] = sgn * tr[(tr_dim1 << 1) + 1];
+    }
+    btmp[0] = b[b_dim1 + 1];
+    btmp[1] = b[b_dim1 + 2];
+    btmp[2] = b[(b_dim1 << 1) + 1];
+    btmp[3] = b[(b_dim1 << 1) + 2];
+
+/*     Perform elimination */
+
+    for (i__ = 1; i__ <= 3; ++i__) {
+	xmax = 0.;
+	for (ip = i__; ip <= 4; ++ip) {
+	    for (jp = i__; jp <= 4; ++jp) {
+		if ((d__1 = t16[ip + (jp << 2) - 5], abs(d__1)) >= xmax) {
+		    xmax = (d__1 = t16[ip + (jp << 2) - 5], abs(d__1));
+		    ipsv = ip;
+		    jpsv = jp;
+		}
+/* L60: */
+	    }
+/* L70: */
+	}
+	if (ipsv != i__) {
+	    dswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4);
+	    temp = btmp[i__ - 1];
+	    btmp[i__ - 1] = btmp[ipsv - 1];
+	    btmp[ipsv - 1] = temp;
+	}
+	if (jpsv != i__) {
+	    dswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4],
+		    &c__1);
+	}
+	jpiv[i__ - 1] = jpsv;
+	if ((d__1 = t16[i__ + (i__ << 2) - 5], abs(d__1)) < smin) {
+	    *info = 1;
+	    t16[i__ + (i__ << 2) - 5] = smin;
+	}
+	for (j = i__ + 1; j <= 4; ++j) {
+	    t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5];
+	    btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1];
+	    for (k = i__ + 1; k <= 4; ++k) {
+		t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + (
+			k << 2) - 5];
+/* L80: */
+	    }
+/* L90: */
+	}
+/* L100: */
+    }
+    if (abs(t16[15]) < smin) {
+	t16[15] = smin;
+    }
+    *scale = 1.;
+    if (smlnum * 8. * abs(btmp[0]) > abs(t16[0]) || smlnum * 8. * abs(btmp[1])
+	     > abs(t16[5]) || smlnum * 8. * abs(btmp[2]) > abs(t16[10]) ||
+	    smlnum * 8. * abs(btmp[3]) > abs(t16[15])) {
+/* Computing MAX */
+	d__1 = abs(btmp[0]), d__2 = abs(btmp[1]), d__1 = max(d__1,d__2), d__2
+		= abs(btmp[2]), d__1 = max(d__1,d__2), d__2 = abs(btmp[3]);
+	*scale = .125 / max(d__1,d__2);
+	btmp[0] *= *scale;
+	btmp[1] *= *scale;
+	btmp[2] *= *scale;
+	btmp[3] *= *scale;
+    }
+    for (i__ = 1; i__ <= 4; ++i__) {
+	k = 5 - i__;
+	temp = 1. / t16[k + (k << 2) - 5];
+	tmp[k - 1] = btmp[k - 1] * temp;
+	for (j = k + 1; j <= 4; ++j) {
+	    tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1];
+/* L110: */
+	}
+/* L120: */
+    }
+    for (i__ = 1; i__ <= 3; ++i__) {
+	if (jpiv[4 - i__ - 1] != 4 - i__) {
+	    temp = tmp[4 - i__ - 1];
+	    tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1];
+	    tmp[jpiv[4 - i__ - 1] - 1] = temp;
+	}
+/* L130: */
+    }
+    x[x_dim1 + 1] = tmp[0];
+    x[x_dim1 + 2] = tmp[1];
+    x[(x_dim1 << 1) + 1] = tmp[2];
+    x[(x_dim1 << 1) + 2] = tmp[3];
+/* Computing MAX */
+    d__1 = abs(tmp[0]) + abs(tmp[2]), d__2 = abs(tmp[1]) + abs(tmp[3]);
+    *xnorm = max(d__1,d__2);
+    return 0;
+
+/*     End of DLASY2 */
+
+} /* dlasy2_ */
+
 /* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal *
 	a, integer *lda, doublereal *e, doublereal *tau, doublereal *w,
 	integer *ldw)
@@ -27437,10 +32688,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -27461,7 +32712,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     Arguments
     =========
 
-    UPLO    (input) CHARACTER
+    UPLO    (input) CHARACTER*1
             Specifies whether the upper or lower triangular part of the
             symmetric matrix A is stored:
             = 'U': Upper triangular
@@ -27760,10 +33011,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -27916,10 +33167,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28091,10 +33342,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28247,10 +33498,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28316,7 +33567,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             reflector H(i) or G(i), which determines Q or P**T, as
             returned by DGEBRD in its array argument TAUQ or TAUP.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -28521,10 +33772,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28561,7 +33812,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by DGEHRD.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -28704,10 +33955,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28862,10 +34113,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28905,7 +34156,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by DGELQF.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -29119,10 +34370,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -29163,7 +34414,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by DGEQRF.
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -29375,10 +34626,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -29576,10 +34827,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -29796,10 +35047,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -29886,7 +35137,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -30095,6 +35346,235 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 } /* dormbr_ */
 
+/* Subroutine */ int dormhr_(char *side, char *trans, integer *m, integer *n,
+	integer *ilo, integer *ihi, doublereal *a, integer *lda, doublereal *
+	tau, doublereal *c__, integer *ldc, doublereal *work, integer *lwork,
+	integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, nh, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    DORMHR overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    IHI-ILO elementary reflectors, as returned by DGEHRD:
+
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of DGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and
+            ILO = 1 and IHI = 0, if M = 0;
+            if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and
+            ILO = 1 and IHI = 0, if N = 0.
+
+    A       (input) DOUBLE PRECISION array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by DGEHRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) DOUBLE PRECISION array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEHRD.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    left = lsame_(side, "L");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ilo < 1 || *ilo > max(1,nq)) {
+	*info = -5;
+    } else if (*ihi < min(*ilo,nq) || *ihi > nq) {
+	*info = -6;
+    } else if (*lda < max(1,nq)) {
+	*info = -8;
+    } else if (*ldc < max(1,*m)) {
+	*info = -11;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -13;
+    }
+
+    if (*info == 0) {
+	if (left) {
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = side;
+	    i__1[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    nb = ilaenv_(&c__1, "DORMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	} else {
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = side;
+	    i__1[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("DORMHR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || nh == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    if (left) {
+	mi = nh;
+	ni = *n;
+	i1 = *ilo + 1;
+	i2 = 1;
+    } else {
+	mi = *m;
+	ni = nh;
+	i1 = 1;
+	i2 = *ilo + 1;
+    }
+
+    dormqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, &
+	    tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMHR */
+
+} /* dormhr_ */
+
 /* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n,
 	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
 	c__, integer *ldc, doublereal *work, integer *info)
@@ -30115,10 +35595,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -30337,10 +35817,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -30405,7 +35885,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -30642,10 +36122,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -30710,7 +36190,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -30756,10 +36236,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
     if (left) {
 	nq = *m;
-	nw = *n;
+	nw = max(1,*n);
     } else {
 	nq = *n;
-	nw = *m;
+	nw = max(1,*m);
     }
     if (! left && ! lsame_(side, "R")) {
 	*info = -1;
@@ -30775,27 +36255,33 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	*info = -7;
     } else if (*ldc < max(1,*m)) {
 	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
     }
 
     if (*info == 0) {
+	if (*m == 0 || *n == 0) {
+	    lwkopt = 1;
+	} else {
 
 /*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
+             Determine the block size.  NB may be at most NBMAX, where
+             NBMAX is used to define the local array T.
 
    Computing MIN
    Writing concatenation
 */
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1,
+		    (ftnlen)6, (ftnlen)2);
+	    nb = min(i__1,i__2);
+	    lwkopt = nw * nb;
+	}
 	work[1] = (doublereal) lwkopt;
+
+	if (*lwork < nw && ! lquery) {
+	    *info = -12;
+	}
     }
 
     if (*info != 0) {
@@ -30808,8 +36294,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 /*     Quick return if possible */
 
-    if (*m == 0 || *n == 0 || *k == 0) {
-	work[1] = 1.;
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -30937,10 +36422,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -31005,7 +36490,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -31234,10 +36719,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -31303,7 +36788,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -31493,14 +36978,15 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	    doublereal *, doublereal *, integer *, doublereal *, integer *,
 	    doublereal *, doublereal *, integer *);
     static logical upper;
+    extern logical disnan_(doublereal *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -31595,7 +37081,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	    i__2 = j - 1;
 	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1,
 		    &a[j * a_dim1 + 1], &c__1);
-	    if (ajj <= 0.) {
+	    if (ajj <= 0. || disnan_(&ajj)) {
 		a[j + j * a_dim1] = ajj;
 		goto L30;
 	    }
@@ -31628,7 +37114,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	    i__2 = j - 1;
 	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j
 		    + a_dim1], lda);
-	    if (ajj <= 0.) {
+	    if (ajj <= 0. || disnan_(&ajj)) {
 		a[j + j * a_dim1] = ajj;
 		goto L30;
 	    }
@@ -31688,10 +37174,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -31894,10 +37380,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -31999,10 +37485,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -32138,7 +37624,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     /* Local variables */
     static integer i__, j, k, m;
     static doublereal p;
-    static integer ii, end, lgn;
+    static integer ii, lgn;
     static doublereal eps, tiny;
     extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
 	    integer *, doublereal *, doublereal *, integer *, doublereal *,
@@ -32161,6 +37647,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
     extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer finish;
     extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
     extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), dlasrt_(char *, integer *, doublereal *, integer *);
@@ -32169,14 +37656,14 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	    doublereal *, doublereal *, integer *, doublereal *, integer *);
     static doublereal orgnrm;
     static logical lquery;
-    static integer smlsiz, dtrtrw, storez;
+    static integer smlsiz, storez, strtrw;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -32243,13 +37730,16 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
                            that 2**k >= N.
             If COMPZ = 'I' and N > 1 then LWORK must be at least
                            ( 1 + 4*N + N**2 ).
+            Note that for COMPZ = 'I' or 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LWORK need
+            only be max(1,2*(N-1)).
 
             If LWORK = -1, then a workspace query is assumed; the routine
             only calculates the optimal size of the WORK array, returns
             this value as the first entry of the WORK array, and no error
             message related to LWORK is issued by XERBLA.
 
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+    IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
             On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
 
     LIWORK  (input) INTEGER
@@ -32259,6 +37749,9 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
                            ( 6 + 6*N + 5*N*lg N ).
             If COMPZ = 'I' and N > 1 then LIWORK must be at least
                            ( 3 + 5*N ).
+            Note that for COMPZ = 'I' or 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LIWORK
+            need only be 1.
 
             If LIWORK = -1, then a workspace query is assumed; the
             routine only calculates the optimal size of the IWORK array,
@@ -32308,44 +37801,54 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     } else {
 	icompz = -1;
     }
-    if (*n <= 1 || icompz <= 0) {
-	liwmin = 1;
-	lwmin = 1;
-    } else {
-	lgn = (integer) (log((doublereal) (*n)) / log(2.));
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (icompz == 1) {
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
-	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
-	} else if (icompz == 2) {
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lwmin = (*n << 2) + 1 + i__1 * i__1;
-	    liwmin = *n * 5 + 3;
-	}
-    }
     if (icompz < 0) {
 	*info = -1;
     } else if (*n < 0) {
 	*info = -2;
     } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
 	*info = -6;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -10;
     }
 
     if (*info == 0) {
+
+/*        Compute the workspace requirements */
+
+	smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+		ftnlen)6, (ftnlen)1);
+	if (*n <= 1 || icompz == 0) {
+	    liwmin = 1;
+	    lwmin = 1;
+	} else if (*n <= smlsiz) {
+	    liwmin = 1;
+	    lwmin = *n - 1 << 1;
+	} else {
+	    lgn = (integer) (log((doublereal) (*n)) / log(2.));
+	    if (pow_ii(&c__2, &lgn) < *n) {
+		++lgn;
+	    }
+	    if (pow_ii(&c__2, &lgn) < *n) {
+		++lgn;
+	    }
+	    if (icompz == 1) {
+/* Computing 2nd power */
+		i__1 = *n;
+		lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
+		liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	    } else if (icompz == 2) {
+/* Computing 2nd power */
+		i__1 = *n;
+		lwmin = (*n << 2) + 1 + i__1 * i__1;
+		liwmin = *n * 5 + 3;
+	    }
+	}
 	work[1] = (doublereal) lwmin;
 	iwork[1] = liwmin;
+
+	if (*lwork < lwmin && ! lquery) {
+	    *info = -8;
+	} else if (*liwork < liwmin && ! lquery) {
+	    *info = -10;
+	}
     }
 
     if (*info != 0) {
@@ -32368,9 +37871,6 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	return 0;
     }
 
-    smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
 /*
        If the following conditional clause is removed, then the routine
        will use the Divide and Conquer routine to compute only the
@@ -32378,14 +37878,15 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
        (2 + 5N + 2N lg(N)) integer workspace.
        Since on many architectures DSTERF is much faster than any other
        algorithm for finding eigenvalues only, it is used here
-       as the default.
+       as the default. If the conditional clause is removed, then
+       information on the size of workspace needs to be changed.
 
        If COMPZ = 'N', use DSTERF to compute the eigenvalues.
 */
 
     if (icompz == 0) {
 	dsterf_(n, &d__[1], &e[1], info);
-	return 0;
+	goto L50;
     }
 
 /*
@@ -32394,182 +37895,176 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 */
 
     if (*n <= smlsiz) {
-	if (icompz == 0) {
-	    dsterf_(n, &d__[1], &e[1], info);
-	    return 0;
-	} else if (icompz == 2) {
-	    dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
-		    info);
-	    return 0;
-	} else {
-	    dsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
-		    info);
-	    return 0;
-	}
-    }
+
+	dsteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info);
+
+    } else {
 
 /*
-       If COMPZ = 'V', the Z matrix must be stored elsewhere for later
-       use.
+          If COMPZ = 'V', the Z matrix must be stored elsewhere for later
+          use.
 */
 
-    if (icompz == 1) {
-	storez = *n * *n + 1;
-    } else {
-	storez = 1;
-    }
+	if (icompz == 1) {
+	    storez = *n * *n + 1;
+	} else {
+	    storez = 1;
+	}
 
-    if (icompz == 2) {
-	dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
-    }
+	if (icompz == 2) {
+	    dlaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
+	}
 
-/*     Scale. */
+/*        Scale. */
 
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
-	return 0;
-    }
+	orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+	if (orgnrm == 0.) {
+	    goto L50;
+	}
 
-    eps = EPSILON;
+	eps = EPSILON;
 
-    start = 1;
+	start = 1;
 
-/*     while ( START <= N ) */
+/*        while ( START <= N ) */
 
 L10:
-    if (start <= *n) {
+	if (start <= *n) {
 
 /*
-       Let END be the position of the next subdiagonal entry such that
-       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
-       matrix identified by the elements between START and END
-       constitutes an independent sub-problem.
+             Let FINISH be the position of the next subdiagonal entry
+             such that E( FINISH ) <= TINY or FINISH = N if no such
+             subdiagonal exists.  The matrix identified by the elements
+             between START and FINISH constitutes an independent
+             sub-problem.
 */
 
-	end = start;
+	    finish = start;
 L20:
-	if (end < *n) {
-	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
-		    d__[end + 1], abs(d__2)));
-	    if ((d__1 = e[end], abs(d__1)) > tiny) {
-		++end;
-		goto L20;
+	    if (finish < *n) {
+		tiny = eps * sqrt((d__1 = d__[finish], abs(d__1))) * sqrt((
+			d__2 = d__[finish + 1], abs(d__2)));
+		if ((d__1 = e[finish], abs(d__1)) > tiny) {
+		    ++finish;
+		    goto L20;
+		}
 	    }
-	}
 
-/*        (Sub) Problem determined.  Compute its size and solve it. */
+/*           (Sub) Problem determined.  Compute its size and solve it. */
 
-	m = end - start + 1;
-	if (m == 1) {
-	    start = end + 1;
-	    goto L10;
-	}
-	if (m > smlsiz) {
-	    *info = smlsiz;
+	    m = finish - start + 1;
+	    if (m == 1) {
+		start = finish + 1;
+		goto L10;
+	    }
+	    if (m > smlsiz) {
 
-/*           Scale. */
+/*              Scale. */
 
-	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[start]
-		    , &m, info);
-	    i__1 = m - 1;
-	    i__2 = m - 1;
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[
-		    start], &i__2, info);
+		orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
+		dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[
+			start], &m, info);
+		i__1 = m - 1;
+		i__2 = m - 1;
+		dlascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[
+			start], &i__2, info);
 
-	    if (icompz == 1) {
-		dtrtrw = 1;
-	    } else {
-		dtrtrw = start;
-	    }
-	    dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[dtrtrw +
-		    start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[
-		    1], info);
-	    if (*info != 0) {
-		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
-			+ 1) + start - 1;
-		return 0;
-	    }
+		if (icompz == 1) {
+		    strtrw = 1;
+		} else {
+		    strtrw = start;
+		}
+		dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw +
+			start * z_dim1], ldz, &work[1], n, &work[storez], &
+			iwork[1], info);
+		if (*info != 0) {
+		    *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info %
+			     (m + 1) + start - 1;
+		    goto L50;
+		}
 
-/*           Scale back. */
+/*              Scale back. */
 
-	    dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[start]
-		    , &m, info);
+		dlascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[
+			start], &m, info);
 
-	} else {
-	    if (icompz == 1) {
+	    } else {
+		if (icompz == 1) {
 
 /*
-       Since QR won't update a Z matrix which is larger than the
-       length of D, we must solve the sub-problem in a workspace and
-       then multiply back into Z.
+                   Since QR won't update a Z matrix which is larger than
+                   the length of D, we must solve the sub-problem in a
+                   workspace and then multiply back into Z.
 */
 
-		dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
-			m * m + 1], info);
-		dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
-			storez], n);
-		dgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], ldz, &work[
-			1], &m, &c_b29, &z__[start * z_dim1 + 1], ldz);
-	    } else if (icompz == 2) {
-		dsteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
-			z_dim1], ldz, &work[1], info);
-	    } else {
-		dsterf_(&m, &d__[start], &e[start], info);
-	    }
-	    if (*info != 0) {
-		*info = start * (*n + 1) + end;
-		return 0;
+		    dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &
+			    work[m * m + 1], info);
+		    dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
+			    storez], n);
+		    dgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], n, &
+			    work[1], &m, &c_b29, &z__[start * z_dim1 + 1],
+			    ldz);
+		} else if (icompz == 2) {
+		    dsteqr_("I", &m, &d__[start], &e[start], &z__[start +
+			    start * z_dim1], ldz, &work[1], info);
+		} else {
+		    dsterf_(&m, &d__[start], &e[start], info);
+		}
+		if (*info != 0) {
+		    *info = start * (*n + 1) + finish;
+		    goto L50;
+		}
 	    }
-	}
 
-	start = end + 1;
-	goto L10;
-    }
+	    start = finish + 1;
+	    goto L10;
+	}
 
 /*
-       endwhile
+          endwhile
 
-       If the problem split any number of times, then the eigenvalues
-       will not be properly ordered.  Here we permute the eigenvalues
-       (and the associated eigenvectors) into ascending order.
+          If the problem split any number of times, then the eigenvalues
+          will not be properly ordered.  Here we permute the eigenvalues
+          (and the associated eigenvectors) into ascending order.
 */
 
-    if (m != *n) {
-	if (icompz == 0) {
+	if (m != *n) {
+	    if (icompz == 0) {
 
-/*        Use Quick Sort */
+/*              Use Quick Sort */
 
-	    dlasrt_("I", n, &d__[1], info);
+		dlasrt_("I", n, &d__[1], info);
 
-	} else {
+	    } else {
 
-/*        Use Selection Sort to minimize swaps of eigenvectors */
+/*              Use Selection Sort to minimize swaps of eigenvectors */
 
-	    i__1 = *n;
-	    for (ii = 2; ii <= i__1; ++ii) {
-		i__ = ii - 1;
-		k = i__;
-		p = d__[i__];
-		i__2 = *n;
-		for (j = ii; j <= i__2; ++j) {
-		    if (d__[j] < p) {
-			k = j;
-			p = d__[j];
-		    }
+		i__1 = *n;
+		for (ii = 2; ii <= i__1; ++ii) {
+		    i__ = ii - 1;
+		    k = i__;
+		    p = d__[i__];
+		    i__2 = *n;
+		    for (j = ii; j <= i__2; ++j) {
+			if (d__[j] < p) {
+			    k = j;
+			    p = d__[j];
+			}
 /* L30: */
-		}
-		if (k != i__) {
-		    d__[k] = d__[i__];
-		    d__[i__] = p;
-		    dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
-			    + 1], &c__1);
-		}
+		    }
+		    if (k != i__) {
+			d__[k] = d__[i__];
+			d__[i__] = p;
+			dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k *
+				z_dim1 + 1], &c__1);
+		    }
 /* L40: */
+		}
 	    }
 	}
     }
 
+L50:
     work[1] = (doublereal) lwmin;
     iwork[1] = liwmin;
 
@@ -32631,10 +38126,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -33220,10 +38715,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -33654,6 +39149,8 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	    char *, integer *, integer *, doublereal *, integer *, doublereal
 	    *, integer *);
     static doublereal safmin;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
     extern /* Subroutine */ int xerbla_(char *, integer *);
     static doublereal bignum;
     static integer indtau;
@@ -33673,10 +39170,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -33740,11 +39237,12 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
                                                   1 + 6*N + 2*N**2.
 
             If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+            only calculates the optimal sizes of the WORK and IWORK
+            arrays, returns these values as the first entries of the WORK
+            and IWORK arrays, and no error message related to LWORK or
+            LIWORK is issued by XERBLA.
 
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+    IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
             On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
 
     LIWORK  (input) INTEGER
@@ -33754,16 +39252,21 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
 
             If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK and
+            IWORK arrays, returns these values as the first entries of
+            the WORK and IWORK arrays, and no error message related to
+            LWORK or LIWORK is issued by XERBLA.
 
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the algorithm failed to converge; i
-                  off-diagonal elements of an intermediate tridiagonal
-                  form did not converge to zero.
+            > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed
+                  to converge; i off-diagonal elements of an intermediate
+                  tridiagonal form did not converge to zero;
+                  if INFO = i and JOBZ = 'V', then the algorithm failed
+                  to compute an eigenvalue while working on the submatrix
+                  lying in rows and columns INFO/(N+1) through
+                  mod(INFO,N+1).
 
     Further Details
     ===============
@@ -33773,6 +39276,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
        at Berkeley, USA
     Modified by Francoise Tisseur, University of Tennessee.
 
+    Modified description of INFO. Sven, 16 Feb 05.
     =====================================================================
 
 
@@ -33793,24 +39297,6 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     lquery = *lwork == -1 || *liwork == -1;
 
     *info = 0;
-    if (*n <= 1) {
-	liwmin = 1;
-	lwmin = 1;
-	lopt = lwmin;
-	liopt = liwmin;
-    } else {
-	if (wantz) {
-	    liwmin = *n * 5 + 3;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lwmin = *n * 6 + 1 + (i__1 * i__1 << 1);
-	} else {
-	    liwmin = 1;
-	    lwmin = (*n << 1) + 1;
-	}
-	lopt = lwmin;
-	liopt = liwmin;
-    }
     if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
     } else if (! (lower || lsame_(uplo, "U"))) {
@@ -33819,15 +39305,38 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -10;
     }
 
     if (*info == 0) {
+	if (*n <= 1) {
+	    liwmin = 1;
+	    lwmin = 1;
+	    lopt = lwmin;
+	    liopt = liwmin;
+	} else {
+	    if (wantz) {
+		liwmin = *n * 5 + 3;
+/* Computing 2nd power */
+		i__1 = *n;
+		lwmin = *n * 6 + 1 + (i__1 * i__1 << 1);
+	    } else {
+		liwmin = 1;
+		lwmin = (*n << 1) + 1;
+	    }
+/* Computing MAX */
+	    i__1 = lwmin, i__2 = (*n << 1) + ilaenv_(&c__1, "DSYTRD", uplo, n,
+		     &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    lopt = max(i__1,i__2);
+	    liopt = liwmin;
+	}
 	work[1] = (doublereal) lopt;
 	iwork[1] = liopt;
+
+	if (*lwork < lwmin && ! lquery) {
+	    *info = -8;
+	} else if (*liwork < liwmin && ! lquery) {
+	    *info = -10;
+	}
     }
 
     if (*info != 0) {
@@ -33957,10 +39466,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -34239,10 +39748,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -34296,7 +39805,7 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -34616,10 +40125,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -34627,28 +40136,23 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
     DTREVC computes some or all of the right and/or left eigenvectors of
     a real upper quasi-triangular matrix T.
+    Matrices of this type are produced by the Schur factorization of
+    a real general matrix:  A = Q*T*Q**T, as computed by DHSEQR.
 
     The right eigenvector x and the left eigenvector y of T corresponding
     to an eigenvalue w are defined by:
 
-                 T*x = w*x,     y'*T = w*y'
+       T*x = w*x,     (y**H)*T = w*(y**H)
 
-    where y' denotes the conjugate transpose of the vector y.
+    where y**H denotes the conjugate transpose of y.
+    The eigenvalues are not input to this routine, but are read directly
+    from the diagonal blocks of T.
 
-    If all eigenvectors are requested, the routine may either return the
-    matrices X and/or Y of right or left eigenvectors of T, or the
-    products Q*X and/or Q*Y, where Q is an input orthogonal
-    matrix. If T was obtained from the real-Schur factorization of an
-    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
-    right or left eigenvectors of A.
-
-    T must be in Schur canonical form (as returned by DHSEQR), that is,
-    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
-    2-by-2 diagonal block has its diagonal elements equal and its
-    off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
-    diagonal block is a complex conjugate pair of eigenvalues and
-    eigenvectors; only one eigenvector of the pair is computed, namely
-    the one corresponding to the eigenvalue with positive imaginary part.
+    This routine returns the matrices X and/or Y of right and left
+    eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an
+    input matrix.  If Q is the orthogonal factor that reduces a matrix
+    A to Schur form T, then Q*X and Q*Y are the matrices of right and
+    left eigenvectors of A.
 
     Arguments
     =========
@@ -34661,21 +40165,21 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
     HOWMNY  (input) CHARACTER*1
             = 'A':  compute all right and/or left eigenvectors;
             = 'B':  compute all right and/or left eigenvectors,
-                    and backtransform them using the input matrices
-                    supplied in VR and/or VL;
+                    backtransformed by the matrices in VR and/or VL;
             = 'S':  compute selected right and/or left eigenvectors,
-                    specified by the logical array SELECT.
+                    as indicated by the logical array SELECT.
 
     SELECT  (input/output) LOGICAL array, dimension (N)
             If HOWMNY = 'S', SELECT specifies the eigenvectors to be
             computed.
-            If HOWMNY = 'A' or 'B', SELECT is not referenced.
-            To select the real eigenvector corresponding to a real
-            eigenvalue w(j), SELECT(j) must be set to .TRUE..  To select
-            the complex eigenvector corresponding to a complex conjugate
-            pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be
-            set to .TRUE.; then on exit SELECT(j) is .TRUE. and
-            SELECT(j+1) is .FALSE..
+            If w(j) is a real eigenvalue, the corresponding real
+            eigenvector is computed if SELECT(j) is .TRUE..
+            If w(j) and w(j+1) are the real and imaginary parts of a
+            complex eigenvalue, the corresponding complex eigenvector is
+            computed if either SELECT(j) or SELECT(j+1) is .TRUE., and
+            on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to
+            .FALSE..
+            Not referenced if HOWMNY = 'A' or 'B'.
 
     N       (input) INTEGER
             The order of the matrix T. N >= 0.
@@ -34692,15 +40196,6 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             of Schur vectors returned by DHSEQR).
             On exit, if SIDE = 'L' or 'B', VL contains:
             if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
-                             VL has the same quasi-lower triangular form
-                             as T'. If T(i,i) is a real eigenvalue, then
-                             the i-th column VL(i) of VL  is its
-                             corresponding eigenvector. If T(i:i+1,i:i+1)
-                             is a 2-by-2 block whose eigenvalues are
-                             complex-conjugate eigenvalues of T, then
-                             VL(i)+sqrt(-1)*VL(i+1) is the complex
-                             eigenvector corresponding to the eigenvalue
-                             with positive real part.
             if HOWMNY = 'B', the matrix Q*Y;
             if HOWMNY = 'S', the left eigenvectors of T specified by
                              SELECT, stored consecutively in the columns
@@ -34709,11 +40204,11 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             A complex eigenvector corresponding to a complex eigenvalue
             is stored in two consecutive columns, the first holding the
             real part, and the second the imaginary part.
-            If SIDE = 'R', VL is not referenced.
+            Not referenced if SIDE = 'R'.
 
     LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= max(1,N) if
-            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+            The leading dimension of the array VL.  LDVL >= 1, and if
+            SIDE = 'L' or 'B', LDVL >= N.
 
     VR      (input/output) DOUBLE PRECISION array, dimension (LDVR,MM)
             On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
@@ -34721,15 +40216,6 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             of Schur vectors returned by DHSEQR).
             On exit, if SIDE = 'R' or 'B', VR contains:
             if HOWMNY = 'A', the matrix X of right eigenvectors of T;
-                             VR has the same quasi-upper triangular form
-                             as T. If T(i,i) is a real eigenvalue, then
-                             the i-th column VR(i) of VR  is its
-                             corresponding eigenvector. If T(i:i+1,i:i+1)
-                             is a 2-by-2 block whose eigenvalues are
-                             complex-conjugate eigenvalues of T, then
-                             VR(i)+sqrt(-1)*VR(i+1) is the complex
-                             eigenvector corresponding to the eigenvalue
-                             with positive real part.
             if HOWMNY = 'B', the matrix Q*X;
             if HOWMNY = 'S', the right eigenvectors of T specified by
                              SELECT, stored consecutively in the columns
@@ -34738,11 +40224,11 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
             A complex eigenvector corresponding to a complex eigenvalue
             is stored in two consecutive columns, the first holding the
             real part and the second the imaginary part.
-            If SIDE = 'L', VR is not referenced.
+            Not referenced if SIDE = 'L'.
 
     LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= max(1,N) if
-            SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+            The leading dimension of the array VR.  LDVR >= 1, and if
+            SIDE = 'R' or 'B', LDVR >= N.
 
     MM      (input) INTEGER
             The number of columns in the arrays VL and/or VR. MM >= M.
@@ -35719,11 +41205,8 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 		    ;
 		}
 
-/*
-                Copy the vector x or Q*x to VL and normalize.
+/*              Copy the vector x or Q*x to VL and normalize. */
 
-   L210:
-*/
 		if (! over) {
 		    i__2 = *n - ki + 1;
 		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
@@ -35814,6 +41297,386 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 } /* dtrevc_ */
 
+/* Subroutine */ int dtrexc_(char *compq, integer *n, doublereal *t, integer *
+	ldt, doublereal *q, integer *ldq, integer *ifst, integer *ilst,
+	doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, t_dim1, t_offset, i__1;
+
+    /* Local variables */
+    static integer nbf, nbl, here;
+    extern logical lsame_(char *, char *);
+    static logical wantq;
+    extern /* Subroutine */ int dlaexc_(logical *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, integer *, integer *, integer
+	    *, doublereal *, integer *), xerbla_(char *, integer *);
+    static integer nbnext;
+
+
+/*
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    DTREXC reorders the real Schur factorization of a real matrix
+    A = Q*T*Q**T, so that the diagonal block of T with row index IFST is
+    moved to row ILST.
+
+    The real Schur form T is reordered by an orthogonal similarity
+    transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors
+    is updated by postmultiplying it with Z.
+
+    T must be in Schur canonical form (as returned by DHSEQR), that is,
+    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+    2-by-2 diagonal block has its diagonal elements equal and its
+    off-diagonal elements of opposite sign.
+
+    Arguments
+    =========
+
+    COMPQ   (input) CHARACTER*1
+            = 'V':  update the matrix Q of Schur vectors;
+            = 'N':  do not update Q.
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input/output) DOUBLE PRECISION array, dimension (LDT,N)
+            On entry, the upper quasi-triangular matrix T, in Schur
+            Schur canonical form.
+            On exit, the reordered upper quasi-triangular matrix, again
+            in Schur canonical form.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+            On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+            On exit, if COMPQ = 'V', Q has been postmultiplied by the
+            orthogonal transformation matrix Z which reorders T.
+            If COMPQ = 'N', Q is not referenced.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    IFST    (input/output) INTEGER
+    ILST    (input/output) INTEGER
+            Specify the reordering of the diagonal blocks of T.
+            The block with row index IFST is moved to row ILST, by a
+            sequence of transpositions between adjacent blocks.
+            On exit, if IFST pointed on entry to the second row of a
+            2-by-2 block, it is changed to point to the first row; ILST
+            always points to the first row of the block in its final
+            position (which may differ from its input value by +1 or -1).
+            1 <= IFST <= N; 1 <= ILST <= N.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            = 1:  two adjacent blocks were too close to swap (the problem
+                  is very ill-conditioned); T may have been partially
+                  reordered, and ILST points to the first row of the
+                  current position of the block being moved.
+
+    =====================================================================
+
+
+       Decode and test the input arguments.
+*/
+
+    /* Parameter adjustments */
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(compq, "V");
+    if (! wantq && ! lsame_(compq, "N")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldt < max(1,*n)) {
+	*info = -4;
+    } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) {
+	*info = -6;
+    } else if (*ifst < 1 || *ifst > *n) {
+	*info = -7;
+    } else if (*ilst < 1 || *ilst > *n) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DTREXC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 1) {
+	return 0;
+    }
+
+/*
+       Determine the first row of specified block
+       and find out it is 1 by 1 or 2 by 2.
+*/
+
+    if (*ifst > 1) {
+	if (t[*ifst + (*ifst - 1) * t_dim1] != 0.) {
+	    --(*ifst);
+	}
+    }
+    nbf = 1;
+    if (*ifst < *n) {
+	if (t[*ifst + 1 + *ifst * t_dim1] != 0.) {
+	    nbf = 2;
+	}
+    }
+
+/*
+       Determine the first row of the final block
+       and find out it is 1 by 1 or 2 by 2.
+*/
+
+    if (*ilst > 1) {
+	if (t[*ilst + (*ilst - 1) * t_dim1] != 0.) {
+	    --(*ilst);
+	}
+    }
+    nbl = 1;
+    if (*ilst < *n) {
+	if (t[*ilst + 1 + *ilst * t_dim1] != 0.) {
+	    nbl = 2;
+	}
+    }
+
+    if (*ifst == *ilst) {
+	return 0;
+    }
+
+    if (*ifst < *ilst) {
+
+/*        Update ILST */
+
+	if (nbf == 2 && nbl == 1) {
+	    --(*ilst);
+	}
+	if (nbf == 1 && nbl == 2) {
+	    ++(*ilst);
+	}
+
+	here = *ifst;
+
+L10:
+
+/*        Swap block with next one below */
+
+	if (nbf == 1 || nbf == 2) {
+
+/*           Current block either 1 by 1 or 2 by 2 */
+
+	    nbnext = 1;
+	    if (here + nbf + 1 <= *n) {
+		if (t[here + nbf + 1 + (here + nbf) * t_dim1] != 0.) {
+		    nbnext = 2;
+		}
+	    }
+	    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &
+		    nbf, &nbnext, &work[1], info);
+	    if (*info != 0) {
+		*ilst = here;
+		return 0;
+	    }
+	    here += nbnext;
+
+/*           Test if 2 by 2 block breaks into two 1 by 1 blocks */
+
+	    if (nbf == 2) {
+		if (t[here + 1 + here * t_dim1] == 0.) {
+		    nbf = 3;
+		}
+	    }
+
+	} else {
+
+/*
+             Current block consists of two 1 by 1 blocks each of which
+             must be swapped individually
+*/
+
+	    nbnext = 1;
+	    if (here + 3 <= *n) {
+		if (t[here + 3 + (here + 2) * t_dim1] != 0.) {
+		    nbnext = 2;
+		}
+	    }
+	    i__1 = here + 1;
+	    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &
+		    c__1, &nbnext, &work[1], info);
+	    if (*info != 0) {
+		*ilst = here;
+		return 0;
+	    }
+	    if (nbnext == 1) {
+
+/*              Swap two 1 by 1 blocks, no problems possible */
+
+		dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			here, &c__1, &nbnext, &work[1], info);
+		++here;
+	    } else {
+
+/*              Recompute NBNEXT in case 2 by 2 split */
+
+		if (t[here + 2 + (here + 1) * t_dim1] == 0.) {
+		    nbnext = 1;
+		}
+		if (nbnext == 2) {
+
+/*                 2 by 2 Block did not split */
+
+		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    here, &c__1, &nbnext, &work[1], info);
+		    if (*info != 0) {
+			*ilst = here;
+			return 0;
+		    }
+		    here += 2;
+		} else {
+
+/*                 2 by 2 Block did split */
+
+		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    here, &c__1, &c__1, &work[1], info);
+		    i__1 = here + 1;
+		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    i__1, &c__1, &c__1, &work[1], info);
+		    here += 2;
+		}
+	    }
+	}
+	if (here < *ilst) {
+	    goto L10;
+	}
+
+    } else {
+
+	here = *ifst;
+L20:
+
+/*        Swap block with next one above */
+
+	if (nbf == 1 || nbf == 2) {
+
+/*           Current block either 1 by 1 or 2 by 2 */
+
+	    nbnext = 1;
+	    if (here >= 3) {
+		if (t[here - 1 + (here - 2) * t_dim1] != 0.) {
+		    nbnext = 2;
+		}
+	    }
+	    i__1 = here - nbnext;
+	    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &
+		    nbnext, &nbf, &work[1], info);
+	    if (*info != 0) {
+		*ilst = here;
+		return 0;
+	    }
+	    here -= nbnext;
+
+/*           Test if 2 by 2 block breaks into two 1 by 1 blocks */
+
+	    if (nbf == 2) {
+		if (t[here + 1 + here * t_dim1] == 0.) {
+		    nbf = 3;
+		}
+	    }
+
+	} else {
+
+/*
+             Current block consists of two 1 by 1 blocks each of which
+             must be swapped individually
+*/
+
+	    nbnext = 1;
+	    if (here >= 3) {
+		if (t[here - 1 + (here - 2) * t_dim1] != 0.) {
+		    nbnext = 2;
+		}
+	    }
+	    i__1 = here - nbnext;
+	    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &
+		    nbnext, &c__1, &work[1], info);
+	    if (*info != 0) {
+		*ilst = here;
+		return 0;
+	    }
+	    if (nbnext == 1) {
+
+/*              Swap two 1 by 1 blocks, no problems possible */
+
+		dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			here, &nbnext, &c__1, &work[1], info);
+		--here;
+	    } else {
+
+/*              Recompute NBNEXT in case 2 by 2 split */
+
+		if (t[here + (here - 1) * t_dim1] == 0.) {
+		    nbnext = 1;
+		}
+		if (nbnext == 2) {
+
+/*                 2 by 2 Block did not split */
+
+		    i__1 = here - 1;
+		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    i__1, &c__2, &c__1, &work[1], info);
+		    if (*info != 0) {
+			*ilst = here;
+			return 0;
+		    }
+		    here += -2;
+		} else {
+
+/*                 2 by 2 Block did split */
+
+		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    here, &c__1, &c__1, &work[1], info);
+		    i__1 = here - 1;
+		    dlaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    i__1, &c__1, &c__1, &work[1], info);
+		    here += -2;
+		}
+	    }
+	}
+	if (here > *ilst) {
+	    goto L20;
+	}
+    }
+    *ilst = here;
+
+    return 0;
+
+/*     End of DTREXC */
+
+} /* dtrexc_ */
+
 /* Subroutine */ int dtrti2_(char *uplo, char *diag, integer *n, doublereal *
 	a, integer *lda, integer *info)
 {
@@ -35833,10 +41696,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -35996,10 +41859,10 @@ doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
diff --git a/numpy/linalg/lapack_lite/f2c_lapack.c b/numpy/linalg/lapack_lite/f2c_lapack.c
index 7a0dd491d678..e1aa860fac57 100644
--- a/numpy/linalg/lapack_lite/f2c_lapack.c
+++ b/numpy/linalg/lapack_lite/f2c_lapack.c
@@ -29,10 +29,10 @@ them.
 
 /* Table of constant values */
 
-static integer c__0 = 0;
-static real c_b163 = 0.f;
-static real c_b164 = 1.f;
 static integer c__1 = 1;
+static real c_b172 = 0.f;
+static real c_b173 = 1.f;
+static integer c__0 = 0;
 
 integer ieeeck_(integer *ispec, real *zero, real *one)
 {
@@ -45,10 +45,10 @@ integer ieeeck_(integer *ispec, real *zero, real *one)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1998
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -148,7 +148,7 @@ integer ieeeck_(integer *ispec, real *zero, real *one)
 
     nan5 = neginf * negzro;
 
-    nan6 = nan5 * 0.f;
+    nan6 = nan5 * *zero;
 
     if (nan1 == nan1) {
 	ret_val = 0;
@@ -183,6 +183,292 @@ integer ieeeck_(integer *ispec, real *zero, real *one)
     return ret_val;
 } /* ieeeck_ */
 
+integer ilaclc_(integer *m, integer *n, complex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, ret_val, i__1, i__2;
+
+    /* Local variables */
+    static integer i__;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+
+
+    Purpose
+    =======
+
+    ILACLC scans A for its last non-zero column.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Quick test for the common case where one corner is non-zero.
+*/
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	ret_val = *n;
+    } else /* if(complicated condition) */ {
+	i__1 = *n * a_dim1 + 1;
+	i__2 = *m + *n * a_dim1;
+	if (a[i__1].r != 0.f || a[i__1].i != 0.f || (a[i__2].r != 0.f || a[
+		i__2].i != 0.f)) {
+	    ret_val = *n;
+	} else {
+/*     Now scan each column from the end, returning with the first non-zero. */
+	    for (ret_val = *n; ret_val >= 1; --ret_val) {
+		i__1 = *m;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__ + ret_val * a_dim1;
+		    if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
+			return ret_val;
+		    }
+		}
+	    }
+	}
+    }
+    return ret_val;
+} /* ilaclc_ */
+
+integer ilaclr_(integer *m, integer *n, complex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, ret_val, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+
+
+    Purpose
+    =======
+
+    ILACLR scans A for its last non-zero row.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input) COMPLEX          array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Quick test for the common case where one corner is non-zero.
+*/
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (*m == 0) {
+	ret_val = *m;
+    } else /* if(complicated condition) */ {
+	i__1 = *m + a_dim1;
+	i__2 = *m + *n * a_dim1;
+	if (a[i__1].r != 0.f || a[i__1].i != 0.f || (a[i__2].r != 0.f || a[
+		i__2].i != 0.f)) {
+	    ret_val = *m;
+	} else {
+/*     Scan up each column tracking the last zero row seen. */
+	    ret_val = 0;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		for (i__ = *m; i__ >= 1; --i__) {
+		    i__2 = i__ + j * a_dim1;
+		    if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
+			goto L10;
+		    }
+		}
+L10:
+		ret_val = max(ret_val,i__);
+	    }
+	}
+    }
+    return ret_val;
+} /* ilaclr_ */
+
+integer iladlc_(integer *m, integer *n, doublereal *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, ret_val, i__1;
+
+    /* Local variables */
+    static integer i__;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+
+
+    Purpose
+    =======
+
+    ILADLC scans A for its last non-zero column.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Quick test for the common case where one corner is non-zero.
+*/
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	ret_val = *n;
+    } else if (a[*n * a_dim1 + 1] != 0. || a[*m + *n * a_dim1] != 0.) {
+	ret_val = *n;
+    } else {
+/*     Now scan each column from the end, returning with the first non-zero. */
+	for (ret_val = *n; ret_val >= 1; --ret_val) {
+	    i__1 = *m;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (a[i__ + ret_val * a_dim1] != 0.) {
+		    return ret_val;
+		}
+	    }
+	}
+    }
+    return ret_val;
+} /* iladlc_ */
+
+integer iladlr_(integer *m, integer *n, doublereal *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, ret_val, i__1;
+
+    /* Local variables */
+    static integer i__, j;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+
+
+    Purpose
+    =======
+
+    ILADLR scans A for its last non-zero row.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Quick test for the common case where one corner is non-zero.
+*/
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (*m == 0) {
+	ret_val = *m;
+    } else if (a[*m + a_dim1] != 0. || a[*m + *n * a_dim1] != 0.) {
+	ret_val = *m;
+    } else {
+/*     Scan up each column tracking the last zero row seen. */
+	ret_val = 0;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    for (i__ = *m; i__ >= 1; --i__) {
+		if (a[i__ + j * a_dim1] != 0.) {
+		    goto L10;
+		}
+	    }
+L10:
+	    ret_val = max(ret_val,i__);
+	}
+    }
+    return ret_val;
+} /* iladlr_ */
+
 integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
 	integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen
 	opts_len)
@@ -198,17 +484,22 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
     static integer i__;
     static char c1[1], c2[2], c3[3], c4[2];
     static integer ic, nb, iz, nx;
-    static logical cname, sname;
+    static logical cname;
     static integer nbmin;
+    static logical sname;
     extern integer ieeeck_(integer *, real *, real *);
     static char subnam[6];
+    extern integer iparmq_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+
+    -- April 2009                                                      --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -218,6 +509,10 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
     parameters for the local environment.  See ISPEC for a description of
     the parameters.
 
+    ILAENV returns an INTEGER
+    if ILAENV >= 0: ILAENV returns the value of the parameter specified by ISPEC
+    if ILAENV < 0:  if ILAENV = -k, the k-th argument had an illegal value.
+
     This version provides a set of parameters which should give good,
     but not optimal, performance on many of the currently available
     computers.  Users are encouraged to modify this subroutine to set
@@ -241,7 +536,7 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
             = 3: the crossover point (in a block routine, for N less
                  than this value, an unblocked routine should be used)
             = 4: the number of shifts, used in the nonsymmetric
-                 eigenvalue routines
+                 eigenvalue routines (DEPRECATED)
             = 5: the minimum column dimension for blocking to be used;
                  rectangular blocks must have dimension at least k by m,
                  where k is given by ILAENV(2,...) and m by ILAENV(5,...)
@@ -250,13 +545,16 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
                  this value, a QR factorization is used first to reduce
                  the matrix to a triangular form.)
             = 7: the number of processors
-            = 8: the crossover point for the multishift QR and QZ methods
-                 for nonsymmetric eigenvalue problems.
+            = 8: the crossover point for the multishift QR method
+                 for nonsymmetric eigenvalue problems (DEPRECATED)
             = 9: maximum size of the subproblems at the bottom of the
                  computation tree in the divide-and-conquer algorithm
                  (used by xGELSD and xGESDD)
             =10: ieee NaN arithmetic can be trusted not to trap
             =11: infinity arithmetic can be trusted not to trap
+            12 <= ISPEC <= 16:
+                 xHSEQR or one of its subroutines,
+                 see IPARMQ for detailed explanation
 
     NAME    (input) CHARACTER*(*)
             The name of the calling subroutine, in either upper case or
@@ -275,10 +573,6 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
             Problem dimensions for the subroutine NAME; these may not all
             be required.
 
-   (ILAENV) (output) INTEGER
-            >= 0: the value of the parameter specified by ISPEC
-            < 0:  if ILAENV = -k, the k-th argument had an illegal value.
-
     Further Details
     ===============
 
@@ -304,17 +598,22 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
 
 
     switch (*ispec) {
-	case 1:  goto L100;
-	case 2:  goto L100;
-	case 3:  goto L100;
-	case 4:  goto L400;
-	case 5:  goto L500;
-	case 6:  goto L600;
-	case 7:  goto L700;
-	case 8:  goto L800;
-	case 9:  goto L900;
-	case 10:  goto L1000;
-	case 11:  goto L1100;
+	case 1:  goto L10;
+	case 2:  goto L10;
+	case 3:  goto L10;
+	case 4:  goto L80;
+	case 5:  goto L90;
+	case 6:  goto L100;
+	case 7:  goto L110;
+	case 8:  goto L120;
+	case 9:  goto L130;
+	case 10:  goto L140;
+	case 11:  goto L150;
+	case 12:  goto L160;
+	case 13:  goto L160;
+	case 14:  goto L160;
+	case 15:  goto L160;
+	case 16:  goto L160;
     }
 
 /*     Invalid value for ISPEC */
@@ -322,7 +621,7 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
     ret_val = -1;
     return ret_val;
 
-L100:
+L10:
 
 /*     Convert NAME to upper case if the first character is lower case. */
 
@@ -341,7 +640,7 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
 		if (ic >= 97 && ic <= 122) {
 		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
 		}
-/* L10: */
+/* L20: */
 	    }
 	}
 
@@ -358,7 +657,7 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
 			162 && ic <= 169) {
 		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
 		}
-/* L20: */
+/* L30: */
 	    }
 	}
 
@@ -373,7 +672,7 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
 		if (ic >= 225 && ic <= 250) {
 		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
 		}
-/* L30: */
+/* L40: */
 	    }
 	}
     }
@@ -389,12 +688,12 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
     s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2);
 
     switch (*ispec) {
-	case 1:  goto L110;
-	case 2:  goto L200;
-	case 3:  goto L300;
+	case 1:  goto L50;
+	case 2:  goto L60;
+	case 3:  goto L70;
     }
 
-L110:
+L50:
 
 /*
        ISPEC = 1:  block size
@@ -565,7 +864,7 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
     ret_val = nb;
     return ret_val;
 
-L200:
+L60:
 
 /*     ISPEC = 2:  minimum block size */
 
@@ -657,7 +956,7 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
     ret_val = nbmin;
     return ret_val;
 
-L300:
+L70:
 
 /*     ISPEC = 3:  crossover point */
 
@@ -719,42 +1018,42 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
     ret_val = nx;
     return ret_val;
 
-L400:
+L80:
 
 /*     ISPEC = 4:  number of shifts (used by xHSEQR) */
 
     ret_val = 6;
     return ret_val;
 
-L500:
+L90:
 
 /*     ISPEC = 5:  minimum column dimension (not used) */
 
     ret_val = 2;
     return ret_val;
 
-L600:
+L100:
 
 /*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */
 
     ret_val = (integer) ((real) min(*n1,*n2) * 1.6f);
     return ret_val;
 
-L700:
+L110:
 
 /*     ISPEC = 7:  number of processors (not used) */
 
     ret_val = 1;
     return ret_val;
 
-L800:
+L120:
 
 /*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */
 
     ret_val = 50;
     return ret_val;
 
-L900:
+L130:
 
 /*
        ISPEC = 9:  maximum size of the subproblems at the bottom of the
@@ -765,7 +1064,7 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
     ret_val = 25;
     return ret_val;
 
-L1000:
+L140:
 
 /*
        ISPEC = 10: ieee NaN arithmetic can be trusted not to trap
@@ -774,11 +1073,11 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
 */
     ret_val = 1;
     if (ret_val == 1) {
-	ret_val = ieeeck_(&c__0, &c_b163, &c_b164);
+	ret_val = ieeeck_(&c__1, &c_b172, &c_b173);
     }
     return ret_val;
 
-L1100:
+L150:
 
 /*
        ISPEC = 11: infinity arithmetic can be trusted not to trap
@@ -787,11 +1086,574 @@ integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
 */
     ret_val = 1;
     if (ret_val == 1) {
-	ret_val = ieeeck_(&c__1, &c_b163, &c_b164);
+	ret_val = ieeeck_(&c__0, &c_b172, &c_b173);
     }
     return ret_val;
 
+L160:
+
+/*     12 <= ISPEC <= 16: xHSEQR or one of its subroutines. */
+
+    ret_val = iparmq_(ispec, name__, opts, n1, n2, n3, n4, name_len, opts_len)
+	    ;
+    return ret_val;
+
 /*     End of ILAENV */
 
 } /* ilaenv_ */
 
+integer ilaslc_(integer *m, integer *n, real *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, ret_val, i__1;
+
+    /* Local variables */
+    static integer i__;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+
+
+    Purpose
+    =======
+
+    ILASLC scans A for its last non-zero column.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Quick test for the common case where one corner is non-zero.
+*/
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	ret_val = *n;
+    } else if (a[*n * a_dim1 + 1] != 0.f || a[*m + *n * a_dim1] != 0.f) {
+	ret_val = *n;
+    } else {
+/*     Now scan each column from the end, returning with the first non-zero. */
+	for (ret_val = *n; ret_val >= 1; --ret_val) {
+	    i__1 = *m;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		if (a[i__ + ret_val * a_dim1] != 0.f) {
+		    return ret_val;
+		}
+	    }
+	}
+    }
+    return ret_val;
+} /* ilaslc_ */
+
+integer ilaslr_(integer *m, integer *n, real *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, ret_val, i__1;
+
+    /* Local variables */
+    static integer i__, j;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+
+
+    Purpose
+    =======
+
+    ILASLR scans A for its last non-zero row.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Quick test for the common case where one corner is non-zero.
+*/
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (*m == 0) {
+	ret_val = *m;
+    } else if (a[*m + a_dim1] != 0.f || a[*m + *n * a_dim1] != 0.f) {
+	ret_val = *m;
+    } else {
+/*     Scan up each column tracking the last zero row seen. */
+	ret_val = 0;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    for (i__ = *m; i__ >= 1; --i__) {
+		if (a[i__ + j * a_dim1] != 0.f) {
+		    goto L10;
+		}
+	    }
+L10:
+	    ret_val = max(ret_val,i__);
+	}
+    }
+    return ret_val;
+} /* ilaslr_ */
+
+integer ilazlc_(integer *m, integer *n, doublecomplex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, ret_val, i__1, i__2;
+
+    /* Local variables */
+    static integer i__;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+
+
+    Purpose
+    =======
+
+    ILAZLC scans A for its last non-zero column.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Quick test for the common case where one corner is non-zero.
+*/
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	ret_val = *n;
+    } else /* if(complicated condition) */ {
+	i__1 = *n * a_dim1 + 1;
+	i__2 = *m + *n * a_dim1;
+	if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2]
+		.i != 0.)) {
+	    ret_val = *n;
+	} else {
+/*     Now scan each column from the end, returning with the first non-zero. */
+	    for (ret_val = *n; ret_val >= 1; --ret_val) {
+		i__1 = *m;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__ + ret_val * a_dim1;
+		    if (a[i__2].r != 0. || a[i__2].i != 0.) {
+			return ret_val;
+		    }
+		}
+	    }
+	}
+    }
+    return ret_val;
+} /* ilazlc_ */
+
+integer ilazlr_(integer *m, integer *n, doublecomplex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, ret_val, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2)                        --
+
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+
+
+    Purpose
+    =======
+
+    ILAZLR scans A for its last non-zero row.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Quick test for the common case where one corner is non-zero.
+*/
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (*m == 0) {
+	ret_val = *m;
+    } else /* if(complicated condition) */ {
+	i__1 = *m + a_dim1;
+	i__2 = *m + *n * a_dim1;
+	if (a[i__1].r != 0. || a[i__1].i != 0. || (a[i__2].r != 0. || a[i__2]
+		.i != 0.)) {
+	    ret_val = *m;
+	} else {
+/*     Scan up each column tracking the last zero row seen. */
+	    ret_val = 0;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		for (i__ = *m; i__ >= 1; --i__) {
+		    i__2 = i__ + j * a_dim1;
+		    if (a[i__2].r != 0. || a[i__2].i != 0.) {
+			goto L10;
+		    }
+		}
+L10:
+		ret_val = max(ret_val,i__);
+	    }
+	}
+    }
+    return ret_val;
+} /* ilazlr_ */
+
+integer iparmq_(integer *ispec, char *name__, char *opts, integer *n, integer
+	*ilo, integer *ihi, integer *lwork, ftnlen name_len, ftnlen opts_len)
+{
+    /* System generated locals */
+    integer ret_val, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer i_nint(real *);
+
+    /* Local variables */
+    static integer nh, ns;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+         This program sets problem and machine dependent parameters
+         useful for xHSEQR and its subroutines. It is called whenever
+         ILAENV is called with 12 <= ISPEC <= 16
+
+    Arguments
+    =========
+
+         ISPEC  (input) integer scalar
+                ISPEC specifies which tunable parameter IPARMQ should
+                return.
+
+                ISPEC=12: (INMIN)  Matrices of order nmin or less
+                          are sent directly to xLAHQR, the implicit
+                          double shift QR algorithm.  NMIN must be
+                          at least 11.
+
+                ISPEC=13: (INWIN)  Size of the deflation window.
+                          This is best set greater than or equal to
+                          the number of simultaneous shifts NS.
+                          Larger matrices benefit from larger deflation
+                          windows.
+
+                ISPEC=14: (INIBL) Determines when to stop nibbling and
+                          invest in an (expensive) multi-shift QR sweep.
+                          If the aggressive early deflation subroutine
+                          finds LD converged eigenvalues from an order
+                          NW deflation window and LD.GT.(NW*NIBBLE)/100,
+                          then the next QR sweep is skipped and early
+                          deflation is applied immediately to the
+                          remaining active diagonal block.  Setting
+                          IPARMQ(ISPEC=14) = 0 causes TTQRE to skip a
+                          multi-shift QR sweep whenever early deflation
+                          finds a converged eigenvalue.  Setting
+                          IPARMQ(ISPEC=14) greater than or equal to 100
+                          prevents TTQRE from skipping a multi-shift
+                          QR sweep.
+
+                ISPEC=15: (NSHFTS) The number of simultaneous shifts in
+                          a multi-shift QR iteration.
+
+                ISPEC=16: (IACC22) IPARMQ is set to 0, 1 or 2 with the
+                          following meanings.
+                          0:  During the multi-shift QR sweep,
+                              xLAQR5 does not accumulate reflections and
+                              does not use matrix-matrix multiply to
+                              update the far-from-diagonal matrix
+                              entries.
+                          1:  During the multi-shift QR sweep,
+                              xLAQR5 and/or xLAQRaccumulates reflections and uses
+                              matrix-matrix multiply to update the
+                              far-from-diagonal matrix entries.
+                          2:  During the multi-shift QR sweep.
+                              xLAQR5 accumulates reflections and takes
+                              advantage of 2-by-2 block structure during
+                              matrix-matrix multiplies.
+                          (If xTRMM is slower than xGEMM, then
+                          IPARMQ(ISPEC=16)=1 may be more efficient than
+                          IPARMQ(ISPEC=16)=2 despite the greater level of
+                          arithmetic work implied by the latter choice.)
+
+         NAME    (input) character string
+                 Name of the calling subroutine
+
+         OPTS    (input) character string
+                 This is a concatenation of the string arguments to
+                 TTQRE.
+
+         N       (input) integer scalar
+                 N is the order of the Hessenberg matrix H.
+
+         ILO     (input) INTEGER
+         IHI     (input) INTEGER
+                 It is assumed that H is already upper triangular
+                 in rows and columns 1:ILO-1 and IHI+1:N.
+
+         LWORK   (input) integer scalar
+                 The amount of workspace available.
+
+    Further Details
+    ===============
+
+         Little is known about how best to choose these parameters.
+         It is possible to use different values of the parameters
+         for each of CHSEQR, DHSEQR, SHSEQR and ZHSEQR.
+
+         It is probably best to choose different parameters for
+         different matrices and different parameters at different
+         times during the iteration, but this has not been
+         implemented --- yet.
+
+
+         The best choices of most of the parameters depend
+         in an ill-understood way on the relative execution
+         rate of xLAQR3 and xLAQR5 and on the nature of each
+         particular eigenvalue problem.  Experiment may be the
+         only practical way to determine which choices are most
+         effective.
+
+         Following is a list of default values supplied by IPARMQ.
+         These defaults may be adjusted in order to attain better
+         performance in any particular computational environment.
+
+         IPARMQ(ISPEC=12) The xLAHQR vs xLAQR0 crossover point.
+                          Default: 75. (Must be at least 11.)
+
+         IPARMQ(ISPEC=13) Recommended deflation window size.
+                          This depends on ILO, IHI and NS, the
+                          number of simultaneous shifts returned
+                          by IPARMQ(ISPEC=15).  The default for
+                          (IHI-ILO+1).LE.500 is NS.  The default
+                          for (IHI-ILO+1).GT.500 is 3*NS/2.
+
+         IPARMQ(ISPEC=14) Nibble crossover point.  Default: 14.
+
+         IPARMQ(ISPEC=15) Number of simultaneous shifts, NS.
+                          a multi-shift QR iteration.
+
+                          If IHI-ILO+1 is ...
+
+                          greater than      ...but less    ... the
+                          or equal to ...      than        default is
+
+                                  0               30       NS =   2+
+                                 30               60       NS =   4+
+                                 60              150       NS =  10
+                                150              590       NS =  **
+                                590             3000       NS =  64
+                               3000             6000       NS = 128
+                               6000             infinity   NS = 256
+
+                      (+)  By default matrices of this order are
+                           passed to the implicit double shift routine
+                           xLAHQR.  See IPARMQ(ISPEC=12) above.   These
+                           values of NS are used only in case of a rare
+                           xLAHQR failure.
+
+                      (**) The asterisks (**) indicate an ad-hoc
+                           function increasing from 10 to 64.
+
+         IPARMQ(ISPEC=16) Select structured matrix multiply.
+                          (See ISPEC=16 above for details.)
+                          Default: 3.
+
+       ================================================================
+*/
+    if (*ispec == 15 || *ispec == 13 || *ispec == 16) {
+
+/*        ==== Set the number simultaneous shifts ==== */
+
+	nh = *ihi - *ilo + 1;
+	ns = 2;
+	if (nh >= 30) {
+	    ns = 4;
+	}
+	if (nh >= 60) {
+	    ns = 10;
+	}
+	if (nh >= 150) {
+/* Computing MAX */
+	    r__1 = log((real) nh) / log(2.f);
+	    i__1 = 10, i__2 = nh / i_nint(&r__1);
+	    ns = max(i__1,i__2);
+	}
+	if (nh >= 590) {
+	    ns = 64;
+	}
+	if (nh >= 3000) {
+	    ns = 128;
+	}
+	if (nh >= 6000) {
+	    ns = 256;
+	}
+/* Computing MAX */
+	i__1 = 2, i__2 = ns - ns % 2;
+	ns = max(i__1,i__2);
+    }
+
+    if (*ispec == 12) {
+
+
+/*
+          ===== Matrices of order smaller than NMIN get sent
+          .     to xLAHQR, the classic double shift algorithm.
+          .     This must be at least 11. ====
+*/
+
+	ret_val = 75;
+
+    } else if (*ispec == 14) {
+
+/*
+          ==== INIBL: skip a multi-shift qr iteration and
+          .    whenever aggressive early deflation finds
+          .    at least (NIBBLE*(window size)/100) deflations. ====
+*/
+
+	ret_val = 14;
+
+    } else if (*ispec == 15) {
+
+/*        ==== NSHFTS: The number of simultaneous shifts ===== */
+
+	ret_val = ns;
+
+    } else if (*ispec == 13) {
+
+/*        ==== NW: deflation window size.  ==== */
+
+	if (nh <= 500) {
+	    ret_val = ns;
+	} else {
+	    ret_val = ns * 3 / 2;
+	}
+
+    } else if (*ispec == 16) {
+
+/*
+          ==== IACC22: Whether to accumulate reflections
+          .     before updating the far-from-diagonal elements
+          .     and whether to use 2-by-2 block structure while
+          .     doing it.  A small amount of work could be saved
+          .     by making this choice dependent also upon the
+          .     NH=IHI-ILO+1.
+*/
+
+	ret_val = 0;
+	if (ns >= 14) {
+	    ret_val = 1;
+	}
+	if (ns >= 14) {
+	    ret_val = 2;
+	}
+
+    } else {
+/*        ===== invalid value of ispec ===== */
+	ret_val = -1;
+
+    }
+
+/*     ==== End of IPARMQ ==== */
+
+    return ret_val;
+} /* iparmq_ */
+
diff --git a/numpy/linalg/lapack_lite/f2c_s_lapack.c b/numpy/linalg/lapack_lite/f2c_s_lapack.c
index 9897c5249b02..9bd8d2455c27 100644
--- a/numpy/linalg/lapack_lite/f2c_s_lapack.c
+++ b/numpy/linalg/lapack_lite/f2c_s_lapack.c
@@ -39,15 +39,17 @@ static real c_b151 = -1.f;
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
-static integer c__8 = 8;
-static integer c__4 = 4;
 static integer c__65 = 65;
-static integer c__15 = 15;
+static integer c__12 = 12;
+static integer c__49 = 49;
+static integer c__4 = 4;
 static logical c_false = FALSE_;
-static integer c__10 = 10;
-static integer c__11 = 11;
-static real c_b2489 = 2.f;
+static integer c__13 = 13;
+static integer c__15 = 15;
+static integer c__14 = 14;
+static integer c__16 = 16;
 static logical c_true = TRUE_;
+static real c_b2863 = 2.f;
 
 /* Subroutine */ int sbdsdc_(char *uplo, char *compq, integer *n, real *d__,
 	real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q,
@@ -104,10 +106,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -128,7 +130,7 @@ static logical c_true = TRUE_;
     It could conceivably fail on hexadecimal or decimal machines
     without guard digits, but we know of none.  See SLASD3 for details.
 
-    The code currently call SLASDQ if singular values only are desired.
+    The code currently calls SLASDQ if singular values only are desired.
     However, it can be slightly modified to compute singular values
     using the divide and conquer method.
 
@@ -154,7 +156,7 @@ static logical c_true = TRUE_;
             On entry, the n diagonal elements of the bidiagonal matrix B.
             On exit, if INFO=0, the singular values of B.
 
-    E       (input/output) REAL array, dimension (N)
+    E       (input/output) REAL array, dimension (N-1)
             On entry, the elements of E contain the offdiagonal
             elements of the bidiagonal matrix whose SVD is desired.
             On exit, E has been destroyed.
@@ -203,7 +205,7 @@ static logical c_true = TRUE_;
                bottom of the computation tree (usually about 25).
             For other values of COMPQ, IQ is not referenced.
 
-    WORK    (workspace) REAL array, dimension (LWORK)
+    WORK    (workspace) REAL array, dimension (MAX(1,LWORK))
             If COMPQ = 'N' then LWORK >= (4 * N).
             If COMPQ = 'P' then LWORK >= (6 * N).
             If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
@@ -213,7 +215,7 @@ static logical c_true = TRUE_;
     INFO    (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  The algorithm failed to compute an singular value.
+            > 0:  The algorithm failed to compute a singular value.
                   The update process of divide and conquer failed.
 
     Further Details
@@ -222,7 +224,9 @@ static logical c_true = TRUE_;
     Based on contributions by
        Ming Gu and Huan Ren, Computer Science Division, University of
        California at Berkeley, USA
-
+    =====================================================================
+    Changed dimension statement in comment describing E from (N) to
+    (N-1).  Sven, 17 Feb 05.
     =====================================================================
 
 
@@ -465,9 +469,9 @@ static logical c_true = TRUE_;
 			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
 			start + (is + qstart - 2) * *n], &work[wstart], &
 			iwork[1], info);
-		if (*info != 0) {
-		    return 0;
-		}
+	    }
+	    if (*info != 0) {
+		return 0;
 	    }
 	    start = i__ + 1;
 	}
@@ -588,27 +592,39 @@ static logical c_true = TRUE_;
 	    );
     static real thresh;
     static logical rotate;
-    static real sminlo, tolmul;
+    static real tolmul;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       January 2007
 
 
     Purpose
     =======
 
-    SBDSQR computes the singular value decomposition (SVD) of a real
-    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
-    denotes the transpose of P), where S is a diagonal matrix with
-    non-negative diagonal elements (the singular values of B), and Q
-    and P are orthogonal matrices.
+    SBDSQR computes the singular values and, optionally, the right and/or
+    left singular vectors from the singular value decomposition (SVD) of
+    a real N-by-N (upper or lower) bidiagonal matrix B using the implicit
+    zero-shift QR algorithm.  The SVD of B has the form
+
+       B = Q * S * P**T
+
+    where S is the diagonal matrix of singular values, Q is an orthogonal
+    matrix of left singular vectors, and P is an orthogonal matrix of
+    right singular vectors.  If left singular vectors are requested, this
+    subroutine actually returns U*Q instead of Q, and, if right singular
+    vectors are requested, this subroutine returns P**T*VT instead of
+    P**T, for given real input matrices U and VT.  When U and VT are the
+    orthogonal matrices that reduce a general matrix A to bidiagonal
+    form:  A = U*B*VT, as computed by SGEBRD, then
 
-    The routine computes S, and optionally computes U * Q, P' * VT,
-    or Q' * C, for given real input matrices U, VT, and C.
+       A = (U*Q) * S * (P**T*VT)
+
+    is the SVD of A.  Optionally, the subroutine may also compute Q**T*C
+    for a given real input matrix C.
 
     See "Computing  Small Singular Values of Bidiagonal Matrices With
     Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
@@ -643,19 +659,18 @@ static logical c_true = TRUE_;
             On exit, if INFO=0, the singular values of B in decreasing
             order.
 
-    E       (input/output) REAL array, dimension (N)
-            On entry, the elements of E contain the
-            offdiagonal elements of the bidiagonal matrix whose SVD
-            is desired. On normal exit (INFO = 0), E is destroyed.
-            If the algorithm does not converge (INFO > 0), D and E
+    E       (input/output) REAL array, dimension (N-1)
+            On entry, the N-1 offdiagonal elements of the bidiagonal
+            matrix B.
+            On exit, if INFO = 0, E is destroyed; if INFO > 0, D and E
             will contain the diagonal and superdiagonal elements of a
             bidiagonal matrix orthogonally equivalent to the one given
-            as input. E(N) is used for workspace.
+            as input.
 
     VT      (input/output) REAL array, dimension (LDVT, NCVT)
             On entry, an N-by-NCVT matrix VT.
-            On exit, VT is overwritten by P' * VT.
-            VT is not referenced if NCVT = 0.
+            On exit, VT is overwritten by P**T * VT.
+            Not referenced if NCVT = 0.
 
     LDVT    (input) INTEGER
             The leading dimension of the array VT.
@@ -664,15 +679,15 @@ static logical c_true = TRUE_;
     U       (input/output) REAL array, dimension (LDU, N)
             On entry, an NRU-by-N matrix U.
             On exit, U is overwritten by U * Q.
-            U is not referenced if NRU = 0.
+            Not referenced if NRU = 0.
 
     LDU     (input) INTEGER
             The leading dimension of the array U.  LDU >= max(1,NRU).
 
     C       (input/output) REAL array, dimension (LDC, NCC)
             On entry, an N-by-NCC matrix C.
-            On exit, C is overwritten by Q' * C.
-            C is not referenced if NCC = 0.
+            On exit, C is overwritten by Q**T * C.
+            Not referenced if NCC = 0.
 
     LDC     (input) INTEGER
             The leading dimension of the array C.
@@ -683,10 +698,18 @@ static logical c_true = TRUE_;
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  If INFO = -i, the i-th argument had an illegal value
-            > 0:  the algorithm did not converge; D and E contain the
-                  elements of a bidiagonal matrix which is orthogonally
-                  similar to the input matrix B;  if INFO = i, i
-                  elements of E have not converged to zero.
+            > 0:
+               if NCVT = NRU = NCC = 0,
+                  = 1, a split was marked by a positive value in E
+                  = 2, current block of Z not diagonalized after 30*N
+                       iterations (in inner while loop)
+                  = 3, termination criterion of outer while loop not met
+                       (program created more than N unreduced blocks)
+               else NCVT = NRU = NCC = 0,
+                     the algorithm did not converge; D and E contain the
+                     elements of a bidiagonal matrix which is orthogonally
+                     similar to the input matrix B;  if INFO = i, i
+                     elements of E have not converged to zero.
 
     Internal Parameters
     ===================
@@ -1030,7 +1053,6 @@ static logical c_true = TRUE_;
 		    e[lll] = 0.f;
 		    goto L60;
 		}
-		sminlo = sminl;
 		mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 =
 			e[lll], dabs(r__1))));
 		sminl = dmin(sminl,mu);
@@ -1066,7 +1088,6 @@ static logical c_true = TRUE_;
 		    e[lll] = 0.f;
 		    goto L60;
 		}
-		sminlo = sminl;
 		mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[
 			lll], dabs(r__1))));
 		sminl = dmin(sminl,mu);
@@ -1453,10 +1474,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -1663,14 +1684,15 @@ static logical c_true = TRUE_;
     extern doublereal slamch_(char *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer isamax_(integer *, real *, integer *);
+    extern logical sisnan_(real *);
     static logical noconv;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -1908,7 +1930,7 @@ static logical c_true = TRUE_;
 
     sfmin1 = slamch_("S") / slamch_("P");
     sfmax1 = 1.f / sfmin1;
-    sfmin2 = sfmin1 * 8.f;
+    sfmin2 = sfmin1 * 2.f;
     sfmax2 = 1.f / sfmin2;
 L140:
     noconv = FALSE_;
@@ -1939,7 +1961,7 @@ static logical c_true = TRUE_;
 	if (c__ == 0.f || r__ == 0.f) {
 	    goto L200;
 	}
-	g = r__ / 8.f;
+	g = r__ / 2.f;
 	f = 1.f;
 	s = c__ + r__;
 L160:
@@ -1950,28 +1972,38 @@ static logical c_true = TRUE_;
 	if (c__ >= g || dmax(r__1,ca) >= sfmax2 || dmin(r__2,ra) <= sfmin2) {
 	    goto L170;
 	}
-	f *= 8.f;
-	c__ *= 8.f;
-	ca *= 8.f;
-	r__ /= 8.f;
-	g /= 8.f;
-	ra /= 8.f;
+	f *= 2.f;
+	c__ *= 2.f;
+	ca *= 2.f;
+	r__ /= 2.f;
+	g /= 2.f;
+	ra /= 2.f;
 	goto L160;
 
 L170:
-	g = c__ / 8.f;
+	g = c__ / 2.f;
 L180:
 /* Computing MIN */
 	r__1 = min(f,c__), r__1 = min(r__1,g);
 	if (g < r__ || dmax(r__,ra) >= sfmax2 || dmin(r__1,ca) <= sfmin2) {
 	    goto L190;
 	}
-	f /= 8.f;
-	c__ /= 8.f;
-	g /= 8.f;
-	ca /= 8.f;
-	r__ *= 8.f;
-	ra *= 8.f;
+	r__1 = c__ + f + ca + r__ + g + ra;
+	if (sisnan_(&r__1)) {
+
+/*           Exit if NaN to avoid infinite loop */
+
+	    *info = -3;
+	    i__2 = -(*info);
+	    xerbla_("SGEBAL", &i__2);
+	    return 0;
+	}
+	f /= 2.f;
+	c__ /= 2.f;
+	g /= 2.f;
+	ca /= 2.f;
+	r__ *= 2.f;
+	ra *= 2.f;
 	goto L180;
 
 /*        Now balance. */
@@ -2020,7 +2052,7 @@ static logical c_true = TRUE_;
 	real *d__, real *e, real *tauq, real *taup, real *work, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
 
     /* Local variables */
     static integer i__;
@@ -2031,10 +2063,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2196,10 +2228,13 @@ static logical c_true = TRUE_;
 
 /*           Apply H(i) to A(i:m,i+1:n) from the left */
 
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
-		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    if (i__ < *n) {
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &
+			tauq[i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]
+			);
+	    }
 	    a[i__ + i__ * a_dim1] = d__[i__];
 
 	    if (i__ < *n) {
@@ -2249,12 +2284,12 @@ static logical c_true = TRUE_;
 
 /*           Apply G(i) to A(i+1:m,i:n) from the right */
 
-	    i__2 = *m - i__;
-	    i__3 = *n - i__ + 1;
-/* Computing MIN */
-	    i__4 = i__ + 1;
-	    slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
-		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    if (i__ < *m) {
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &
+			taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
 	    a[i__ + i__ * a_dim1] = d__[i__];
 
 	    if (i__ < *m) {
@@ -2319,10 +2354,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2381,7 +2416,7 @@ static logical c_true = TRUE_;
             The scalar factors of the elementary reflectors which
             represent the orthogonal matrix P. See Further Details.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -2610,7 +2645,7 @@ static logical c_true = TRUE_;
 {
     /* System generated locals */
     integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4;
+	    i__2, i__3;
     real r__1, r__2;
 
     /* Builtin functions */
@@ -2625,7 +2660,6 @@ static logical c_true = TRUE_;
     static real dum[1], eps;
     static integer ibal;
     static char side[1];
-    static integer maxb;
     static real anrm;
     static integer ierr, itau, iwrk, nout;
     extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
@@ -2668,10 +2702,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 8, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2749,7 +2783,7 @@ static logical c_true = TRUE_;
             The leading dimension of the array VR.  LDVR >= 1; if
             JOBVR = 'V', LDVR >= N.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -2821,69 +2855,66 @@ static logical c_true = TRUE_;
          the worst case.)
 */
 
-    minwrk = 1;
-    if (*info == 0 && (*lwork >= 1 || lquery)) {
-	maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1, n, &
-		c__0, (ftnlen)6, (ftnlen)1);
-	if (! wantvl && ! wantvr) {
-/* Computing MAX */
-	    i__1 = 1, i__2 = *n * 3;
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "SHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "EN", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
-		    hswork;
-	    maxwrk = max(i__1,i__2);
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
 	} else {
+	    maxwrk = (*n << 1) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1,
+		    n, &c__0, (ftnlen)6, (ftnlen)1);
+	    if (wantvl) {
+		minwrk = *n << 2;
 /* Computing MAX */
-	    i__1 = 1, i__2 = *n << 2;
-	    minwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1,
+			"SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = max(i__1,i__2);
+		shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vl[vl_offset], ldvl, &work[1], &c_n1, info);
+		hswork = work[1];
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "SOR"
-		    "GHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "SHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "SV", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n << 2;
+		maxwrk = max(i__1,i__2);
+	    } else if (wantvr) {
+		minwrk = *n << 2;
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1,
+			"SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)
+			1);
+		maxwrk = max(i__1,i__2);
+		shseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+		hswork = work[1];
 /* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
-		    hswork;
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n << 2;
+		maxwrk = max(i__1,i__2);
+	    } else {
+		minwrk = *n * 3;
+		shseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &wr[1], &wi[
+			1], &vr[vr_offset], ldvr, &work[1], &c_n1, info);
+		hswork = work[1];
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n << 2;
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *
+			n + hswork;
+		maxwrk = max(i__1,i__2);
+	    }
+	    maxwrk = max(maxwrk,minwrk);
 	}
 	work[1] = (real) maxwrk;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -13;
+	}
     }
-    if (*lwork < minwrk && ! lquery) {
-	*info = -13;
-    }
+
     if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGEEV ", &i__1);
@@ -3178,10 +3209,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -3331,20 +3362,23 @@ static logical c_true = TRUE_;
     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
 
     /* Local variables */
-    static integer i__;
+    static integer i__, j;
     static real t[4160]	/* was [65][64] */;
     static integer ib;
     static real ei;
     static integer nb, nh, nx, iws, nbmin, iinfo;
     extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
 	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *), sgehd2_(integer *, integer *,
-	    integer *, real *, integer *, real *, real *, integer *), slarfb_(
-	    char *, char *, char *, char *, integer *, integer *, integer *,
-	    real *, integer *, real *, integer *, real *, integer *, real *,
-	    integer *), slahrd_(integer *,
+	    real *, integer *), strmm_(char *, char *, char *,
+	     char *, integer *, integer *, real *, real *, integer *, real *,
+	    integer *), saxpy_(integer *,
+	    real *, real *, integer *, real *, integer *), sgehd2_(integer *,
 	    integer *, integer *, real *, integer *, real *, real *, integer *
-	    , real *, integer *), xerbla_(char *, integer *);
+	    ), slahr2_(integer *, integer *, integer *, real *, integer *,
+	    real *, real *, integer *, real *, integer *), slarfb_(char *,
+	    char *, char *, char *, integer *, integer *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *, real *, integer *
+	    ), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
     static integer ldwork, lwkopt;
@@ -3352,10 +3386,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2.1)                                  --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+    -- April 2009                                                      --
 
 
     Purpose
@@ -3444,6 +3478,10 @@ static logical c_true = TRUE_;
     modified element of the upper Hessenberg matrix H, and vi denotes an
     element of the vector defining H(i).
 
+    This file is a slight modification of LAPACK-3.0's DGEHRD
+    subroutine incorporating improvements proposed by Quintana-Orti and
+    Van de Geijn (2006). (See DLAHR2.)
+
     =====================================================================
 
 
@@ -3507,7 +3545,7 @@ static logical c_true = TRUE_;
     }
 
 /*
-       Determine the block size.
+       Determine the block size
 
    Computing MIN
 */
@@ -3520,7 +3558,7 @@ static logical c_true = TRUE_;
 
 /*
           Determine when to cross over from blocked to unblocked code
-          (last block is always handled by unblocked code).
+          (last block is always handled by unblocked code)
 
    Computing MAX
 */
@@ -3529,7 +3567,7 @@ static logical c_true = TRUE_;
 	nx = max(i__1,i__2);
 	if (nx < nh) {
 
-/*           Determine if workspace is large enough for blocked code. */
+/*           Determine if workspace is large enough for blocked code */
 
 	    iws = *n * nb;
 	    if (*lwork < iws) {
@@ -3537,7 +3575,7 @@ static logical c_true = TRUE_;
 /*
                 Not enough workspace to use optimal NB:  determine the
                 minimum value of NB, and reduce NB or force use of
-                unblocked code.
+                unblocked code
 
    Computing MAX
 */
@@ -3577,13 +3615,13 @@ static logical c_true = TRUE_;
              which performs the reduction, and also the matrix Y = A*V*T
 */
 
-	    slahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+	    slahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
 		    c__65, &work[1], &ldwork);
 
 /*
              Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
              right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
-             to 1.
+             to 1
 */
 
 	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
@@ -3594,6 +3632,21 @@ static logical c_true = TRUE_;
 		    c_b15, &a[(i__ + ib) * a_dim1 + 1], lda);
 	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
 
+/*
+             Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
+             right
+*/
+
+	    i__3 = ib - 1;
+	    strmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b15,
+		     &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork);
+	    i__3 = ib - 2;
+	    for (j = 0; j <= i__3; ++j) {
+		saxpy_(&i__, &c_b151, &work[ldwork * j + 1], &c__1, &a[(i__ +
+			j + 1) * a_dim1 + 1], &c__1);
+/* L30: */
+	    }
+
 /*
              Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
              left
@@ -3604,7 +3657,7 @@ static logical c_true = TRUE_;
 	    slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
 		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
 		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
-/* L30: */
+/* L40: */
 	}
     }
 
@@ -3635,10 +3688,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -3773,10 +3826,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -3809,7 +3862,7 @@ static logical c_true = TRUE_;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -3998,10 +4051,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -4136,10 +4189,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -4173,7 +4226,7 @@ static logical c_true = TRUE_;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -4409,10 +4462,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK driver routine (version 3.2.1)                                  --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       March 2009
 
 
     Purpose
@@ -4458,7 +4511,7 @@ static logical c_true = TRUE_;
                     the array VT;
                     otherwise, all columns of U are returned in the
                     array U and the first M rows of V**T are overwritten
-                    in the array VT;
+                    in the array A;
             = 'N':  no columns of U or rows of V**T are computed.
 
     M       (input) INTEGER
@@ -4509,7 +4562,7 @@ static logical c_true = TRUE_;
             JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
             if JOBZ = 'S', LDVT >= min(M,N).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
 
     LWORK   (input) INTEGER
@@ -4517,13 +4570,13 @@ static logical c_true = TRUE_;
             If JOBZ = 'N',
               LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
             If JOBZ = 'O',
-              LWORK >= 3*min(M,N)*min(M,N) +
+              LWORK >= 3*min(M,N) +
                        max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
             If JOBZ = 'S' or 'A'
-              LWORK >= 3*min(M,N)*min(M,N) +
+              LWORK >= 3*min(M,N) +
                        max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
             For good performance, LWORK should generally be larger.
-            If LWORK < 0 but other input arguments are legal, WORK(1)
+            If LWORK = -1 but other input arguments are legal, WORK(1)
             returns the optimal LWORK.
 
     IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
@@ -4563,14 +4616,11 @@ static logical c_true = TRUE_;
     /* Function Body */
     *info = 0;
     minmn = min(*m,*n);
-    mnthr = (integer) (minmn * 11.f / 6.f);
     wntqa = lsame_(jobz, "A");
     wntqs = lsame_(jobz, "S");
     wntqas = wntqa || wntqs;
     wntqo = lsame_(jobz, "O");
     wntqn = lsame_(jobz, "N");
-    minwrk = 1;
-    maxwrk = 1;
     lquery = *lwork == -1;
 
     if (! (wntqa || wntqs || wntqo || wntqn)) {
@@ -4598,11 +4648,14 @@ static logical c_true = TRUE_;
          following subroutine, as returned by ILAENV.)
 */
 
-    if (*info == 0 && *m > 0 && *n > 0) {
-	if (*m >= *n) {
+    if (*info == 0) {
+	minwrk = 1;
+	maxwrk = 1;
+	if (*m >= *n && minmn > 0) {
 
 /*           Compute space needed for SBDSDC */
 
+	    mnthr = (integer) (minmn * 11.f / 6.f);
 	    if (wntqn) {
 		bdspac = *n * 7;
 	    } else {
@@ -4764,10 +4817,11 @@ static logical c_true = TRUE_;
 		    minwrk = *n * 3 + max(*m,bdspac);
 		}
 	    }
-	} else {
+	} else if (minmn > 0) {
 
 /*           Compute space needed for SBDSDC */
 
+	    mnthr = (integer) (minmn * 11.f / 6.f);
 	    if (wntqn) {
 		bdspac = *m * 7;
 	    } else {
@@ -4930,12 +4984,14 @@ static logical c_true = TRUE_;
 		}
 	    }
 	}
+	maxwrk = max(maxwrk,minwrk);
 	work[1] = (real) maxwrk;
-    }
 
-    if (*lwork < minwrk && ! lquery) {
-	*info = -12;
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
     }
+
     if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("SGESDD", &i__1);
@@ -4947,9 +5003,6 @@ static logical c_true = TRUE_;
 /*     Quick return if possible */
 
     if (*m == 0 || *n == 0) {
-	if (*lwork >= 1) {
-	    work[1] = 1.f;
-	}
 	return 0;
     }
 
@@ -5493,10 +5546,12 @@ static logical c_true = TRUE_;
 
 /*              Set the right corner of U to identity matrix */
 
-		i__1 = *m - *n;
-		i__2 = *m - *n;
-		slaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (*n +
-			1) * u_dim1], ldu);
+		if (*m > *n) {
+		    i__1 = *m - *n;
+		    i__2 = *m - *n;
+		    slaset_("F", &i__1, &i__2, &c_b29, &c_b15, &u[*n + 1 + (*
+			    n + 1) * u_dim1], ldu);
+		}
 
 /*
                 Overwrite U by left singular vectors of A and VT
@@ -6031,10 +6086,12 @@ static logical c_true = TRUE_;
 
 /*              Set the right corner of VT to identity matrix */
 
-		i__1 = *n - *m;
-		i__2 = *n - *m;
-		slaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (*m +
-			1) * vt_dim1], ldvt);
+		if (*n > *m) {
+		    i__1 = *n - *m;
+		    i__2 = *n - *m;
+		    slaset_("F", &i__1, &i__2, &c_b29, &c_b15, &vt[*m + 1 + (*
+			    m + 1) * vt_dim1], ldvt);
+		}
 
 /*
                 Overwrite U by left singular vectors of A and VT
@@ -6092,10 +6149,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6205,19 +6262,23 @@ static logical c_true = TRUE_;
     real r__1;
 
     /* Local variables */
-    static integer j, jp;
+    static integer i__, j, jp;
     extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
 	    integer *, real *, integer *, real *, integer *), sscal_(integer *
-	    , real *, real *, integer *), sswap_(integer *, real *, integer *,
-	     real *, integer *), xerbla_(char *, integer *);
+	    , real *, real *, integer *);
+    static real sfmin;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer isamax_(integer *, real *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6296,6 +6357,10 @@ static logical c_true = TRUE_;
 	return 0;
     }
 
+/*     Compute machine safe minimum */
+
+    sfmin = slamch_("S");
+
     i__1 = min(*m,*n);
     for (j = 1; j <= i__1; ++j) {
 
@@ -6315,9 +6380,17 @@ static logical c_true = TRUE_;
 /*           Compute elements J+1:M of J-th column. */
 
 	    if (j < *m) {
-		i__2 = *m - j;
-		r__1 = 1.f / a[j + j * a_dim1];
-		sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+		if ((r__1 = a[j + j * a_dim1], dabs(r__1)) >= sfmin) {
+		    i__2 = *m - j;
+		    r__1 = 1.f / a[j + j * a_dim1];
+		    sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+		} else {
+		    i__2 = *m - j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[j + i__ + j * a_dim1] /= a[j + j * a_dim1];
+/* L20: */
+		    }
+		}
 	    }
 
 	} else if (*info == 0) {
@@ -6365,10 +6438,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6547,10 +6620,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6693,164 +6766,268 @@ static logical c_true = TRUE_;
 {
     /* System generated locals */
     address a__1[2];
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4,
-	    i__5;
-    real r__1, r__2;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2[2], i__3;
+    real r__1;
     char ch__1[2];
 
     /* Builtin functions */
     /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
 
     /* Local variables */
-    static integer i__, j, k, l;
-    static real s[225]	/* was [15][15] */, v[16];
-    static integer i1, i2, ii, nh, nr, ns, nv;
-    static real vv[16];
-    static integer itn;
-    static real tau;
-    static integer its;
-    static real ulp, tst1;
-    static integer maxb;
-    static real absw;
-    static integer ierr;
-    static real unfl, temp, ovfl;
+    static integer i__;
+    static real hl[2401]	/* was [49][49] */;
+    static integer kbot, nmin;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
-    static integer itemp;
-    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *);
-    static logical initz, wantt;
-    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
-	    integer *);
-    static logical wantz;
-    extern doublereal slapy2_(real *, real *);
-    extern /* Subroutine */ int slabad_(real *, real *);
-    extern doublereal slamch_(char *);
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical initz;
+    static real workl[49];
+    static logical wantt, wantz;
+    extern /* Subroutine */ int slaqr0_(logical *, logical *, integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , integer *, real *, integer *, real *, integer *, integer *),
+	    xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
-	    real *);
-    extern integer isamax_(integer *, real *, integer *);
-    extern doublereal slanhs_(char *, integer *, real *, integer *, real *);
     extern /* Subroutine */ int slahqr_(logical *, logical *, integer *,
 	    integer *, integer *, real *, integer *, real *, real *, integer *
 	    , integer *, real *, integer *, integer *), slacpy_(char *,
 	    integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
-	    real *, integer *), slarfx_(char *, integer *, integer *,
-	    real *, real *, real *, integer *, real *);
-    static real smlnum;
+	    real *, integer *);
     static logical lquery;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    SHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
-    and, optionally, the matrices T and Z from the Schur decomposition
-    H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
-    form), and Z is the orthogonal matrix of Schur vectors.
-
-    Optionally Z may be postmultiplied into an input orthogonal matrix Q,
-    so that this routine can give the Schur factorization of a matrix A
-    which has been reduced to the Hessenberg form H by the orthogonal
-    matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
-
-    Arguments
-    =========
+    -- LAPACK computational routine (version 3.2.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       June 2010
+
+       Purpose
+       =======
+
+       SHSEQR computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**T, where T is an upper quasi-triangular matrix (the
+       Schur form), and Z is the orthogonal matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input orthogonal
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+       Arguments
+       =========
+
+       JOB   (input) CHARACTER*1
+             = 'E':  compute eigenvalues only;
+             = 'S':  compute eigenvalues and the Schur form T.
+
+       COMPZ (input) CHARACTER*1
+             = 'N':  no Schur vectors are computed;
+             = 'I':  Z is initialized to the unit matrix and the matrix Z
+                     of Schur vectors of H is returned;
+             = 'V':  Z must contain an orthogonal matrix Q on entry, and
+                     the product Q*Z is returned.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+             set by a previous call to SGEBAL, and then passed to SGEHRD
+             when the matrix output by SGEBAL is reduced to Hessenberg
+             form. Otherwise ILO and IHI should be set to 1 and N
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) REAL array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and JOB = 'S', then H contains the
+             upper quasi-triangular matrix T from the Schur decomposition
+             (the Schur form); 2-by-2 diagonal blocks (corresponding to
+             complex conjugate pairs of eigenvalues) are returned in
+             standard form, with H(i,i) = H(i+1,i+1) and
+             H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and JOB = 'E', the
+             contents of H are unspecified on exit.  (The output value of
+             H when INFO.GT.0 is given under the description of INFO
+             below.)
+
+             Unlike earlier versions of SHSEQR, this subroutine may
+             explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1
+             or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       WR    (output) REAL array, dimension (N)
+       WI    (output) REAL array, dimension (N)
+             The real and imaginary parts, respectively, of the computed
+             eigenvalues. If two eigenvalues are computed as a complex
+             conjugate pair, they are stored in consecutive elements of
+             WR and WI, say the i-th and (i+1)th, with WI(i) .GT. 0 and
+             WI(i+1) .LT. 0. If JOB = 'S', the eigenvalues are stored in
+             the same order as on the diagonal of the Schur form returned
+             in H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
+             diagonal block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and
+             WI(i+1) = -WI(i).
+
+       Z     (input/output) REAL array, dimension (LDZ,N)
+             If COMPZ = 'N', Z is not referenced.
+             If COMPZ = 'I', on entry Z need not be set and on exit,
+             if INFO = 0, Z contains the orthogonal matrix Z of the Schur
+             vectors of H.  If COMPZ = 'V', on entry Z must contain an
+             N-by-N matrix Q, which is assumed to be equal to the unit
+             matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit,
+             if INFO = 0, Z contains Q*Z.
+             Normally Q is the orthogonal matrix generated by SORGHR
+             after the call to SGEHRD which formed the Hessenberg matrix
+             H. (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if COMPZ = 'I' or
+             COMPZ = 'V', then LDZ.GE.MAX(1,N).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) REAL array, dimension (LWORK)
+             On exit, if INFO = 0, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient and delivers very good and sometimes
+             optimal performance.  However, LWORK as large as 11*N
+             may be required for optimal performance.  A workspace
+             query is recommended to determine the optimal workspace
+             size.
+
+             If LWORK = -1, then SHSEQR does a workspace query.
+             In this case, SHSEQR checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .LT. 0:  if INFO = -i, the i-th argument had an illegal
+                      value
+             .GT. 0:  if INFO = i, SHSEQR failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and JOB = 'E', then on exit, the
+                  remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and JOB   = 'S', then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is an orthogonal matrix.  The final
+                  value of H is upper Hessenberg and quasi-triangular
+                  in rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and COMPZ = 'V', then on exit
+
+                    (final value of Z)  =  (initial value of Z)*U
+
+                  where U is the orthogonal matrix in (*) (regard-
+                  less of the value of JOB.)
+
+                  If INFO .GT. 0 and COMPZ = 'I', then on exit
+                        (final value of Z)  = U
+                  where U is the orthogonal matrix in (*) (regard-
+                  less of the value of JOB.)
+
+                  If INFO .GT. 0 and COMPZ = 'N', then Z is not
+                  accessed.
+
+       ================================================================
+               Default values supplied by
+               ILAENV(ISPEC,'SHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK).
+               It is suggested that these defaults be adjusted in order
+               to attain best performance in each particular
+               computational environment.
+
+              ISPEC=12: The SLAHQR vs SLAQR0 crossover point.
+                        Default: 75. (Must be at least 11.)
+
+              ISPEC=13: Recommended deflation window size.
+                        This depends on ILO, IHI and NS.  NS is the
+                        number of simultaneous shifts returned
+                        by ILAENV(ISPEC=15).  (See ISPEC=15 below.)
+                        The default for (IHI-ILO+1).LE.500 is NS.
+                        The default for (IHI-ILO+1).GT.500 is 3*NS/2.
+
+              ISPEC=14: Nibble crossover point. (See IPARMQ for
+                        details.)  Default: 14% of deflation window
+                        size.
+
+              ISPEC=15: Number of simultaneous shifts in a multishift
+                        QR iteration.
+
+                        If IHI-ILO+1 is ...
+
+                        greater than      ...but less    ... the
+                        or equal to ...      than        default is
+
+                             1               30          NS =   2(+)
+                            30               60          NS =   4(+)
+                            60              150          NS =  10(+)
+                           150              590          NS =  **
+                           590             3000          NS =  64
+                          3000             6000          NS = 128
+                          6000             infinity      NS = 256
+
+                    (+)  By default some or all matrices of this order
+                         are passed to the implicit double shift routine
+                         SLAHQR and this parameter is ignored.  See
+                         ISPEC=12 above and comments in IPARMQ for
+                         details.
+
+                   (**)  The asterisks (**) indicate an ad-hoc
+                         function of N increasing from 10 to 64.
+
+              ISPEC=16: Select structured matrix multiply.
+                        If the number of simultaneous shifts (specified
+                        by ISPEC=15) is less than 14, then the default
+                        for ISPEC=16 is 0.  Otherwise the default for
+                        ISPEC=16 is 2.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
 
-    JOB     (input) CHARACTER*1
-            = 'E':  compute eigenvalues only;
-            = 'S':  compute eigenvalues and the Schur form T.
+       ================================================================
 
-    COMPZ   (input) CHARACTER*1
-            = 'N':  no Schur vectors are computed;
-            = 'I':  Z is initialized to the unit matrix and the matrix Z
-                    of Schur vectors of H is returned;
-            = 'V':  Z must contain an orthogonal matrix Q on entry, and
-                    the product Q*Z is returned.
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    SLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
 
-    N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
+       ==== NL allocates some local workspace to help small matrices
+       .    through a rare SLAHQR failure.  NL .GT. NTINY = 11 is
+       .    required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom-
+       .    mended.  (The default value of NMIN is 75.)  Using NL = 49
+       .    allows up to six simultaneous shifts and a 16-by-16
+       .    deflation window.  ====
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to SGEBAL, and then passed to SGEHRD
-            when the matrix output by SGEBAL is reduced to Hessenberg
-            form. Otherwise ILO and IHI should be set to 1 and N
-            respectively.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    H       (input/output) REAL array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if JOB = 'S', H contains the upper quasi-triangular
-            matrix T from the Schur decomposition (the Schur form);
-            2-by-2 diagonal blocks (corresponding to complex conjugate
-            pairs of eigenvalues) are returned in standard form, with
-            H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
-            the contents of H are unspecified on exit.
-
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
-
-    WR      (output) REAL array, dimension (N)
-    WI      (output) REAL array, dimension (N)
-            The real and imaginary parts, respectively, of the computed
-            eigenvalues. If two eigenvalues are computed as a complex
-            conjugate pair, they are stored in consecutive elements of
-            WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
-            WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
-            same order as on the diagonal of the Schur form returned in
-            H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
-            diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
-            WI(i+1) = -WI(i).
-
-    Z       (input/output) REAL array, dimension (LDZ,N)
-            If COMPZ = 'N': Z is not referenced.
-            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
-            contains the orthogonal matrix Z of the Schur vectors of H.
-            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
-            which is assumed to be equal to the unit matrix except for
-            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
-            Normally Q is the orthogonal matrix generated by SORGHR after
-            the call to SGEHRD which formed the Hessenberg matrix H.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.
-            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
-
-    WORK    (workspace/output) REAL array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, SHSEQR failed to compute all of the
-                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
-                  elements 1:ilo-1 and i+1:n of WR and WI contain those
-                  eigenvalues which have been successfully computed.
-
-    =====================================================================
-
-
-       Decode and test the input parameters
+       ==== Decode and check the input parameters. ====
 */
 
     /* Parameter adjustments */
@@ -6868,10 +7045,10 @@ static logical c_true = TRUE_;
     wantt = lsame_(job, "S");
     initz = lsame_(compz, "I");
     wantz = initz || lsame_(compz, "V");
-
-    *info = 0;
     work[1] = (real) max(1,*n);
     lquery = *lwork == -1;
+
+    *info = 0;
     if (! lsame_(job, "E") && ! wantt) {
 	*info = -1;
     } else if (! lsame_(compz, "N") && ! wantz) {
@@ -6889,401 +7066,195 @@ static logical c_true = TRUE_;
     } else if (*lwork < max(1,*n) && ! lquery) {
 	*info = -13;
     }
+
     if (*info != 0) {
+
+/*        ==== Quick return in case of invalid argument. ==== */
+
 	i__1 = -(*info);
 	xerbla_("SHSEQR", &i__1);
 	return 0;
-    } else if (lquery) {
-	return 0;
-    }
-
-/*     Initialize Z, if necessary */
 
-    if (initz) {
-	slaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
-    }
-
-/*     Store the eigenvalues isolated by SGEBAL. */
-
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.f;
-/* L10: */
-    }
-    i__1 = *n;
-    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
-	wr[i__] = h__[i__ + i__ * h_dim1];
-	wi[i__] = 0.f;
-/* L20: */
-    }
+    } else if (*n == 0) {
 
-/*     Quick return if possible. */
+/*        ==== Quick return in case N = 0; nothing to do. ==== */
 
-    if (*n == 0) {
-	return 0;
-    }
-    if (*ilo == *ihi) {
-	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
-	wi[*ilo] = 0.f;
 	return 0;
-    }
 
-/*
-       Set rows and columns ILO to IHI to zero below the first
-       subdiagonal.
-*/
+    } else if (lquery) {
 
-    i__1 = *ihi - 2;
-    for (j = *ilo; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = j + 2; i__ <= i__2; ++i__) {
-	    h__[i__ + j * h_dim1] = 0.f;
-/* L30: */
-	}
-/* L40: */
-    }
-    nh = *ihi - *ilo + 1;
+/*        ==== Quick return in case of a workspace query ==== */
 
+	slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
+		1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info);
 /*
-       Determine the order of the multi-shift QR algorithm to be used.
-
-   Writing concatenation
+          ==== Ensure reported workspace size is backward-compatible with
+          .    previous LAPACK versions. ====
+   Computing MAX
 */
-    i__3[0] = 1, a__1[0] = job;
-    i__3[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    ns = ilaenv_(&c__4, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-/* Writing concatenation */
-    i__3[0] = 1, a__1[0] = job;
-    i__3[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-    maxb = ilaenv_(&c__8, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if (ns <= 2 || ns > nh || maxb >= nh) {
-
-/*        Use the standard double-shift algorithm */
-
-	slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
-		1], ilo, ihi, &z__[z_offset], ldz, info);
+	r__1 = (real) max(1,*n);
+	work[1] = dmax(r__1,work[1]);
 	return 0;
-    }
-    maxb = max(3,maxb);
-/* Computing MIN */
-    i__1 = min(ns,maxb);
-    ns = min(i__1,15);
 
-/*
-       Now 2 < NS <= MAXB < NH.
-
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
-
-    unfl = slamch_("Safe minimum");
-    ovfl = 1.f / unfl;
-    slabad_(&unfl, &ovfl);
-    ulp = slamch_("Precision");
-    smlnum = unfl * (nh / ulp);
-
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are set inside the main loop.
-*/
-
-    if (wantt) {
-	i1 = 1;
-	i2 = *n;
-    }
-
-/*     ITN is the total number of multiple-shift QR iterations allowed. */
-
-    itn = nh * 30;
-
-/*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of at most MAXB. Each iteration of the loop
-       works with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
-       H(L,L-1) is negligible so that the matrix splits.
-*/
-
-    i__ = *ihi;
-L50:
-    l = *ilo;
-    if (i__ < *ilo) {
-	goto L170;
-    }
-
-/*
-       Perform multiple-shift QR iterations on rows and columns ILO to I
-       until a submatrix of order at most MAXB splits off at the bottom
-       because a subdiagonal element has become negligible.
-*/
-
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
+    } else {
 
-/*        Look for a single small subdiagonal element. */
+/*        ==== copy eigenvalues isolated by SGEBAL ==== */
 
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2
-		    = h__[k + k * h_dim1], dabs(r__2));
-	    if (tst1 == 0.f) {
-		i__4 = i__ - l + 1;
-		tst1 = slanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
-			);
-	    }
-/* Computing MAX */
-	    r__2 = ulp * tst1;
-	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2,
-		    smlnum)) {
-		goto L70;
-	    }
-/* L60: */
+	i__1 = *ilo - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    wr[i__] = h__[i__ + i__ * h_dim1];
+	    wi[i__] = 0.f;
+/* L10: */
+	}
+	i__1 = *n;
+	for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	    wr[i__] = h__[i__ + i__ * h_dim1];
+	    wi[i__] = 0.f;
+/* L20: */
 	}
-L70:
-	l = k;
-	if (l > *ilo) {
 
-/*           H(L,L-1) is negligible. */
+/*        ==== Initialize Z, if requested ==== */
 
-	    h__[l + (l - 1) * h_dim1] = 0.f;
+	if (initz) {
+	    slaset_("A", n, n, &c_b29, &c_b15, &z__[z_offset], ldz)
+		    ;
 	}
 
-/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+/*        ==== Quick return if possible ==== */
 
-	if (l >= i__ - maxb + 1) {
-	    goto L160;
+	if (*ilo == *ihi) {
+	    wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	    wi[*ilo] = 0.f;
+	    return 0;
 	}
 
 /*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
-
-	if (! wantt) {
-	    i1 = l;
-	    i2 = i__;
-	}
+          ==== SLAHQR/SLAQR0 crossover point ====
 
-	if (its == 20 || its == 30) {
+   Writing concatenation
+*/
+	i__2[0] = 1, a__1[0] = job;
+	i__2[1] = 1, a__1[1] = compz;
+	s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+	nmin = ilaenv_(&c__12, "SHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nmin = max(11,nmin);
 
-/*           Exceptional shifts. */
+/*        ==== SLAQR0 for big matrices; SLAHQR for small ones ==== */
 
-	    i__2 = i__;
-	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
-		wr[ii] = ((r__1 = h__[ii + (ii - 1) * h_dim1], dabs(r__1)) + (
-			r__2 = h__[ii + ii * h_dim1], dabs(r__2))) * 1.5f;
-		wi[ii] = 0.f;
-/* L80: */
-	    }
+	if (*n > nmin) {
+	    slaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1],
+		    &wi[1], ilo, ihi, &z__[z_offset], ldz, &work[1], lwork,
+		    info);
 	} else {
 
-/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
+/*           ==== Small matrix ==== */
+
+	    slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1],
+		    &wi[1], ilo, ihi, &z__[z_offset], ldz, info);
 
-	    slacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
-		    h_dim1], ldh, s, &c__15);
-	    slahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
-		    ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset],
-		    ldz, &ierr);
-	    if (ierr > 0) {
+	    if (*info > 0) {
 
 /*
-                If SLAHQR failed to compute all NS eigenvalues, use the
-                unconverged diagonal elements as the remaining shifts.
+                ==== A rare SLAHQR failure!  SLAQR0 sometimes succeeds
+                .    when SLAHQR fails. ====
 */
 
-		i__2 = ierr;
-		for (ii = 1; ii <= i__2; ++ii) {
-		    wr[i__ - ns + ii] = s[ii + ii * 15 - 16];
-		    wi[i__ - ns + ii] = 0.f;
-/* L90: */
-		}
-	    }
-	}
+		kbot = *info;
+
+		if (*n >= 49) {
 
 /*
-          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
-          where G is the Hessenberg submatrix H(L:I,L:I) and w is
-          the vector of shifts (stored in WR and WI). The result is
-          stored in the local array V.
+                   ==== Larger matrices have enough subdiagonal scratch
+                   .    space to call SLAQR0 directly. ====
 */
 
-	v[0] = 1.f;
-	i__2 = ns + 1;
-	for (ii = 2; ii <= i__2; ++ii) {
-	    v[ii - 1] = 0.f;
-/* L100: */
-	}
-	nv = 1;
-	i__2 = i__;
-	for (j = i__ - ns + 1; j <= i__2; ++j) {
-	    if (wi[j] >= 0.f) {
-		if (wi[j] == 0.f) {
-
-/*                 real shift */
-
-		    i__4 = nv + 1;
-		    scopy_(&i__4, v, &c__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    r__1 = -wr[j];
-		    sgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l *
-			    h_dim1], ldh, vv, &c__1, &r__1, v, &c__1);
-		    ++nv;
-		} else if (wi[j] > 0.f) {
-
-/*                 complex conjugate pair of shifts */
-
-		    i__4 = nv + 1;
-		    scopy_(&i__4, v, &c__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    r__1 = wr[j] * -2.f;
-		    sgemv_("No transpose", &i__4, &nv, &c_b15, &h__[l + l *
-			    h_dim1], ldh, v, &c__1, &r__1, vv, &c__1);
-		    i__4 = nv + 1;
-		    itemp = isamax_(&i__4, vv, &c__1);
-/* Computing MAX */
-		    r__2 = (r__1 = vv[itemp - 1], dabs(r__1));
-		    temp = 1.f / dmax(r__2,smlnum);
-		    i__4 = nv + 1;
-		    sscal_(&i__4, &temp, vv, &c__1);
-		    absw = slapy2_(&wr[j], &wi[j]);
-		    temp = temp * absw * absw;
-		    i__4 = nv + 2;
-		    i__5 = nv + 1;
-		    sgemv_("No transpose", &i__4, &i__5, &c_b15, &h__[l + l *
-			    h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
-		    nv += 2;
-		}
+		    slaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset],
+			    ldh, &wr[1], &wi[1], ilo, ihi, &z__[z_offset],
+			    ldz, &work[1], lwork, info);
+
+		} else {
 
 /*
-                Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
-                reset it to the unit vector.
+                   ==== Tiny matrices don't have enough subdiagonal
+                   .    scratch space to benefit from SLAQR0.  Hence,
+                   .    tiny matrices must be copied into a larger
+                   .    array before calling SLAQR0. ====
 */
 
-		itemp = isamax_(&nv, v, &c__1);
-		temp = (r__1 = v[itemp - 1], dabs(r__1));
-		if (temp == 0.f) {
-		    v[0] = 1.f;
-		    i__4 = nv;
-		    for (ii = 2; ii <= i__4; ++ii) {
-			v[ii - 1] = 0.f;
-/* L110: */
+		    slacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49);
+		    hl[*n + 1 + *n * 49 - 50] = 0.f;
+		    i__1 = 49 - *n;
+		    slaset_("A", &c__49, &i__1, &c_b29, &c_b29, &hl[(*n + 1) *
+			     49 - 49], &c__49);
+		    slaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &
+			    wr[1], &wi[1], ilo, ihi, &z__[z_offset], ldz,
+			    workl, &c__49, info);
+		    if (wantt || *info != 0) {
+			slacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh);
 		    }
-		} else {
-		    temp = dmax(temp,smlnum);
-		    r__1 = 1.f / temp;
-		    sscal_(&nv, &r__1, v, &c__1);
 		}
 	    }
-/* L120: */
 	}
 
-/*        Multiple-shift QR step */
+/*        ==== Clear out the trash, if necessary. ==== */
 
-	i__2 = i__ - 1;
-	for (k = l; k <= i__2; ++k) {
+	if ((wantt || *info != 0) && *n > 2) {
+	    i__1 = *n - 2;
+	    i__3 = *n - 2;
+	    slaset_("L", &i__1, &i__3, &c_b29, &c_b29, &h__[h_dim1 + 3], ldh);
+	}
 
 /*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
+          ==== Ensure reported workspace size is backward-compatible with
+          .    previous LAPACK versions. ====
 
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
-
-   Computing MIN
-*/
-	    i__4 = ns + 1, i__5 = i__ - k + 1;
-	    nr = min(i__4,i__5);
-	    if (k > l) {
-		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    slarfg_(&nr, v, &v[1], &c__1, &tau);
-	    if (k > l) {
-		h__[k + (k - 1) * h_dim1] = v[0];
-		i__4 = i__;
-		for (ii = k + 1; ii <= i__4; ++ii) {
-		    h__[ii + (k - 1) * h_dim1] = 0.f;
-/* L130: */
-		}
-	    }
-	    v[0] = 1.f;
-
-/*
-             Apply G from the left to transform the rows of the matrix in
-             columns K to I2.
+   Computing MAX
 */
+	r__1 = (real) max(1,*n);
+	work[1] = dmax(r__1,work[1]);
+    }
 
-	    i__4 = i2 - k + 1;
-	    slarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
-		    work[1]);
-
-/*
-             Apply G from the right to transform the columns of the
-             matrix in rows I1 to min(K+NR,I).
+/*     ==== End of SHSEQR ==== */
 
-   Computing MIN
-*/
-	    i__5 = k + nr;
-	    i__4 = min(i__5,i__) - i1 + 1;
-	    slarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
-		    &work[1]);
+    return 0;
+} /* shseqr_ */
 
-	    if (wantz) {
+logical sisnan_(real *sin__)
+{
+    /* System generated locals */
+    logical ret_val;
 
-/*              Accumulate transformations in the matrix Z */
+    /* Local variables */
+    extern logical slaisnan_(real *, real *);
 
-		slarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
-			ldz, &work[1]);
-	    }
-/* L140: */
-	}
 
-/* L150: */
-    }
+/*
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
-/*     Failure to converge in remaining number of iterations */
 
-    *info = i__;
-    return 0;
+    Purpose
+    =======
 
-L160:
+    SISNAN returns .TRUE. if its argument is NaN, and .FALSE.
+    otherwise.  To be replaced by the Fortran 2003 intrinsic in the
+    future.
 
-/*
-       A submatrix of order <= MAXB in rows and columns L to I has split
-       off. Use the double-shift QR algorithm to handle it.
-*/
+    Arguments
+    =========
 
-    slahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
-	    ilo, ihi, &z__[z_offset], ldz, info);
-    if (*info > 0) {
-	return 0;
-    }
+    SIN     (input) REAL
+            Input to test for NaN.
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with a new value of I.
+    =====================================================================
 */
 
-    itn -= its;
-    i__ = l - 1;
-    goto L50;
-
-L170:
-    work[1] = (real) max(1,*n);
-    return 0;
-
-/*     End of SHSEQR */
-
-} /* shseqr_ */
+    ret_val = slaisnan_(sin__, sin__);
+    return ret_val;
+} /* sisnan_ */
 
 /* Subroutine */ int slabad_(real *small, real *large)
 {
@@ -7292,10 +7263,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7358,10 +7329,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7437,7 +7408,7 @@ static logical c_true = TRUE_;
             The n-by-nb matrix Y required to update the unreduced part
             of A.
 
-    LDY     (output) INTEGER
+    LDY     (input) INTEGER
             The leading dimension of the array Y. LDY >= N.
 
     Further Details
@@ -7754,10 +7725,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7852,10 +7823,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7918,10 +7889,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8072,10 +8043,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8468,10 +8439,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8700,10 +8671,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9209,10 +9180,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9521,10 +9492,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       December 23, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9563,10 +9534,10 @@ static logical c_true = TRUE_;
            The components of the updating vector.
 
     DELTA  (output) REAL array, dimension (N)
-           If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
-           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
-           contains the information necessary to construct the
-           eigenvectors.
+           If N .GT. 2, DELTA contains (D(j) - lambda_I) in its  j-th
+           component.  If N = 1, then DELTA(1) = 1. If N = 2, see SLAED5
+           for detail. The vector DELTA contains the information necessary
+           to construct the eigenvectors by SLAED3 and SLAED9.
 
     RHO    (input) REAL
            The scalar in the symmetric updating formula.
@@ -10182,7 +10153,6 @@ static logical c_true = TRUE_;
 
 	prew = w;
 
-/* L170: */
 	i__1 = *n;
 	for (j = 1; j <= i__1; ++j) {
 	    delta[j] -= eta;
@@ -10448,10 +10418,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -10564,10 +10534,6 @@ static logical c_true = TRUE_;
 /* Subroutine */ int slaed6_(integer *kniter, logical *orgati, real *rho,
 	real *d__, real *z__, real *finit, real *tau, integer *info)
 {
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
     /* System generated locals */
     integer i__1;
     real r__1, r__2, r__3, r__4;
@@ -10578,7 +10544,7 @@ static logical c_true = TRUE_;
     /* Local variables */
     static real a, b, c__, f;
     static integer i__;
-    static real fc, df, ddf, eta, eps, base;
+    static real fc, df, ddf, lbd, eta, ubd, eps, base;
     static integer iter;
     static real temp, temp1, temp2, temp3, temp4;
     static logical scale;
@@ -10589,10 +10555,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       February 2007
 
 
     Purpose
@@ -10648,21 +10614,40 @@ static logical c_true = TRUE_;
     Further Details
     ===============
 
-    Based on contributions by
+    30/06/99: Based on contributions by
        Ren-Cang Li, Computer Science Division, University of California
        at Berkeley, USA
 
+    10/02/03: This version has a few statements commented out for thread safety
+       (machine parameters are computed on each entry). SJH.
+
+    05/10/06: Modified from a new version of Ren-Cang Li, use
+       Gragg-Thornton-Warner cubic convergent scheme for better stability.
+
     =====================================================================
 */
 
+
     /* Parameter adjustments */
     --z__;
     --d__;
 
     /* Function Body */
-
     *info = 0;
 
+    if (*orgati) {
+	lbd = d__[2];
+	ubd = d__[3];
+    } else {
+	lbd = d__[1];
+	ubd = d__[2];
+    }
+    if (*finit < 0.f) {
+	lbd = 0.f;
+    } else {
+	ubd = 0.f;
+    }
+
     niter = 1;
     *tau = 0.f;
     if (*kniter == 2) {
@@ -10693,29 +10678,41 @@ static logical c_true = TRUE_;
 	    *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
 		    r__1))));
 	}
-	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
-		z__[3] / (d__[3] - *tau);
-	if (dabs(*finit) <= dabs(temp)) {
+	if (*tau < lbd || *tau > ubd) {
+	    *tau = (lbd + ubd) / 2.f;
+	}
+	if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
 	    *tau = 0.f;
+	} else {
+	    temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau
+		    * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / (
+		    d__[3] * (d__[3] - *tau));
+	    if (temp <= 0.f) {
+		lbd = *tau;
+	    } else {
+		ubd = *tau;
+	    }
+	    if (dabs(*finit) <= dabs(temp)) {
+		*tau = 0.f;
+	    }
 	}
     }
 
 /*
-       On first call to routine, get machine parameters for
-       possible scaling to avoid overflow
+       get machine parameters for possible scaling to avoid overflow
+
+       modified by Sven: parameters SMALL1, SMINV1, SMALL2,
+       SMINV2, EPS are not SAVEd anymore between one call to the
+       others but recomputed at each call
 */
 
-    if (first) {
-	eps = slamch_("Epsilon");
-	base = slamch_("Base");
-	i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f)
-		;
-	small1 = pow_ri(&base, &i__1);
-	sminv1 = 1.f / small1;
-	small2 = small1 * small1;
-	sminv2 = sminv1 * sminv1;
-	first = FALSE_;
-    }
+    eps = slamch_("Epsilon");
+    base = slamch_("Base");
+    i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f);
+    small1 = pow_ri(&base, &i__1);
+    sminv1 = 1.f / small1;
+    small2 = small1 * small1;
+    sminv2 = sminv1 * sminv1;
 
 /*
        Determine if scaling of inputs necessary to avoid overflow
@@ -10758,6 +10755,8 @@ static logical c_true = TRUE_;
 /* L10: */
 	}
 	*tau *= sclfac;
+	lbd *= sclfac;
+	ubd *= sclfac;
     } else {
 
 /*        Copy D and Z to DSCALE and ZSCALE */
@@ -10787,9 +10786,15 @@ static logical c_true = TRUE_;
     if (dabs(f) <= 0.f) {
 	goto L60;
     }
+    if (f <= 0.f) {
+	lbd = *tau;
+    } else {
+	ubd = *tau;
+    }
 
 /*
-          Iteration begins
+          Iteration begins -- Use Gragg-Thornton-Warner cubic convergent
+                              scheme
 
        It is not hard to see that
 
@@ -10802,7 +10807,7 @@ static logical c_true = TRUE_;
 
     iter = niter + 1;
 
-    for (niter = iter; niter <= 20; ++niter) {
+    for (niter = iter; niter <= 40; ++niter) {
 
 	if (*orgati) {
 	    temp1 = dscale[1] - *tau;
@@ -10834,23 +10839,10 @@ static logical c_true = TRUE_;
 	    eta = -f / df;
 	}
 
-	temp = eta + *tau;
-	if (*orgati) {
-	    if (eta > 0.f && temp >= dscale[2]) {
-		eta = (dscale[2] - *tau) / 2.f;
-	    }
-	    if (eta < 0.f && temp <= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.f;
-	    }
-	} else {
-	    if (eta > 0.f && temp >= dscale[1]) {
-		eta = (dscale[1] - *tau) / 2.f;
-	    }
-	    if (eta < 0.f && temp <= dscale[0]) {
-		eta = (dscale[0] - *tau) / 2.f;
-	    }
-	}
 	*tau += eta;
+	if (*tau < lbd || *tau > ubd) {
+	    *tau = (lbd + ubd) / 2.f;
+	}
 
 	fc = 0.f;
 	erretm = 0.f;
@@ -10873,6 +10865,11 @@ static logical c_true = TRUE_;
 	if (dabs(f) <= eps * erretm) {
 	    goto L60;
 	}
+	if (f <= 0.f) {
+	    lbd = *tau;
+	} else {
+	    ubd = *tau;
+	}
 /* L50: */
     }
     *info = 1;
@@ -10925,10 +10922,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -11249,10 +11246,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -11424,6 +11421,15 @@ static logical c_true = TRUE_;
 	return 0;
     }
 
+/*
+       Need to initialize GIVPTR to O here in case of quick exit
+       to prevent an unspecified code behavior (usually sigfault)
+       when IWORK array on entry to *stedc is not zeroed
+       (or at least some IWORK entries which used in *laed7 for GIVPTR).
+*/
+
+    *givptr = 0;
+
 /*     Quick return if possible */
 
     if (*n == 0) {
@@ -11515,7 +11521,6 @@ static logical c_true = TRUE_;
 */
 
     *k = 0;
-    *givptr = 0;
     k2 = *n + 1;
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
@@ -11692,10 +11697,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -11938,10 +11943,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -12093,7 +12098,7 @@ static logical c_true = TRUE_;
     }
 
 /*
-       Loop thru remaining levels 1 -> CURLVL applying the Givens
+       Loop through remaining levels 1 -> CURLVL applying the Givens
        rotations and permutation and then multiplying the center matrices
        against the current Z.
 */
@@ -12191,10 +12196,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -12347,74 +12352,511 @@ static logical c_true = TRUE_;
 
 } /* slaev2_ */
 
+/* Subroutine */ int slaexc_(logical *wantq, integer *n, real *t, integer *
+	ldt, real *q, integer *ldq, integer *j1, integer *n1, integer *n2,
+	real *work, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, t_dim1, t_offset, i__1;
+    real r__1, r__2, r__3;
+
+    /* Local variables */
+    static real d__[16]	/* was [4][4] */;
+    static integer k;
+    static real u[3], x[4]	/* was [2][2] */;
+    static integer j2, j3, j4;
+    static real u1[3], u2[3];
+    static integer nd;
+    static real cs, t11, t22, t33, sn, wi1, wi2, wr1, wr2, eps, tau, tau1,
+	    tau2;
+    static integer ierr;
+    static real temp;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    static real scale, dnorm, xnorm;
+    extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real *
+	    , real *, real *, real *, real *, real *), slasy2_(logical *,
+	    logical *, integer *, integer *, integer *, real *, integer *,
+	    real *, integer *, real *, integer *, real *, real *, integer *,
+	    real *, integer *);
+    extern doublereal slamch_(char *), slange_(char *, integer *,
+	    integer *, real *, integer *, real *);
+    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
+	    real *), slacpy_(char *, integer *, integer *, real *, integer *,
+	    real *, integer *), slartg_(real *, real *, real *, real *
+	    , real *);
+    static real thresh;
+    extern /* Subroutine */ int slarfx_(char *, integer *, integer *, real *,
+	    real *, real *, integer *, real *);
+    static real smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    SLAEXC swaps adjacent diagonal blocks T11 and T22 of order 1 or 2 in
+    an upper quasi-triangular matrix T by an orthogonal similarity
+    transformation.
+
+    T must be in Schur canonical form, that is, block upper triangular
+    with 1-by-1 and 2-by-2 diagonal blocks; each 2-by-2 diagonal block
+    has its diagonal elemnts equal and its off-diagonal elements of
+    opposite sign.
+
+    Arguments
+    =========
+
+    WANTQ   (input) LOGICAL
+            = .TRUE. : accumulate the transformation in the matrix Q;
+            = .FALSE.: do not accumulate the transformation.
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input/output) REAL array, dimension (LDT,N)
+            On entry, the upper quasi-triangular matrix T, in Schur
+            canonical form.
+            On exit, the updated matrix T, again in Schur canonical form.
+
+    LDT     (input)  INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    Q       (input/output) REAL array, dimension (LDQ,N)
+            On entry, if WANTQ is .TRUE., the orthogonal matrix Q.
+            On exit, if WANTQ is .TRUE., the updated matrix Q.
+            If WANTQ is .FALSE., Q is not referenced.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.
+            LDQ >= 1; and if WANTQ is .TRUE., LDQ >= N.
+
+    J1      (input) INTEGER
+            The index of the first row of the first block T11.
+
+    N1      (input) INTEGER
+            The order of the first block T11. N1 = 0, 1 or 2.
+
+    N2      (input) INTEGER
+            The order of the second block T22. N2 = 0, 1 or 2.
+
+    WORK    (workspace) REAL array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            = 1: the transformed matrix T would be too far from Schur
+                 form; the blocks are not swapped and T and Q are
+                 unchanged.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0 || *n1 == 0 || *n2 == 0) {
+	return 0;
+    }
+    if (*j1 + *n1 > *n) {
+	return 0;
+    }
+
+    j2 = *j1 + 1;
+    j3 = *j1 + 2;
+    j4 = *j1 + 3;
+
+    if (*n1 == 1 && *n2 == 1) {
+
+/*        Swap two 1-by-1 blocks. */
+
+	t11 = t[*j1 + *j1 * t_dim1];
+	t22 = t[j2 + j2 * t_dim1];
+
+/*        Determine the transformation to perform the interchange. */
+
+	r__1 = t22 - t11;
+	slartg_(&t[*j1 + j2 * t_dim1], &r__1, &cs, &sn, &temp);
+
+/*        Apply transformation to the matrix T. */
+
+	if (j3 <= *n) {
+	    i__1 = *n - *j1 - 1;
+	    srot_(&i__1, &t[*j1 + j3 * t_dim1], ldt, &t[j2 + j3 * t_dim1],
+		    ldt, &cs, &sn);
+	}
+	i__1 = *j1 - 1;
+	srot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &c__1,
+		&cs, &sn);
+
+	t[*j1 + *j1 * t_dim1] = t22;
+	t[j2 + j2 * t_dim1] = t11;
+
+	if (*wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    srot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &c__1,
+		    &cs, &sn);
+	}
+
+    } else {
+
+/*
+          Swapping involves at least one 2-by-2 block.
+
+          Copy the diagonal block of order N1+N2 to the local array D
+          and compute its norm.
+*/
+
+	nd = *n1 + *n2;
+	slacpy_("Full", &nd, &nd, &t[*j1 + *j1 * t_dim1], ldt, d__, &c__4);
+	dnorm = slange_("Max", &nd, &nd, d__, &c__4, &work[1]);
+
+/*
+          Compute machine-dependent threshold for test for accepting
+          swap.
+*/
+
+	eps = slamch_("P");
+	smlnum = slamch_("S") / eps;
+/* Computing MAX */
+	r__1 = eps * 10.f * dnorm;
+	thresh = dmax(r__1,smlnum);
+
+/*        Solve T11*X - X*T22 = scale*T12 for X. */
+
+	slasy2_(&c_false, &c_false, &c_n1, n1, n2, d__, &c__4, &d__[*n1 + 1 +
+		(*n1 + 1 << 2) - 5], &c__4, &d__[(*n1 + 1 << 2) - 4], &c__4, &
+		scale, x, &c__2, &xnorm, &ierr);
+
+/*        Swap the adjacent diagonal blocks. */
+
+	k = *n1 + *n1 + *n2 - 3;
+	switch (k) {
+	    case 1:  goto L10;
+	    case 2:  goto L20;
+	    case 3:  goto L30;
+	}
+
+L10:
+
+/*
+          N1 = 1, N2 = 2: generate elementary reflector H so that:
+
+          ( scale, X11, X12 ) H = ( 0, 0, * )
+*/
+
+	u[0] = scale;
+	u[1] = x[0];
+	u[2] = x[2];
+	slarfg_(&c__3, &u[2], u, &c__1, &tau);
+	u[2] = 1.f;
+	t11 = t[*j1 + *j1 * t_dim1];
+
+/*        Perform swap provisionally on diagonal block in D. */
+
+	slarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
+	slarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
+
+/*
+          Test whether to reject swap.
+
+   Computing MAX
+*/
+	r__2 = dabs(d__[2]), r__3 = dabs(d__[6]), r__2 = max(r__2,r__3), r__3
+		= (r__1 = d__[10] - t11, dabs(r__1));
+	if (dmax(r__2,r__3) > thresh) {
+	    goto L50;
+	}
+
+/*        Accept swap: apply transformation to the entire matrix T. */
+
+	i__1 = *n - *j1 + 1;
+	slarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + *j1 * t_dim1], ldt, &
+		work[1]);
+	slarfx_("R", &j2, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]);
+
+	t[j3 + *j1 * t_dim1] = 0.f;
+	t[j3 + j2 * t_dim1] = 0.f;
+	t[j3 + j3 * t_dim1] = t11;
+
+	if (*wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    slarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[
+		    1]);
+	}
+	goto L40;
+
+L20:
+
+/*
+          N1 = 2, N2 = 1: generate elementary reflector H so that:
+
+          H (  -X11 ) = ( * )
+            (  -X21 ) = ( 0 )
+            ( scale ) = ( 0 )
+*/
+
+	u[0] = -x[0];
+	u[1] = -x[1];
+	u[2] = scale;
+	slarfg_(&c__3, u, &u[1], &c__1, &tau);
+	u[0] = 1.f;
+	t33 = t[j3 + j3 * t_dim1];
+
+/*        Perform swap provisionally on diagonal block in D. */
+
+	slarfx_("L", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
+	slarfx_("R", &c__3, &c__3, u, &tau, d__, &c__4, &work[1]);
+
+/*
+          Test whether to reject swap.
+
+   Computing MAX
+*/
+	r__2 = dabs(d__[1]), r__3 = dabs(d__[2]), r__2 = max(r__2,r__3), r__3
+		= (r__1 = d__[0] - t33, dabs(r__1));
+	if (dmax(r__2,r__3) > thresh) {
+	    goto L50;
+	}
+
+/*        Accept swap: apply transformation to the entire matrix T. */
+
+	slarfx_("R", &j3, &c__3, u, &tau, &t[*j1 * t_dim1 + 1], ldt, &work[1]);
+	i__1 = *n - *j1;
+	slarfx_("L", &c__3, &i__1, u, &tau, &t[*j1 + j2 * t_dim1], ldt, &work[
+		1]);
+
+	t[*j1 + *j1 * t_dim1] = t33;
+	t[j2 + *j1 * t_dim1] = 0.f;
+	t[j3 + *j1 * t_dim1] = 0.f;
+
+	if (*wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    slarfx_("R", n, &c__3, u, &tau, &q[*j1 * q_dim1 + 1], ldq, &work[
+		    1]);
+	}
+	goto L40;
+
+L30:
+
+/*
+          N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so
+          that:
+
+          H(2) H(1) (  -X11  -X12 ) = (  *  * )
+                    (  -X21  -X22 )   (  0  * )
+                    ( scale    0  )   (  0  0 )
+                    (    0  scale )   (  0  0 )
+*/
+
+	u1[0] = -x[0];
+	u1[1] = -x[1];
+	u1[2] = scale;
+	slarfg_(&c__3, u1, &u1[1], &c__1, &tau1);
+	u1[0] = 1.f;
+
+	temp = -tau1 * (x[2] + u1[1] * x[3]);
+	u2[0] = -temp * u1[1] - x[3];
+	u2[1] = -temp * u1[2];
+	u2[2] = scale;
+	slarfg_(&c__3, u2, &u2[1], &c__1, &tau2);
+	u2[0] = 1.f;
+
+/*        Perform swap provisionally on diagonal block in D. */
+
+	slarfx_("L", &c__3, &c__4, u1, &tau1, d__, &c__4, &work[1])
+		;
+	slarfx_("R", &c__4, &c__3, u1, &tau1, d__, &c__4, &work[1])
+		;
+	slarfx_("L", &c__3, &c__4, u2, &tau2, &d__[1], &c__4, &work[1]);
+	slarfx_("R", &c__4, &c__3, u2, &tau2, &d__[4], &c__4, &work[1]);
+
+/*
+          Test whether to reject swap.
+
+   Computing MAX
+*/
+	r__1 = dabs(d__[2]), r__2 = dabs(d__[6]), r__1 = max(r__1,r__2), r__2
+		= dabs(d__[3]), r__1 = max(r__1,r__2), r__2 = dabs(d__[7]);
+	if (dmax(r__1,r__2) > thresh) {
+	    goto L50;
+	}
+
+/*        Accept swap: apply transformation to the entire matrix T. */
+
+	i__1 = *n - *j1 + 1;
+	slarfx_("L", &c__3, &i__1, u1, &tau1, &t[*j1 + *j1 * t_dim1], ldt, &
+		work[1]);
+	slarfx_("R", &j4, &c__3, u1, &tau1, &t[*j1 * t_dim1 + 1], ldt, &work[
+		1]);
+	i__1 = *n - *j1 + 1;
+	slarfx_("L", &c__3, &i__1, u2, &tau2, &t[j2 + *j1 * t_dim1], ldt, &
+		work[1]);
+	slarfx_("R", &j4, &c__3, u2, &tau2, &t[j2 * t_dim1 + 1], ldt, &work[1]
+		);
+
+	t[j3 + *j1 * t_dim1] = 0.f;
+	t[j3 + j2 * t_dim1] = 0.f;
+	t[j4 + *j1 * t_dim1] = 0.f;
+	t[j4 + j2 * t_dim1] = 0.f;
+
+	if (*wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    slarfx_("R", n, &c__3, u1, &tau1, &q[*j1 * q_dim1 + 1], ldq, &
+		    work[1]);
+	    slarfx_("R", n, &c__3, u2, &tau2, &q[j2 * q_dim1 + 1], ldq, &work[
+		    1]);
+	}
+
+L40:
+
+	if (*n2 == 2) {
+
+/*           Standardize new 2-by-2 block T11 */
+
+	    slanv2_(&t[*j1 + *j1 * t_dim1], &t[*j1 + j2 * t_dim1], &t[j2 + *
+		    j1 * t_dim1], &t[j2 + j2 * t_dim1], &wr1, &wi1, &wr2, &
+		    wi2, &cs, &sn);
+	    i__1 = *n - *j1 - 1;
+	    srot_(&i__1, &t[*j1 + (*j1 + 2) * t_dim1], ldt, &t[j2 + (*j1 + 2)
+		    * t_dim1], ldt, &cs, &sn);
+	    i__1 = *j1 - 1;
+	    srot_(&i__1, &t[*j1 * t_dim1 + 1], &c__1, &t[j2 * t_dim1 + 1], &
+		    c__1, &cs, &sn);
+	    if (*wantq) {
+		srot_(n, &q[*j1 * q_dim1 + 1], &c__1, &q[j2 * q_dim1 + 1], &
+			c__1, &cs, &sn);
+	    }
+	}
+
+	if (*n1 == 2) {
+
+/*           Standardize new 2-by-2 block T22 */
+
+	    j3 = *j1 + *n2;
+	    j4 = j3 + 1;
+	    slanv2_(&t[j3 + j3 * t_dim1], &t[j3 + j4 * t_dim1], &t[j4 + j3 *
+		    t_dim1], &t[j4 + j4 * t_dim1], &wr1, &wi1, &wr2, &wi2, &
+		    cs, &sn);
+	    if (j3 + 2 <= *n) {
+		i__1 = *n - j3 - 1;
+		srot_(&i__1, &t[j3 + (j3 + 2) * t_dim1], ldt, &t[j4 + (j3 + 2)
+			 * t_dim1], ldt, &cs, &sn);
+	    }
+	    i__1 = j3 - 1;
+	    srot_(&i__1, &t[j3 * t_dim1 + 1], &c__1, &t[j4 * t_dim1 + 1], &
+		    c__1, &cs, &sn);
+	    if (*wantq) {
+		srot_(n, &q[j3 * q_dim1 + 1], &c__1, &q[j4 * q_dim1 + 1], &
+			c__1, &cs, &sn);
+	    }
+	}
+
+    }
+    return 0;
+
+/*     Exit with INFO = 1 if swap was rejected. */
+
+L50:
+    *info = 1;
+    return 0;
+
+/*     End of SLAEXC */
+
+} /* slaexc_ */
+
 /* Subroutine */ int slahqr_(logical *wantt, logical *wantz, integer *n,
 	integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *
 	wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *
 	info)
 {
     /* System generated locals */
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
-    real r__1, r__2;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3;
+    real r__1, r__2, r__3, r__4;
 
     /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
+    double sqrt(doublereal);
 
     /* Local variables */
     static integer i__, j, k, l, m;
     static real s, v[3];
     static integer i1, i2;
-    static real t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, h33,
-	    h44;
+    static real t1, t2, t3, v2, v3, aa, ab, ba, bb, h11, h12, h21, h22, cs;
     static integer nh;
-    static real cs;
-    static integer nr;
     static real sn;
+    static integer nr;
+    static real tr;
     static integer nz;
-    static real ave, h33s, h44s;
-    static integer itn, its;
-    static real ulp, sum, tst1, h43h34, disc, unfl, ovfl, work[1];
+    static real det, h21s;
+    static integer its;
+    static real ulp, sum, tst, rt1i, rt2i, rt1r, rt2r;
     extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
 	    integer *, real *, real *), scopy_(integer *, real *, integer *,
 	    real *, integer *), slanv2_(real *, real *, real *, real *, real *
 	    , real *, real *, real *, real *, real *), slabad_(real *, real *)
 	    ;
     extern doublereal slamch_(char *);
+    static real safmin;
     extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
 	    real *);
-    extern doublereal slanhs_(char *, integer *, real *, integer *, real *);
-    static real smlnum;
+    static real safmax, rtdisc, smlnum;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
 
 
-    Purpose
-    =======
+       Purpose
+       =======
 
-    SLAHQR is an auxiliary routine called by SHSEQR to update the
-    eigenvalues and Schur decomposition already computed by SHSEQR, by
-    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+       SLAHQR is an auxiliary routine called by SHSEQR to update the
+       eigenvalues and Schur decomposition already computed by SHSEQR, by
+       dealing with the Hessenberg submatrix in rows and columns ILO to
+       IHI.
 
-    Arguments
-    =========
+       Arguments
+       =========
 
-    WANTT   (input) LOGICAL
+       WANTT   (input) LOGICAL
             = .TRUE. : the full Schur form T is required;
             = .FALSE.: only eigenvalues are required.
 
-    WANTZ   (input) LOGICAL
+       WANTZ   (input) LOGICAL
             = .TRUE. : the matrix of Schur vectors Z is required;
             = .FALSE.: Schur vectors are not required.
 
-    N       (input) INTEGER
+       N       (input) INTEGER
             The order of the matrix H.  N >= 0.
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
+       ILO     (input) INTEGER
+       IHI     (input) INTEGER
             It is assumed that H is already upper quasi-triangular in
             rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
             ILO = 1). SLAHQR works primarily with the Hessenberg
@@ -12422,18 +12864,20 @@ static logical c_true = TRUE_;
             transformations to all of H if WANTT is .TRUE..
             1 <= ILO <= max(1,IHI); IHI <= N.
 
-    H       (input/output) REAL array, dimension (LDH,N)
+       H       (input/output) REAL array, dimension (LDH,N)
             On entry, the upper Hessenberg matrix H.
-            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
-            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
-            standard form. If WANTT is .FALSE., the contents of H are
-            unspecified on exit.
-
-    LDH     (input) INTEGER
+            On exit, if INFO is zero and if WANTT is .TRUE., H is upper
+            quasi-triangular in rows and columns ILO:IHI, with any
+            2-by-2 diagonal blocks in standard form. If INFO is zero
+            and WANTT is .FALSE., the contents of H are unspecified on
+            exit.  The output state of H if INFO is nonzero is given
+            below under the description of INFO.
+
+       LDH     (input) INTEGER
             The leading dimension of the array H. LDH >= max(1,N).
 
-    WR      (output) REAL array, dimension (N)
-    WI      (output) REAL array, dimension (N)
+       WR      (output) REAL array, dimension (N)
+       WI      (output) REAL array, dimension (N)
             The real and imaginary parts, respectively, of the computed
             eigenvalues ILO to IHI are stored in the corresponding
             elements of WR and WI. If two eigenvalues are computed as a
@@ -12445,36 +12889,61 @@ static logical c_true = TRUE_;
             H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
             WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
 
-    ILOZ    (input) INTEGER
-    IHIZ    (input) INTEGER
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
             Specify the rows of Z to which transformations must be
             applied if WANTZ is .TRUE..
             1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 
-    Z       (input/output) REAL array, dimension (LDZ,N)
+       Z       (input/output) REAL array, dimension (LDZ,N)
             If WANTZ is .TRUE., on entry Z must contain the current
             matrix Z of transformations accumulated by SHSEQR, and on
             exit Z has been updated; transformations are applied only to
             the submatrix Z(ILOZ:IHIZ,ILO:IHI).
             If WANTZ is .FALSE., Z is not referenced.
 
-    LDZ     (input) INTEGER
+       LDZ     (input) INTEGER
             The leading dimension of the array Z. LDZ >= max(1,N).
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            > 0: SLAHQR failed to compute all the eigenvalues ILO to IHI
-                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
-                 elements i+1:ihi of WR and WI contain those eigenvalues
-                 which have been successfully computed.
-
-    Further Details
-    ===============
-
-    2-96 Based on modifications by
+       INFO    (output) INTEGER
+             =   0: successful exit
+            .GT. 0: If INFO = i, SLAHQR failed to compute all the
+                    eigenvalues ILO to IHI in a total of 30 iterations
+                    per eigenvalue; elements i+1:ihi of WR and WI
+                    contain those eigenvalues which have been
+                    successfully computed.
+
+                    If INFO .GT. 0 and WANTT is .FALSE., then on exit,
+                    the remaining unconverged eigenvalues are the
+                    eigenvalues of the upper Hessenberg matrix rows
+                    and columns ILO thorugh INFO of the final, output
+                    value of H.
+
+                    If INFO .GT. 0 and WANTT is .TRUE., then on exit
+            (*)       (initial value of H)*U  = U*(final value of H)
+                    where U is an orthognal matrix.    The final
+                    value of H is upper Hessenberg and triangular in
+                    rows and columns INFO+1 through IHI.
+
+                    If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+                        (final value of Z)  = (initial value of Z)*U
+                    where U is the orthogonal matrix in (*)
+                    (regardless of the value of WANTT.)
+
+       Further Details
+       ===============
+
+       02-96 Based on modifications by
        David Day, Sandia National Laboratory, USA
 
-    =====================================================================
+       12-04 Further modifications by
+       Ralph Byers, University of Kansas, USA
+       This is a modified version of SLAHQR from LAPACK version 3.0.
+       It is (1) more robust against overflow and underflow and
+       (2) adopts the more conservative Ahues & Tisseur stopping
+       criterion (LAWN 122, 1997).
+
+       =========================================================
 */
 
 
@@ -12502,19 +12971,27 @@ static logical c_true = TRUE_;
 	return 0;
     }
 
+/*     ==== clear out the trash ==== */
+    i__1 = *ihi - 3;
+    for (j = *ilo; j <= i__1; ++j) {
+	h__[j + 2 + j * h_dim1] = 0.f;
+	h__[j + 3 + j * h_dim1] = 0.f;
+/* L10: */
+    }
+    if (*ilo <= *ihi - 2) {
+	h__[*ihi + (*ihi - 2) * h_dim1] = 0.f;
+    }
+
     nh = *ihi - *ilo + 1;
     nz = *ihiz - *iloz + 1;
 
-/*
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
+/*     Set machine-dependent constants for the stopping criterion. */
 
-    unfl = slamch_("Safe minimum");
-    ovfl = 1.f / unfl;
-    slabad_(&unfl, &ovfl);
-    ulp = slamch_("Precision");
-    smlnum = unfl * (nh / ulp);
+    safmin = slamch_("SAFE MINIMUM");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    ulp = slamch_("PRECISION");
+    smlnum = safmin * ((real) nh / ulp);
 
 /*
        I1 and I2 are the indices of the first row and last column of H
@@ -12527,10 +13004,6 @@ static logical c_true = TRUE_;
 	i2 = *n;
     }
 
-/*     ITN is the total number of QR iterations allowed. */
-
-    itn = nh * 30;
-
 /*
        The main loop begins here. I is the loop index and decreases from
        IHI to ILO in steps of 1 or 2. Each iteration of the loop works
@@ -12540,10 +13013,10 @@ static logical c_true = TRUE_;
 */
 
     i__ = *ihi;
-L10:
+L20:
     l = *ilo;
     if (i__ < *ilo) {
-	goto L150;
+	goto L160;
     }
 
 /*
@@ -12552,28 +13025,60 @@ static logical c_true = TRUE_;
        subdiagonal element has become negligible.
 */
 
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
+    for (its = 0; its <= 30; ++its) {
 
 /*        Look for a single small subdiagonal element. */
 
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2
-		    = h__[k + k * h_dim1], dabs(r__2));
-	    if (tst1 == 0.f) {
-		i__3 = i__ - l + 1;
-		tst1 = slanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
+	i__1 = l + 1;
+	for (k = i__; k >= i__1; --k) {
+	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= smlnum) {
+		goto L40;
+	    }
+	    tst = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2 =
+		     h__[k + k * h_dim1], dabs(r__2));
+	    if (tst == 0.f) {
+		if (k - 2 >= *ilo) {
+		    tst += (r__1 = h__[k - 1 + (k - 2) * h_dim1], dabs(r__1));
+		}
+		if (k + 1 <= *ihi) {
+		    tst += (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1));
+		}
 	    }
+/*
+             ==== The following is a conservative small subdiagonal
+             .    deflation  criterion due to Ahues & Tisseur (LAWN 122,
+             .    1997). It has better mathematical foundation and
+             .    improves accuracy in some cases.  ====
+*/
+	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= ulp * tst) {
 /* Computing MAX */
-	    r__2 = ulp * tst1;
-	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2,
-		    smlnum)) {
-		goto L30;
+		r__3 = (r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)), r__4 =
+			(r__2 = h__[k - 1 + k * h_dim1], dabs(r__2));
+		ab = dmax(r__3,r__4);
+/* Computing MIN */
+		r__3 = (r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)), r__4 =
+			(r__2 = h__[k - 1 + k * h_dim1], dabs(r__2));
+		ba = dmin(r__3,r__4);
+/* Computing MAX */
+		r__3 = (r__1 = h__[k + k * h_dim1], dabs(r__1)), r__4 = (r__2
+			= h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1],
+			 dabs(r__2));
+		aa = dmax(r__3,r__4);
+/* Computing MIN */
+		r__3 = (r__1 = h__[k + k * h_dim1], dabs(r__1)), r__4 = (r__2
+			= h__[k - 1 + (k - 1) * h_dim1] - h__[k + k * h_dim1],
+			 dabs(r__2));
+		bb = dmin(r__3,r__4);
+		s = aa + ab;
+/* Computing MAX */
+		r__1 = smlnum, r__2 = ulp * (bb * (aa / s));
+		if (ba * (ab / s) <= dmax(r__1,r__2)) {
+		    goto L40;
+		}
 	    }
-/* L20: */
+/* L30: */
 	}
-L30:
+L40:
 	l = k;
 	if (l > *ilo) {
 
@@ -12585,7 +13090,7 @@ static logical c_true = TRUE_;
 /*        Exit from loop if a submatrix of order 1 or 2 has split off. */
 
 	if (l >= i__ - 1) {
-	    goto L140;
+	    goto L150;
 	}
 
 /*
@@ -12599,15 +13104,26 @@ static logical c_true = TRUE_;
 	    i2 = i__;
 	}
 
-	if (its == 10 || its == 20) {
+	if (its == 10) {
+
+/*           Exceptional shift. */
+
+	    s = (r__1 = h__[l + 1 + l * h_dim1], dabs(r__1)) + (r__2 = h__[l
+		    + 2 + (l + 1) * h_dim1], dabs(r__2));
+	    h11 = s * .75f + h__[l + l * h_dim1];
+	    h12 = s * -.4375f;
+	    h21 = s;
+	    h22 = h11;
+	} else if (its == 20) {
 
 /*           Exceptional shift. */
 
 	    s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 =
 		    h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2));
-	    h44 = s * .75f + h__[i__ + i__ * h_dim1];
-	    h33 = h44;
-	    h43h34 = s * -.4375f * s;
+	    h11 = s * .75f + h__[i__ + i__ * h_dim1];
+	    h12 = s * -.4375f;
+	    h21 = s;
+	    h22 = h11;
 	} else {
 
 /*
@@ -12615,74 +13131,95 @@ static logical c_true = TRUE_;
              (i.e. 2nd degree generalized Rayleigh quotient)
 */
 
-	    h44 = h__[i__ + i__ * h_dim1];
-	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
-	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
-		    h_dim1];
-	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
-		    h_dim1];
-	    disc = (h33 - h44) * .5f;
-	    disc = disc * disc + h43h34;
-	    if (disc > 0.f) {
+	    h11 = h__[i__ - 1 + (i__ - 1) * h_dim1];
+	    h21 = h__[i__ + (i__ - 1) * h_dim1];
+	    h12 = h__[i__ - 1 + i__ * h_dim1];
+	    h22 = h__[i__ + i__ * h_dim1];
+	}
+	s = dabs(h11) + dabs(h12) + dabs(h21) + dabs(h22);
+	if (s == 0.f) {
+	    rt1r = 0.f;
+	    rt1i = 0.f;
+	    rt2r = 0.f;
+	    rt2i = 0.f;
+	} else {
+	    h11 /= s;
+	    h21 /= s;
+	    h12 /= s;
+	    h22 /= s;
+	    tr = (h11 + h22) / 2.f;
+	    det = (h11 - tr) * (h22 - tr) - h12 * h21;
+	    rtdisc = sqrt((dabs(det)));
+	    if (det >= 0.f) {
+
+/*              ==== complex conjugate shifts ==== */
+
+		rt1r = tr * s;
+		rt2r = rt1r;
+		rt1i = rtdisc * s;
+		rt2i = -rt1i;
+	    } else {
 
-/*              Real roots: use Wilkinson's shift twice */
+/*              ==== real shifts (use only one of them)  ==== */
 
-		disc = sqrt(disc);
-		ave = (h33 + h44) * .5f;
-		if (dabs(h33) - dabs(h44) > 0.f) {
-		    h33 = h33 * h44 - h43h34;
-		    h44 = h33 / (r_sign(&disc, &ave) + ave);
+		rt1r = tr + rtdisc;
+		rt2r = tr - rtdisc;
+		if ((r__1 = rt1r - h22, dabs(r__1)) <= (r__2 = rt2r - h22,
+			dabs(r__2))) {
+		    rt1r *= s;
+		    rt2r = rt1r;
 		} else {
-		    h44 = r_sign(&disc, &ave) + ave;
+		    rt2r *= s;
+		    rt1r = rt2r;
 		}
-		h33 = h44;
-		h43h34 = 0.f;
+		rt1i = 0.f;
+		rt2i = 0.f;
 	    }
 	}
 
 /*        Look for two consecutive small subdiagonal elements. */
 
-	i__2 = l;
-	for (m = i__ - 2; m >= i__2; --m) {
+	i__1 = l;
+	for (m = i__ - 2; m >= i__1; --m) {
 /*
              Determine the effect of starting the double-shift QR
              iteration at row M, and see if this would make H(M,M-1)
-             negligible.
-*/
-
-	    h11 = h__[m + m * h_dim1];
-	    h22 = h__[m + 1 + (m + 1) * h_dim1];
-	    h21 = h__[m + 1 + m * h_dim1];
-	    h12 = h__[m + (m + 1) * h_dim1];
-	    h44s = h44 - h11;
-	    h33s = h33 - h11;
-	    v1 = (h33s * h44s - h43h34) / h21 + h12;
-	    v2 = h22 - h11 - h33s - h44s;
-	    v3 = h__[m + 2 + (m + 1) * h_dim1];
-	    s = dabs(v1) + dabs(v2) + dabs(v3);
-	    v1 /= s;
-	    v2 /= s;
-	    v3 /= s;
-	    v[0] = v1;
-	    v[1] = v2;
-	    v[2] = v3;
+             negligible.  (The following uses scaling to avoid
+             overflows and most underflows.)
+*/
+
+	    h21s = h__[m + 1 + m * h_dim1];
+	    s = (r__1 = h__[m + m * h_dim1] - rt2r, dabs(r__1)) + dabs(rt2i)
+		    + dabs(h21s);
+	    h21s = h__[m + 1 + m * h_dim1] / s;
+	    v[0] = h21s * h__[m + (m + 1) * h_dim1] + (h__[m + m * h_dim1] -
+		    rt1r) * ((h__[m + m * h_dim1] - rt2r) / s) - rt1i * (rt2i
+		    / s);
+	    v[1] = h21s * (h__[m + m * h_dim1] + h__[m + 1 + (m + 1) * h_dim1]
+		     - rt1r - rt2r);
+	    v[2] = h21s * h__[m + 2 + (m + 1) * h_dim1];
+	    s = dabs(v[0]) + dabs(v[1]) + dabs(v[2]);
+	    v[0] /= s;
+	    v[1] /= s;
+	    v[2] /= s;
 	    if (m == l) {
-		goto L50;
+		goto L60;
 	    }
-	    h00 = h__[m - 1 + (m - 1) * h_dim1];
-	    h10 = h__[m + (m - 1) * h_dim1];
-	    tst1 = dabs(v1) * (dabs(h00) + dabs(h11) + dabs(h22));
-	    if (dabs(h10) * (dabs(v2) + dabs(v3)) <= ulp * tst1) {
-		goto L50;
+	    if ((r__1 = h__[m + (m - 1) * h_dim1], dabs(r__1)) * (dabs(v[1])
+		    + dabs(v[2])) <= ulp * dabs(v[0]) * ((r__2 = h__[m - 1 + (
+		    m - 1) * h_dim1], dabs(r__2)) + (r__3 = h__[m + m *
+		    h_dim1], dabs(r__3)) + (r__4 = h__[m + 1 + (m + 1) *
+		    h_dim1], dabs(r__4)))) {
+		goto L60;
 	    }
-/* L40: */
+/* L50: */
 	}
-L50:
+L60:
 
 /*        Double-shift QR step */
 
-	i__2 = i__ - 1;
-	for (k = m; k <= i__2; ++k) {
+	i__1 = i__ - 1;
+	for (k = m; k <= i__1; ++k) {
 
 /*
              The first iteration of this loop determines a reflection G
@@ -12696,8 +13233,8 @@ static logical c_true = TRUE_;
 
    Computing MIN
 */
-	    i__3 = 3, i__4 = i__ - k + 1;
-	    nr = min(i__3,i__4);
+	    i__2 = 3, i__3 = i__ - k + 1;
+	    nr = min(i__2,i__3);
 	    if (k > m) {
 		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
 	    }
@@ -12709,7 +13246,13 @@ static logical c_true = TRUE_;
 		    h__[k + 2 + (k - 1) * h_dim1] = 0.f;
 		}
 	    } else if (m > l) {
-		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
+/*
+                 ==== Use the following instead of
+                 .    H( K, K-1 ) = -H( K, K-1 ) to
+                 .    avoid a bug when v(2) and v(3)
+                 .    underflow. ====
+*/
+		h__[k + (k - 1) * h_dim1] *= 1.f - t1;
 	    }
 	    v2 = v[1];
 	    t2 = t1 * v2;
@@ -12722,14 +13265,14 @@ static logical c_true = TRUE_;
                 in columns K to I2.
 */
 
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
+		i__2 = i2;
+		for (j = k; j <= i__2; ++j) {
 		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
 			    + v3 * h__[k + 2 + j * h_dim1];
 		    h__[k + j * h_dim1] -= sum * t1;
 		    h__[k + 1 + j * h_dim1] -= sum * t2;
 		    h__[k + 2 + j * h_dim1] -= sum * t3;
-/* L60: */
+/* L70: */
 		}
 
 /*
@@ -12738,29 +13281,29 @@ static logical c_true = TRUE_;
 
    Computing MIN
 */
-		i__4 = k + 3;
-		i__3 = min(i__4,i__);
-		for (j = i1; j <= i__3; ++j) {
+		i__3 = k + 3;
+		i__2 = min(i__3,i__);
+		for (j = i1; j <= i__2; ++j) {
 		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
 			     + v3 * h__[j + (k + 2) * h_dim1];
 		    h__[j + k * h_dim1] -= sum * t1;
 		    h__[j + (k + 1) * h_dim1] -= sum * t2;
 		    h__[j + (k + 2) * h_dim1] -= sum * t3;
-/* L70: */
+/* L80: */
 		}
 
 		if (*wantz) {
 
 /*                 Accumulate transformations in the matrix Z */
 
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
+		    i__2 = *ihiz;
+		    for (j = *iloz; j <= i__2; ++j) {
 			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
 				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
 			z__[j + k * z_dim1] -= sum * t1;
 			z__[j + (k + 1) * z_dim1] -= sum * t2;
 			z__[j + (k + 2) * z_dim1] -= sum * t3;
-/* L80: */
+/* L90: */
 		    }
 		}
 	    } else if (nr == 2) {
@@ -12770,12 +13313,12 @@ static logical c_true = TRUE_;
                 in columns K to I2.
 */
 
-		i__3 = i2;
-		for (j = k; j <= i__3; ++j) {
+		i__2 = i2;
+		for (j = k; j <= i__2; ++j) {
 		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
 		    h__[k + j * h_dim1] -= sum * t1;
 		    h__[k + 1 + j * h_dim1] -= sum * t2;
-/* L90: */
+/* L100: */
 		}
 
 /*
@@ -12783,33 +13326,33 @@ static logical c_true = TRUE_;
                 matrix in rows I1 to min(K+3,I).
 */
 
-		i__3 = i__;
-		for (j = i1; j <= i__3; ++j) {
+		i__2 = i__;
+		for (j = i1; j <= i__2; ++j) {
 		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
 			    ;
 		    h__[j + k * h_dim1] -= sum * t1;
 		    h__[j + (k + 1) * h_dim1] -= sum * t2;
-/* L100: */
+/* L110: */
 		}
 
 		if (*wantz) {
 
 /*                 Accumulate transformations in the matrix Z */
 
-		    i__3 = *ihiz;
-		    for (j = *iloz; j <= i__3; ++j) {
+		    i__2 = *ihiz;
+		    for (j = *iloz; j <= i__2; ++j) {
 			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
 				z_dim1];
 			z__[j + k * z_dim1] -= sum * t1;
 			z__[j + (k + 1) * z_dim1] -= sum * t2;
-/* L110: */
+/* L120: */
 		    }
 		}
 	    }
-/* L120: */
+/* L130: */
 	}
 
-/* L130: */
+/* L140: */
     }
 
 /*     Failure to converge in remaining number of iterations */
@@ -12817,7 +13360,7 @@ static logical c_true = TRUE_;
     *info = i__;
     return 0;
 
-L140:
+L150:
 
     if (l == i__) {
 
@@ -12861,23 +13404,19 @@ static logical c_true = TRUE_;
 	}
     }
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with new value of I.
-*/
+/*     return to start of the main loop with new value of I. */
 
-    itn -= its;
     i__ = l - 1;
-    goto L10;
+    goto L20;
 
-L150:
+L160:
     return 0;
 
 /*     End of SLAHQR */
 
 } /* slahqr_ */
 
-/* Subroutine */ int slahrd_(integer *n, integer *k, integer *nb, real *a,
+/* Subroutine */ int slahr2_(integer *n, integer *k, integer *nb, real *a,
 	integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy)
 {
     /* System generated locals */
@@ -12889,25 +13428,28 @@ static logical c_true = TRUE_;
     static integer i__;
     static real ei;
     extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
-	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
-	    real *, integer *, real *, real *, integer *), scopy_(
-	    integer *, real *, integer *, real *, integer *), saxpy_(integer *
-	    , real *, real *, integer *, real *, integer *), strmv_(char *,
-	    char *, char *, integer *, real *, integer *, real *, integer *), slarfg_(integer *, real *, real *,
-	    integer *, real *);
+	    sgemm_(char *, char *, integer *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *),
+	    strmm_(char *, char *, char *, char *, integer *, integer *, real
+	    *, real *, integer *, real *, integer *), saxpy_(integer *, real *, real *, integer *, real *,
+	    integer *), strmv_(char *, char *, char *, integer *, real *,
+	    integer *, real *, integer *), slarfg_(
+	    integer *, real *, real *, integer *, real *), slacpy_(char *,
+	    integer *, integer *, real *, integer *, real *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+    -- April 2009                                                      --
 
 
     Purpose
     =======
 
-    SLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
+    SLAHR2 reduces the first NB columns of A real general n-BY-(n-k+1)
     matrix A so that elements below the k-th subdiagonal are zero. The
     reduction is performed by an orthogonal similarity transformation
     Q' * A * Q. The routine returns the matrices V and T which determine
@@ -12924,6 +13466,7 @@ static logical c_true = TRUE_;
     K       (input) INTEGER
             The offset for the reduction. Elements below the k-th
             subdiagonal in the first NB columns are reduced to zero.
+            K < N.
 
     NB      (input) INTEGER
             The number of columns to be reduced.
@@ -12979,9 +13522,9 @@ static logical c_true = TRUE_;
     The contents of A on exit are illustrated by the following example
     with n = 7, k = 3 and nb = 2:
 
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
+       ( a   a   a   a   a )
+       ( a   a   a   a   a )
+       ( a   a   a   a   a )
        ( h   h   a   a   a )
        ( v1  h   a   a   a )
        ( v1  v2  a   a   a )
@@ -12991,6 +13534,19 @@ static logical c_true = TRUE_;
     modified element of the upper Hessenberg matrix H, and vi denotes an
     element of the vector defining H(i).
 
+    This subroutine is a slight modification of LAPACK-3.0's DLAHRD
+    incorporating improvements proposed by Quintana-Orti and Van de
+    Gejin. Note that the entries of A(1:K,2:NB) differ from those
+    returned by the original LAPACK-3.0's DLAHRD routine. (This
+    subroutine is not backward compatible with LAPACK-3.0's DLAHRD.)
+
+    References
+    ==========
+
+    Gregorio Quintana-Orti and Robert van de Geijn, "Improving the
+    performance of reduction to Hessenberg form," ACM Transactions on
+    Mathematical Software, 32(2):180-194, June 2006.
+
     =====================================================================
 
 
@@ -13019,15 +13575,16 @@ static logical c_true = TRUE_;
 	if (i__ > 1) {
 
 /*
-             Update A(1:n,i)
+             Update A(K+1:N,I)
 
-             Compute i-th column of A - Y * V'
+             Update I-th column of A - Y * V'
 */
 
-	    i__2 = i__ - 1;
-	    sgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &a[*
-		    k + i__ - 1 + a_dim1], lda, &c_b15, &a[i__ * a_dim1 + 1],
-		    &c__1);
+	    i__2 = *n - *k;
+	    i__3 = i__ - 1;
+	    sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &y[*k + 1 + y_dim1],
+		     ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b15, &a[*k + 1 +
+		    i__ * a_dim1], &c__1);
 
 /*
              Apply I - V * T' * V' to this column (call it b) from the
@@ -13045,7 +13602,7 @@ static logical c_true = TRUE_;
 	    scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
 		    1], &c__1);
 	    i__2 = i__ - 1;
-	    strmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
+	    strmv_("Lower", "Transpose", "UNIT", &i__2, &a[*k + 1 + a_dim1],
 		    lda, &t[*nb * t_dim1 + 1], &c__1);
 
 /*           w := w + V2'*b2 */
@@ -13059,21 +13616,21 @@ static logical c_true = TRUE_;
 /*           w := T'*w */
 
 	    i__2 = i__ - 1;
-	    strmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+	    strmv_("Upper", "Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
 		     &t[*nb * t_dim1 + 1], &c__1);
 
 /*           b2 := b2 - V2*w */
 
 	    i__2 = *n - *k - i__ + 1;
 	    i__3 = i__ - 1;
-	    sgemv_("No transpose", &i__2, &i__3, &c_b151, &a[*k + i__ +
+	    sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &a[*k + i__ +
 		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b15, &a[*k
 		    + i__ + i__ * a_dim1], &c__1);
 
 /*           b1 := b1 - V1*w */
 
 	    i__2 = i__ - 1;
-	    strmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+	    strmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1]
 		    , lda, &t[*nb * t_dim1 + 1], &c__1);
 	    i__2 = i__ - 1;
 	    saxpy_(&i__2, &c_b151, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
@@ -13083,8 +13640,8 @@ static logical c_true = TRUE_;
 	}
 
 /*
-          Generate the elementary reflector H(i) to annihilate
-          A(k+i+1:n,i)
+          Generate the elementary reflector H(I) to annihilate
+          A(K+I+1:N,I)
 */
 
 	i__2 = *n - *k - i__ + 1;
@@ -13095,29 +13652,33 @@ static logical c_true = TRUE_;
 	ei = a[*k + i__ + i__ * a_dim1];
 	a[*k + i__ + i__ * a_dim1] = 1.f;
 
-/*        Compute  Y(1:n,i) */
+/*        Compute  Y(K+1:N,I) */
 
-	i__2 = *n - *k - i__ + 1;
-	sgemv_("No transpose", n, &i__2, &c_b15, &a[(i__ + 1) * a_dim1 + 1],
-		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[i__ *
-		y_dim1 + 1], &c__1);
+	i__2 = *n - *k;
+	i__3 = *n - *k - i__ + 1;
+	sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b15, &a[*k + 1 + (i__ + 1) *
+		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &y[*
+		k + 1 + i__ * y_dim1], &c__1);
 	i__2 = *n - *k - i__ + 1;
 	i__3 = i__ - 1;
 	sgemv_("Transpose", &i__2, &i__3, &c_b15, &a[*k + i__ + a_dim1], lda,
 		&a[*k + i__ + i__ * a_dim1], &c__1, &c_b29, &t[i__ * t_dim1 +
 		1], &c__1);
-	i__2 = i__ - 1;
-	sgemv_("No transpose", n, &i__2, &c_b151, &y[y_offset], ldy, &t[i__ *
-		t_dim1 + 1], &c__1, &c_b15, &y[i__ * y_dim1 + 1], &c__1);
-	sscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+	i__2 = *n - *k;
+	i__3 = i__ - 1;
+	sgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b151, &y[*k + 1 + y_dim1],
+		ldy, &t[i__ * t_dim1 + 1], &c__1, &c_b15, &y[*k + 1 + i__ *
+		y_dim1], &c__1);
+	i__2 = *n - *k;
+	sscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1);
 
-/*        Compute T(1:i,i) */
+/*        Compute T(1:I,I) */
 
 	i__2 = i__ - 1;
 	r__1 = -tau[i__];
 	sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1);
 	i__2 = i__ - 1;
-	strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+	strmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
 		&t[i__ * t_dim1 + 1], &c__1)
 		;
 	t[i__ + i__ * t_dim1] = tau[i__];
@@ -13126,11 +13687,70 @@ static logical c_true = TRUE_;
     }
     a[*k + *nb + *nb * a_dim1] = ei;
 
+/*     Compute Y(1:K,1:NB) */
+
+    slacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy);
+    strmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b15, &a[*k + 1
+	    + a_dim1], lda, &y[y_offset], ldy);
+    if (*n > *k + *nb) {
+	i__1 = *n - *k - *nb;
+	sgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b15, &a[(*nb
+		+ 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, &
+		c_b15, &y[y_offset], ldy);
+    }
+    strmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b15, &t[
+	    t_offset], ldt, &y[y_offset], ldy);
+
     return 0;
 
-/*     End of SLAHRD */
+/*     End of SLAHR2 */
+
+} /* slahr2_ */
 
-} /* slahrd_ */
+logical slaisnan_(real *sin1, real *sin2)
+{
+    /* System generated locals */
+    logical ret_val;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
+
+
+    Purpose
+    =======
+
+    This routine is not for general use.  It exists solely to avoid
+    over-optimization in SISNAN.
+
+    SLAISNAN checks for NaNs by comparing its two arguments for
+    inequality.  NaN is the only floating-point value where NaN != NaN
+    returns .TRUE.  To check for NaNs, pass the same variable as both
+    arguments.
+
+    A compiler must assume that the two arguments are
+    not the same variable, and the test will not be optimized away.
+    Interprocedural or whole-program optimization may delete this
+    test.  The ISNAN functions will be replaced by the correct
+    Fortran 03 intrinsic once the intrinsic is widely available.
+
+    Arguments
+    =========
+
+    SIN1     (input) REAL
+
+    SIN2     (input) REAL
+            Two numbers to compare for inequality.
+
+    =====================================================================
+*/
+
+    ret_val = *sin1 != *sin2;
+    return ret_val;
+} /* slaisnan_ */
 
 /* Subroutine */ int slaln2_(logical *ltrans, integer *na, integer *nw, real *
 	smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b,
@@ -13169,10 +13789,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -13699,10 +14319,10 @@ static logical c_true = TRUE_;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -13821,10 +14441,10 @@ doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -13850,7 +14470,7 @@ doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -13873,7 +14493,7 @@ doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(M,1).
 
-    WORK    (workspace) REAL array, dimension (LWORK),
+    WORK    (workspace) REAL array, dimension (MAX(1,LWORK)),
             where LWORK >= M when NORM = 'I'; otherwise, WORK is not
             referenced.
 
@@ -13970,180 +14590,6 @@ doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
 
 } /* slange_ */
 
-doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    real ret_val, r__1, r__2, r__3;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer i__, j;
-    static real sum, scale;
-    extern logical lsame_(char *, char *);
-    static real value;
-    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
-	    real *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
-
-
-    Purpose
-    =======
-
-    SLANHS  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    Hessenberg matrix A.
-
-    Description
-    ===========
-
-    SLANHS returns the value
-
-       SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
-
-    Arguments
-    =========
-
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in SLANHS as described
-            above.
-
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, SLANHS is
-            set to zero.
-
-    A       (input) REAL array, dimension (LDA,N)
-            The n by n upper Hessenberg matrix A; the part of A below the
-            first sub-diagonal is not referenced.
-
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
-
-    WORK    (workspace) REAL array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-            referenced.
-
-   =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
-
-    /* Function Body */
-    if (*n == 0) {
-	value = 0.f;
-    } else if (lsame_(norm, "M")) {
-
-/*        Find max(abs(A(i,j))). */
-
-	value = 0.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
-		value = dmax(r__2,r__3);
-/* L10: */
-	    }
-/* L20: */
-	}
-    } else if (lsame_(norm, "O") || *(unsigned char *)
-	    norm == '1') {
-
-/*        Find norm1(A). */
-
-	value = 0.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.f;
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
-/* L30: */
-	    }
-	    value = dmax(value,sum);
-/* L40: */
-	}
-    } else if (lsame_(norm, "I")) {
-
-/*        Find normI(A). */
-
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.f;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
-/* L60: */
-	    }
-/* L70: */
-	}
-	value = 0.f;
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-/* Computing MAX */
-	    r__1 = value, r__2 = work[i__];
-	    value = dmax(r__1,r__2);
-/* L80: */
-	}
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-
-/*        Find normF(A). */
-
-	scale = 0.f;
-	sum = 1.f;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
-	}
-	value = scale * sqrt(sum);
-    }
-
-    ret_val = value;
-    return ret_val;
-
-/*     End of SLANHS */
-
-} /* slanhs_ */
-
 doublereal slanst_(char *norm, integer *n, real *d__, real *e)
 {
     /* System generated locals */
@@ -14163,10 +14609,10 @@ doublereal slanst_(char *norm, integer *n, real *d__, real *e)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14192,7 +14638,7 @@ doublereal slanst_(char *norm, integer *n, real *d__, real *e)
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -14300,10 +14746,10 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14329,7 +14775,7 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -14360,7 +14806,7 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(N,1).
 
-    WORK    (workspace) REAL array, dimension (LWORK),
+    WORK    (workspace) REAL array, dimension (MAX(1,LWORK)),
             where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
             WORK is not referenced.
 
@@ -14505,10 +14951,10 @@ doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -14718,10 +15164,10 @@ doublereal slapy2_(real *x, real *y)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14771,10 +15217,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14802,7 +15248,12 @@ doublereal slapy3_(real *x, real *y, real *z__)
     r__1 = max(xabs,yabs);
     w = dmax(r__1,zabs);
     if (w == 0.f) {
-	ret_val = 0.f;
+/*
+       W can be zero for max(0,nan,0)
+       adding all three entries together will make sure
+       NaN will not disappear.
+*/
+	ret_val = xabs + yabs + zabs;
     } else {
 /* Computing 2nd power */
 	r__1 = xabs / w;
@@ -14818,26 +15269,4066 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 } /* slapy3_ */
 
-/* Subroutine */ int slarf_(char *side, integer *m, integer *n, real *v,
-	integer *incv, real *tau, real *c__, integer *ldc, real *work)
+/* Subroutine */ int slaqr0_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *
+	wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work,
+	 integer *lwork, integer *info)
 {
     /* System generated locals */
-    integer c_dim1, c_offset;
-    real r__1;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3, r__4;
 
     /* Local variables */
-    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
-	    integer *, real *, integer *, real *, integer *);
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *);
+    static integer i__, k;
+    static real aa, bb, cc, dd;
+    static integer ld;
+    static real cs;
+    static integer nh, it, ks, kt;
+    static real sn;
+    static integer ku, kv, ls, ns;
+    static real ss;
+    static integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl,
+	    kbot, nmin;
+    static real swap;
+    static integer ktop;
+    static real zdum[1]	/* was [1][1] */;
+    static integer kacc22, itmax, nsmax, nwmax, kwtop;
+    extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real *
+	    , real *, real *, real *, real *, real *), slaqr3_(logical *,
+	    logical *, integer *, integer *, integer *, integer *, real *,
+	    integer *, integer *, integer *, real *, integer *, integer *,
+	    integer *, real *, real *, real *, integer *, integer *, real *,
+	    integer *, integer *, real *, integer *, real *, integer *),
+	    slaqr4_(logical *, logical *, integer *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *, integer *, real *,
+	    integer *, real *, integer *, integer *), slaqr5_(logical *,
+	    logical *, integer *, integer *, integer *, integer *, integer *,
+	    real *, real *, real *, integer *, integer *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *, integer *, real *
+	    , integer *, integer *, real *, integer *);
+    static integer nibble;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static char jbcmpz[2];
+    extern /* Subroutine */ int slahqr_(logical *, logical *, integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , integer *, real *, integer *, integer *), slacpy_(char *,
+	    integer *, integer *, real *, integer *, real *, integer *);
+    static integer nwupbd;
+    static logical sorted;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+       Purpose
+       =======
+
+       SLAQR0 computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**T, where T is an upper quasi-triangular matrix (the
+       Schur form), and Z is the orthogonal matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input orthogonal
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+       Arguments
+       =========
+
+       WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+       WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+             H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
+             previous call to SGEBAL, and then passed to SGEHRD when the
+             matrix output by SGEBAL is reduced to Hessenberg form.
+             Otherwise, ILO and IHI should be set to 1 and N,
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) REAL array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and WANTT is .TRUE., then H contains
+             the upper quasi-triangular matrix T from the Schur
+             decomposition (the Schur form); 2-by-2 diagonal blocks
+             (corresponding to complex conjugate pairs of eigenvalues)
+             are returned in standard form, with H(i,i) = H(i+1,i+1)
+             and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is
+             .FALSE., then the contents of H are unspecified on exit.
+             (The output value of H when INFO.GT.0 is given under the
+             description of INFO below.)
+
+             This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+             j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       WR    (output) REAL array, dimension (IHI)
+       WI    (output) REAL array, dimension (IHI)
+             The real and imaginary parts, respectively, of the computed
+             eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI)
+             and WI(ILO:IHI). If two eigenvalues are computed as a
+             complex conjugate pair, they are stored in consecutive
+             elements of WR and WI, say the i-th and (i+1)th, with
+             WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then
+             the eigenvalues are stored in the same order as on the
+             diagonal of the Schur form returned in H, with
+             WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal
+             block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and
+             WI(i+1) = -WI(i).
+
+       ILOZ     (input) INTEGER
+       IHIZ     (input) INTEGER
+             Specify the rows of Z to which transformations must be
+             applied if WANTZ is .TRUE..
+             1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N.
+
+       Z     (input/output) REAL array, dimension (LDZ,IHI)
+             If WANTZ is .FALSE., then Z is not referenced.
+             If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
+             replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
+             orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
+             (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if WANTZ is .TRUE.
+             then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) REAL array, dimension LWORK
+             On exit, if LWORK = -1, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient, but LWORK typically as large as 6*N may
+             be required for optimal performance.  A workspace query
+             to determine the optimal workspace size is recommended.
+
+             If LWORK = -1, then SLAQR0 does a workspace query.
+             In this case, SLAQR0 checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .GT. 0:  if INFO = i, SLAQR0 failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and WANT is .FALSE., then on exit,
+                  the remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is an orthogonal matrix.  The final
+                  value of H is upper Hessenberg and quasi-triangular
+                  in rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+
+                    (final value of Z(ILO:IHI,ILOZ:IHIZ)
+                     =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
+
+                  where U is the orthogonal matrix in (*) (regard-
+                  less of the value of WANTT.)
+
+                  If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+                  accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
+
+       ================================================================
+
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    SLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
+
+       ==== Exceptional deflation windows:  try to cure rare
+       .    slow convergence by varying the size of the
+       .    deflation window after KEXNW iterations. ====
+
+       ==== Exceptional shifts: try to cure rare slow convergence
+       .    with ad-hoc exceptional shifts every KEXSH iterations.
+       .    ====
+
+       ==== The constants WILK1 and WILK2 are used to form the
+       .    exceptional shifts. ====
+*/
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+/*     ==== Quick return for N = 0: nothing to do. ==== */
+
+    if (*n == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    if (*n <= 11) {
+
+/*        ==== Tiny matrices must use SLAHQR. ==== */
+
+	lwkopt = 1;
+	if (*lwork != -1) {
+	    slahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &
+		    wi[1], iloz, ihiz, &z__[z_offset], ldz, info);
+	}
+    } else {
+
+/*
+          ==== Use small bulge multi-shift QR with aggressive early
+          .    deflation on larger-than-tiny matrices. ====
+
+          ==== Hope for the best. ====
+*/
+
+	*info = 0;
+
+/*        ==== Set up job flags for ILAENV. ==== */
+
+	if (*wantt) {
+	    *(unsigned char *)jbcmpz = 'S';
+	} else {
+	    *(unsigned char *)jbcmpz = 'E';
+	}
+	if (*wantz) {
+	    *(unsigned char *)&jbcmpz[1] = 'V';
+	} else {
+	    *(unsigned char *)&jbcmpz[1] = 'N';
+	}
+
+/*
+          ==== NWR = recommended deflation window size.  At this
+          .    point,  N .GT. NTINY = 11, so there is enough
+          .    subdiagonal workspace for NWR.GE.2 as required.
+          .    (In fact, there is enough subdiagonal space for
+          .    NWR.GE.3.) ====
+*/
+
+	nwr = ilaenv_(&c__13, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nwr = max(2,nwr);
+/* Computing MIN */
+	i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2);
+	nwr = min(i__1,nwr);
+
+/*
+          ==== NSR = recommended number of simultaneous shifts.
+          .    At this point N .GT. NTINY = 11, so there is at
+          .    enough subdiagonal workspace for NSR to be even
+          .    and greater than or equal to two as required. ====
+*/
+
+	nsr = ilaenv_(&c__15, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+/* Computing MIN */
+	i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi -
+		*ilo;
+	nsr = min(i__1,i__2);
+/* Computing MAX */
+	i__1 = 2, i__2 = nsr - nsr % 2;
+	nsr = max(i__1,i__2);
+
+/*
+          ==== Estimate optimal workspace ====
+
+          ==== Workspace query call to SLAQR3 ====
+*/
+
+	i__1 = nwr + 1;
+	slaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz,
+		ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[
+		h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset],
+		ldh, &work[1], &c_n1);
+
+/*
+          ==== Optimal workspace = MAX(SLAQR5, SLAQR3) ====
+
+   Computing MAX
+*/
+	i__1 = nsr * 3 / 2, i__2 = (integer) work[1];
+	lwkopt = max(i__1,i__2);
+
+/*        ==== Quick return in case of workspace query. ==== */
+
+	if (*lwork == -1) {
+	    work[1] = (real) lwkopt;
+	    return 0;
+	}
+
+/*        ==== SLAHQR/SLAQR0 crossover point ==== */
+
+	nmin = ilaenv_(&c__12, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)
+		6, (ftnlen)2);
+	nmin = max(11,nmin);
+
+/*        ==== Nibble crossover point ==== */
+
+	nibble = ilaenv_(&c__14, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	nibble = max(0,nibble);
+
+/*
+          ==== Accumulate reflections during ttswp?  Use block
+          .    2-by-2 structure during matrix-matrix multiply? ====
+*/
+
+	kacc22 = ilaenv_(&c__16, "SLAQR0", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	kacc22 = max(0,kacc22);
+	kacc22 = min(2,kacc22);
+
+/*
+          ==== NWMAX = the largest possible deflation window for
+          .    which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n - 1) / 3, i__2 = *lwork / 2;
+	nwmax = min(i__1,i__2);
+	nw = nwmax;
+
+/*
+          ==== NSMAX = the Largest number of simultaneous shifts
+          .    for which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3;
+	nsmax = min(i__1,i__2);
+	nsmax -= nsmax % 2;
+
+/*        ==== NDFL: an iteration count restarted at deflation. ==== */
+
+	ndfl = 1;
+
+/*
+          ==== ITMAX = iteration limit ====
+
+   Computing MAX
+*/
+	i__1 = 10, i__2 = *ihi - *ilo + 1;
+	itmax = max(i__1,i__2) * 30;
+
+/*        ==== Last row and column in the active block ==== */
+
+	kbot = *ihi;
+
+/*        ==== Main Loop ==== */
+
+	i__1 = itmax;
+	for (it = 1; it <= i__1; ++it) {
+
+/*           ==== Done when KBOT falls below ILO ==== */
+
+	    if (kbot < *ilo) {
+		goto L90;
+	    }
+
+/*           ==== Locate active block ==== */
+
+	    i__2 = *ilo + 1;
+	    for (k = kbot; k >= i__2; --k) {
+		if (h__[k + (k - 1) * h_dim1] == 0.f) {
+		    goto L20;
+		}
+/* L10: */
+	    }
+	    k = *ilo;
+L20:
+	    ktop = k;
+
+/*
+             ==== Select deflation window size:
+             .    Typical Case:
+             .      If possible and advisable, nibble the entire
+             .      active block.  If not, use size MIN(NWR,NWMAX)
+             .      or MIN(NWR+1,NWMAX) depending upon which has
+             .      the smaller corresponding subdiagonal entry
+             .      (a heuristic).
+             .
+             .    Exceptional Case:
+             .      If there have been no deflations in KEXNW or
+             .      more iterations, then vary the deflation window
+             .      size.   At first, because, larger windows are,
+             .      in general, more powerful than smaller ones,
+             .      rapidly increase the window to the maximum possible.
+             .      Then, gradually reduce the window size. ====
+*/
+
+	    nh = kbot - ktop + 1;
+	    nwupbd = min(nh,nwmax);
+	    if (ndfl < 5) {
+		nw = min(nwupbd,nwr);
+	    } else {
+/* Computing MIN */
+		i__2 = nwupbd, i__3 = nw << 1;
+		nw = min(i__2,i__3);
+	    }
+	    if (nw < nwmax) {
+		if (nw >= nh - 1) {
+		    nw = nh;
+		} else {
+		    kwtop = kbot - nw + 1;
+		    if ((r__1 = h__[kwtop + (kwtop - 1) * h_dim1], dabs(r__1))
+			     > (r__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1],
+			    dabs(r__2))) {
+			++nw;
+		    }
+		}
+	    }
+	    if (ndfl < 5) {
+		ndec = -1;
+	    } else if (ndec >= 0 || nw >= nwupbd) {
+		++ndec;
+		if (nw - ndec < 2) {
+		    ndec = 0;
+		}
+		nw -= ndec;
+	    }
+
+/*
+             ==== Aggressive early deflation:
+             .    split workspace under the subdiagonal into
+             .      - an nw-by-nw work array V in the lower
+             .        left-hand-corner,
+             .      - an NW-by-at-least-NW-but-more-is-better
+             .        (NW-by-NHO) horizontal work array along
+             .        the bottom edge,
+             .      - an at-least-NW-but-more-is-better (NHV-by-NW)
+             .        vertical work array along the left-hand-edge.
+             .        ====
+*/
+
+	    kv = *n - nw + 1;
+	    kt = nw + 1;
+	    nho = *n - nw - 1 - kt + 1;
+	    kwv = nw + 2;
+	    nve = *n - nw - kwv + 1;
+
+/*           ==== Aggressive early deflation ==== */
+
+	    slaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh,
+		    iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1],
+		     &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1],
+		    ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork);
+
+/*           ==== Adjust KBOT accounting for new deflations. ==== */
+
+	    kbot -= ld;
+
+/*           ==== KS points to the shifts. ==== */
+
+	    ks = kbot - ls + 1;
+
+/*
+             ==== Skip an expensive QR sweep if there is a (partly
+             .    heuristic) reason to expect that many eigenvalues
+             .    will deflate without it.  Here, the QR sweep is
+             .    skipped if many eigenvalues have just been deflated
+             .    or if the remaining active block is small.
+*/
+
+	    if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(
+		    nmin,nwmax)) {
+
+/*
+                ==== NS = nominal number of simultaneous shifts.
+                .    This may be lowered (slightly) if SLAQR3
+                .    did not provide that many shifts. ====
+
+   Computing MIN
+   Computing MAX
+*/
+		i__4 = 2, i__5 = kbot - ktop;
+		i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5);
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+
+/*
+                ==== If there have been no deflations
+                .    in a multiple of KEXSH iterations,
+                .    then try exceptional shifts.
+                .    Otherwise use shifts provided by
+                .    SLAQR3 above or from the eigenvalues
+                .    of a trailing principal submatrix. ====
+*/
+
+		if (ndfl % 6 == 0) {
+		    ks = kbot - ns + 1;
+/* Computing MAX */
+		    i__3 = ks + 1, i__4 = ktop + 2;
+		    i__2 = max(i__3,i__4);
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			ss = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)
+				) + (r__2 = h__[i__ - 1 + (i__ - 2) * h_dim1],
+				 dabs(r__2));
+			aa = ss * .75f + h__[i__ + i__ * h_dim1];
+			bb = ss;
+			cc = ss * -.4375f;
+			dd = aa;
+			slanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1]
+				, &wr[i__], &wi[i__], &cs, &sn);
+/* L30: */
+		    }
+		    if (ks == ktop) {
+			wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1];
+			wi[ks + 1] = 0.f;
+			wr[ks] = wr[ks + 1];
+			wi[ks] = wi[ks + 1];
+		    }
+		} else {
+
+/*
+                   ==== Got NS/2 or fewer shifts? Use SLAQR4 or
+                   .    SLAHQR on a trailing principal submatrix to
+                   .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
+                   .    there is enough space below the subdiagonal
+                   .    to fit an NS-by-NS scratch array.) ====
+*/
+
+		    if (kbot - ks + 1 <= ns / 2) {
+			ks = kbot - ns + 1;
+			kt = *n - ns + 1;
+			slacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &
+				h__[kt + h_dim1], ldh);
+			if (ns > nmin) {
+			    slaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[
+				    kt + h_dim1], ldh, &wr[ks], &wi[ks], &
+				    c__1, &c__1, zdum, &c__1, &work[1], lwork,
+				     &inf);
+			} else {
+			    slahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[
+				    kt + h_dim1], ldh, &wr[ks], &wi[ks], &
+				    c__1, &c__1, zdum, &c__1, &inf);
+			}
+			ks += inf;
+
+/*
+                      ==== In case of a rare QR failure use
+                      .    eigenvalues of the trailing 2-by-2
+                      .    principal submatrix.  ====
+*/
+
+			if (ks >= kbot) {
+			    aa = h__[kbot - 1 + (kbot - 1) * h_dim1];
+			    cc = h__[kbot + (kbot - 1) * h_dim1];
+			    bb = h__[kbot - 1 + kbot * h_dim1];
+			    dd = h__[kbot + kbot * h_dim1];
+			    slanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[
+				    kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn)
+				    ;
+			    ks = kbot - 1;
+			}
+		    }
+
+		    if (kbot - ks + 1 > ns) {
+
+/*
+                      ==== Sort the shifts (Helps a little)
+                      .    Bubble sort keeps complex conjugate
+                      .    pairs together. ====
+*/
+
+			sorted = FALSE_;
+			i__2 = ks + 1;
+			for (k = kbot; k >= i__2; --k) {
+			    if (sorted) {
+				goto L60;
+			    }
+			    sorted = TRUE_;
+			    i__3 = k - 1;
+			    for (i__ = ks; i__ <= i__3; ++i__) {
+				if ((r__1 = wr[i__], dabs(r__1)) + (r__2 = wi[
+					i__], dabs(r__2)) < (r__3 = wr[i__ +
+					1], dabs(r__3)) + (r__4 = wi[i__ + 1],
+					 dabs(r__4))) {
+				    sorted = FALSE_;
+
+				    swap = wr[i__];
+				    wr[i__] = wr[i__ + 1];
+				    wr[i__ + 1] = swap;
+
+				    swap = wi[i__];
+				    wi[i__] = wi[i__ + 1];
+				    wi[i__ + 1] = swap;
+				}
+/* L40: */
+			    }
+/* L50: */
+			}
+L60:
+			;
+		    }
+
+/*
+                   ==== Shuffle shifts into pairs of real shifts
+                   .    and pairs of complex conjugate shifts
+                   .    assuming complex conjugate shifts are
+                   .    already adjacent to one another. (Yes,
+                   .    they are.)  ====
+*/
+
+		    i__2 = ks + 2;
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			if (wi[i__] != -wi[i__ - 1]) {
+
+			    swap = wr[i__];
+			    wr[i__] = wr[i__ - 1];
+			    wr[i__ - 1] = wr[i__ - 2];
+			    wr[i__ - 2] = swap;
+
+			    swap = wi[i__];
+			    wi[i__] = wi[i__ - 1];
+			    wi[i__ - 1] = wi[i__ - 2];
+			    wi[i__ - 2] = swap;
+			}
+/* L70: */
+		    }
+		}
+
+/*
+                ==== If there are only two shifts and both are
+                .    real, then use only one.  ====
+*/
+
+		if (kbot - ks + 1 == 2) {
+		    if (wi[kbot] == 0.f) {
+			if ((r__1 = wr[kbot] - h__[kbot + kbot * h_dim1],
+				dabs(r__1)) < (r__2 = wr[kbot - 1] - h__[kbot
+				+ kbot * h_dim1], dabs(r__2))) {
+			    wr[kbot - 1] = wr[kbot];
+			} else {
+			    wr[kbot] = wr[kbot - 1];
+			}
+		    }
+		}
+
+/*
+                ==== Use up to NS of the the smallest magnatiude
+                .    shifts.  If there aren't NS shifts available,
+                .    then use them all, possibly dropping one to
+                .    make the number of shifts even. ====
+
+   Computing MIN
+*/
+		i__2 = ns, i__3 = kbot - ks + 1;
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+		ks = kbot - ns + 1;
+
+/*
+                ==== Small-bulge multi-shift QR sweep:
+                .    split workspace under the subdiagonal into
+                .    - a KDU-by-KDU work array U in the lower
+                .      left-hand-corner,
+                .    - a KDU-by-at-least-KDU-but-more-is-better
+                .      (KDU-by-NHo) horizontal work array WH along
+                .      the bottom edge,
+                .    - and an at-least-KDU-but-more-is-better-by-KDU
+                .      (NVE-by-KDU) vertical work WV arrow along
+                .      the left-hand-edge. ====
+*/
+
+		kdu = ns * 3 - 3;
+		ku = *n - kdu + 1;
+		kwh = kdu + 1;
+		nho = *n - kdu - 3 - (kdu + 1) + 1;
+		kwv = kdu + 4;
+		nve = *n - kdu - kwv + 1;
+
+/*              ==== Small-bulge multi-shift QR sweep ==== */
+
+		slaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks],
+			&wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[
+			z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1],
+			ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku +
+			kwh * h_dim1], ldh);
+	    }
+
+/*           ==== Note progress (or the lack of it). ==== */
+
+	    if (ld > 0) {
+		ndfl = 1;
+	    } else {
+		++ndfl;
+	    }
+
+/*
+             ==== End of main loop ====
+   L80:
+*/
+	}
+
+/*
+          ==== Iteration limit exceeded.  Set INFO to show where
+          .    the problem occurred and exit. ====
+*/
+
+	*info = kbot;
+L90:
+	;
+    }
+
+/*     ==== Return the optimal value of LWORK. ==== */
+
+    work[1] = (real) lwkopt;
+
+/*     ==== End of SLAQR0 ==== */
+
+    return 0;
+} /* slaqr0_ */
+
+/* Subroutine */ int slaqr1_(integer *n, real *h__, integer *ldh, real *sr1,
+	real *si1, real *sr2, real *si2, real *v)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset;
+    real r__1, r__2, r__3;
+
+    /* Local variables */
+    static real s, h21s, h31s;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+         Given a 2-by-2 or 3-by-3 matrix H, SLAQR1 sets v to a
+         scalar multiple of the first column of the product
+
+         (*)  K = (H - (sr1 + i*si1)*I)*(H - (sr2 + i*si2)*I)
+
+         scaling to avoid overflows and most underflows. It
+         is assumed that either
+
+                 1) sr1 = sr2 and si1 = -si2
+             or
+                 2) si1 = si2 = 0.
+
+         This is useful for starting double implicit shift bulges
+         in the QR algorithm.
+
+
+         N      (input) integer
+                Order of the matrix H. N must be either 2 or 3.
+
+         H      (input) REAL array of dimension (LDH,N)
+                The 2-by-2 or 3-by-3 matrix H in (*).
+
+         LDH    (input) integer
+                The leading dimension of H as declared in
+                the calling procedure.  LDH.GE.N
+
+         SR1    (input) REAL
+         SI1    The shifts in (*).
+         SR2
+         SI2
+
+         V      (output) REAL array of dimension N
+                A scalar multiple of the first column of the
+                matrix K in (*).
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --v;
+
+    /* Function Body */
+    if (*n == 2) {
+	s = (r__1 = h__[h_dim1 + 1] - *sr2, dabs(r__1)) + dabs(*si2) + (r__2 =
+		 h__[h_dim1 + 2], dabs(r__2));
+	if (s == 0.f) {
+	    v[1] = 0.f;
+	    v[2] = 0.f;
+	} else {
+	    h21s = h__[h_dim1 + 2] / s;
+	    v[1] = h21s * h__[(h_dim1 << 1) + 1] + (h__[h_dim1 + 1] - *sr1) *
+		    ((h__[h_dim1 + 1] - *sr2) / s) - *si1 * (*si2 / s);
+	    v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *
+		    sr2);
+	}
+    } else {
+	s = (r__1 = h__[h_dim1 + 1] - *sr2, dabs(r__1)) + dabs(*si2) + (r__2 =
+		 h__[h_dim1 + 2], dabs(r__2)) + (r__3 = h__[h_dim1 + 3], dabs(
+		r__3));
+	if (s == 0.f) {
+	    v[1] = 0.f;
+	    v[2] = 0.f;
+	    v[3] = 0.f;
+	} else {
+	    h21s = h__[h_dim1 + 2] / s;
+	    h31s = h__[h_dim1 + 3] / s;
+	    v[1] = (h__[h_dim1 + 1] - *sr1) * ((h__[h_dim1 + 1] - *sr2) / s)
+		    - *si1 * (*si2 / s) + h__[(h_dim1 << 1) + 1] * h21s + h__[
+		    h_dim1 * 3 + 1] * h31s;
+	    v[2] = h21s * (h__[h_dim1 + 1] + h__[(h_dim1 << 1) + 2] - *sr1 - *
+		    sr2) + h__[h_dim1 * 3 + 2] * h31s;
+	    v[3] = h31s * (h__[h_dim1 + 1] + h__[h_dim1 * 3 + 3] - *sr1 - *
+		    sr2) + h21s * h__[(h_dim1 << 1) + 3];
+	}
+    }
+    return 0;
+} /* slaqr1_ */
+
+/* Subroutine */ int slaqr2_(logical *wantt, logical *wantz, integer *n,
+	integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh,
+	integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns,
+	integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh,
+	real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real *
+	work, integer *lwork)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1,
+	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2, r__3, r__4, r__5, r__6;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static real s, aa, bb, cc, dd, cs, sn;
+    static integer jw;
+    static real evi, evk, foo;
+    static integer kln;
+    static real tau, ulp;
+    static integer lwk1, lwk2;
+    static real beta;
+    static integer kend, kcol, info, ifst, ilst, ltop, krow;
+    static logical bulge;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), sgemm_(
+	    char *, char *, integer *, integer *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, real *, integer *);
+    static integer infqr;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static integer kwtop;
+    extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real *
+	    , real *, real *, real *, real *, real *), slabad_(real *, real *)
+	    ;
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *);
+    static real safmin;
+    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
+	    real *);
+    static real safmax;
+    extern /* Subroutine */ int slahqr_(logical *, logical *, integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , integer *, real *, integer *, integer *), slacpy_(char *,
+	    integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *);
+    static logical sorted;
+    extern /* Subroutine */ int strexc_(char *, integer *, real *, integer *,
+	    real *, integer *, integer *, integer *, real *, integer *), sormhr_(char *, char *, integer *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
+    static real smlnum;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+    -- April 2009                                                      --
+
+
+       This subroutine is identical to SLAQR3 except that it avoids
+       recursion by calling SLAHQR instead of SLAQR4.
+
+
+       ******************************************************************
+       Aggressive early deflation:
+
+       This subroutine accepts as input an upper Hessenberg matrix
+       H and performs an orthogonal similarity transformation
+       designed to detect and deflate fully converged eigenvalues from
+       a trailing principal submatrix.  On output H has been over-
+       written by a new Hessenberg matrix that is a perturbation of
+       an orthogonal similarity transformation of H.  It is to be
+       hoped that the final version of H has many zero subdiagonal
+       entries.
+
+       ******************************************************************
+       WANTT   (input) LOGICAL
+            If .TRUE., then the Hessenberg matrix H is fully updated
+            so that the quasi-triangular Schur factor may be
+            computed (in cooperation with the calling subroutine).
+            If .FALSE., then only enough of H is updated to preserve
+            the eigenvalues.
+
+       WANTZ   (input) LOGICAL
+            If .TRUE., then the orthogonal matrix Z is updated so
+            so that the orthogonal Schur factor may be computed
+            (in cooperation with the calling subroutine).
+            If .FALSE., then Z is not referenced.
+
+       N       (input) INTEGER
+            The order of the matrix H and (if WANTZ is .TRUE.) the
+            order of the orthogonal matrix Z.
+
+       KTOP    (input) INTEGER
+            It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+            KBOT and KTOP together determine an isolated block
+            along the diagonal of the Hessenberg matrix.
+
+       KBOT    (input) INTEGER
+            It is assumed without a check that either
+            KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
+            determine an isolated block along the diagonal of the
+            Hessenberg matrix.
+
+       NW      (input) INTEGER
+            Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+
+       H       (input/output) REAL array, dimension (LDH,N)
+            On input the initial N-by-N section of H stores the
+            Hessenberg matrix undergoing aggressive early deflation.
+            On output H has been transformed by an orthogonal
+            similarity transformation, perturbed, and the returned
+            to Hessenberg form that (it is to be hoped) has some
+            zero subdiagonal entries.
+
+       LDH     (input) integer
+            Leading dimension of H just as declared in the calling
+            subroutine.  N .LE. LDH
+
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+
+       Z       (input/output) REAL array, dimension (LDZ,N)
+            IF WANTZ is .TRUE., then on output, the orthogonal
+            similarity transformation mentioned above has been
+            accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+            If WANTZ is .FALSE., then Z is unreferenced.
+
+       LDZ     (input) integer
+            The leading dimension of Z just as declared in the
+            calling subroutine.  1 .LE. LDZ.
+
+       NS      (output) integer
+            The number of unconverged (ie approximate) eigenvalues
+            returned in SR and SI that may be used as shifts by the
+            calling subroutine.
+
+       ND      (output) integer
+            The number of converged eigenvalues uncovered by this
+            subroutine.
+
+       SR      (output) REAL array, dimension KBOT
+       SI      (output) REAL array, dimension KBOT
+            On output, the real and imaginary parts of approximate
+            eigenvalues that may be used for shifts are stored in
+            SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
+            SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
+            The real and imaginary parts of converged eigenvalues
+            are stored in SR(KBOT-ND+1) through SR(KBOT) and
+            SI(KBOT-ND+1) through SI(KBOT), respectively.
+
+       V       (workspace) REAL array, dimension (LDV,NW)
+            An NW-by-NW work array.
+
+       LDV     (input) integer scalar
+            The leading dimension of V just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       NH      (input) integer scalar
+            The number of columns of T.  NH.GE.NW.
+
+       T       (workspace) REAL array, dimension (LDT,NW)
+
+       LDT     (input) integer
+            The leading dimension of T just as declared in the
+            calling subroutine.  NW .LE. LDT
+
+       NV      (input) integer
+            The number of rows of work array WV available for
+            workspace.  NV.GE.NW.
+
+       WV      (workspace) REAL array, dimension (LDWV,NW)
+
+       LDWV    (input) integer
+            The leading dimension of W just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       WORK    (workspace) REAL array, dimension LWORK.
+            On exit, WORK(1) is set to an estimate of the optimal value
+            of LWORK for the given values of N, NW, KTOP and KBOT.
+
+       LWORK   (input) integer
+            The dimension of the work array WORK.  LWORK = 2*NW
+            suffices, but greater efficiency may result from larger
+            values of LWORK.
+
+            If LWORK = -1, then a workspace query is assumed; SLAQR2
+            only estimates the optimal workspace size for the given
+            values of N, NW, KTOP and KBOT.  The estimate is returned
+            in WORK(1).  No error message related to LWORK is issued
+            by XERBLA.  Neither H nor Z are accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+
+       ==== Estimate optimal workspace. ====
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --sr;
+    --si;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    --work;
+
+    /* Function Body */
+/* Computing MIN */
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    if (jw <= 2) {
+	lwkopt = 1;
+    } else {
+
+/*        ==== Workspace query call to SGEHRD ==== */
+
+	i__1 = jw - 1;
+	sgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
+		c_n1, &info);
+	lwk1 = (integer) work[1];
+
+/*        ==== Workspace query call to SORMHR ==== */
+
+	i__1 = jw - 1;
+	sormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1],
+		 &v[v_offset], ldv, &work[1], &c_n1, &info);
+	lwk2 = (integer) work[1];
+
+/*        ==== Optimal workspace ==== */
+
+	lwkopt = jw + max(lwk1,lwk2);
+    }
+
+/*     ==== Quick return in case of workspace query. ==== */
+
+    if (*lwork == -1) {
+	work[1] = (real) lwkopt;
+	return 0;
+    }
+
+/*
+       ==== Nothing to do ...
+       ... for an empty active block ... ====
+*/
+    *ns = 0;
+    *nd = 0;
+    work[1] = 1.f;
+    if (*ktop > *kbot) {
+	return 0;
+    }
+/*     ... nor for an empty deflation window. ==== */
+    if (*nw < 1) {
+	return 0;
+    }
+
+/*     ==== Machine constants ==== */
+
+    safmin = slamch_("SAFE MINIMUM");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    ulp = slamch_("PRECISION");
+    smlnum = safmin * ((real) (*n) / ulp);
+
+/*
+       ==== Setup deflation window ====
+
+   Computing MIN
+*/
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    kwtop = *kbot - jw + 1;
+    if (kwtop == *ktop) {
+	s = 0.f;
+    } else {
+	s = h__[kwtop + (kwtop - 1) * h_dim1];
+    }
+
+    if (*kbot == kwtop) {
+
+/*        ==== 1-by-1 deflation window: not much to do ==== */
+
+	sr[kwtop] = h__[kwtop + kwtop * h_dim1];
+	si[kwtop] = 0.f;
+	*ns = 1;
+	*nd = 0;
+/* Computing MAX */
+	r__2 = smlnum, r__3 = ulp * (r__1 = h__[kwtop + kwtop * h_dim1], dabs(
+		r__1));
+	if (dabs(s) <= dmax(r__2,r__3)) {
+	    *ns = 0;
+	    *nd = 1;
+	    if (kwtop > *ktop) {
+		h__[kwtop + (kwtop - 1) * h_dim1] = 0.f;
+	    }
+	}
+	work[1] = 1.f;
+	return 0;
+    }
+
+/*
+       ==== Convert to spike-triangular form.  (In case of a
+       .    rare QR failure, this routine continues to do
+       .    aggressive early deflation using that part of
+       .    the deflation window that converged using INFQR
+       .    here and there to keep track.) ====
+*/
+
+    slacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset],
+	    ldt);
+    i__1 = jw - 1;
+    i__2 = *ldh + 1;
+    i__3 = *ldt + 1;
+    scopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
+	    i__3);
+
+    slaset_("A", &jw, &jw, &c_b29, &c_b15, &v[v_offset], ldv);
+    slahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[kwtop],
+	    &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr);
+
+/*     ==== STREXC needs a clean margin near the diagonal ==== */
+
+    i__1 = jw - 3;
+    for (j = 1; j <= i__1; ++j) {
+	t[j + 2 + j * t_dim1] = 0.f;
+	t[j + 3 + j * t_dim1] = 0.f;
+/* L10: */
+    }
+    if (jw > 2) {
+	t[jw + (jw - 2) * t_dim1] = 0.f;
+    }
+
+/*     ==== Deflation detection loop ==== */
+
+    *ns = jw;
+    ilst = infqr + 1;
+L20:
+    if (ilst <= *ns) {
+	if (*ns == 1) {
+	    bulge = FALSE_;
+	} else {
+	    bulge = t[*ns + (*ns - 1) * t_dim1] != 0.f;
+	}
+
+/*        ==== Small spike tip test for deflation ==== */
+
+	if (! bulge) {
+
+/*           ==== Real eigenvalue ==== */
+
+	    foo = (r__1 = t[*ns + *ns * t_dim1], dabs(r__1));
+	    if (foo == 0.f) {
+		foo = dabs(s);
+	    }
+/* Computing MAX */
+	    r__2 = smlnum, r__3 = ulp * foo;
+	    if ((r__1 = s * v[*ns * v_dim1 + 1], dabs(r__1)) <= dmax(r__2,
+		    r__3)) {
+
+/*              ==== Deflatable ==== */
+
+		--(*ns);
+	    } else {
+
+/*
+                ==== Undeflatable.   Move it up out of the way.
+                .    (STREXC can not fail in this case.) ====
+*/
+
+		ifst = *ns;
+		strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		++ilst;
+	    }
+	} else {
+
+/*           ==== Complex conjugate pair ==== */
+
+	    foo = (r__3 = t[*ns + *ns * t_dim1], dabs(r__3)) + sqrt((r__1 = t[
+		    *ns + (*ns - 1) * t_dim1], dabs(r__1))) * sqrt((r__2 = t[*
+		    ns - 1 + *ns * t_dim1], dabs(r__2)));
+	    if (foo == 0.f) {
+		foo = dabs(s);
+	    }
+/* Computing MAX */
+	    r__3 = (r__1 = s * v[*ns * v_dim1 + 1], dabs(r__1)), r__4 = (r__2
+		    = s * v[(*ns - 1) * v_dim1 + 1], dabs(r__2));
+/* Computing MAX */
+	    r__5 = smlnum, r__6 = ulp * foo;
+	    if (dmax(r__3,r__4) <= dmax(r__5,r__6)) {
+
+/*              ==== Deflatable ==== */
+
+		*ns += -2;
+	    } else {
+
+/*
+                ==== Undeflatable. Move them up out of the way.
+                .    Fortunately, STREXC does the right thing with
+                .    ILST in case of a rare exchange failure. ====
+*/
+
+		ifst = *ns;
+		strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		ilst += 2;
+	    }
+	}
+
+/*        ==== End deflation detection loop ==== */
+
+	goto L20;
+    }
+
+/*        ==== Return to Hessenberg form ==== */
+
+    if (*ns == 0) {
+	s = 0.f;
+    }
+
+    if (*ns < jw) {
+
+/*
+          ==== sorting diagonal blocks of T improves accuracy for
+          .    graded matrices.  Bubble sort deals well with
+          .    exchange failures. ====
+*/
+
+	sorted = FALSE_;
+	i__ = *ns + 1;
+L30:
+	if (sorted) {
+	    goto L50;
+	}
+	sorted = TRUE_;
+
+	kend = i__ - 1;
+	i__ = infqr + 1;
+	if (i__ == *ns) {
+	    k = i__ + 1;
+	} else if (t[i__ + 1 + i__ * t_dim1] == 0.f) {
+	    k = i__ + 1;
+	} else {
+	    k = i__ + 2;
+	}
+L40:
+	if (k <= kend) {
+	    if (k == i__ + 1) {
+		evi = (r__1 = t[i__ + i__ * t_dim1], dabs(r__1));
+	    } else {
+		evi = (r__3 = t[i__ + i__ * t_dim1], dabs(r__3)) + sqrt((r__1
+			= t[i__ + 1 + i__ * t_dim1], dabs(r__1))) * sqrt((
+			r__2 = t[i__ + (i__ + 1) * t_dim1], dabs(r__2)));
+	    }
+
+	    if (k == kend) {
+		evk = (r__1 = t[k + k * t_dim1], dabs(r__1));
+	    } else if (t[k + 1 + k * t_dim1] == 0.f) {
+		evk = (r__1 = t[k + k * t_dim1], dabs(r__1));
+	    } else {
+		evk = (r__3 = t[k + k * t_dim1], dabs(r__3)) + sqrt((r__1 = t[
+			k + 1 + k * t_dim1], dabs(r__1))) * sqrt((r__2 = t[k
+			+ (k + 1) * t_dim1], dabs(r__2)));
+	    }
+
+	    if (evi >= evk) {
+		i__ = k;
+	    } else {
+		sorted = FALSE_;
+		ifst = i__;
+		ilst = k;
+		strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		if (info == 0) {
+		    i__ = ilst;
+		} else {
+		    i__ = k;
+		}
+	    }
+	    if (i__ == kend) {
+		k = i__ + 1;
+	    } else if (t[i__ + 1 + i__ * t_dim1] == 0.f) {
+		k = i__ + 1;
+	    } else {
+		k = i__ + 2;
+	    }
+	    goto L40;
+	}
+	goto L30;
+L50:
+	;
+    }
+
+/*     ==== Restore shift/eigenvalue array from T ==== */
+
+    i__ = jw;
+L60:
+    if (i__ >= infqr + 1) {
+	if (i__ == infqr + 1) {
+	    sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1];
+	    si[kwtop + i__ - 1] = 0.f;
+	    --i__;
+	} else if (t[i__ + (i__ - 1) * t_dim1] == 0.f) {
+	    sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1];
+	    si[kwtop + i__ - 1] = 0.f;
+	    --i__;
+	} else {
+	    aa = t[i__ - 1 + (i__ - 1) * t_dim1];
+	    cc = t[i__ + (i__ - 1) * t_dim1];
+	    bb = t[i__ - 1 + i__ * t_dim1];
+	    dd = t[i__ + i__ * t_dim1];
+	    slanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__
+		    - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &
+		    sn);
+	    i__ += -2;
+	}
+	goto L60;
+    }
+
+    if (*ns < jw || s == 0.f) {
+	if (*ns > 1 && s != 0.f) {
+
+/*           ==== Reflect spike back into lower triangle ==== */
+
+	    scopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
+	    beta = work[1];
+	    slarfg_(ns, &beta, &work[2], &c__1, &tau);
+	    work[1] = 1.f;
+
+	    i__1 = jw - 2;
+	    i__2 = jw - 2;
+	    slaset_("L", &i__1, &i__2, &c_b29, &c_b29, &t[t_dim1 + 3], ldt);
+
+	    slarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    slarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    slarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
+		    work[jw + 1]);
+
+	    i__1 = *lwork - jw;
+	    sgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
+		    , &i__1, &info);
+	}
+
+/*        ==== Copy updated reduced window into place ==== */
+
+	if (kwtop > 1) {
+	    h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1];
+	}
+	slacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
+		, ldh);
+	i__1 = jw - 1;
+	i__2 = *ldt + 1;
+	i__3 = *ldh + 1;
+	scopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1],
+		 &i__3);
+
+/*
+          ==== Accumulate orthogonal matrix in order update
+          .    H and Z, if requested.  ====
+*/
+
+	if (*ns > 1 && s != 0.f) {
+	    i__1 = *lwork - jw;
+	    sormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1],
+		     &v[v_offset], ldv, &work[jw + 1], &i__1, &info);
+	}
+
+/*        ==== Update vertical slab in H ==== */
+
+	if (*wantt) {
+	    ltop = 1;
+	} else {
+	    ltop = *ktop;
+	}
+	i__1 = kwtop - 1;
+	i__2 = *nv;
+	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		i__2) {
+/* Computing MIN */
+	    i__3 = *nv, i__4 = kwtop - krow;
+	    kln = min(i__3,i__4);
+	    sgemm_("N", "N", &kln, &jw, &jw, &c_b15, &h__[krow + kwtop *
+		    h_dim1], ldh, &v[v_offset], ldv, &c_b29, &wv[wv_offset],
+		    ldwv);
+	    slacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop *
+		    h_dim1], ldh);
+/* L70: */
+	}
+
+/*        ==== Update horizontal slab in H ==== */
+
+	if (*wantt) {
+	    i__2 = *n;
+	    i__1 = *nh;
+	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2;
+		    kcol += i__1) {
+/* Computing MIN */
+		i__3 = *nh, i__4 = *n - kcol + 1;
+		kln = min(i__3,i__4);
+		sgemm_("C", "N", &jw, &kln, &jw, &c_b15, &v[v_offset], ldv, &
+			h__[kwtop + kcol * h_dim1], ldh, &c_b29, &t[t_offset],
+			 ldt);
+		slacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
+			 h_dim1], ldh);
+/* L80: */
+	    }
+	}
+
+/*        ==== Update vertical slab in Z ==== */
+
+	if (*wantz) {
+	    i__1 = *ihiz;
+	    i__2 = *nv;
+	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		     i__2) {
+/* Computing MIN */
+		i__3 = *nv, i__4 = *ihiz - krow + 1;
+		kln = min(i__3,i__4);
+		sgemm_("N", "N", &kln, &jw, &jw, &c_b15, &z__[krow + kwtop *
+			z_dim1], ldz, &v[v_offset], ldv, &c_b29, &wv[
+			wv_offset], ldwv);
+		slacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow +
+			kwtop * z_dim1], ldz);
+/* L90: */
+	    }
+	}
+    }
+
+/*     ==== Return the number of deflations ... ==== */
+
+    *nd = jw - *ns;
+
+/*
+       ==== ... and the number of shifts. (Subtracting
+       .    INFQR from the spike length takes care
+       .    of the case of a rare QR failure while
+       .    calculating eigenvalues of the deflation
+       .    window.)  ====
+*/
+
+    *ns -= infqr;
+
+/*      ==== Return optimal workspace. ==== */
+
+    work[1] = (real) lwkopt;
+
+/*     ==== End of SLAQR2 ==== */
+
+    return 0;
+} /* slaqr2_ */
+
+/* Subroutine */ int slaqr3_(logical *wantt, logical *wantz, integer *n,
+	integer *ktop, integer *kbot, integer *nw, real *h__, integer *ldh,
+	integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *ns,
+	integer *nd, real *sr, real *si, real *v, integer *ldv, integer *nh,
+	real *t, integer *ldt, integer *nv, real *wv, integer *ldwv, real *
+	work, integer *lwork)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1,
+	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2, r__3, r__4, r__5, r__6;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static real s, aa, bb, cc, dd, cs, sn;
+    static integer jw;
+    static real evi, evk, foo;
+    static integer kln;
+    static real tau, ulp;
+    static integer lwk1, lwk2, lwk3;
+    static real beta;
+    static integer kend, kcol, info, nmin, ifst, ilst, ltop, krow;
+    static logical bulge;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), sgemm_(
+	    char *, char *, integer *, integer *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, real *, integer *);
+    static integer infqr;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static integer kwtop;
+    extern /* Subroutine */ int slanv2_(real *, real *, real *, real *, real *
+	    , real *, real *, real *, real *, real *), slaqr4_(logical *,
+	    logical *, integer *, integer *, integer *, real *, integer *,
+	    real *, real *, integer *, integer *, real *, integer *, real *,
+	    integer *, integer *), slabad_(real *, real *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *);
+    static real safmin;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static real safmax;
+    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
+	    real *), slahqr_(logical *, logical *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, integer *
+	    , real *, integer *, integer *), slacpy_(char *, integer *,
+	    integer *, real *, integer *, real *, integer *), slaset_(
+	    char *, integer *, integer *, real *, real *, real *, integer *);
+    static logical sorted;
+    extern /* Subroutine */ int strexc_(char *, integer *, real *, integer *,
+	    real *, integer *, integer *, integer *, real *, integer *), sormhr_(char *, char *, integer *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
+    static real smlnum;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+    -- April 2009                                                      --
+
+
+       ******************************************************************
+       Aggressive early deflation:
+
+       This subroutine accepts as input an upper Hessenberg matrix
+       H and performs an orthogonal similarity transformation
+       designed to detect and deflate fully converged eigenvalues from
+       a trailing principal submatrix.  On output H has been over-
+       written by a new Hessenberg matrix that is a perturbation of
+       an orthogonal similarity transformation of H.  It is to be
+       hoped that the final version of H has many zero subdiagonal
+       entries.
+
+       ******************************************************************
+       WANTT   (input) LOGICAL
+            If .TRUE., then the Hessenberg matrix H is fully updated
+            so that the quasi-triangular Schur factor may be
+            computed (in cooperation with the calling subroutine).
+            If .FALSE., then only enough of H is updated to preserve
+            the eigenvalues.
+
+       WANTZ   (input) LOGICAL
+            If .TRUE., then the orthogonal matrix Z is updated so
+            so that the orthogonal Schur factor may be computed
+            (in cooperation with the calling subroutine).
+            If .FALSE., then Z is not referenced.
+
+       N       (input) INTEGER
+            The order of the matrix H and (if WANTZ is .TRUE.) the
+            order of the orthogonal matrix Z.
+
+       KTOP    (input) INTEGER
+            It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+            KBOT and KTOP together determine an isolated block
+            along the diagonal of the Hessenberg matrix.
+
+       KBOT    (input) INTEGER
+            It is assumed without a check that either
+            KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
+            determine an isolated block along the diagonal of the
+            Hessenberg matrix.
+
+       NW      (input) INTEGER
+            Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+
+       H       (input/output) REAL array, dimension (LDH,N)
+            On input the initial N-by-N section of H stores the
+            Hessenberg matrix undergoing aggressive early deflation.
+            On output H has been transformed by an orthogonal
+            similarity transformation, perturbed, and the returned
+            to Hessenberg form that (it is to be hoped) has some
+            zero subdiagonal entries.
+
+       LDH     (input) integer
+            Leading dimension of H just as declared in the calling
+            subroutine.  N .LE. LDH
+
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+
+       Z       (input/output) REAL array, dimension (LDZ,N)
+            IF WANTZ is .TRUE., then on output, the orthogonal
+            similarity transformation mentioned above has been
+            accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+            If WANTZ is .FALSE., then Z is unreferenced.
+
+       LDZ     (input) integer
+            The leading dimension of Z just as declared in the
+            calling subroutine.  1 .LE. LDZ.
+
+       NS      (output) integer
+            The number of unconverged (ie approximate) eigenvalues
+            returned in SR and SI that may be used as shifts by the
+            calling subroutine.
+
+       ND      (output) integer
+            The number of converged eigenvalues uncovered by this
+            subroutine.
+
+       SR      (output) REAL array, dimension KBOT
+       SI      (output) REAL array, dimension KBOT
+            On output, the real and imaginary parts of approximate
+            eigenvalues that may be used for shifts are stored in
+            SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
+            SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
+            The real and imaginary parts of converged eigenvalues
+            are stored in SR(KBOT-ND+1) through SR(KBOT) and
+            SI(KBOT-ND+1) through SI(KBOT), respectively.
+
+       V       (workspace) REAL array, dimension (LDV,NW)
+            An NW-by-NW work array.
+
+       LDV     (input) integer scalar
+            The leading dimension of V just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       NH      (input) integer scalar
+            The number of columns of T.  NH.GE.NW.
+
+       T       (workspace) REAL array, dimension (LDT,NW)
+
+       LDT     (input) integer
+            The leading dimension of T just as declared in the
+            calling subroutine.  NW .LE. LDT
+
+       NV      (input) integer
+            The number of rows of work array WV available for
+            workspace.  NV.GE.NW.
+
+       WV      (workspace) REAL array, dimension (LDWV,NW)
+
+       LDWV    (input) integer
+            The leading dimension of W just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       WORK    (workspace) REAL array, dimension LWORK.
+            On exit, WORK(1) is set to an estimate of the optimal value
+            of LWORK for the given values of N, NW, KTOP and KBOT.
+
+       LWORK   (input) integer
+            The dimension of the work array WORK.  LWORK = 2*NW
+            suffices, but greater efficiency may result from larger
+            values of LWORK.
+
+            If LWORK = -1, then a workspace query is assumed; SLAQR3
+            only estimates the optimal workspace size for the given
+            values of N, NW, KTOP and KBOT.  The estimate is returned
+            in WORK(1).  No error message related to LWORK is issued
+            by XERBLA.  Neither H nor Z are accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+
+       ==== Estimate optimal workspace. ====
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --sr;
+    --si;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    --work;
+
+    /* Function Body */
+/* Computing MIN */
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    if (jw <= 2) {
+	lwkopt = 1;
+    } else {
+
+/*        ==== Workspace query call to SGEHRD ==== */
+
+	i__1 = jw - 1;
+	sgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
+		c_n1, &info);
+	lwk1 = (integer) work[1];
+
+/*        ==== Workspace query call to SORMHR ==== */
+
+	i__1 = jw - 1;
+	sormhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1],
+		 &v[v_offset], ldv, &work[1], &c_n1, &info);
+	lwk2 = (integer) work[1];
+
+/*        ==== Workspace query call to SLAQR4 ==== */
+
+	slaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[1],
+		&si[1], &c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, &
+		infqr);
+	lwk3 = (integer) work[1];
+
+/*
+          ==== Optimal workspace ====
+
+   Computing MAX
+*/
+	i__1 = jw + max(lwk1,lwk2);
+	lwkopt = max(i__1,lwk3);
+    }
+
+/*     ==== Quick return in case of workspace query. ==== */
+
+    if (*lwork == -1) {
+	work[1] = (real) lwkopt;
+	return 0;
+    }
+
+/*
+       ==== Nothing to do ...
+       ... for an empty active block ... ====
+*/
+    *ns = 0;
+    *nd = 0;
+    work[1] = 1.f;
+    if (*ktop > *kbot) {
+	return 0;
+    }
+/*     ... nor for an empty deflation window. ==== */
+    if (*nw < 1) {
+	return 0;
+    }
+
+/*     ==== Machine constants ==== */
+
+    safmin = slamch_("SAFE MINIMUM");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    ulp = slamch_("PRECISION");
+    smlnum = safmin * ((real) (*n) / ulp);
+
+/*
+       ==== Setup deflation window ====
+
+   Computing MIN
+*/
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    kwtop = *kbot - jw + 1;
+    if (kwtop == *ktop) {
+	s = 0.f;
+    } else {
+	s = h__[kwtop + (kwtop - 1) * h_dim1];
+    }
+
+    if (*kbot == kwtop) {
+
+/*        ==== 1-by-1 deflation window: not much to do ==== */
+
+	sr[kwtop] = h__[kwtop + kwtop * h_dim1];
+	si[kwtop] = 0.f;
+	*ns = 1;
+	*nd = 0;
+/* Computing MAX */
+	r__2 = smlnum, r__3 = ulp * (r__1 = h__[kwtop + kwtop * h_dim1], dabs(
+		r__1));
+	if (dabs(s) <= dmax(r__2,r__3)) {
+	    *ns = 0;
+	    *nd = 1;
+	    if (kwtop > *ktop) {
+		h__[kwtop + (kwtop - 1) * h_dim1] = 0.f;
+	    }
+	}
+	work[1] = 1.f;
+	return 0;
+    }
+
+/*
+       ==== Convert to spike-triangular form.  (In case of a
+       .    rare QR failure, this routine continues to do
+       .    aggressive early deflation using that part of
+       .    the deflation window that converged using INFQR
+       .    here and there to keep track.) ====
+*/
+
+    slacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset],
+	    ldt);
+    i__1 = jw - 1;
+    i__2 = *ldh + 1;
+    i__3 = *ldt + 1;
+    scopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
+	    i__3);
+
+    slaset_("A", &jw, &jw, &c_b29, &c_b15, &v[v_offset], ldv);
+    nmin = ilaenv_(&c__12, "SLAQR3", "SV", &jw, &c__1, &jw, lwork, (ftnlen)6,
+	    (ftnlen)2);
+    if (jw > nmin) {
+	slaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[
+		kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1],
+		lwork, &infqr);
+    } else {
+	slahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sr[
+		kwtop], &si[kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr);
+    }
+
+/*     ==== STREXC needs a clean margin near the diagonal ==== */
+
+    i__1 = jw - 3;
+    for (j = 1; j <= i__1; ++j) {
+	t[j + 2 + j * t_dim1] = 0.f;
+	t[j + 3 + j * t_dim1] = 0.f;
+/* L10: */
+    }
+    if (jw > 2) {
+	t[jw + (jw - 2) * t_dim1] = 0.f;
+    }
+
+/*     ==== Deflation detection loop ==== */
+
+    *ns = jw;
+    ilst = infqr + 1;
+L20:
+    if (ilst <= *ns) {
+	if (*ns == 1) {
+	    bulge = FALSE_;
+	} else {
+	    bulge = t[*ns + (*ns - 1) * t_dim1] != 0.f;
+	}
+
+/*        ==== Small spike tip test for deflation ==== */
+
+	if (! bulge) {
+
+/*           ==== Real eigenvalue ==== */
+
+	    foo = (r__1 = t[*ns + *ns * t_dim1], dabs(r__1));
+	    if (foo == 0.f) {
+		foo = dabs(s);
+	    }
+/* Computing MAX */
+	    r__2 = smlnum, r__3 = ulp * foo;
+	    if ((r__1 = s * v[*ns * v_dim1 + 1], dabs(r__1)) <= dmax(r__2,
+		    r__3)) {
+
+/*              ==== Deflatable ==== */
+
+		--(*ns);
+	    } else {
+
+/*
+                ==== Undeflatable.   Move it up out of the way.
+                .    (STREXC can not fail in this case.) ====
+*/
+
+		ifst = *ns;
+		strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		++ilst;
+	    }
+	} else {
+
+/*           ==== Complex conjugate pair ==== */
+
+	    foo = (r__3 = t[*ns + *ns * t_dim1], dabs(r__3)) + sqrt((r__1 = t[
+		    *ns + (*ns - 1) * t_dim1], dabs(r__1))) * sqrt((r__2 = t[*
+		    ns - 1 + *ns * t_dim1], dabs(r__2)));
+	    if (foo == 0.f) {
+		foo = dabs(s);
+	    }
+/* Computing MAX */
+	    r__3 = (r__1 = s * v[*ns * v_dim1 + 1], dabs(r__1)), r__4 = (r__2
+		    = s * v[(*ns - 1) * v_dim1 + 1], dabs(r__2));
+/* Computing MAX */
+	    r__5 = smlnum, r__6 = ulp * foo;
+	    if (dmax(r__3,r__4) <= dmax(r__5,r__6)) {
+
+/*              ==== Deflatable ==== */
+
+		*ns += -2;
+	    } else {
+
+/*
+                ==== Undeflatable. Move them up out of the way.
+                .    Fortunately, STREXC does the right thing with
+                .    ILST in case of a rare exchange failure. ====
+*/
+
+		ifst = *ns;
+		strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		ilst += 2;
+	    }
+	}
+
+/*        ==== End deflation detection loop ==== */
+
+	goto L20;
+    }
+
+/*        ==== Return to Hessenberg form ==== */
+
+    if (*ns == 0) {
+	s = 0.f;
+    }
+
+    if (*ns < jw) {
+
+/*
+          ==== sorting diagonal blocks of T improves accuracy for
+          .    graded matrices.  Bubble sort deals well with
+          .    exchange failures. ====
+*/
+
+	sorted = FALSE_;
+	i__ = *ns + 1;
+L30:
+	if (sorted) {
+	    goto L50;
+	}
+	sorted = TRUE_;
+
+	kend = i__ - 1;
+	i__ = infqr + 1;
+	if (i__ == *ns) {
+	    k = i__ + 1;
+	} else if (t[i__ + 1 + i__ * t_dim1] == 0.f) {
+	    k = i__ + 1;
+	} else {
+	    k = i__ + 2;
+	}
+L40:
+	if (k <= kend) {
+	    if (k == i__ + 1) {
+		evi = (r__1 = t[i__ + i__ * t_dim1], dabs(r__1));
+	    } else {
+		evi = (r__3 = t[i__ + i__ * t_dim1], dabs(r__3)) + sqrt((r__1
+			= t[i__ + 1 + i__ * t_dim1], dabs(r__1))) * sqrt((
+			r__2 = t[i__ + (i__ + 1) * t_dim1], dabs(r__2)));
+	    }
+
+	    if (k == kend) {
+		evk = (r__1 = t[k + k * t_dim1], dabs(r__1));
+	    } else if (t[k + 1 + k * t_dim1] == 0.f) {
+		evk = (r__1 = t[k + k * t_dim1], dabs(r__1));
+	    } else {
+		evk = (r__3 = t[k + k * t_dim1], dabs(r__3)) + sqrt((r__1 = t[
+			k + 1 + k * t_dim1], dabs(r__1))) * sqrt((r__2 = t[k
+			+ (k + 1) * t_dim1], dabs(r__2)));
+	    }
+
+	    if (evi >= evk) {
+		i__ = k;
+	    } else {
+		sorted = FALSE_;
+		ifst = i__;
+		ilst = k;
+		strexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &work[1], &info);
+		if (info == 0) {
+		    i__ = ilst;
+		} else {
+		    i__ = k;
+		}
+	    }
+	    if (i__ == kend) {
+		k = i__ + 1;
+	    } else if (t[i__ + 1 + i__ * t_dim1] == 0.f) {
+		k = i__ + 1;
+	    } else {
+		k = i__ + 2;
+	    }
+	    goto L40;
+	}
+	goto L30;
+L50:
+	;
+    }
+
+/*     ==== Restore shift/eigenvalue array from T ==== */
+
+    i__ = jw;
+L60:
+    if (i__ >= infqr + 1) {
+	if (i__ == infqr + 1) {
+	    sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1];
+	    si[kwtop + i__ - 1] = 0.f;
+	    --i__;
+	} else if (t[i__ + (i__ - 1) * t_dim1] == 0.f) {
+	    sr[kwtop + i__ - 1] = t[i__ + i__ * t_dim1];
+	    si[kwtop + i__ - 1] = 0.f;
+	    --i__;
+	} else {
+	    aa = t[i__ - 1 + (i__ - 1) * t_dim1];
+	    cc = t[i__ + (i__ - 1) * t_dim1];
+	    bb = t[i__ - 1 + i__ * t_dim1];
+	    dd = t[i__ + i__ * t_dim1];
+	    slanv2_(&aa, &bb, &cc, &dd, &sr[kwtop + i__ - 2], &si[kwtop + i__
+		    - 2], &sr[kwtop + i__ - 1], &si[kwtop + i__ - 1], &cs, &
+		    sn);
+	    i__ += -2;
+	}
+	goto L60;
+    }
+
+    if (*ns < jw || s == 0.f) {
+	if (*ns > 1 && s != 0.f) {
+
+/*           ==== Reflect spike back into lower triangle ==== */
+
+	    scopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
+	    beta = work[1];
+	    slarfg_(ns, &beta, &work[2], &c__1, &tau);
+	    work[1] = 1.f;
+
+	    i__1 = jw - 2;
+	    i__2 = jw - 2;
+	    slaset_("L", &i__1, &i__2, &c_b29, &c_b29, &t[t_dim1 + 3], ldt);
+
+	    slarf_("L", ns, &jw, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    slarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    slarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
+		    work[jw + 1]);
+
+	    i__1 = *lwork - jw;
+	    sgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
+		    , &i__1, &info);
+	}
+
+/*        ==== Copy updated reduced window into place ==== */
+
+	if (kwtop > 1) {
+	    h__[kwtop + (kwtop - 1) * h_dim1] = s * v[v_dim1 + 1];
+	}
+	slacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
+		, ldh);
+	i__1 = jw - 1;
+	i__2 = *ldt + 1;
+	i__3 = *ldh + 1;
+	scopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1],
+		 &i__3);
+
+/*
+          ==== Accumulate orthogonal matrix in order update
+          .    H and Z, if requested.  ====
+*/
+
+	if (*ns > 1 && s != 0.f) {
+	    i__1 = *lwork - jw;
+	    sormhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1],
+		     &v[v_offset], ldv, &work[jw + 1], &i__1, &info);
+	}
+
+/*        ==== Update vertical slab in H ==== */
+
+	if (*wantt) {
+	    ltop = 1;
+	} else {
+	    ltop = *ktop;
+	}
+	i__1 = kwtop - 1;
+	i__2 = *nv;
+	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		i__2) {
+/* Computing MIN */
+	    i__3 = *nv, i__4 = kwtop - krow;
+	    kln = min(i__3,i__4);
+	    sgemm_("N", "N", &kln, &jw, &jw, &c_b15, &h__[krow + kwtop *
+		    h_dim1], ldh, &v[v_offset], ldv, &c_b29, &wv[wv_offset],
+		    ldwv);
+	    slacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop *
+		    h_dim1], ldh);
+/* L70: */
+	}
+
+/*        ==== Update horizontal slab in H ==== */
+
+	if (*wantt) {
+	    i__2 = *n;
+	    i__1 = *nh;
+	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2;
+		    kcol += i__1) {
+/* Computing MIN */
+		i__3 = *nh, i__4 = *n - kcol + 1;
+		kln = min(i__3,i__4);
+		sgemm_("C", "N", &jw, &kln, &jw, &c_b15, &v[v_offset], ldv, &
+			h__[kwtop + kcol * h_dim1], ldh, &c_b29, &t[t_offset],
+			 ldt);
+		slacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
+			 h_dim1], ldh);
+/* L80: */
+	    }
+	}
+
+/*        ==== Update vertical slab in Z ==== */
+
+	if (*wantz) {
+	    i__1 = *ihiz;
+	    i__2 = *nv;
+	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		     i__2) {
+/* Computing MIN */
+		i__3 = *nv, i__4 = *ihiz - krow + 1;
+		kln = min(i__3,i__4);
+		sgemm_("N", "N", &kln, &jw, &jw, &c_b15, &z__[krow + kwtop *
+			z_dim1], ldz, &v[v_offset], ldv, &c_b29, &wv[
+			wv_offset], ldwv);
+		slacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow +
+			kwtop * z_dim1], ldz);
+/* L90: */
+	    }
+	}
+    }
+
+/*     ==== Return the number of deflations ... ==== */
+
+    *nd = jw - *ns;
+
+/*
+       ==== ... and the number of shifts. (Subtracting
+       .    INFQR from the spike length takes care
+       .    of the case of a rare QR failure while
+       .    calculating eigenvalues of the deflation
+       .    window.)  ====
+*/
+
+    *ns -= infqr;
+
+/*      ==== Return optimal workspace. ==== */
+
+    work[1] = (real) lwkopt;
+
+/*     ==== End of SLAQR3 ==== */
+
+    return 0;
+} /* slaqr3_ */
+
+/* Subroutine */ int slaqr4_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *
+	wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, real *work,
+	 integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3, r__4;
+
+    /* Local variables */
+    static integer i__, k;
+    static real aa, bb, cc, dd;
+    static integer ld;
+    static real cs;
+    static integer nh, it, ks, kt;
+    static real sn;
+    static integer ku, kv, ls, ns;
+    static real ss;
+    static integer nw, inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl,
+	    kbot, nmin;
+    static real swap;
+    static integer ktop;
+    static real zdum[1]	/* was [1][1] */;
+    static integer kacc22, itmax, nsmax, nwmax, kwtop;
+    extern /* Subroutine */ int slaqr2_(logical *, logical *, integer *,
+	    integer *, integer *, integer *, real *, integer *, integer *,
+	    integer *, real *, integer *, integer *, integer *, real *, real *
+	    , real *, integer *, integer *, real *, integer *, integer *,
+	    real *, integer *, real *, integer *), slanv2_(real *, real *,
+	    real *, real *, real *, real *, real *, real *, real *, real *),
+	    slaqr5_(logical *, logical *, integer *, integer *, integer *,
+	    integer *, integer *, real *, real *, real *, integer *, integer *
+	    , integer *, real *, integer *, real *, integer *, real *,
+	    integer *, integer *, real *, integer *, integer *, real *,
+	    integer *);
+    static integer nibble;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static char jbcmpz[2];
+    extern /* Subroutine */ int slahqr_(logical *, logical *, integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , integer *, real *, integer *, integer *), slacpy_(char *,
+	    integer *, integer *, real *, integer *, real *, integer *);
+    static integer nwupbd;
+    static logical sorted;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+       This subroutine implements one level of recursion for SLAQR0.
+       It is a complete implementation of the small bulge multi-shift
+       QR algorithm.  It may be called by SLAQR0 and, for large enough
+       deflation window size, it may be called by SLAQR3.  This
+       subroutine is identical to SLAQR0 except that it calls SLAQR2
+       instead of SLAQR3.
+
+       Purpose
+       =======
+
+       SLAQR4 computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**T, where T is an upper quasi-triangular matrix (the
+       Schur form), and Z is the orthogonal matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input orthogonal
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the orthogonal matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+       Arguments
+       =========
+
+       WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+       WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+             H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
+             previous call to SGEBAL, and then passed to SGEHRD when the
+             matrix output by SGEBAL is reduced to Hessenberg form.
+             Otherwise, ILO and IHI should be set to 1 and N,
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) REAL array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and WANTT is .TRUE., then H contains
+             the upper quasi-triangular matrix T from the Schur
+             decomposition (the Schur form); 2-by-2 diagonal blocks
+             (corresponding to complex conjugate pairs of eigenvalues)
+             are returned in standard form, with H(i,i) = H(i+1,i+1)
+             and H(i+1,i)*H(i,i+1).LT.0. If INFO = 0 and WANTT is
+             .FALSE., then the contents of H are unspecified on exit.
+             (The output value of H when INFO.GT.0 is given under the
+             description of INFO below.)
+
+             This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+             j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       WR    (output) REAL array, dimension (IHI)
+       WI    (output) REAL array, dimension (IHI)
+             The real and imaginary parts, respectively, of the computed
+             eigenvalues of H(ILO:IHI,ILO:IHI) are stored in WR(ILO:IHI)
+             and WI(ILO:IHI). If two eigenvalues are computed as a
+             complex conjugate pair, they are stored in consecutive
+             elements of WR and WI, say the i-th and (i+1)th, with
+             WI(i) .GT. 0 and WI(i+1) .LT. 0. If WANTT is .TRUE., then
+             the eigenvalues are stored in the same order as on the
+             diagonal of the Schur form returned in H, with
+             WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2 diagonal
+             block, WI(i) = sqrt(-H(i+1,i)*H(i,i+1)) and
+             WI(i+1) = -WI(i).
+
+       ILOZ     (input) INTEGER
+       IHIZ     (input) INTEGER
+             Specify the rows of Z to which transformations must be
+             applied if WANTZ is .TRUE..
+             1 .LE. ILOZ .LE. ILO; IHI .LE. IHIZ .LE. N.
+
+       Z     (input/output) REAL array, dimension (LDZ,IHI)
+             If WANTZ is .FALSE., then Z is not referenced.
+             If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
+             replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
+             orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
+             (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if WANTZ is .TRUE.
+             then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) REAL array, dimension LWORK
+             On exit, if LWORK = -1, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient, but LWORK typically as large as 6*N may
+             be required for optimal performance.  A workspace query
+             to determine the optimal workspace size is recommended.
+
+             If LWORK = -1, then SLAQR4 does a workspace query.
+             In this case, SLAQR4 checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .GT. 0:  if INFO = i, SLAQR4 failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and WANT is .FALSE., then on exit,
+                  the remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is an orthogonal matrix.  The final
+                  value of H is upper Hessenberg and quasi-triangular
+                  in rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+
+                    (final value of Z(ILO:IHI,ILOZ:IHIZ)
+                     =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
+
+                  where U is the orthogonal matrix in (*) (regard-
+                  less of the value of WANTT.)
+
+                  If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+                  accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
+
+       ================================================================
+
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    SLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
+
+       ==== Exceptional deflation windows:  try to cure rare
+       .    slow convergence by varying the size of the
+       .    deflation window after KEXNW iterations. ====
+
+       ==== Exceptional shifts: try to cure rare slow convergence
+       .    with ad-hoc exceptional shifts every KEXSH iterations.
+       .    ====
+
+       ==== The constants WILK1 and WILK2 are used to form the
+       .    exceptional shifts. ====
+*/
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+/*     ==== Quick return for N = 0: nothing to do. ==== */
+
+    if (*n == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    if (*n <= 11) {
+
+/*        ==== Tiny matrices must use SLAHQR. ==== */
+
+	lwkopt = 1;
+	if (*lwork != -1) {
+	    slahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &
+		    wi[1], iloz, ihiz, &z__[z_offset], ldz, info);
+	}
+    } else {
+
+/*
+          ==== Use small bulge multi-shift QR with aggressive early
+          .    deflation on larger-than-tiny matrices. ====
+
+          ==== Hope for the best. ====
+*/
+
+	*info = 0;
+
+/*        ==== Set up job flags for ILAENV. ==== */
+
+	if (*wantt) {
+	    *(unsigned char *)jbcmpz = 'S';
+	} else {
+	    *(unsigned char *)jbcmpz = 'E';
+	}
+	if (*wantz) {
+	    *(unsigned char *)&jbcmpz[1] = 'V';
+	} else {
+	    *(unsigned char *)&jbcmpz[1] = 'N';
+	}
+
+/*
+          ==== NWR = recommended deflation window size.  At this
+          .    point,  N .GT. NTINY = 11, so there is enough
+          .    subdiagonal workspace for NWR.GE.2 as required.
+          .    (In fact, there is enough subdiagonal space for
+          .    NWR.GE.3.) ====
+*/
+
+	nwr = ilaenv_(&c__13, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nwr = max(2,nwr);
+/* Computing MIN */
+	i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2);
+	nwr = min(i__1,nwr);
+
+/*
+          ==== NSR = recommended number of simultaneous shifts.
+          .    At this point N .GT. NTINY = 11, so there is at
+          .    enough subdiagonal workspace for NSR to be even
+          .    and greater than or equal to two as required. ====
+*/
+
+	nsr = ilaenv_(&c__15, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+/* Computing MIN */
+	i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi -
+		*ilo;
+	nsr = min(i__1,i__2);
+/* Computing MAX */
+	i__1 = 2, i__2 = nsr - nsr % 2;
+	nsr = max(i__1,i__2);
+
+/*
+          ==== Estimate optimal workspace ====
+
+          ==== Workspace query call to SLAQR2 ====
+*/
+
+	i__1 = nwr + 1;
+	slaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz,
+		ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1], &h__[
+		h_offset], ldh, n, &h__[h_offset], ldh, n, &h__[h_offset],
+		ldh, &work[1], &c_n1);
+
+/*
+          ==== Optimal workspace = MAX(SLAQR5, SLAQR2) ====
+
+   Computing MAX
+*/
+	i__1 = nsr * 3 / 2, i__2 = (integer) work[1];
+	lwkopt = max(i__1,i__2);
+
+/*        ==== Quick return in case of workspace query. ==== */
+
+	if (*lwork == -1) {
+	    work[1] = (real) lwkopt;
+	    return 0;
+	}
+
+/*        ==== SLAHQR/SLAQR0 crossover point ==== */
+
+	nmin = ilaenv_(&c__12, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)
+		6, (ftnlen)2);
+	nmin = max(11,nmin);
+
+/*        ==== Nibble crossover point ==== */
+
+	nibble = ilaenv_(&c__14, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	nibble = max(0,nibble);
+
+/*
+          ==== Accumulate reflections during ttswp?  Use block
+          .    2-by-2 structure during matrix-matrix multiply? ====
+*/
+
+	kacc22 = ilaenv_(&c__16, "SLAQR4", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	kacc22 = max(0,kacc22);
+	kacc22 = min(2,kacc22);
+
+/*
+          ==== NWMAX = the largest possible deflation window for
+          .    which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n - 1) / 3, i__2 = *lwork / 2;
+	nwmax = min(i__1,i__2);
+	nw = nwmax;
+
+/*
+          ==== NSMAX = the Largest number of simultaneous shifts
+          .    for which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3;
+	nsmax = min(i__1,i__2);
+	nsmax -= nsmax % 2;
+
+/*        ==== NDFL: an iteration count restarted at deflation. ==== */
+
+	ndfl = 1;
+
+/*
+          ==== ITMAX = iteration limit ====
+
+   Computing MAX
+*/
+	i__1 = 10, i__2 = *ihi - *ilo + 1;
+	itmax = max(i__1,i__2) * 30;
+
+/*        ==== Last row and column in the active block ==== */
+
+	kbot = *ihi;
+
+/*        ==== Main Loop ==== */
+
+	i__1 = itmax;
+	for (it = 1; it <= i__1; ++it) {
+
+/*           ==== Done when KBOT falls below ILO ==== */
+
+	    if (kbot < *ilo) {
+		goto L90;
+	    }
+
+/*           ==== Locate active block ==== */
+
+	    i__2 = *ilo + 1;
+	    for (k = kbot; k >= i__2; --k) {
+		if (h__[k + (k - 1) * h_dim1] == 0.f) {
+		    goto L20;
+		}
+/* L10: */
+	    }
+	    k = *ilo;
+L20:
+	    ktop = k;
+
+/*
+             ==== Select deflation window size:
+             .    Typical Case:
+             .      If possible and advisable, nibble the entire
+             .      active block.  If not, use size MIN(NWR,NWMAX)
+             .      or MIN(NWR+1,NWMAX) depending upon which has
+             .      the smaller corresponding subdiagonal entry
+             .      (a heuristic).
+             .
+             .    Exceptional Case:
+             .      If there have been no deflations in KEXNW or
+             .      more iterations, then vary the deflation window
+             .      size.   At first, because, larger windows are,
+             .      in general, more powerful than smaller ones,
+             .      rapidly increase the window to the maximum possible.
+             .      Then, gradually reduce the window size. ====
+*/
+
+	    nh = kbot - ktop + 1;
+	    nwupbd = min(nh,nwmax);
+	    if (ndfl < 5) {
+		nw = min(nwupbd,nwr);
+	    } else {
+/* Computing MIN */
+		i__2 = nwupbd, i__3 = nw << 1;
+		nw = min(i__2,i__3);
+	    }
+	    if (nw < nwmax) {
+		if (nw >= nh - 1) {
+		    nw = nh;
+		} else {
+		    kwtop = kbot - nw + 1;
+		    if ((r__1 = h__[kwtop + (kwtop - 1) * h_dim1], dabs(r__1))
+			     > (r__2 = h__[kwtop - 1 + (kwtop - 2) * h_dim1],
+			    dabs(r__2))) {
+			++nw;
+		    }
+		}
+	    }
+	    if (ndfl < 5) {
+		ndec = -1;
+	    } else if (ndec >= 0 || nw >= nwupbd) {
+		++ndec;
+		if (nw - ndec < 2) {
+		    ndec = 0;
+		}
+		nw -= ndec;
+	    }
+
+/*
+             ==== Aggressive early deflation:
+             .    split workspace under the subdiagonal into
+             .      - an nw-by-nw work array V in the lower
+             .        left-hand-corner,
+             .      - an NW-by-at-least-NW-but-more-is-better
+             .        (NW-by-NHO) horizontal work array along
+             .        the bottom edge,
+             .      - an at-least-NW-but-more-is-better (NHV-by-NW)
+             .        vertical work array along the left-hand-edge.
+             .        ====
+*/
+
+	    kv = *n - nw + 1;
+	    kt = nw + 1;
+	    nho = *n - nw - 1 - kt + 1;
+	    kwv = nw + 2;
+	    nve = *n - nw - kwv + 1;
+
+/*           ==== Aggressive early deflation ==== */
+
+	    slaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh,
+		    iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &wr[1], &wi[1],
+		     &h__[kv + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1],
+		    ldh, &nve, &h__[kwv + h_dim1], ldh, &work[1], lwork);
+
+/*           ==== Adjust KBOT accounting for new deflations. ==== */
+
+	    kbot -= ld;
+
+/*           ==== KS points to the shifts. ==== */
+
+	    ks = kbot - ls + 1;
+
+/*
+             ==== Skip an expensive QR sweep if there is a (partly
+             .    heuristic) reason to expect that many eigenvalues
+             .    will deflate without it.  Here, the QR sweep is
+             .    skipped if many eigenvalues have just been deflated
+             .    or if the remaining active block is small.
+*/
+
+	    if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(
+		    nmin,nwmax)) {
+
+/*
+                ==== NS = nominal number of simultaneous shifts.
+                .    This may be lowered (slightly) if SLAQR2
+                .    did not provide that many shifts. ====
+
+   Computing MIN
+   Computing MAX
+*/
+		i__4 = 2, i__5 = kbot - ktop;
+		i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5);
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+
+/*
+                ==== If there have been no deflations
+                .    in a multiple of KEXSH iterations,
+                .    then try exceptional shifts.
+                .    Otherwise use shifts provided by
+                .    SLAQR2 above or from the eigenvalues
+                .    of a trailing principal submatrix. ====
+*/
+
+		if (ndfl % 6 == 0) {
+		    ks = kbot - ns + 1;
+/* Computing MAX */
+		    i__3 = ks + 1, i__4 = ktop + 2;
+		    i__2 = max(i__3,i__4);
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			ss = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)
+				) + (r__2 = h__[i__ - 1 + (i__ - 2) * h_dim1],
+				 dabs(r__2));
+			aa = ss * .75f + h__[i__ + i__ * h_dim1];
+			bb = ss;
+			cc = ss * -.4375f;
+			dd = aa;
+			slanv2_(&aa, &bb, &cc, &dd, &wr[i__ - 1], &wi[i__ - 1]
+				, &wr[i__], &wi[i__], &cs, &sn);
+/* L30: */
+		    }
+		    if (ks == ktop) {
+			wr[ks + 1] = h__[ks + 1 + (ks + 1) * h_dim1];
+			wi[ks + 1] = 0.f;
+			wr[ks] = wr[ks + 1];
+			wi[ks] = wi[ks + 1];
+		    }
+		} else {
+
+/*
+                   ==== Got NS/2 or fewer shifts? Use SLAHQR
+                   .    on a trailing principal submatrix to
+                   .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
+                   .    there is enough space below the subdiagonal
+                   .    to fit an NS-by-NS scratch array.) ====
+*/
+
+		    if (kbot - ks + 1 <= ns / 2) {
+			ks = kbot - ns + 1;
+			kt = *n - ns + 1;
+			slacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &
+				h__[kt + h_dim1], ldh);
+			slahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt
+				+ h_dim1], ldh, &wr[ks], &wi[ks], &c__1, &
+				c__1, zdum, &c__1, &inf);
+			ks += inf;
+
+/*
+                      ==== In case of a rare QR failure use
+                      .    eigenvalues of the trailing 2-by-2
+                      .    principal submatrix.  ====
+*/
+
+			if (ks >= kbot) {
+			    aa = h__[kbot - 1 + (kbot - 1) * h_dim1];
+			    cc = h__[kbot + (kbot - 1) * h_dim1];
+			    bb = h__[kbot - 1 + kbot * h_dim1];
+			    dd = h__[kbot + kbot * h_dim1];
+			    slanv2_(&aa, &bb, &cc, &dd, &wr[kbot - 1], &wi[
+				    kbot - 1], &wr[kbot], &wi[kbot], &cs, &sn)
+				    ;
+			    ks = kbot - 1;
+			}
+		    }
+
+		    if (kbot - ks + 1 > ns) {
+
+/*
+                      ==== Sort the shifts (Helps a little)
+                      .    Bubble sort keeps complex conjugate
+                      .    pairs together. ====
+*/
+
+			sorted = FALSE_;
+			i__2 = ks + 1;
+			for (k = kbot; k >= i__2; --k) {
+			    if (sorted) {
+				goto L60;
+			    }
+			    sorted = TRUE_;
+			    i__3 = k - 1;
+			    for (i__ = ks; i__ <= i__3; ++i__) {
+				if ((r__1 = wr[i__], dabs(r__1)) + (r__2 = wi[
+					i__], dabs(r__2)) < (r__3 = wr[i__ +
+					1], dabs(r__3)) + (r__4 = wi[i__ + 1],
+					 dabs(r__4))) {
+				    sorted = FALSE_;
+
+				    swap = wr[i__];
+				    wr[i__] = wr[i__ + 1];
+				    wr[i__ + 1] = swap;
+
+				    swap = wi[i__];
+				    wi[i__] = wi[i__ + 1];
+				    wi[i__ + 1] = swap;
+				}
+/* L40: */
+			    }
+/* L50: */
+			}
+L60:
+			;
+		    }
+
+/*
+                   ==== Shuffle shifts into pairs of real shifts
+                   .    and pairs of complex conjugate shifts
+                   .    assuming complex conjugate shifts are
+                   .    already adjacent to one another. (Yes,
+                   .    they are.)  ====
+*/
+
+		    i__2 = ks + 2;
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			if (wi[i__] != -wi[i__ - 1]) {
+
+			    swap = wr[i__];
+			    wr[i__] = wr[i__ - 1];
+			    wr[i__ - 1] = wr[i__ - 2];
+			    wr[i__ - 2] = swap;
+
+			    swap = wi[i__];
+			    wi[i__] = wi[i__ - 1];
+			    wi[i__ - 1] = wi[i__ - 2];
+			    wi[i__ - 2] = swap;
+			}
+/* L70: */
+		    }
+		}
+
+/*
+                ==== If there are only two shifts and both are
+                .    real, then use only one.  ====
+*/
+
+		if (kbot - ks + 1 == 2) {
+		    if (wi[kbot] == 0.f) {
+			if ((r__1 = wr[kbot] - h__[kbot + kbot * h_dim1],
+				dabs(r__1)) < (r__2 = wr[kbot - 1] - h__[kbot
+				+ kbot * h_dim1], dabs(r__2))) {
+			    wr[kbot - 1] = wr[kbot];
+			} else {
+			    wr[kbot] = wr[kbot - 1];
+			}
+		    }
+		}
+
+/*
+                ==== Use up to NS of the the smallest magnatiude
+                .    shifts.  If there aren't NS shifts available,
+                .    then use them all, possibly dropping one to
+                .    make the number of shifts even. ====
+
+   Computing MIN
+*/
+		i__2 = ns, i__3 = kbot - ks + 1;
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+		ks = kbot - ns + 1;
+
+/*
+                ==== Small-bulge multi-shift QR sweep:
+                .    split workspace under the subdiagonal into
+                .    - a KDU-by-KDU work array U in the lower
+                .      left-hand-corner,
+                .    - a KDU-by-at-least-KDU-but-more-is-better
+                .      (KDU-by-NHo) horizontal work array WH along
+                .      the bottom edge,
+                .    - and an at-least-KDU-but-more-is-better-by-KDU
+                .      (NVE-by-KDU) vertical work WV arrow along
+                .      the left-hand-edge. ====
+*/
+
+		kdu = ns * 3 - 3;
+		ku = *n - kdu + 1;
+		kwh = kdu + 1;
+		nho = *n - kdu - 3 - (kdu + 1) + 1;
+		kwv = kdu + 4;
+		nve = *n - kdu - kwv + 1;
+
+/*              ==== Small-bulge multi-shift QR sweep ==== */
+
+		slaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &wr[ks],
+			&wi[ks], &h__[h_offset], ldh, iloz, ihiz, &z__[
+			z_offset], ldz, &work[1], &c__3, &h__[ku + h_dim1],
+			ldh, &nve, &h__[kwv + h_dim1], ldh, &nho, &h__[ku +
+			kwh * h_dim1], ldh);
+	    }
+
+/*           ==== Note progress (or the lack of it). ==== */
+
+	    if (ld > 0) {
+		ndfl = 1;
+	    } else {
+		++ndfl;
+	    }
+
+/*
+             ==== End of main loop ====
+   L80:
+*/
+	}
+
+/*
+          ==== Iteration limit exceeded.  Set INFO to show where
+          .    the problem occurred and exit. ====
+*/
+
+	*info = kbot;
+L90:
+	;
+    }
+
+/*     ==== Return the optimal value of LWORK. ==== */
+
+    work[1] = (real) lwkopt;
+
+/*     ==== End of SLAQR4 ==== */
+
+    return 0;
+} /* slaqr4_ */
+
+/* Subroutine */ int slaqr5_(logical *wantt, logical *wantz, integer *kacc22,
+	integer *n, integer *ktop, integer *kbot, integer *nshfts, real *sr,
+	real *si, real *h__, integer *ldh, integer *iloz, integer *ihiz, real
+	*z__, integer *ldz, real *v, integer *ldv, real *u, integer *ldu,
+	integer *nv, real *wv, integer *ldwv, integer *nh, real *wh, integer *
+	ldwh)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1,
+	    wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3,
+	     i__4, i__5, i__6, i__7;
+    real r__1, r__2, r__3, r__4, r__5;
+
+    /* Local variables */
+    static integer i__, j, k, m, i2, j2, i4, j4, k1;
+    static real h11, h12, h21, h22;
+    static integer m22, ns, nu;
+    static real vt[3], scl;
+    static integer kdu, kms;
+    static real ulp;
+    static integer knz, kzs;
+    static real tst1, tst2, beta;
+    static logical blk22, bmp22;
+    static integer mend, jcol, jlen, jbot, mbot;
+    static real swap;
+    static integer jtop, jrow, mtop;
+    static real alpha;
+    static logical accum;
+    static integer ndcol, incol;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer krcol, nbmps;
+    extern /* Subroutine */ int strmm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), slaqr1_(integer *, real *,
+	    integer *, real *, real *, real *, real *, real *), slabad_(real *
+	    , real *);
+    extern doublereal slamch_(char *);
+    static real safmin;
+    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
+	    real *);
+    static real safmax;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
+	    integer *, real *, integer *), slaset_(char *, integer *,
+	    integer *, real *, real *, real *, integer *);
+    static real refsum;
+    static integer mstart;
+    static real smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+       This auxiliary subroutine called by SLAQR0 performs a
+       single small-bulge multi-shift QR sweep.
+
+        WANTT  (input) logical scalar
+               WANTT = .true. if the quasi-triangular Schur factor
+               is being computed.  WANTT is set to .false. otherwise.
+
+        WANTZ  (input) logical scalar
+               WANTZ = .true. if the orthogonal Schur factor is being
+               computed.  WANTZ is set to .false. otherwise.
+
+        KACC22 (input) integer with value 0, 1, or 2.
+               Specifies the computation mode of far-from-diagonal
+               orthogonal updates.
+          = 0: SLAQR5 does not accumulate reflections and does not
+               use matrix-matrix multiply to update far-from-diagonal
+               matrix entries.
+          = 1: SLAQR5 accumulates reflections and uses matrix-matrix
+               multiply to update the far-from-diagonal matrix entries.
+          = 2: SLAQR5 accumulates reflections, uses matrix-matrix
+               multiply to update the far-from-diagonal matrix entries,
+               and takes advantage of 2-by-2 block structure during
+               matrix multiplies.
+
+        N      (input) integer scalar
+               N is the order of the Hessenberg matrix H upon which this
+               subroutine operates.
+
+        KTOP   (input) integer scalar
+        KBOT   (input) integer scalar
+               These are the first and last rows and columns of an
+               isolated diagonal block upon which the QR sweep is to be
+               applied. It is assumed without a check that
+                         either KTOP = 1  or   H(KTOP,KTOP-1) = 0
+               and
+                         either KBOT = N  or   H(KBOT+1,KBOT) = 0.
+
+        NSHFTS (input) integer scalar
+               NSHFTS gives the number of simultaneous shifts.  NSHFTS
+               must be positive and even.
+
+        SR     (input/output) REAL array of size (NSHFTS)
+        SI     (input/output) REAL array of size (NSHFTS)
+               SR contains the real parts and SI contains the imaginary
+               parts of the NSHFTS shifts of origin that define the
+               multi-shift QR sweep.  On output SR and SI may be
+               reordered.
+
+        H      (input/output) REAL array of size (LDH,N)
+               On input H contains a Hessenberg matrix.  On output a
+               multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
+               to the isolated diagonal block in rows and columns KTOP
+               through KBOT.
+
+        LDH    (input) integer scalar
+               LDH is the leading dimension of H just as declared in the
+               calling procedure.  LDH.GE.MAX(1,N).
+
+        ILOZ   (input) INTEGER
+        IHIZ   (input) INTEGER
+               Specify the rows of Z to which transformations must be
+               applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
+
+        Z      (input/output) REAL array of size (LDZ,IHI)
+               If WANTZ = .TRUE., then the QR Sweep orthogonal
+               similarity transformation is accumulated into
+               Z(ILOZ:IHIZ,ILO:IHI) from the right.
+               If WANTZ = .FALSE., then Z is unreferenced.
+
+        LDZ    (input) integer scalar
+               LDA is the leading dimension of Z just as declared in
+               the calling procedure. LDZ.GE.N.
+
+        V      (workspace) REAL array of size (LDV,NSHFTS/2)
+
+        LDV    (input) integer scalar
+               LDV is the leading dimension of V as declared in the
+               calling procedure.  LDV.GE.3.
+
+        U      (workspace) REAL array of size
+               (LDU,3*NSHFTS-3)
+
+        LDU    (input) integer scalar
+               LDU is the leading dimension of U just as declared in the
+               in the calling subroutine.  LDU.GE.3*NSHFTS-3.
+
+        NH     (input) integer scalar
+               NH is the number of columns in array WH available for
+               workspace. NH.GE.1.
+
+        WH     (workspace) REAL array of size (LDWH,NH)
+
+        LDWH   (input) integer scalar
+               Leading dimension of WH just as declared in the
+               calling procedure.  LDWH.GE.3*NSHFTS-3.
+
+        NV     (input) integer scalar
+               NV is the number of rows in WV agailable for workspace.
+               NV.GE.1.
+
+        WV     (workspace) REAL array of size
+               (LDWV,3*NSHFTS-3)
+
+        LDWV   (input) integer scalar
+               LDWV is the leading dimension of WV as declared in the
+               in the calling subroutine.  LDWV.GE.NV.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       Reference:
+
+       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+       Algorithm Part I: Maintaining Well Focused Shifts, and
+       Level 3 Performance, SIAM Journal of Matrix Analysis,
+       volume 23, pages 929--947, 2002.
+
+       ================================================================
+
+
+       ==== If there are no shifts, then there is nothing to do. ====
+*/
+
+    /* Parameter adjustments */
+    --sr;
+    --si;
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    wh_dim1 = *ldwh;
+    wh_offset = 1 + wh_dim1;
+    wh -= wh_offset;
+
+    /* Function Body */
+    if (*nshfts < 2) {
+	return 0;
+    }
+
+/*
+       ==== If the active block is empty or 1-by-1, then there
+       .    is nothing to do. ====
+*/
+
+    if (*ktop >= *kbot) {
+	return 0;
+    }
+
+/*
+       ==== Shuffle shifts into pairs of real shifts and pairs
+       .    of complex conjugate shifts assuming complex
+       .    conjugate shifts are already adjacent to one
+       .    another. ====
+*/
+
+    i__1 = *nshfts - 2;
+    for (i__ = 1; i__ <= i__1; i__ += 2) {
+	if (si[i__] != -si[i__ + 1]) {
+
+	    swap = sr[i__];
+	    sr[i__] = sr[i__ + 1];
+	    sr[i__ + 1] = sr[i__ + 2];
+	    sr[i__ + 2] = swap;
+
+	    swap = si[i__];
+	    si[i__] = si[i__ + 1];
+	    si[i__ + 1] = si[i__ + 2];
+	    si[i__ + 2] = swap;
+	}
+/* L10: */
+    }
+
+/*
+       ==== NSHFTS is supposed to be even, but if it is odd,
+       .    then simply reduce it by one.  The shuffle above
+       .    ensures that the dropped shift is real and that
+       .    the remaining shifts are paired. ====
+*/
+
+    ns = *nshfts - *nshfts % 2;
+
+/*     ==== Machine constants for deflation ==== */
+
+    safmin = slamch_("SAFE MINIMUM");
+    safmax = 1.f / safmin;
+    slabad_(&safmin, &safmax);
+    ulp = slamch_("PRECISION");
+    smlnum = safmin * ((real) (*n) / ulp);
+
+/*
+       ==== Use accumulated reflections to update far-from-diagonal
+       .    entries ? ====
+*/
+
+    accum = *kacc22 == 1 || *kacc22 == 2;
+
+/*     ==== If so, exploit the 2-by-2 block structure? ==== */
+
+    blk22 = ns > 2 && *kacc22 == 2;
+
+/*     ==== clear trash ==== */
+
+    if (*ktop + 2 <= *kbot) {
+	h__[*ktop + 2 + *ktop * h_dim1] = 0.f;
+    }
+
+/*     ==== NBMPS = number of 2-shift bulges in the chain ==== */
+
+    nbmps = ns / 2;
+
+/*     ==== KDU = width of slab ==== */
+
+    kdu = nbmps * 6 - 3;
+
+/*     ==== Create and chase chains of NBMPS bulges ==== */
+
+    i__1 = *kbot - 2;
+    i__2 = nbmps * 3 - 2;
+    for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 :
+	    incol <= i__1; incol += i__2) {
+	ndcol = incol + kdu;
+	if (accum) {
+	    slaset_("ALL", &kdu, &kdu, &c_b29, &c_b15, &u[u_offset], ldu);
+	}
+
+/*
+          ==== Near-the-diagonal bulge chase.  The following loop
+          .    performs the near-the-diagonal part of a small bulge
+          .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal
+          .    chunk extends from column INCOL to column NDCOL
+          .    (including both column INCOL and column NDCOL). The
+          .    following loop chases a 3*NBMPS column long chain of
+          .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL
+          .    may be less than KTOP and and NDCOL may be greater than
+          .    KBOT indicating phantom columns from which to chase
+          .    bulges before they are actually introduced or to which
+          .    to chase bulges beyond column KBOT.)  ====
+
+   Computing MIN
+*/
+	i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2;
+	i__3 = min(i__4,i__5);
+	for (krcol = incol; krcol <= i__3; ++krcol) {
+
+/*
+             ==== Bulges number MTOP to MBOT are active double implicit
+             .    shift bulges.  There may or may not also be small
+             .    2-by-2 bulge, if there is room.  The inactive bulges
+             .    (if any) must wait until the active bulges have moved
+             .    down the diagonal to make room.  The phantom matrix
+             .    paradigm described above helps keep track.  ====
+
+   Computing MAX
+*/
+	    i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1;
+	    mtop = max(i__4,i__5);
+/* Computing MIN */
+	    i__4 = nbmps, i__5 = (*kbot - krcol) / 3;
+	    mbot = min(i__4,i__5);
+	    m22 = mbot + 1;
+	    bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2;
+
+/*
+             ==== Generate reflections to chase the chain right
+             .    one column.  (The minimum value of K is KTOP-1.) ====
+*/
+
+	    i__4 = mbot;
+	    for (m = mtop; m <= i__4; ++m) {
+		k = krcol + (m - 1) * 3;
+		if (k == *ktop - 1) {
+		    slaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &sr[(m
+			    << 1) - 1], &si[(m << 1) - 1], &sr[m * 2], &si[m *
+			     2], &v[m * v_dim1 + 1]);
+		    alpha = v[m * v_dim1 + 1];
+		    slarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m *
+			    v_dim1 + 1]);
+		} else {
+		    beta = h__[k + 1 + k * h_dim1];
+		    v[m * v_dim1 + 2] = h__[k + 2 + k * h_dim1];
+		    v[m * v_dim1 + 3] = h__[k + 3 + k * h_dim1];
+		    slarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m *
+			    v_dim1 + 1]);
+
+/*
+                   ==== A Bulge may collapse because of vigilant
+                   .    deflation or destructive underflow.  In the
+                   .    underflow case, try the two-small-subdiagonals
+                   .    trick to try to reinflate the bulge.  ====
+*/
+
+		    if (h__[k + 3 + k * h_dim1] != 0.f || h__[k + 3 + (k + 1)
+			    * h_dim1] != 0.f || h__[k + 3 + (k + 2) * h_dim1]
+			    == 0.f) {
+
+/*                    ==== Typical case: not collapsed (yet). ==== */
+
+			h__[k + 1 + k * h_dim1] = beta;
+			h__[k + 2 + k * h_dim1] = 0.f;
+			h__[k + 3 + k * h_dim1] = 0.f;
+		    } else {
+
+/*
+                      ==== Atypical case: collapsed.  Attempt to
+                      .    reintroduce ignoring H(K+1,K) and H(K+2,K).
+                      .    If the fill resulting from the new
+                      .    reflector is too large, then abandon it.
+                      .    Otherwise, use the new one. ====
+*/
+
+			slaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, &
+				sr[(m << 1) - 1], &si[(m << 1) - 1], &sr[m *
+				2], &si[m * 2], vt);
+			alpha = vt[0];
+			slarfg_(&c__3, &alpha, &vt[1], &c__1, vt);
+			refsum = vt[0] * (h__[k + 1 + k * h_dim1] + vt[1] *
+				h__[k + 2 + k * h_dim1]);
+
+			if ((r__1 = h__[k + 2 + k * h_dim1] - refsum * vt[1],
+				dabs(r__1)) + (r__2 = refsum * vt[2], dabs(
+				r__2)) > ulp * ((r__3 = h__[k + k * h_dim1],
+				dabs(r__3)) + (r__4 = h__[k + 1 + (k + 1) *
+				h_dim1], dabs(r__4)) + (r__5 = h__[k + 2 + (k
+				+ 2) * h_dim1], dabs(r__5)))) {
+
+/*
+                         ==== Starting a new bulge here would
+                         .    create non-negligible fill.  Use
+                         .    the old one with trepidation. ====
+*/
+
+			    h__[k + 1 + k * h_dim1] = beta;
+			    h__[k + 2 + k * h_dim1] = 0.f;
+			    h__[k + 3 + k * h_dim1] = 0.f;
+			} else {
+
+/*
+                         ==== Stating a new bulge here would
+                         .    create only negligible fill.
+                         .    Replace the old reflector with
+                         .    the new one. ====
+*/
+
+			    h__[k + 1 + k * h_dim1] -= refsum;
+			    h__[k + 2 + k * h_dim1] = 0.f;
+			    h__[k + 3 + k * h_dim1] = 0.f;
+			    v[m * v_dim1 + 1] = vt[0];
+			    v[m * v_dim1 + 2] = vt[1];
+			    v[m * v_dim1 + 3] = vt[2];
+			}
+		    }
+		}
+/* L20: */
+	    }
+
+/*           ==== Generate a 2-by-2 reflection, if needed. ==== */
+
+	    k = krcol + (m22 - 1) * 3;
+	    if (bmp22) {
+		if (k == *ktop - 1) {
+		    slaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &sr[(
+			    m22 << 1) - 1], &si[(m22 << 1) - 1], &sr[m22 * 2],
+			     &si[m22 * 2], &v[m22 * v_dim1 + 1]);
+		    beta = v[m22 * v_dim1 + 1];
+		    slarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22
+			    * v_dim1 + 1]);
+		} else {
+		    beta = h__[k + 1 + k * h_dim1];
+		    v[m22 * v_dim1 + 2] = h__[k + 2 + k * h_dim1];
+		    slarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22
+			    * v_dim1 + 1]);
+		    h__[k + 1 + k * h_dim1] = beta;
+		    h__[k + 2 + k * h_dim1] = 0.f;
+		}
+	    }
+
+/*           ==== Multiply H by reflections from the left ==== */
+
+	    if (accum) {
+		jbot = min(ndcol,*kbot);
+	    } else if (*wantt) {
+		jbot = *n;
+	    } else {
+		jbot = *kbot;
+	    }
+	    i__4 = jbot;
+	    for (j = max(*ktop,krcol); j <= i__4; ++j) {
+/* Computing MIN */
+		i__5 = mbot, i__6 = (j - krcol + 2) / 3;
+		mend = min(i__5,i__6);
+		i__5 = mend;
+		for (m = mtop; m <= i__5; ++m) {
+		    k = krcol + (m - 1) * 3;
+		    refsum = v[m * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] + v[
+			    m * v_dim1 + 2] * h__[k + 2 + j * h_dim1] + v[m *
+			    v_dim1 + 3] * h__[k + 3 + j * h_dim1]);
+		    h__[k + 1 + j * h_dim1] -= refsum;
+		    h__[k + 2 + j * h_dim1] -= refsum * v[m * v_dim1 + 2];
+		    h__[k + 3 + j * h_dim1] -= refsum * v[m * v_dim1 + 3];
+/* L30: */
+		}
+/* L40: */
+	    }
+	    if (bmp22) {
+		k = krcol + (m22 - 1) * 3;
+/* Computing MAX */
+		i__4 = k + 1;
+		i__5 = jbot;
+		for (j = max(i__4,*ktop); j <= i__5; ++j) {
+		    refsum = v[m22 * v_dim1 + 1] * (h__[k + 1 + j * h_dim1] +
+			    v[m22 * v_dim1 + 2] * h__[k + 2 + j * h_dim1]);
+		    h__[k + 1 + j * h_dim1] -= refsum;
+		    h__[k + 2 + j * h_dim1] -= refsum * v[m22 * v_dim1 + 2];
+/* L50: */
+		}
+	    }
+
+/*
+             ==== Multiply H by reflections from the right.
+             .    Delay filling in the last row until the
+             .    vigilant deflation check is complete. ====
+*/
+
+	    if (accum) {
+		jtop = max(*ktop,incol);
+	    } else if (*wantt) {
+		jtop = 1;
+	    } else {
+		jtop = *ktop;
+	    }
+	    i__5 = mbot;
+	    for (m = mtop; m <= i__5; ++m) {
+		if (v[m * v_dim1 + 1] != 0.f) {
+		    k = krcol + (m - 1) * 3;
+/* Computing MIN */
+		    i__6 = *kbot, i__7 = k + 3;
+		    i__4 = min(i__6,i__7);
+		    for (j = jtop; j <= i__4; ++j) {
+			refsum = v[m * v_dim1 + 1] * (h__[j + (k + 1) *
+				h_dim1] + v[m * v_dim1 + 2] * h__[j + (k + 2)
+				* h_dim1] + v[m * v_dim1 + 3] * h__[j + (k +
+				3) * h_dim1]);
+			h__[j + (k + 1) * h_dim1] -= refsum;
+			h__[j + (k + 2) * h_dim1] -= refsum * v[m * v_dim1 +
+				2];
+			h__[j + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 +
+				3];
+/* L60: */
+		    }
+
+		    if (accum) {
+
+/*
+                      ==== Accumulate U. (If necessary, update Z later
+                      .    with with an efficient matrix-matrix
+                      .    multiply.) ====
+*/
+
+			kms = k - incol;
+/* Computing MAX */
+			i__4 = 1, i__6 = *ktop - incol;
+			i__7 = kdu;
+			for (j = max(i__4,i__6); j <= i__7; ++j) {
+			    refsum = v[m * v_dim1 + 1] * (u[j + (kms + 1) *
+				    u_dim1] + v[m * v_dim1 + 2] * u[j + (kms
+				    + 2) * u_dim1] + v[m * v_dim1 + 3] * u[j
+				    + (kms + 3) * u_dim1]);
+			    u[j + (kms + 1) * u_dim1] -= refsum;
+			    u[j + (kms + 2) * u_dim1] -= refsum * v[m *
+				    v_dim1 + 2];
+			    u[j + (kms + 3) * u_dim1] -= refsum * v[m *
+				    v_dim1 + 3];
+/* L70: */
+			}
+		    } else if (*wantz) {
+
+/*
+                      ==== U is not accumulated, so update Z
+                      .    now by multiplying by reflections
+                      .    from the right. ====
+*/
+
+			i__7 = *ihiz;
+			for (j = *iloz; j <= i__7; ++j) {
+			    refsum = v[m * v_dim1 + 1] * (z__[j + (k + 1) *
+				    z_dim1] + v[m * v_dim1 + 2] * z__[j + (k
+				    + 2) * z_dim1] + v[m * v_dim1 + 3] * z__[
+				    j + (k + 3) * z_dim1]);
+			    z__[j + (k + 1) * z_dim1] -= refsum;
+			    z__[j + (k + 2) * z_dim1] -= refsum * v[m *
+				    v_dim1 + 2];
+			    z__[j + (k + 3) * z_dim1] -= refsum * v[m *
+				    v_dim1 + 3];
+/* L80: */
+			}
+		    }
+		}
+/* L90: */
+	    }
+
+/*           ==== Special case: 2-by-2 reflection (if needed) ==== */
+
+	    k = krcol + (m22 - 1) * 3;
+	    if (bmp22 && v[m22 * v_dim1 + 1] != 0.f) {
+/* Computing MIN */
+		i__7 = *kbot, i__4 = k + 3;
+		i__5 = min(i__7,i__4);
+		for (j = jtop; j <= i__5; ++j) {
+		    refsum = v[m22 * v_dim1 + 1] * (h__[j + (k + 1) * h_dim1]
+			    + v[m22 * v_dim1 + 2] * h__[j + (k + 2) * h_dim1])
+			    ;
+		    h__[j + (k + 1) * h_dim1] -= refsum;
+		    h__[j + (k + 2) * h_dim1] -= refsum * v[m22 * v_dim1 + 2];
+/* L100: */
+		}
+
+		if (accum) {
+		    kms = k - incol;
+/* Computing MAX */
+		    i__5 = 1, i__7 = *ktop - incol;
+		    i__4 = kdu;
+		    for (j = max(i__5,i__7); j <= i__4; ++j) {
+			refsum = v[m22 * v_dim1 + 1] * (u[j + (kms + 1) *
+				u_dim1] + v[m22 * v_dim1 + 2] * u[j + (kms +
+				2) * u_dim1]);
+			u[j + (kms + 1) * u_dim1] -= refsum;
+			u[j + (kms + 2) * u_dim1] -= refsum * v[m22 * v_dim1
+				+ 2];
+/* L110: */
+		    }
+		} else if (*wantz) {
+		    i__4 = *ihiz;
+		    for (j = *iloz; j <= i__4; ++j) {
+			refsum = v[m22 * v_dim1 + 1] * (z__[j + (k + 1) *
+				z_dim1] + v[m22 * v_dim1 + 2] * z__[j + (k +
+				2) * z_dim1]);
+			z__[j + (k + 1) * z_dim1] -= refsum;
+			z__[j + (k + 2) * z_dim1] -= refsum * v[m22 * v_dim1
+				+ 2];
+/* L120: */
+		    }
+		}
+	    }
+
+/*           ==== Vigilant deflation check ==== */
+
+	    mstart = mtop;
+	    if (krcol + (mstart - 1) * 3 < *ktop) {
+		++mstart;
+	    }
+	    mend = mbot;
+	    if (bmp22) {
+		++mend;
+	    }
+	    if (krcol == *kbot - 2) {
+		++mend;
+	    }
+	    i__4 = mend;
+	    for (m = mstart; m <= i__4; ++m) {
+/* Computing MIN */
+		i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3;
+		k = min(i__5,i__7);
+
+/*
+                ==== The following convergence test requires that
+                .    the tradition small-compared-to-nearby-diagonals
+                .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
+                .    criteria both be satisfied.  The latter improves
+                .    accuracy in some examples. Falling back on an
+                .    alternate convergence criterion when TST1 or TST2
+                .    is zero (as done here) is traditional but probably
+                .    unnecessary. ====
+*/
+
+		if (h__[k + 1 + k * h_dim1] != 0.f) {
+		    tst1 = (r__1 = h__[k + k * h_dim1], dabs(r__1)) + (r__2 =
+			    h__[k + 1 + (k + 1) * h_dim1], dabs(r__2));
+		    if (tst1 == 0.f) {
+			if (k >= *ktop + 1) {
+			    tst1 += (r__1 = h__[k + (k - 1) * h_dim1], dabs(
+				    r__1));
+			}
+			if (k >= *ktop + 2) {
+			    tst1 += (r__1 = h__[k + (k - 2) * h_dim1], dabs(
+				    r__1));
+			}
+			if (k >= *ktop + 3) {
+			    tst1 += (r__1 = h__[k + (k - 3) * h_dim1], dabs(
+				    r__1));
+			}
+			if (k <= *kbot - 2) {
+			    tst1 += (r__1 = h__[k + 2 + (k + 1) * h_dim1],
+				    dabs(r__1));
+			}
+			if (k <= *kbot - 3) {
+			    tst1 += (r__1 = h__[k + 3 + (k + 1) * h_dim1],
+				    dabs(r__1));
+			}
+			if (k <= *kbot - 4) {
+			    tst1 += (r__1 = h__[k + 4 + (k + 1) * h_dim1],
+				    dabs(r__1));
+			}
+		    }
+/* Computing MAX */
+		    r__2 = smlnum, r__3 = ulp * tst1;
+		    if ((r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)) <= dmax(
+			    r__2,r__3)) {
+/* Computing MAX */
+			r__3 = (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)),
+				r__4 = (r__2 = h__[k + (k + 1) * h_dim1],
+				dabs(r__2));
+			h12 = dmax(r__3,r__4);
+/* Computing MIN */
+			r__3 = (r__1 = h__[k + 1 + k * h_dim1], dabs(r__1)),
+				r__4 = (r__2 = h__[k + (k + 1) * h_dim1],
+				dabs(r__2));
+			h21 = dmin(r__3,r__4);
+/* Computing MAX */
+			r__3 = (r__1 = h__[k + 1 + (k + 1) * h_dim1], dabs(
+				r__1)), r__4 = (r__2 = h__[k + k * h_dim1] -
+				h__[k + 1 + (k + 1) * h_dim1], dabs(r__2));
+			h11 = dmax(r__3,r__4);
+/* Computing MIN */
+			r__3 = (r__1 = h__[k + 1 + (k + 1) * h_dim1], dabs(
+				r__1)), r__4 = (r__2 = h__[k + k * h_dim1] -
+				h__[k + 1 + (k + 1) * h_dim1], dabs(r__2));
+			h22 = dmin(r__3,r__4);
+			scl = h11 + h12;
+			tst2 = h22 * (h11 / scl);
+
+/* Computing MAX */
+			r__1 = smlnum, r__2 = ulp * tst2;
+			if (tst2 == 0.f || h21 * (h12 / scl) <= dmax(r__1,
+				r__2)) {
+			    h__[k + 1 + k * h_dim1] = 0.f;
+			}
+		    }
+		}
+/* L130: */
+	    }
+
+/*
+             ==== Fill in the last row of each bulge. ====
+
+   Computing MIN
+*/
+	    i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3;
+	    mend = min(i__4,i__5);
+	    i__4 = mend;
+	    for (m = mtop; m <= i__4; ++m) {
+		k = krcol + (m - 1) * 3;
+		refsum = v[m * v_dim1 + 1] * v[m * v_dim1 + 3] * h__[k + 4 + (
+			k + 3) * h_dim1];
+		h__[k + 4 + (k + 1) * h_dim1] = -refsum;
+		h__[k + 4 + (k + 2) * h_dim1] = -refsum * v[m * v_dim1 + 2];
+		h__[k + 4 + (k + 3) * h_dim1] -= refsum * v[m * v_dim1 + 3];
+/* L140: */
+	    }
+
+/*
+             ==== End of near-the-diagonal bulge chase. ====
+
+   L150:
+*/
+	}
+
+/*
+          ==== Use U (if accumulated) to update far-from-diagonal
+          .    entries in H.  If required, use U to update Z as
+          .    well. ====
+*/
+
+	if (accum) {
+	    if (*wantt) {
+		jtop = 1;
+		jbot = *n;
+	    } else {
+		jtop = *ktop;
+		jbot = *kbot;
+	    }
+	    if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) {
+
+/*
+                ==== Updates not exploiting the 2-by-2 block
+                .    structure of U.  K1 and NU keep track of
+                .    the location and size of U in the special
+                .    cases of introducing bulges and chasing
+                .    bulges off the bottom.  In these special
+                .    cases and in case the number of shifts
+                .    is NS = 2, there is no 2-by-2 block
+                .    structure to exploit.  ====
+
+   Computing MAX
+*/
+		i__3 = 1, i__4 = *ktop - incol;
+		k1 = max(i__3,i__4);
+/* Computing MAX */
+		i__3 = 0, i__4 = ndcol - *kbot;
+		nu = kdu - max(i__3,i__4) - k1 + 1;
+
+/*              ==== Horizontal Multiply ==== */
+
+		i__3 = jbot;
+		i__4 = *nh;
+		for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 :
+			jcol <= i__3; jcol += i__4) {
+/* Computing MIN */
+		    i__5 = *nh, i__7 = jbot - jcol + 1;
+		    jlen = min(i__5,i__7);
+		    sgemm_("C", "N", &nu, &jlen, &nu, &c_b15, &u[k1 + k1 *
+			    u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1],
+			    ldh, &c_b29, &wh[wh_offset], ldwh);
+		    slacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[
+			    incol + k1 + jcol * h_dim1], ldh);
+/* L160: */
+		}
+
+/*              ==== Vertical multiply ==== */
+
+		i__4 = max(*ktop,incol) - 1;
+		i__3 = *nv;
+		for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4;
+			jrow += i__3) {
+/* Computing MIN */
+		    i__5 = *nv, i__7 = max(*ktop,incol) - jrow;
+		    jlen = min(i__5,i__7);
+		    sgemm_("N", "N", &jlen, &nu, &nu, &c_b15, &h__[jrow + (
+			    incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1],
+			    ldu, &c_b29, &wv[wv_offset], ldwv);
+		    slacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[
+			    jrow + (incol + k1) * h_dim1], ldh);
+/* L170: */
+		}
+
+/*              ==== Z multiply (also vertical) ==== */
+
+		if (*wantz) {
+		    i__3 = *ihiz;
+		    i__4 = *nv;
+		    for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3;
+			     jrow += i__4) {
+/* Computing MIN */
+			i__5 = *nv, i__7 = *ihiz - jrow + 1;
+			jlen = min(i__5,i__7);
+			sgemm_("N", "N", &jlen, &nu, &nu, &c_b15, &z__[jrow +
+				(incol + k1) * z_dim1], ldz, &u[k1 + k1 *
+				u_dim1], ldu, &c_b29, &wv[wv_offset], ldwv);
+			slacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[
+				jrow + (incol + k1) * z_dim1], ldz)
+				;
+/* L180: */
+		    }
+		}
+	    } else {
+
+/*
+                ==== Updates exploiting U's 2-by-2 block structure.
+                .    (I2, I4, J2, J4 are the last rows and columns
+                .    of the blocks.) ====
+*/
+
+		i2 = (kdu + 1) / 2;
+		i4 = kdu;
+		j2 = i4 - i2;
+		j4 = kdu;
+
+/*
+                ==== KZS and KNZ deal with the band of zeros
+                .    along the diagonal of one of the triangular
+                .    blocks. ====
+*/
+
+		kzs = j4 - j2 - (ns + 1);
+		knz = ns + 1;
+
+/*              ==== Horizontal multiply ==== */
+
+		i__4 = jbot;
+		i__3 = *nh;
+		for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 :
+			jcol <= i__4; jcol += i__3) {
+/* Computing MIN */
+		    i__5 = *nh, i__7 = jbot - jcol + 1;
+		    jlen = min(i__5,i__7);
+
+/*
+                   ==== Copy bottom of H to top+KZS of scratch ====
+                    (The first KZS rows get multiplied by zero.) ====
+*/
+
+		    slacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol *
+			    h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U21' ==== */
+
+		    slaset_("ALL", &kzs, &jlen, &c_b29, &c_b29, &wh[wh_offset]
+			    , ldwh);
+		    strmm_("L", "U", "C", "N", &knz, &jlen, &c_b15, &u[j2 + 1
+			    + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1]
+			    , ldwh);
+
+/*                 ==== Multiply top of H by U11' ==== */
+
+		    sgemm_("C", "N", &i2, &jlen, &j2, &c_b15, &u[u_offset],
+			    ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b15,
+			     &wh[wh_offset], ldwh);
+
+/*                 ==== Copy top of H to bottom of WH ==== */
+
+		    slacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1]
+			    , ldh, &wh[i2 + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U21' ==== */
+
+		    strmm_("L", "L", "C", "N", &j2, &jlen, &c_b15, &u[(i2 + 1)
+			     * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U22 ==== */
+
+		    i__5 = i4 - i2;
+		    i__7 = j4 - j2;
+		    sgemm_("C", "N", &i__5, &jlen, &i__7, &c_b15, &u[j2 + 1 +
+			    (i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 +
+			    jcol * h_dim1], ldh, &c_b15, &wh[i2 + 1 + wh_dim1]
+			    , ldwh);
+
+/*                 ==== Copy it back ==== */
+
+		    slacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[
+			    incol + 1 + jcol * h_dim1], ldh);
+/* L190: */
+		}
+
+/*              ==== Vertical multiply ==== */
+
+		i__3 = max(incol,*ktop) - 1;
+		i__4 = *nv;
+		for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3;
+			jrow += i__4) {
+/* Computing MIN */
+		    i__5 = *nv, i__7 = max(incol,*ktop) - jrow;
+		    jlen = min(i__5,i__7);
+
+/*
+                   ==== Copy right of H to scratch (the first KZS
+                   .    columns get multiplied by zero) ====
+*/
+
+		    slacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) *
+			     h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U21 ==== */
+
+		    slaset_("ALL", &jlen, &kzs, &c_b29, &c_b29, &wv[wv_offset]
+			    , ldwv);
+		    strmm_("R", "U", "N", "N", &jlen, &knz, &c_b15, &u[j2 + 1
+			    + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) *
+			    wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U11 ==== */
+
+		    sgemm_("N", "N", &jlen, &i2, &j2, &c_b15, &h__[jrow + (
+			    incol + 1) * h_dim1], ldh, &u[u_offset], ldu, &
+			    c_b15, &wv[wv_offset], ldwv)
+			    ;
+
+/*                 ==== Copy left of H to right of scratch ==== */
+
+		    slacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) *
+			    h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U21 ==== */
+
+		    i__5 = i4 - i2;
+		    strmm_("R", "L", "N", "N", &jlen, &i__5, &c_b15, &u[(i2 +
+			    1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1]
+			    , ldwv);
+
+/*                 ==== Multiply by U22 ==== */
+
+		    i__5 = i4 - i2;
+		    i__7 = j4 - j2;
+		    sgemm_("N", "N", &jlen, &i__5, &i__7, &c_b15, &h__[jrow +
+			    (incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2
+			    + 1) * u_dim1], ldu, &c_b15, &wv[(i2 + 1) *
+			    wv_dim1 + 1], ldwv);
+
+/*                 ==== Copy it back ==== */
+
+		    slacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[
+			    jrow + (incol + 1) * h_dim1], ldh);
+/* L200: */
+		}
+
+/*              ==== Multiply Z (also vertical) ==== */
+
+		if (*wantz) {
+		    i__4 = *ihiz;
+		    i__3 = *nv;
+		    for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4;
+			     jrow += i__3) {
+/* Computing MIN */
+			i__5 = *nv, i__7 = *ihiz - jrow + 1;
+			jlen = min(i__5,i__7);
+
+/*
+                      ==== Copy right of Z to left of scratch (first
+                      .     KZS columns get multiplied by zero) ====
+*/
+
+			slacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 +
+				j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 +
+				1], ldwv);
+
+/*                    ==== Multiply by U12 ==== */
+
+			slaset_("ALL", &jlen, &kzs, &c_b29, &c_b29, &wv[
+				wv_offset], ldwv);
+			strmm_("R", "U", "N", "N", &jlen, &knz, &c_b15, &u[j2
+				+ 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1)
+				* wv_dim1 + 1], ldwv);
+
+/*                    ==== Multiply by U11 ==== */
+
+			sgemm_("N", "N", &jlen, &i2, &j2, &c_b15, &z__[jrow +
+				(incol + 1) * z_dim1], ldz, &u[u_offset], ldu,
+				 &c_b15, &wv[wv_offset], ldwv);
+
+/*                    ==== Copy left of Z to right of scratch ==== */
+
+			slacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) *
+				z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1],
+				ldwv);
+
+/*                    ==== Multiply by U21 ==== */
+
+			i__5 = i4 - i2;
+			strmm_("R", "L", "N", "N", &jlen, &i__5, &c_b15, &u[(
+				i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) *
+				wv_dim1 + 1], ldwv);
+
+/*                    ==== Multiply by U22 ==== */
+
+			i__5 = i4 - i2;
+			i__7 = j4 - j2;
+			sgemm_("N", "N", &jlen, &i__5, &i__7, &c_b15, &z__[
+				jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2
+				+ 1 + (i2 + 1) * u_dim1], ldu, &c_b15, &wv[(
+				i2 + 1) * wv_dim1 + 1], ldwv);
+
+/*                    ==== Copy the result back to Z ==== */
+
+			slacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &
+				z__[jrow + (incol + 1) * z_dim1], ldz);
+/* L210: */
+		    }
+		}
+	    }
+	}
+/* L220: */
+    }
+
+/*     ==== End of SLAQR5 ==== */
+
+    return 0;
+} /* slaqr5_ */
+
+/* Subroutine */ int slarf_(char *side, integer *m, integer *n, real *v,
+	integer *incv, real *tau, real *c__, integer *ldc, real *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset;
+    real r__1;
+
+    /* Local variables */
+    static integer i__;
+    static logical applyleft;
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    static integer lastc;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    static integer lastv;
+    extern integer ilaslc_(integer *, integer *, real *, integer *), ilaslr_(
+	    integer *, integer *, real *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14901,39 +19392,74 @@ doublereal slapy3_(real *x, real *y, real *z__)
     --work;
 
     /* Function Body */
-    if (lsame_(side, "L")) {
+    applyleft = lsame_(side, "L");
+    lastv = 0;
+    lastc = 0;
+    if (*tau != 0.f) {
+/*
+       Set up variables for scanning V.  LASTV begins pointing to the end
+       of V.
+*/
+	if (applyleft) {
+	    lastv = *m;
+	} else {
+	    lastv = *n;
+	}
+	if (*incv > 0) {
+	    i__ = (lastv - 1) * *incv + 1;
+	} else {
+	    i__ = 1;
+	}
+/*     Look for the last non-zero row in V. */
+	while(lastv > 0 && v[i__] == 0.f) {
+	    --lastv;
+	    i__ -= *incv;
+	}
+	if (applyleft) {
+/*     Scan for the last non-zero column in C(1:lastv,:). */
+	    lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);
+	} else {
+/*     Scan for the last non-zero row in C(:,1:lastv). */
+	    lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);
+	}
+    }
+/*
+       Note that lastc.eq.0 renders the BLAS operations null; no special
+       case is needed at this level.
+*/
+    if (applyleft) {
 
 /*        Form  H * C */
 
-	if (*tau != 0.f) {
+	if (lastv > 0) {
 
-/*           w := C' * v */
+/*           w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */
 
-	    sgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b29, &work[1], &c__1);
+	    sgemv_("Transpose", &lastv, &lastc, &c_b15, &c__[c_offset], ldc, &
+		    v[1], incv, &c_b29, &work[1], &c__1);
 
-/*           C := C - v * w' */
+/*           C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */
 
 	    r__1 = -(*tau);
-	    sger_(m, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
-		    ldc);
+	    sger_(&lastv, &lastc, &r__1, &v[1], incv, &work[1], &c__1, &c__[
+		    c_offset], ldc);
 	}
     } else {
 
 /*        Form  C * H */
 
-	if (*tau != 0.f) {
+	if (lastv > 0) {
 
-/*           w := C * v */
+/*           w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */
 
-	    sgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b29, &work[1], &c__1);
+	    sgemv_("No transpose", &lastc, &lastv, &c_b15, &c__[c_offset],
+		    ldc, &v[1], incv, &c_b29, &work[1], &c__1);
 
-/*           C := C - w * v' */
+/*           C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */
 
 	    r__1 = -(*tau);
-	    sger_(m, n, &r__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
-		    ldc);
+	    sger_(&lastc, &lastv, &r__1, &work[1], &c__1, &v[1], incv, &c__[
+		    c_offset], ldc);
 	}
     }
     return 0;
@@ -14954,20 +19480,24 @@ doublereal slapy3_(real *x, real *y, real *z__)
     /* Local variables */
     static integer i__, j;
     extern logical lsame_(char *, char *);
+    static integer lastc;
     extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
 	    integer *, real *, real *, integer *, real *, integer *, real *,
-	    real *, integer *), scopy_(integer *, real *,
-	    integer *, real *, integer *), strmm_(char *, char *, char *,
-	    char *, integer *, integer *, real *, real *, integer *, real *,
-	    integer *);
+	    real *, integer *);
+    static integer lastv;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), strmm_(char *, char *, char *, char *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *);
+    extern integer ilaslc_(integer *, integer *, real *, integer *), ilaslr_(
+	    integer *, integer *, real *, integer *);
     static char transt[1];
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -15089,6 +19619,13 @@ doublereal slapy3_(real *x, real *y, real *z__)
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaslr_(m, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
 
                 W := C1'
@@ -15096,52 +19633,53 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
+		    scopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
+			    + 1], &c__1);
 /* L10: */
 		}
 
 /*              W := W * V1 */
 
-		strmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+		strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2'*V2 */
 
-		    i__1 = *m - *k;
-		    sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, &
-			    c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
-			    ldv, &c_b15, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    sgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
+			    c_b15, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 +
+			    v_dim1], ldv, &c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		strmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
+			c_b15, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - V2 * W' */
 
-		    i__1 = *m - *k;
-		    sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151,
-			    &v[*k + 1 + v_dim1], ldv, &work[work_offset],
-			    ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc);
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
+			    c_b151, &v[*k + 1 + v_dim1], ldv, &work[
+			    work_offset], ldwork, &c_b15, &c__[*k + 1 +
+			    c_dim1], ldc);
 		}
 
 /*              W := W * V1' */
 
-		strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
+		strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
 /* L20: */
@@ -15154,6 +19692,13 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaslr_(n, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
                 W := C1
@@ -15161,21 +19706,21 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    scopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L40: */
 		}
 
 /*              W := W * V1 */
 
-		strmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+		strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2 * V2 */
 
-		    i__1 = *n - *k;
-		    sgemm_("No transpose", "No transpose", m, k, &i__1, &
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
 			    c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
 			    1 + v_dim1], ldv, &c_b15, &work[work_offset],
 			    ldwork);
@@ -15183,31 +19728,32 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 /*              W := W * T  or  W * T' */
 
-		strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		strmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b15,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - W * V2' */
 
-		    i__1 = *n - *k;
-		    sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151,
-			    &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
-			    ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc);
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
+			    c_b151, &work[work_offset], ldwork, &v[*k + 1 +
+			    v_dim1], ldv, &c_b15, &c__[(*k + 1) * c_dim1 + 1],
+			     ldc);
 		}
 
 /*              W := W * V1' */
 
-		strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
+		strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
 /* L50: */
@@ -15230,6 +19776,13 @@ doublereal slapy3_(real *x, real *y, real *z__)
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaslr_(m, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
 
                 W := C2'
@@ -15237,57 +19790,56 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
+		    scopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
+			    j * work_dim1 + 1], &c__1);
 /* L70: */
 		}
 
 /*              W := W * V2 */
 
-		strmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15,
-			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*m > *k) {
+		strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1'*V1 */
 
-		    i__1 = *m - *k;
-		    sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b15, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
-			    work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    sgemm_("Transpose", "No transpose", &lastc, k, &i__1, &
+			    c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		strmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
+			c_b15, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - V1 * W' */
 
-		    i__1 = *m - *k;
-		    sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b151,
-			    &v[v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b15, &c__[c_offset], ldc)
-			    ;
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "Transpose", &i__1, &lastc, k, &
+			    c_b151, &v[v_offset], ldv, &work[work_offset],
+			    ldwork, &c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2' */
 
-		strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, &
-			v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
+		strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
 
 /*              C2 := C2 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j *
 				work_dim1];
 /* L80: */
 		    }
@@ -15299,6 +19851,13 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaslr_(n, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
                 W := C2
@@ -15306,58 +19865,57 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
+		    scopy_(&lastc, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &
+			    work[j * work_dim1 + 1], &c__1);
 /* L100: */
 		}
 
 /*              W := W * V2 */
 
-		strmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15,
-			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*n > *k) {
+		strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1 * V1 */
 
-		    i__1 = *n - *k;
-		    sgemm_("No transpose", "No transpose", m, k, &i__1, &
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
 			    c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, &
 			    c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		strmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b15,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - W * V1' */
 
-		    i__1 = *n - *k;
-		    sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b151,
-			    &work[work_offset], ldwork, &v[v_offset], ldv, &
-			    c_b15, &c__[c_offset], ldc)
-			    ;
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "Transpose", &lastc, &i__1, k, &
+			    c_b151, &work[work_offset], ldwork, &v[v_offset],
+			    ldv, &c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2' */
 
-		strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, &
-			v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
+		strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
 
 /*              C2 := C2 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
-				work_dim1];
+			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
+				 work_dim1];
 /* L110: */
 		    }
 /* L120: */
@@ -15380,6 +19938,13 @@ doublereal slapy3_(real *x, real *y, real *z__)
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaslc_(k, m, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
                 W := C1'
@@ -15387,52 +19952,53 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
+		    scopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
+			    + 1], &c__1);
 /* L130: */
 		}
 
 /*              W := W * V1' */
 
-		strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+		strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2'*V2' */
 
-		    i__1 = *m - *k;
-		    sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, &
-			    c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 +
-			    1], ldv, &c_b15, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    sgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b15,
+			     &c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1
+			    + 1], ldv, &c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		strmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
+			c_b15, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - V2' * W' */
 
-		    i__1 = *m - *k;
-		    sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[
-			    (*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
-			    ldwork, &c_b15, &c__[*k + 1 + c_dim1], ldc);
+		    i__1 = lastv - *k;
+		    sgemm_("Transpose", "Transpose", &i__1, &lastc, k, &
+			    c_b151, &v[(*k + 1) * v_dim1 + 1], ldv, &work[
+			    work_offset], ldwork, &c_b15, &c__[*k + 1 +
+			    c_dim1], ldc);
 		}
 
 /*              W := W * V1 */
 
-		strmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
 /* L140: */
@@ -15445,6 +20011,13 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaslc_(k, n, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
                 W := C1
@@ -15452,39 +20025,39 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    scopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L160: */
 		}
 
 /*              W := W * V1' */
 
-		strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b15, &
-			v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+		strmm_("Right", "Upper", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2 * V2' */
 
-		    i__1 = *n - *k;
-		    sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, &
-			    c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
-			    v_dim1 + 1], ldv, &c_b15, &work[work_offset],
-			    ldwork);
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
+			    c_b15, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k +
+			    1) * v_dim1 + 1], ldv, &c_b15, &work[work_offset],
+			     ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		strmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b15,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - W * V2 */
 
-		    i__1 = *n - *k;
-		    sgemm_("No transpose", "No transpose", m, &i__1, k, &
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
 			    c_b151, &work[work_offset], ldwork, &v[(*k + 1) *
 			    v_dim1 + 1], ldv, &c_b15, &c__[(*k + 1) * c_dim1
 			    + 1], ldc);
@@ -15492,14 +20065,14 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 /*              W := W * V1 */
 
-		strmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b15,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		strmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
 /* L170: */
@@ -15522,6 +20095,13 @@ doublereal slapy3_(real *x, real *y, real *z__)
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaslc_(k, m, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaslc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
                 W := C2'
@@ -15529,56 +20109,56 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
+		    scopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
+			    j * work_dim1 + 1], &c__1);
 /* L190: */
 		}
 
 /*              W := W * V2' */
 
-		strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b15, &
-			v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
-			, ldwork);
-		if (*m > *k) {
+		strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1'*V1' */
 
-		    i__1 = *m - *k;
-		    sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b15, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
+		    i__1 = lastv - *k;
+		    sgemm_("Transpose", "Transpose", &lastc, k, &i__1, &c_b15,
+			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
 			    work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		strmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
+			c_b15, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - V1' * W' */
 
-		    i__1 = *m - *k;
-		    sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b151, &v[
-			    v_offset], ldv, &work[work_offset], ldwork, &
-			    c_b15, &c__[c_offset], ldc);
+		    i__1 = lastv - *k;
+		    sgemm_("Transpose", "Transpose", &i__1, &lastc, k, &
+			    c_b151, &v[v_offset], ldv, &work[work_offset],
+			    ldwork, &c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		strmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b15,
-			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+		strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C2 := C2 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+			c__[lastv - *k + j + i__ * c_dim1] -= work[i__ + j *
 				work_dim1];
 /* L200: */
 		    }
@@ -15590,6 +20170,13 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilaslc_(k, n, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilaslr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
                 W := C2
@@ -15597,57 +20184,57 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
+		    scopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1,
+			     &work[j * work_dim1 + 1], &c__1);
 /* L220: */
 		}
 
 /*              W := W * V2' */
 
-		strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b15, &
-			v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[work_offset]
-			, ldwork);
-		if (*n > *k) {
+		strmm_("Right", "Lower", "Transpose", "Unit", &lastc, k, &
+			c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1 * V1' */
 
-		    i__1 = *n - *k;
-		    sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b15, &
-			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b15, &
-			    work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "Transpose", &lastc, k, &i__1, &
+			    c_b15, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b15, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b15, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		strmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b15,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - W * V1 */
 
-		    i__1 = *n - *k;
-		    sgemm_("No transpose", "No transpose", m, &i__1, k, &
+		    i__1 = lastv - *k;
+		    sgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
 			    c_b151, &work[work_offset], ldwork, &v[v_offset],
 			    ldv, &c_b15, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		strmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b15,
-			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+		strmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b15, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
-				work_dim1];
+			c__[i__ + (lastv - *k + j) * c_dim1] -= work[i__ + j *
+				 work_dim1];
 /* L230: */
 		    }
 /* L240: */
@@ -15685,10 +20272,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -15763,12 +20350,12 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	r__1 = slapy2_(alpha, &xnorm);
 	beta = -r_sign(&r__1, alpha);
 	safmin = slamch_("S") / slamch_("E");
+	knt = 0;
 	if (dabs(beta) < safmin) {
 
 /*           XNORM, BETA may be inaccurate; scale X and recompute them */
 
 	    rsafmn = 1.f / safmin;
-	    knt = 0;
 L10:
 	    ++knt;
 	    i__1 = *n - 1;
@@ -15785,26 +20372,20 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	    xnorm = snrm2_(&i__1, &x[1], incx);
 	    r__1 = slapy2_(alpha, &xnorm);
 	    beta = -r_sign(&r__1, alpha);
-	    *tau = (beta - *alpha) / beta;
-	    i__1 = *n - 1;
-	    r__1 = 1.f / (*alpha - beta);
-	    sscal_(&i__1, &r__1, &x[1], incx);
+	}
+	*tau = (beta - *alpha) / beta;
+	i__1 = *n - 1;
+	r__1 = 1.f / (*alpha - beta);
+	sscal_(&i__1, &r__1, &x[1], incx);
 
-/*           If ALPHA is subnormal, it may lose relative accuracy */
+/*        If ALPHA is subnormal, it may lose relative accuracy */
 
-	    *alpha = beta;
-	    i__1 = knt;
-	    for (j = 1; j <= i__1; ++j) {
-		*alpha *= safmin;
+	i__1 = knt;
+	for (j = 1; j <= i__1; ++j) {
+	    beta *= safmin;
 /* L20: */
-	    }
-	} else {
-	    *tau = (beta - *alpha) / beta;
-	    i__1 = *n - 1;
-	    r__1 = 1.f / (*alpha - beta);
-	    sscal_(&i__1, &r__1, &x[1], incx);
-	    *alpha = beta;
 	}
+	*alpha = beta;
     }
 
     return 0;
@@ -15821,19 +20402,21 @@ doublereal slapy3_(real *x, real *y, real *z__)
     real r__1;
 
     /* Local variables */
-    static integer i__, j;
+    static integer i__, j, prevlastv;
     static real vii;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *), strmv_(char *, char *, char *, integer *, real *,
-	    integer *, real *, integer *);
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    static integer lastv;
+    extern /* Subroutine */ int strmv_(char *, char *, char *, integer *,
+	    real *, integer *, real *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -15945,8 +20528,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
     }
 
     if (lsame_(direct, "F")) {
+	prevlastv = *n;
 	i__1 = *k;
 	for (i__ = 1; i__ <= i__1; ++i__) {
+	    prevlastv = max(i__,prevlastv);
 	    if (tau[i__] == 0.f) {
 
 /*              H(i)  =  I */
@@ -15963,21 +20548,39 @@ doublereal slapy3_(real *x, real *y, real *z__)
 		vii = v[i__ + i__ * v_dim1];
 		v[i__ + i__ * v_dim1] = 1.f;
 		if (lsame_(storev, "C")) {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			if (v[lastv + i__ * v_dim1] != 0.f) {
+			    goto L15;
+			}
+		    }
+L15:
+		    j = min(lastv,prevlastv);
 
-/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */
 
-		    i__2 = *n - i__ + 1;
+		    i__2 = j - i__ + 1;
 		    i__3 = i__ - 1;
 		    r__1 = -tau[i__];
 		    sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + v_dim1],
 			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b29, &t[
 			    i__ * t_dim1 + 1], &c__1);
 		} else {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			if (v[i__ + lastv * v_dim1] != 0.f) {
+			    goto L16;
+			}
+		    }
+L16:
+		    j = min(lastv,prevlastv);
 
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */
 
 		    i__2 = i__ - 1;
-		    i__3 = *n - i__ + 1;
+		    i__3 = j - i__ + 1;
 		    r__1 = -tau[i__];
 		    sgemv_("No transpose", &i__2, &i__3, &r__1, &v[i__ *
 			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
@@ -15991,10 +20594,16 @@ doublereal slapy3_(real *x, real *y, real *z__)
 		strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
 			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
 		t[i__ + i__ * t_dim1] = tau[i__];
+		if (i__ > 1) {
+		    prevlastv = max(prevlastv,lastv);
+		} else {
+		    prevlastv = lastv;
+		}
 	    }
 /* L20: */
 	}
     } else {
+	prevlastv = 1;
 	for (i__ = *k; i__ >= 1; --i__) {
 	    if (tau[i__] == 0.f) {
 
@@ -16013,35 +20622,54 @@ doublereal slapy3_(real *x, real *y, real *z__)
 		    if (lsame_(storev, "C")) {
 			vii = v[*n - *k + i__ + i__ * v_dim1];
 			v[*n - *k + i__ + i__ * v_dim1] = 1.f;
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    if (v[lastv + i__ * v_dim1] != 0.f) {
+				goto L35;
+			    }
+			}
+L35:
+			j = max(lastv,prevlastv);
 
 /*
                       T(i+1:k,i) :=
-                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+                              - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i)
 */
 
-			i__1 = *n - *k + i__;
+			i__1 = *n - *k + i__ - j + 1;
 			i__2 = *k - i__;
 			r__1 = -tau[i__];
-			sgemv_("Transpose", &i__1, &i__2, &r__1, &v[(i__ + 1)
-				* v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], &
+			sgemv_("Transpose", &i__1, &i__2, &r__1, &v[j + (i__
+				+ 1) * v_dim1], ldv, &v[j + i__ * v_dim1], &
 				c__1, &c_b29, &t[i__ + 1 + i__ * t_dim1], &
 				c__1);
 			v[*n - *k + i__ + i__ * v_dim1] = vii;
 		    } else {
 			vii = v[i__ + (*n - *k + i__) * v_dim1];
 			v[i__ + (*n - *k + i__) * v_dim1] = 1.f;
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    if (v[i__ + lastv * v_dim1] != 0.f) {
+				goto L36;
+			    }
+			}
+L36:
+			j = max(lastv,prevlastv);
 
 /*
                       T(i+1:k,i) :=
-                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+                              - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)'
 */
 
 			i__1 = *k - i__;
-			i__2 = *n - *k + i__;
+			i__2 = *n - *k + i__ - j + 1;
 			r__1 = -tau[i__];
 			sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ +
-				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
-				c_b29, &t[i__ + 1 + i__ * t_dim1], &c__1);
+				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
+				ldv, &c_b29, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
 			v[i__ + (*n - *k + i__) * v_dim1] = vii;
 		    }
 
@@ -16052,6 +20680,11 @@ doublereal slapy3_(real *x, real *y, real *z__)
 			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
 			     t_dim1], &c__1)
 			    ;
+		    if (i__ > 1) {
+			prevlastv = min(prevlastv,lastv);
+		    } else {
+			prevlastv = lastv;
+		    }
 		}
 		t[i__ + i__ * t_dim1] = tau[i__];
 	    }
@@ -16069,24 +20702,21 @@ doublereal slapy3_(real *x, real *y, real *z__)
 {
     /* System generated locals */
     integer c_dim1, c_offset, i__1;
-    real r__1;
 
     /* Local variables */
     static integer j;
     static real t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6,
 	    v7, v8, v9, t10, v10, sum;
-    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
-	    integer *, real *, integer *, real *, integer *);
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
-	    real *, integer *, real *, integer *, real *, real *, integer *);
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -16169,20 +20799,9 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	    case 10:  goto L190;
 	}
 
-/*
-          Code for general M
-
-          w := C'*v
-*/
-
-	sgemv_("Transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &c__1, &
-		c_b29, &work[1], &c__1);
+/*        Code for general M */
 
-/*        C := C - tau * v * w' */
-
-	r__1 = -(*tau);
-	sger_(m, n, &r__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
-		;
+	slarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]);
 	goto L410;
 L10:
 
@@ -16484,20 +21103,9 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	    case 10:  goto L390;
 	}
 
-/*
-          Code for general N
-
-          w := C * v
-*/
-
-	sgemv_("No transpose", m, n, &c_b15, &c__[c_offset], ldc, &v[1], &
-		c__1, &c_b29, &work[1], &c__1);
+/*        Code for general N */
 
-/*        C := C - tau * w * v' */
-
-	r__1 = -(*tau);
-	sger_(m, n, &r__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
-		;
+	slarf_(side, m, n, &v[1], &c__1, tau, &c__[c_offset], ldc, &work[1]);
 	goto L410;
 L210:
 
@@ -16792,10 +21400,6 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 /* Subroutine */ int slartg_(real *f, real *g, real *cs, real *sn, real *r__)
 {
-    /* Initialized data */
-
-    static logical first = TRUE_;
-
     /* System generated locals */
     integer i__1;
     real r__1, r__2;
@@ -16813,10 +21417,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -16855,20 +21459,27 @@ doublereal slapy3_(real *x, real *y, real *z__)
     R       (output) REAL
             The nonzero component of the rotated vector.
 
+    This version has a few statements commented out for thread safety
+    (machine parameters are computed on each entry). 10 feb 03, SJH.
+
     =====================================================================
-*/
 
+       LOGICAL            FIRST
+       SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+       DATA               FIRST / .TRUE. /
 
-    if (first) {
-	first = FALSE_;
-	safmin = slamch_("S");
-	eps = slamch_("E");
-	r__1 = slamch_("B");
-	i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) /
-		2.f);
-	safmn2 = pow_ri(&r__1, &i__1);
-	safmx2 = 1.f / safmn2;
-    }
+       IF( FIRST ) THEN
+*/
+    safmin = slamch_("S");
+    eps = slamch_("E");
+    r__1 = slamch_("B");
+    i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) / 2.f);
+    safmn2 = pow_ri(&r__1, &i__1);
+    safmx2 = 1.f / safmn2;
+/*
+          FIRST = .FALSE.
+       END IF
+*/
     if (*g == 0.f) {
 	*cs = 1.f;
 	*sn = 0.f;
@@ -16966,10 +21577,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17097,14 +21708,16 @@ doublereal slapy3_(real *x, real *y, real *z__)
     extern doublereal slamch_(char *);
     static real cfromc;
     extern /* Subroutine */ int xerbla_(char *, integer *);
-    static real bignum, smlnum;
+    static real bignum;
+    extern logical sisnan_(real *);
+    static real smlnum;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17155,7 +21768,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) REAL array, dimension (LDA,M)
+    A       (input/output) REAL array, dimension (LDA,N)
             The matrix to be multiplied by CTO/CFROM.  See TYPE for the
             storage type.
 
@@ -17200,8 +21813,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
     if (itype == -1) {
 	*info = -1;
-    } else if (*cfrom == 0.f) {
+    } else if (*cfrom == 0.f || sisnan_(cfrom)) {
 	*info = -4;
+    } else if (sisnan_(cto)) {
+	*info = -5;
     } else if (*m < 0) {
 	*info = -6;
     } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
@@ -17248,18 +21863,36 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 L10:
     cfrom1 = cfromc * smlnum;
-    cto1 = ctoc / bignum;
-    if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
-	mul = smlnum;
-	done = FALSE_;
-	cfromc = cfrom1;
-    } else if (dabs(cto1) > dabs(cfromc)) {
-	mul = bignum;
-	done = FALSE_;
-	ctoc = cto1;
-    } else {
+    if (cfrom1 == cfromc) {
+/*
+          CFROMC is an inf.  Multiply by a correctly signed zero for
+          finite CTOC, or a NaN if CTOC is infinite.
+*/
 	mul = ctoc / cfromc;
 	done = TRUE_;
+	cto1 = ctoc;
+    } else {
+	cto1 = ctoc / bignum;
+	if (cto1 == ctoc) {
+/*
+             CTOC is either 0 or an inf.  In both cases, CTOC itself
+             serves as the correct multiplication factor.
+*/
+	    mul = ctoc;
+	    done = TRUE_;
+	    cfromc = 1.f;
+	} else if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
+	    mul = smlnum;
+	    done = FALSE_;
+	    cfromc = cfrom1;
+	} else if (dabs(cto1) > dabs(cfromc)) {
+	    mul = bignum;
+	    done = FALSE_;
+	    ctoc = cto1;
+	} else {
+	    mul = ctoc / cfromc;
+	    done = TRUE_;
+	}
     }
 
     if (itype == 0) {
@@ -17417,10 +22050,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -17472,16 +22105,14 @@ doublereal slapy3_(real *x, real *y, real *z__)
            On entry, maximum size of the subproblems at the
            bottom of the computation tree.
 
-    IWORK  INTEGER work array.
-           Dimension must be at least (8 * N)
+    IWORK  (workspace) INTEGER array, dimension (8*N)
 
-    WORK   REAL work array.
-           Dimension must be at least (3 * M**2 + 2 * M)
+    WORK   (workspace) REAL array, dimension (3*M**2+2*M)
 
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -17689,10 +22320,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -17753,21 +22384,21 @@ doublereal slapy3_(real *x, real *y, real *z__)
            The bidiagonal matrix has row dimension N = NL + NR + 1,
            and column dimension M = N + SQRE.
 
-    D      (input/output) REAL array,
-                          dimension (N = NL+NR+1).
+    D      (input/output) REAL array, dimension (NL+NR+1).
+           N = NL+NR+1
            On entry D(1:NL,1:NL) contains the singular values of the
            upper block; and D(NL+2:N) contains the singular values of
            the lower block. On exit D(1:N) contains the singular values
            of the modified matrix.
 
-    ALPHA  (input) REAL
+    ALPHA  (input/output) REAL
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) REAL
+    BETA   (input/output) REAL
            Contains the off-diagonal element associated with the added
            row.
 
-    U      (input/output) REAL array, dimension(LDU,N)
+    U      (input/output) REAL array, dimension (LDU,N)
            On entry U(1:NL, 1:NL) contains the left singular vectors of
            the upper block; U(NL+2:N, NL+2:N) contains the left singular
            vectors of the lower block. On exit U contains the left
@@ -17776,7 +22407,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= max( 1, N ).
 
-    VT     (input/output) REAL array, dimension(LDVT,M)
+    VT     (input/output) REAL array, dimension (LDVT,M)
            where M = N + SQRE.
            On entry VT(1:NL+1, 1:NL+1)' contains the right singular
            vectors of the upper block; VT(NL+2:M, NL+2:M)' contains
@@ -17787,19 +22418,19 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDVT   (input) INTEGER
            The leading dimension of the array VT.  LDVT >= max( 1, M ).
 
-    IDXQ  (output) INTEGER array, dimension(N)
+    IDXQ  (output) INTEGER array, dimension (N)
            This contains the permutation which will reintegrate the
            subproblem just solved back into sorted order, i.e.
            D( IDXQ( I = 1, N ) ) will be in ascending order.
 
-    IWORK  (workspace) INTEGER array, dimension( 4 * N )
+    IWORK  (workspace) INTEGER array, dimension (4*N)
 
-    WORK   (workspace) REAL array, dimension( 3*M**2 + 2*M )
+    WORK   (workspace) REAL array, dimension (3*M**2+2*M)
 
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -17952,10 +22583,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17990,12 +22621,16 @@ doublereal slapy3_(real *x, real *y, real *z__)
            Contains the dimension of the non-deflated matrix,
            This is the order of the related secular equation. 1 <= K <=N.
 
-    D      (input/output) REAL array, dimension(N)
+    D      (input/output) REAL array, dimension (N)
            On entry D contains the singular values of the two submatrices
            to be combined.  On exit D contains the trailing (N-K) updated
            singular values (those which were deflated) sorted into
            increasing order.
 
+    Z      (output) REAL array, dimension (N)
+           On exit Z contains the updating row vector in the secular
+           equation.
+
     ALPHA  (input) REAL
            Contains the diagonal element associated with the added row.
 
@@ -18003,7 +22638,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
            Contains the off-diagonal element associated with the added
            row.
 
-    U      (input/output) REAL array, dimension(LDU,N)
+    U      (input/output) REAL array, dimension (LDU,N)
            On entry U contains the left singular vectors of two
            submatrices in the two square blocks with corners at (1,1),
            (NL, NL), and (NL+2, NL+2), (N,N).
@@ -18013,15 +22648,23 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDU    (input) INTEGER
            The leading dimension of the array U.  LDU >= N.
 
-    Z      (output) REAL array, dimension(N)
-           On exit Z contains the updating row vector in the secular
-           equation.
+    VT     (input/output) REAL array, dimension (LDVT,M)
+           On entry VT' contains the right singular vectors of two
+           submatrices in the two square blocks with corners at (1,1),
+           (NL+1, NL+1), and (NL+2, NL+2), (M,M).
+           On exit VT' contains the trailing (N-K) updated right singular
+           vectors (those which were deflated) in its last N-K columns.
+           In case SQRE =1, the last row of VT spans the right null
+           space.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= M.
 
     DSIGMA (output) REAL array, dimension (N)
            Contains a copy of the diagonal elements (K-1 singular values
            and one zero) in the secular equation.
 
-    U2     (output) REAL array, dimension(LDU2,N)
+    U2     (output) REAL array, dimension (LDU2,N)
            Contains a copy of the first K-1 left singular vectors which
            will be used by SLASD3 in a matrix multiply (SGEMM) to solve
            for the new left singular vectors. U2 is arranged into four
@@ -18033,19 +22676,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDU2   (input) INTEGER
            The leading dimension of the array U2.  LDU2 >= N.
 
-    VT     (input/output) REAL array, dimension(LDVT,M)
-           On entry VT' contains the right singular vectors of two
-           submatrices in the two square blocks with corners at (1,1),
-           (NL+1, NL+1), and (NL+2, NL+2), (M,M).
-           On exit VT' contains the trailing (N-K) updated right singular
-           vectors (those which were deflated) in its last N-K columns.
-           In case SQRE =1, the last row of VT spans the right null
-           space.
-
-    LDVT   (input) INTEGER
-           The leading dimension of the array VT.  LDVT >= M.
-
-    VT2    (output) REAL array, dimension(LDVT2,N)
+    VT2    (output) REAL array, dimension (LDVT2,N)
            VT2' contains a copy of the first K right singular vectors
            which will be used by SLASD3 in a matrix multiply (SGEMM) to
            solve for the new right singular vectors. VT2 is arranged into
@@ -18057,24 +22688,31 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDVT2  (input) INTEGER
            The leading dimension of the array VT2.  LDVT2 >= M.
 
-    IDXP   (workspace) INTEGER array, dimension(N)
+    IDXP   (workspace) INTEGER array, dimension (N)
            This will contain the permutation used to place deflated
            values of D at the end of the array. On output IDXP(2:K)
            points to the nondeflated D-values and IDXP(K+1:N)
            points to the deflated singular values.
 
-    IDX    (workspace) INTEGER array, dimension(N)
+    IDX    (workspace) INTEGER array, dimension (N)
            This will contain the permutation used to sort the contents of
            D into ascending order.
 
-    IDXC   (output) INTEGER array, dimension(N)
+    IDXC   (output) INTEGER array, dimension (N)
            This will contain the permutation used to arrange the columns
            of the deflated U matrix into three groups:  the first group
            contains non-zero entries only at and above NL, the second
            contains non-zero entries only below NL+2, and the third is
            dense.
 
-    COLTYP (workspace/output) INTEGER array, dimension(N)
+    IDXQ   (input/output) INTEGER array, dimension (N)
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that entries in
+           the first hlaf of this permutation must first be moved one
+           position backward; and entries in the second half
+           must first have NL+1 added to their values.
+
+    COLTYP (workspace/output) INTEGER array, dimension (N)
            As workspace, this will contain a label which will indicate
            which of the following types a column in the U2 matrix or a
            row in the VT2 matrix is:
@@ -18086,13 +22724,6 @@ doublereal slapy3_(real *x, real *y, real *z__)
            On exit, it is an array of dimension 4, with COLTYP(I) being
            the dimension of the I-th type columns.
 
-    IDXQ   (input) INTEGER array, dimension(N)
-           This contains the permutation which separately sorts the two
-           sub-problems in D into ascending order.  Note that entries in
-           the first hlaf of this permutation must first be moved one
-           position backward; and entries in the second half
-           must first have NL+1 added to their values.
-
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
@@ -18547,10 +23178,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -18599,12 +23230,12 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDQ    (input) INTEGER
            The leading dimension of the array Q.  LDQ >= K.
 
-    DSIGMA (input) REAL array, dimension(K)
+    DSIGMA (input/output) REAL array, dimension(K)
            The first K elements of this array contain the old roots
            of the deflated updating problem.  These are the poles
            of the secular equation.
 
-    U      (input) REAL array, dimension (LDU, N)
+    U      (output) REAL array, dimension (LDU, N)
            The last N - K columns of this matrix contain the deflated
            left singular vectors.
 
@@ -18618,21 +23249,21 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDU2   (input) INTEGER
            The leading dimension of the array U2.  LDU2 >= N.
 
-    VT     (input) REAL array, dimension (LDVT, M)
+    VT     (output) REAL array, dimension (LDVT, M)
            The last M - K columns of VT' contain the deflated
            right singular vectors.
 
     LDVT   (input) INTEGER
            The leading dimension of the array VT.  LDVT >= N.
 
-    VT2    (input) REAL array, dimension (LDVT2, N)
+    VT2    (input/output) REAL array, dimension (LDVT2, N)
            The first K columns of VT2' contain the non-deflated
            right singular vectors for the split problem.
 
     LDVT2  (input) INTEGER
            The leading dimension of the array VT2.  LDVT2 >= N.
 
-    IDXC   (input) INTEGER array, dimension ( N )
+    IDXC   (input) INTEGER array, dimension (N)
            The permutation used to arrange the columns of U (and rows of
            VT) into three groups:  the first group contains non-zero
            entries only at and above (or before) NL +1; the second
@@ -18644,19 +23275,19 @@ doublereal slapy3_(real *x, real *y, real *z__)
            must be likewise permuted before the matrix multiplies can
            take place.
 
-    CTOT   (input) INTEGER array, dimension ( 4 )
+    CTOT   (input) INTEGER array, dimension (4)
            A count of the total number of the various types of columns
            in U (or rows in VT), as described in IDXC. The fourth column
            type is any column which has been deflated.
 
-    Z      (input) REAL array, dimension (K)
+    Z      (input/output) REAL array, dimension (K)
            The first K elements of this array contain the components
            of the deflation-adjusted updating row vector.
 
     INFO   (output) INTEGER
            = 0:  successful exit.
            < 0:  if INFO = -i, the i-th argument had an illegal value.
-           > 0:  if INFO = 1, an singular value did not converge
+           > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -18760,7 +23391,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
        changes the bottommost bits of DSIGMA(I). It does not account
        for hexadecimal or decimal machines without guard digits
        (we know of none). We use a subroutine call to compute
-       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       2*DSIGMA(I) to prevent optimizing compilers from eliminating
        this code.
 */
 
@@ -18971,10 +23602,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -19010,10 +23641,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
            The original eigenvalues.  It is assumed that they are in
            order, 0 <= D(I) < D(J)  for I < J.
 
-    Z      (input) REAL array, dimension ( N )
+    Z      (input) REAL array, dimension (N)
            The components of the updating vector.
 
-    DELTA  (output) REAL array, dimension ( N )
+    DELTA  (output) REAL array, dimension (N)
            If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
            component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
            contains the information necessary to construct the
@@ -19023,9 +23654,9 @@ doublereal slapy3_(real *x, real *y, real *z__)
            The scalar in the symmetric updating formula.
 
     SIGMA  (output) REAL
-           The computed lambda_I, the I-th updated eigenvalue.
+           The computed sigma_I, the I-th updated eigenvalue.
 
-    WORK   (workspace) REAL array, dimension ( N )
+    WORK   (workspace) REAL array, dimension (N)
            If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
            component.  If N = 1, then WORK( 1 ) = 1.
 
@@ -19956,10 +24587,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -19984,14 +24615,14 @@ doublereal slapy3_(real *x, real *y, real *z__)
     I      (input) INTEGER
            The index of the eigenvalue to be computed.  I = 1 or I = 2.
 
-    D      (input) REAL array, dimension ( 2 )
+    D      (input) REAL array, dimension (2)
            The original eigenvalues.  We assume 0 <= D(1) < D(2).
 
-    Z      (input) REAL array, dimension ( 2 )
+    Z      (input) REAL array, dimension (2)
            The components of the updating vector.
 
-    DELTA  (output) REAL array, dimension ( 2 )
-           Contains (D(j) - lambda_I) in its  j-th component.
+    DELTA  (output) REAL array, dimension (2)
+           Contains (D(j) - sigma_I) in its  j-th component.
            The vector DELTA contains the information necessary
            to construct the eigenvectors.
 
@@ -19999,9 +24630,9 @@ doublereal slapy3_(real *x, real *y, real *z__)
            The scalar in the symmetric updating formula.
 
     DSIGMA (output) REAL
-           The computed lambda_I, the I-th updated eigenvalue.
+           The computed sigma_I, the I-th updated eigenvalue.
 
-    WORK   (workspace) REAL array, dimension ( 2 )
+    WORK   (workspace) REAL array, dimension (2)
            WORK contains (D(j) + sigma_I) in its  j-th component.
 
     Further Details
@@ -20150,10 +24781,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -20226,34 +24857,34 @@ doublereal slapy3_(real *x, real *y, real *z__)
            The bidiagonal matrix has row dimension N = NL + NR + 1,
            and column dimension M = N + SQRE.
 
-    D      (input/output) REAL array, dimension ( NL+NR+1 ).
+    D      (input/output) REAL array, dimension (NL+NR+1).
            On entry D(1:NL,1:NL) contains the singular values of the
            upper block, and D(NL+2:N) contains the singular values
            of the lower block. On exit D(1:N) contains the singular
            values of the modified matrix.
 
-    VF     (input/output) REAL array, dimension ( M )
+    VF     (input/output) REAL array, dimension (M)
            On entry, VF(1:NL+1) contains the first components of all
            right singular vectors of the upper block; and VF(NL+2:M)
            contains the first components of all right singular vectors
            of the lower block. On exit, VF contains the first components
            of all right singular vectors of the bidiagonal matrix.
 
-    VL     (input/output) REAL array, dimension ( M )
+    VL     (input/output) REAL array, dimension (M)
            On entry, VL(1:NL+1) contains the  last components of all
            right singular vectors of the upper block; and VL(NL+2:M)
            contains the last components of all right singular vectors of
            the lower block. On exit, VL contains the last components of
            all right singular vectors of the bidiagonal matrix.
 
-    ALPHA  (input) REAL
+    ALPHA  (input/output) REAL
            Contains the diagonal element associated with the added row.
 
-    BETA   (input) REAL
+    BETA   (input/output) REAL
            Contains the off-diagonal element associated with the added
            row.
 
-    IDXQ   (output) INTEGER array, dimension ( N )
+    IDXQ   (output) INTEGER array, dimension (N)
            This contains the permutation which will reintegrate the
            subproblem just solved back into sorted order, i.e.
            D( IDXQ( I = 1, N ) ) will be in ascending order.
@@ -20326,7 +24957,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -20485,10 +25116,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -20986,10 +25617,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -21020,9 +25651,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
     D       (output) REAL array, dimension ( K )
             On output, D contains the updated singular values.
 
-    Z       (input) REAL array, dimension ( K )
-            The first K elements of this array contain the components
-            of the deflation-adjusted updating row vector.
+    Z       (input/output) REAL array, dimension ( K )
+            On entry, the first K elements of this array contain the
+            components of the deflation-adjusted updating row vector.
+            On exit, Z is updated.
 
     VF      (input/output) REAL array, dimension ( K )
             On entry, VF contains  information passed through DBEDE8.
@@ -21051,17 +25683,19 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDDIFR  (input) INTEGER
             The leading dimension of DIFR, must be at least K.
 
-    DSIGMA  (input) REAL array, dimension ( K )
-            The first K elements of this array contain the old roots
-            of the deflated updating problem.  These are the poles
+    DSIGMA  (input/output) REAL array, dimension ( K )
+            On entry, the first K elements of this array contain the old
+            roots of the deflated updating problem.  These are the poles
             of the secular equation.
+            On exit, the elements of DSIGMA may be very slightly altered
+            in value.
 
     WORK    (workspace) REAL array, dimension at least 3 * K
 
     INFO    (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -21285,10 +25919,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -21351,8 +25985,8 @@ doublereal slapy3_(real *x, real *y, real *z__)
            singular vector matrices of all subproblems at the bottom
            level.
 
-    K      (output) INTEGER array,
-           dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
+    K      (output) INTEGER array, dimension ( N )
+           if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
            If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
            secular equation on the computation tree.
 
@@ -21397,8 +26031,8 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDGCOL (input) INTEGER, LDGCOL = > N.
            The leading dimension of arrays GIVCOL and PERM.
 
-    PERM   (output) INTEGER array,
-           dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced
+    PERM   (output) INTEGER array, dimension ( LDGCOL, NLVL )
+           if ICOMPQ = 1, and not referenced
            if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
            permutations done on the I-th level of the computation tree.
 
@@ -21424,13 +26058,12 @@ doublereal slapy3_(real *x, real *y, real *z__)
     WORK   (workspace) REAL array, dimension
            (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
 
-    IWORK  (workspace) INTEGER array.
-           Dimension must be at least (7 * N).
+    IWORK  (workspace) INTEGER array, dimension (7*N).
 
     INFO   (output) INTEGER
             = 0:  successful exit.
             < 0:  if INFO = -i, the i-th argument had an illegal value.
-            > 0:  if INFO = 1, an singular value did not converge
+            > 0:  if INFO = 1, a singular value did not converge
 
     Further Details
     ===============
@@ -21734,10 +26367,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -22080,10 +26713,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK auxiliary routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -22114,7 +26747,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
      NDIMR  (output) INTEGER array, dimension ( N )
             On exit, row dimensions of right children.
 
-     MSUB   (input) INTEGER.
+     MSUB   (input) INTEGER
             On entry, the maximum row dimension each subproblem at the
             bottom of the tree can be of.
 
@@ -22192,10 +26825,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -22343,10 +26976,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -22504,38 +27142,47 @@ doublereal slapy3_(real *x, real *y, real *z__)
     double sqrt(doublereal);
 
     /* Local variables */
-    static real d__, e;
+    static real d__, e, g;
     static integer k;
     static real s, t;
-    static integer i0, i4, n0, pp;
-    static real eps, tol;
+    static integer i0, i4, n0;
+    static real dn;
+    static integer pp;
+    static real dn1, dn2, dee, eps, tau, tol;
     static integer ipn4;
     static real tol2;
     static logical ieee;
     static integer nbig;
     static real dmin__, emin, emax;
-    static integer ndiv, iter;
+    static integer kmin, ndiv, iter;
     static real qmin, temp, qmax, zmax;
-    static integer splt, nfail;
+    static integer splt;
+    static real dmin1, dmin2;
+    static integer nfail;
     static real desig, trace, sigma;
-    static integer iinfo;
+    static integer iinfo, ttype;
     extern /* Subroutine */ int slasq3_(integer *, integer *, real *, integer
 	    *, real *, real *, real *, real *, integer *, integer *, integer *
-	    , logical *);
+	    , logical *, integer *, real *, real *, real *, real *, real *,
+	    real *, real *);
+    static real deemin;
     extern doublereal slamch_(char *);
     static integer iwhila, iwhilb;
     static real oldemn, safmin;
-    extern /* Subroutine */ int xerbla_(char *, integer *);
-    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
-	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slasrt_(
+	    char *, integer *, real *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -22563,7 +27210,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     N     (input) INTEGER
           The number of rows and columns in the matrix. N >= 0.
 
-    Z     (workspace) REAL array, dimension ( 4*N )
+    Z     (input/output) REAL array, dimension ( 4*N )
           On entry Z holds the qd array. On exit, entries 1 to N hold
           the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
           trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
@@ -22721,11 +27368,17 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	return 0;
     }
 
-/*     Check whether the machine is IEEE conformable. */
+/*
+       Check whether the machine is IEEE conformable.
+
+       IEEE = ILAENV( 10, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1 .AND.
+      $       ILAENV( 11, 'SLASQ2', 'N', 1, 2, 3, 4 ).EQ.1
+
+       [11/15/2008] The case IEEE=.TRUE. has a problem in single precision with
+       some the test matrices of type 16. The double precision code is fine.
+*/
 
-    ieee = ilaenv_(&c__10, "SLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
-	    6, (ftnlen)1) == 1 && ilaenv_(&c__11, "SLASQ2", "N", &c__1, &c__2,
-	     &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
+    ieee = FALSE_;
 
 /*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
 
@@ -22820,6 +27473,17 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /* L80: */
     }
 
+/*     Initialise variables to pass to SLASQ3. */
+
+    ttype = 0;
+    dmin1 = 0.f;
+    dmin2 = 0.f;
+    dn = 0.f;
+    dn1 = 0.f;
+    dn2 = 0.f;
+    g = 0.f;
+    tau = 0.f;
+
     iter = 2;
     nfail = 0;
     ndiv = n0 - i0 << 1;
@@ -22827,7 +27491,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     i__1 = *n + 1;
     for (iwhila = 1; iwhila <= i__1; ++iwhila) {
 	if (n0 < 1) {
-	    goto L150;
+	    goto L170;
 	}
 
 /*
@@ -22885,10 +27549,43 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 L100:
 	i0 = i4 / 4;
+	pp = 0;
 
-/*        Store EMIN for passing to SLASQ3. */
-
-	z__[(n0 << 2) - 1] = emin;
+	if (n0 - i0 > 1) {
+	    dee = z__[(i0 << 2) - 3];
+	    deemin = dee;
+	    kmin = i0;
+	    i__2 = (n0 << 2) - 3;
+	    for (i4 = (i0 << 2) + 1; i4 <= i__2; i4 += 4) {
+		dee = z__[i4] * (dee / (dee + z__[i4 - 2]));
+		if (dee <= deemin) {
+		    deemin = dee;
+		    kmin = (i4 + 3) / 4;
+		}
+/* L110: */
+	    }
+	    if (kmin - i0 << 1 < n0 - kmin && deemin <= z__[(n0 << 2) - 3] *
+		    .5f) {
+		ipn4 = i0 + n0 << 2;
+		pp = 2;
+		i__2 = i0 + n0 - 1 << 1;
+		for (i4 = i0 << 2; i4 <= i__2; i4 += 4) {
+		    temp = z__[i4 - 3];
+		    z__[i4 - 3] = z__[ipn4 - i4 - 3];
+		    z__[ipn4 - i4 - 3] = temp;
+		    temp = z__[i4 - 2];
+		    z__[i4 - 2] = z__[ipn4 - i4 - 2];
+		    z__[ipn4 - i4 - 2] = temp;
+		    temp = z__[i4 - 1];
+		    z__[i4 - 1] = z__[ipn4 - i4 - 5];
+		    z__[ipn4 - i4 - 5] = temp;
+		    temp = z__[i4];
+		    z__[i4] = z__[ipn4 - i4 - 4];
+		    z__[ipn4 - i4 - 4] = temp;
+/* L120: */
+		}
+	    }
+	}
 
 /*
           Put -(initial shift) into DMIN.
@@ -22898,21 +27595,26 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	r__1 = 0.f, r__2 = qmin - sqrt(qmin) * 2.f * sqrt(emax);
 	dmin__ = -dmax(r__1,r__2);
 
-/*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */
-
-	pp = 0;
+/*
+          Now I0:N0 is unreduced.
+          PP = 0 for ping, PP = 1 for pong.
+          PP = 2 indicates that flipping was applied to the Z array and
+                 and that the tests for deflation upon entry in SLASQ3
+                 should not be performed.
+*/
 
 	nbig = (n0 - i0 + 1) * 30;
 	i__2 = nbig;
 	for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
 	    if (i0 > n0) {
-		goto L130;
+		goto L150;
 	    }
 
 /*           While submatrix unfinished take a good dqds step. */
 
 	    slasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
-		    nfail, &iter, &ndiv, &ieee);
+		    nfail, &iter, &ndiv, &ieee, &ttype, &dmin1, &dmin2, &dn, &
+		    dn1, &dn2, &g, &tau);
 
 	    pp = 1 - pp;
 
@@ -22945,7 +27647,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
 			    r__1 = oldemn, r__2 = z__[i4];
 			    oldemn = dmin(r__1,r__2);
 			}
-/* L110: */
+/* L130: */
 		    }
 		    z__[(n0 << 2) - 1] = emin;
 		    z__[n0 * 4] = oldemn;
@@ -22953,7 +27655,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
 		}
 	    }
 
-/* L120: */
+/* L140: */
 	}
 
 	*info = 2;
@@ -22961,9 +27663,9 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 /*        end IWHILB */
 
-L130:
+L150:
 
-/* L140: */
+/* L160: */
 	;
     }
 
@@ -22972,14 +27674,14 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 /*     end IWHILA */
 
-L150:
+L170:
 
 /*     Move q's to the front. */
 
     i__1 = *n;
     for (k = 2; k <= i__1; ++k) {
 	z__[k] = z__[(k << 2) - 3];
-/* L160: */
+/* L180: */
     }
 
 /*     Sort and compute sum of eigenvalues. */
@@ -22989,7 +27691,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     e = 0.f;
     for (k = *n; k >= 1; --k) {
 	e += z__[k];
-/* L170: */
+/* L190: */
     }
 
 /*     Store trace, sum(eigenvalues) and information on performance. */
@@ -23009,18 +27711,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 /* Subroutine */ int slasq3_(integer *i0, integer *n0, real *z__, integer *pp,
 	 real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail,
-	integer *iter, integer *ndiv, logical *ieee)
+	integer *iter, integer *ndiv, logical *ieee, integer *ttype, real *
+	dmin1, real *dmin2, real *dn, real *dn1, real *dn2, real *g, real *
+	tau)
 {
-    /* Initialized data */
-
-    static integer ttype = 0;
-    static real dmin1 = 0.f;
-    static real dmin2 = 0.f;
-    static real dn = 0.f;
-    static real dn1 = 0.f;
-    static real dn2 = 0.f;
-    static real tau = 0.f;
-
     /* System generated locals */
     integer i__1;
     real r__1, r__2;
@@ -23036,19 +27730,24 @@ doublereal slapy3_(real *x, real *y, real *z__)
     static real tol2, temp;
     extern /* Subroutine */ int slasq4_(integer *, integer *, real *, integer
 	    *, integer *, real *, real *, real *, real *, real *, real *,
-	    real *, integer *), slasq5_(integer *, integer *, real *, integer
-	    *, real *, real *, real *, real *, real *, real *, real *,
-	    logical *), slasq6_(integer *, integer *, real *, integer *, real
-	    *, real *, real *, real *, real *, real *);
+	    real *, integer *, real *), slasq5_(integer *, integer *, real *,
+	    integer *, real *, real *, real *, real *, real *, real *, real *,
+	     logical *), slasq6_(integer *, integer *, real *, integer *,
+	    real *, real *, real *, real *, real *, real *);
     extern doublereal slamch_(char *);
-    static real safmin;
+    extern logical sisnan_(real *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       May 17, 2000
+    -- LAPACK routine (version 3.2.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- June 2010                                                       --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -23064,14 +27763,17 @@ doublereal slapy3_(real *x, real *y, real *z__)
     I0     (input) INTEGER
            First index.
 
-    N0     (input) INTEGER
+    N0     (input/output) INTEGER
            Last index.
 
     Z      (input) REAL array, dimension ( 4*N )
            Z holds the qd array.
 
-    PP     (input) INTEGER
+    PP     (input/output) INTEGER
            PP=0 for ping, PP=1 for pong.
+           PP=2 indicates that flipping was applied to the Z array
+           and that the initial tests for deflation should not be
+           performed.
 
     DMIN   (output) REAL
            Minimum value of d.
@@ -23094,23 +27796,39 @@ doublereal slapy3_(real *x, real *y, real *z__)
     NDIV   (output) INTEGER
            Number of divisions.
 
-    TTYPE  (output) INTEGER
-           Shift type.
-
     IEEE   (input) LOGICAL
            Flag for IEEE or non IEEE arithmetic (passed to SLASQ5).
 
+    TTYPE  (input/output) INTEGER
+           Shift type.
+
+    DMIN1  (input/output) REAL
+
+    DMIN2  (input/output) REAL
+
+    DN     (input/output) REAL
+
+    DN1    (input/output) REAL
+
+    DN2    (input/output) REAL
+
+    G      (input/output) REAL
+
+    TAU    (input/output) REAL
+
+           These are passed as arguments in order to save their values
+           between calls to SLASQ3.
+
     =====================================================================
 */
 
+
     /* Parameter adjustments */
     --z__;
 
     /* Function Body */
-
     n0in = *n0;
     eps = slamch_("Precision");
-    safmin = slamch_("Safe minimum");
     tol = eps * 100.f;
 /* Computing 2nd power */
     r__1 = tol;
@@ -23178,6 +27896,9 @@ doublereal slapy3_(real *x, real *y, real *z__)
     goto L10;
 
 L50:
+    if (*pp == 2) {
+	*pp = 0;
+    }
 
 /*     Reverse the qd-array, if warranted. */
 
@@ -23205,8 +27926,8 @@ doublereal slapy3_(real *x, real *y, real *z__)
 		z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
 	    }
 /* Computing MIN */
-	    r__1 = dmin2, r__2 = z__[(*n0 << 2) + *pp - 1];
-	    dmin2 = dmin(r__1,r__2);
+	    r__1 = *dmin2, r__2 = z__[(*n0 << 2) + *pp - 1];
+	    *dmin2 = dmin(r__1,r__2);
 /* Computing MIN */
 	    r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*i0 << 2) + *pp - 1]
 		    , r__1 = min(r__1,r__2), r__2 = z__[(*i0 << 2) + *pp + 3];
@@ -23223,100 +27944,94 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	}
     }
 
-/*
-   L70:
+/*     Choose a shift. */
 
-   Computing MIN
-*/
-    r__1 = z__[(*n0 << 2) + *pp - 1], r__2 = z__[(*n0 << 2) + *pp - 9], r__1 =
-	     min(r__1,r__2), r__2 = dmin2 + z__[(*n0 << 2) - *pp];
-    if (*dmin__ < 0.f || safmin * *qmax < dmin(r__1,r__2)) {
-
-/*        Choose a shift. */
-
-	slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
-		 &dn2, &tau, &ttype);
+    slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2,
+	    tau, ttype, g);
 
-/*        Call dqds until DMIN > 0. */
+/*     Call dqds until DMIN > 0. */
 
-L80:
+L70:
 
-	slasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
-		&dn2, ieee);
+    slasq5_(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2,
+	    ieee);
 
-	*ndiv += *n0 - *i0 + 2;
-	++(*iter);
+    *ndiv += *n0 - *i0 + 2;
+    ++(*iter);
 
-/*        Check status. */
+/*     Check status. */
 
-	if (*dmin__ >= 0.f && dmin1 > 0.f) {
+    if (*dmin__ >= 0.f && *dmin1 > 0.f) {
 
-/*           Success. */
+/*        Success. */
 
-	    goto L100;
+	goto L90;
 
-	} else if (*dmin__ < 0.f && dmin1 > 0.f && z__[(*n0 - 1 << 2) - *pp] <
-		 tol * (*sigma + dn1) && dabs(dn) < tol * *sigma) {
+    } else if (*dmin__ < 0.f && *dmin1 > 0.f && z__[(*n0 - 1 << 2) - *pp] <
+	    tol * (*sigma + *dn1) && dabs(*dn) < tol * *sigma) {
 
-/*           Convergence hidden by negative DN. */
+/*        Convergence hidden by negative DN. */
 
-	    z__[(*n0 - 1 << 2) - *pp + 2] = 0.f;
-	    *dmin__ = 0.f;
-	    goto L100;
-	} else if (*dmin__ < 0.f) {
+	z__[(*n0 - 1 << 2) - *pp + 2] = 0.f;
+	*dmin__ = 0.f;
+	goto L90;
+    } else if (*dmin__ < 0.f) {
 
-/*           TAU too big. Select new TAU and try again. */
+/*        TAU too big. Select new TAU and try again. */
 
-	    ++(*nfail);
-	    if (ttype < -22) {
+	++(*nfail);
+	if (*ttype < -22) {
 
-/*              Failed twice. Play it safe. */
+/*           Failed twice. Play it safe. */
 
-		tau = 0.f;
-	    } else if (dmin1 > 0.f) {
+	    *tau = 0.f;
+	} else if (*dmin1 > 0.f) {
 
-/*              Late failure. Gives excellent shift. */
+/*           Late failure. Gives excellent shift. */
 
-		tau = (tau + *dmin__) * (1.f - eps * 2.f);
-		ttype += -11;
-	    } else {
+	    *tau = (*tau + *dmin__) * (1.f - eps * 2.f);
+	    *ttype += -11;
+	} else {
 
-/*              Early failure. Divide by 4. */
+/*           Early failure. Divide by 4. */
 
-		tau *= .25f;
-		ttype += -12;
-	    }
-	    goto L80;
-	} else if (*dmin__ != *dmin__) {
+	    *tau *= .25f;
+	    *ttype += -12;
+	}
+	goto L70;
+    } else if (sisnan_(dmin__)) {
 
-/*           NaN. */
+/*        NaN. */
 
-	    tau = 0.f;
+	if (*tau == 0.f) {
 	    goto L80;
 	} else {
+	    *tau = 0.f;
+	    goto L70;
+	}
+    } else {
 
-/*           Possible underflow. Play it safe. */
+/*        Possible underflow. Play it safe. */
 
-	    goto L90;
-	}
+	goto L80;
     }
 
 /*     Risk of underflow. */
 
-L90:
-    slasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
+L80:
+    slasq6_(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
     *ndiv += *n0 - *i0 + 2;
     ++(*iter);
-    tau = 0.f;
+    *tau = 0.f;
 
-L100:
-    if (tau < *sigma) {
-	*desig += tau;
+L90:
+    if (*tau < *sigma) {
+	*desig += *tau;
 	t = *sigma + *desig;
 	*desig -= t - *sigma;
     } else {
-	t = *sigma + tau;
-	*desig = *sigma - (t - tau) + *desig;
+	t = *sigma + *tau;
+	*desig = *sigma - (t - *tau) + *desig;
     }
     *sigma = t;
 
@@ -23328,12 +28043,8 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 /* Subroutine */ int slasq4_(integer *i0, integer *n0, real *z__, integer *pp,
 	 integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn,
-	real *dn1, real *dn2, real *tau, integer *ttype)
+	real *dn1, real *dn2, real *tau, integer *ttype, real *g)
 {
-    /* Initialized data */
-
-    static real g = 0.f;
-
     /* System generated locals */
     integer i__1;
     real r__1, r__2;
@@ -23348,10 +28059,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -23399,23 +28115,25 @@ doublereal slapy3_(real *x, real *y, real *z__)
     TTYPE (output) INTEGER
           Shift type.
 
+    G     (input/output) REAL
+          G is passed as an argument in order to save its value between
+          calls to SLASQ4.
+
     Further Details
     ===============
     CNST1 = 9/16
 
     =====================================================================
-*/
-
-    /* Parameter adjustments */
-    --z__;
 
-    /* Function Body */
 
-/*
        A negative DMIN forces the shift to take that absolute value
        TTYPE records the type of shift.
 */
 
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
     if (*dmin__ <= 0.f) {
 	*tau = -(*dmin__);
 	*ttype = -1;
@@ -23570,13 +28288,13 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*           Case 6, no information to guide us. */
 
 	    if (*ttype == -6) {
-		g += (1.f - g) * .333f;
+		*g += (1.f - *g) * .333f;
 	    } else if (*ttype == -18) {
-		g = .083250000000000005f;
+		*g = .083250000000000005f;
 	    } else {
-		g = .25f;
+		*g = .25f;
 	    }
-	    s = g * *dmin__;
+	    s = *g * *dmin__;
 	    *ttype = -6;
 	}
 
@@ -23719,10 +28437,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       May 17, 2000
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -23932,10 +28655,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2)                                    --
+
+    -- Contributed by Osni Marques of the Lawrence Berkeley National   --
+    -- Laboratory and Beresford Parlett of the Univ. of California at  --
+    -- Berkeley                                                        --
+    -- November 2008                                                   --
+
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 
 
     Purpose
@@ -24112,53 +28840,86 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
     =======
 
-    SLASR   performs the transformation
+    SLASR applies a sequence of plane rotations to a real matrix A,
+    from either the left or the right.
+
+    When SIDE = 'L', the transformation takes the form
+
+       A := P*A
+
+    and when SIDE = 'R', the transformation takes the form
 
-       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+       A := A*P**T
 
-       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+    where P is an orthogonal matrix consisting of a sequence of z plane
+    rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R',
+    and P**T is the transpose of P.
 
-    where A is an m by n real matrix and P is an orthogonal matrix,
-    consisting of a sequence of plane rotations determined by the
-    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
-    and z = n when SIDE = 'R' or 'r' ):
+    When DIRECT = 'F' (Forward sequence), then
 
-    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+       P = P(z-1) * ... * P(2) * P(1)
 
-       P = P( z - 1 )*...*P( 2 )*P( 1 ),
+    and when DIRECT = 'B' (Backward sequence), then
 
-    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+       P = P(1) * P(2) * ... * P(z-1)
 
-       P = P( 1 )*P( 2 )*...*P( z - 1 ),
+    where P(k) is a plane rotation matrix defined by the 2-by-2 rotation
 
-    where  P( k ) is a plane rotation matrix for the following planes:
+       R(k) = (  c(k)  s(k) )
+            = ( -s(k)  c(k) ).
 
-       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
-          the plane ( k, k + 1 )
+    When PIVOT = 'V' (Variable pivot), the rotation is performed
+    for the plane (k,k+1), i.e., P(k) has the form
 
-       when  PIVOT = 'T' or 't'  ( Top pivot ),
-          the plane ( 1, k + 1 )
+       P(k) = (  1                                            )
+              (       ...                                     )
+              (              1                                )
+              (                   c(k)  s(k)                  )
+              (                  -s(k)  c(k)                  )
+              (                                1              )
+              (                                     ...       )
+              (                                            1  )
 
-       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
-          the plane ( k, z )
+    where R(k) appears as a rank-2 modification to the identity matrix in
+    rows and columns k and k+1.
 
-    c( k ) and s( k )  must contain the  cosine and sine that define the
-    matrix  P( k ).  The two by two plane rotation part of the matrix
-    P( k ), R( k ), is assumed to be of the form
+    When PIVOT = 'T' (Top pivot), the rotation is performed for the
+    plane (1,k+1), so P(k) has the form
 
-       R( k ) = (  c( k )  s( k ) ).
-                ( -s( k )  c( k ) )
+       P(k) = (  c(k)                    s(k)                 )
+              (         1                                     )
+              (              ...                              )
+              (                     1                         )
+              ( -s(k)                    c(k)                 )
+              (                                 1             )
+              (                                      ...      )
+              (                                             1 )
 
-    This version vectorises across rows of the array A when SIDE = 'L'.
+    where R(k) appears in rows and columns 1 and k+1.
+
+    Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is
+    performed for the plane (k,z), giving P(k) the form
+
+       P(k) = ( 1                                             )
+              (      ...                                      )
+              (             1                                 )
+              (                  c(k)                    s(k) )
+              (                         1                     )
+              (                              ...              )
+              (                                     1         )
+              (                 -s(k)                    c(k) )
+
+    where R(k) appears in rows and columns k and z.  The rotations are
+    performed without ever forming P(k) explicitly.
 
     Arguments
     =========
@@ -24167,13 +28928,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
             Specifies whether the plane rotation matrix P is applied to
             A on the left or the right.
             = 'L':  Left, compute A := P*A
-            = 'R':  Right, compute A:= A*P'
-
-    DIRECT  (input) CHARACTER*1
-            Specifies whether P is a forward or backward sequence of
-            plane rotations.
-            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
-            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+            = 'R':  Right, compute A:= A*P**T
 
     PIVOT   (input) CHARACTER*1
             Specifies the plane for which P(k) is a plane rotation
@@ -24182,6 +28937,12 @@ doublereal slapy3_(real *x, real *y, real *z__)
             = 'T':  Top pivot, the plane (1,k+1)
             = 'B':  Bottom pivot, the plane (k,z)
 
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P(z-1)*...*P(2)*P(1)
+            = 'B':  Backward, P = P(1)*P(2)*...*P(z-1)
+
     M       (input) INTEGER
             The number of rows of the matrix A.  If m <= 1, an immediate
             return is effected.
@@ -24190,18 +28951,22 @@ doublereal slapy3_(real *x, real *y, real *z__)
             The number of columns of the matrix A.  If n <= 1, an
             immediate return is effected.
 
-    C, S    (input) REAL arrays, dimension
+    C       (input) REAL array, dimension
                     (M-1) if SIDE = 'L'
                     (N-1) if SIDE = 'R'
-            c(k) and s(k) contain the cosine and sine that define the
-            matrix P(k).  The two by two plane rotation part of the
-            matrix P(k), R(k), is assumed to be of the form
-            R( k ) = (  c( k )  s( k ) ).
-                     ( -s( k )  c( k ) )
+            The cosines c(k) of the plane rotations.
+
+    S       (input) REAL array, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            The sines s(k) of the plane rotations.  The 2-by-2 plane
+            rotation part of the matrix P(k), R(k), has the form
+            R(k) = (  c(k)  s(k) )
+                   ( -s(k)  c(k) ).
 
     A       (input/output) REAL array, dimension (LDA,N)
-            The m by n matrix A.  On exit, A is overwritten by P*A if
-            SIDE = 'R' or by A*P' if SIDE = 'L'.
+            The M-by-N matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P**T if SIDE = 'L'.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
@@ -24506,10 +29271,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -24760,10 +29525,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -24862,10 +29627,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25043,7 +29808,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
 /*              Note that M is very tiny */
 
 		if (l == 0.f) {
-		    t = r_sign(&c_b2489, &ft) * r_sign(&c_b15, &gt);
+		    t = r_sign(&c_b2863, &ft) * r_sign(&c_b15, &gt);
 		} else {
 		    t = gt / r_sign(&d__, &ft) + m / t;
 		}
@@ -25102,10 +29867,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25136,7 +29901,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
             The last element of IPIV for which a row interchange will
             be done.
 
-    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+    IPIV    (input) INTEGER array, dimension (K2*abs(INCX))
             The vector of pivot indices.  Only the elements in positions
             K1 through K2 of IPIV are accessed.
             IPIV(K) = L implies rows K and L are to be interchanged.
@@ -25230,6 +29995,460 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 } /* slaswp_ */
 
+/* Subroutine */ int slasy2_(logical *ltranl, logical *ltranr, integer *isgn,
+	integer *n1, integer *n2, real *tl, integer *ldtl, real *tr, integer *
+	ldtr, real *b, integer *ldb, real *scale, real *x, integer *ldx, real
+	*xnorm, integer *info)
+{
+    /* Initialized data */
+
+    static integer locu12[4] = { 3,4,1,2 };
+    static integer locl21[4] = { 2,1,4,3 };
+    static integer locu22[4] = { 4,3,2,1 };
+    static logical xswpiv[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
+    static logical bswpiv[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
+
+    /* System generated locals */
+    integer b_dim1, b_offset, tl_dim1, tl_offset, tr_dim1, tr_offset, x_dim1,
+	    x_offset;
+    real r__1, r__2, r__3, r__4, r__5, r__6, r__7, r__8;
+
+    /* Local variables */
+    static integer i__, j, k;
+    static real x2[2], l21, u11, u12;
+    static integer ip, jp;
+    static real u22, t16[16]	/* was [4][4] */, gam, bet, eps, sgn, tmp[4],
+	    tau1, btmp[4], smin;
+    static integer ipiv;
+    static real temp;
+    static integer jpiv[4];
+    static real xmax;
+    static integer ipsv, jpsv;
+    static logical bswap;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), sswap_(integer *, real *, integer *, real *, integer *
+	    );
+    static logical xswap;
+    extern doublereal slamch_(char *);
+    extern integer isamax_(integer *, real *, integer *);
+    static real smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    SLASY2 solves for the N1 by N2 matrix X, 1 <= N1,N2 <= 2, in
+
+           op(TL)*X + ISGN*X*op(TR) = SCALE*B,
+
+    where TL is N1 by N1, TR is N2 by N2, B is N1 by N2, and ISGN = 1 or
+    -1.  op(T) = T or T', where T' denotes the transpose of T.
+
+    Arguments
+    =========
+
+    LTRANL  (input) LOGICAL
+            On entry, LTRANL specifies the op(TL):
+               = .FALSE., op(TL) = TL,
+               = .TRUE., op(TL) = TL'.
+
+    LTRANR  (input) LOGICAL
+            On entry, LTRANR specifies the op(TR):
+              = .FALSE., op(TR) = TR,
+              = .TRUE., op(TR) = TR'.
+
+    ISGN    (input) INTEGER
+            On entry, ISGN specifies the sign of the equation
+            as described before. ISGN may only be 1 or -1.
+
+    N1      (input) INTEGER
+            On entry, N1 specifies the order of matrix TL.
+            N1 may only be 0, 1 or 2.
+
+    N2      (input) INTEGER
+            On entry, N2 specifies the order of matrix TR.
+            N2 may only be 0, 1 or 2.
+
+    TL      (input) REAL array, dimension (LDTL,2)
+            On entry, TL contains an N1 by N1 matrix.
+
+    LDTL    (input) INTEGER
+            The leading dimension of the matrix TL. LDTL >= max(1,N1).
+
+    TR      (input) REAL array, dimension (LDTR,2)
+            On entry, TR contains an N2 by N2 matrix.
+
+    LDTR    (input) INTEGER
+            The leading dimension of the matrix TR. LDTR >= max(1,N2).
+
+    B       (input) REAL array, dimension (LDB,2)
+            On entry, the N1 by N2 matrix B contains the right-hand
+            side of the equation.
+
+    LDB     (input) INTEGER
+            The leading dimension of the matrix B. LDB >= max(1,N1).
+
+    SCALE   (output) REAL
+            On exit, SCALE contains the scale factor. SCALE is chosen
+            less than or equal to 1 to prevent the solution overflowing.
+
+    X       (output) REAL array, dimension (LDX,2)
+            On exit, X contains the N1 by N2 solution.
+
+    LDX     (input) INTEGER
+            The leading dimension of the matrix X. LDX >= max(1,N1).
+
+    XNORM   (output) REAL
+            On exit, XNORM is the infinity-norm of the solution.
+
+    INFO    (output) INTEGER
+            On exit, INFO is set to
+               0: successful exit.
+               1: TL and TR have too close eigenvalues, so TL or
+                  TR is perturbed to get a nonsingular equation.
+            NOTE: In the interests of speed, this routine does not
+                  check the inputs for errors.
+
+   =====================================================================
+*/
+
+    /* Parameter adjustments */
+    tl_dim1 = *ldtl;
+    tl_offset = 1 + tl_dim1;
+    tl -= tl_offset;
+    tr_dim1 = *ldtr;
+    tr_offset = 1 + tr_dim1;
+    tr -= tr_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+
+    /* Function Body */
+
+/*     Do not check the input parameters for errors */
+
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n1 == 0 || *n2 == 0) {
+	return 0;
+    }
+
+/*     Set constants to control overflow */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S") / eps;
+    sgn = (real) (*isgn);
+
+    k = *n1 + *n1 + *n2 - 2;
+    switch (k) {
+	case 1:  goto L10;
+	case 2:  goto L20;
+	case 3:  goto L30;
+	case 4:  goto L50;
+    }
+
+/*     1 by 1: TL11*X + SGN*X*TR11 = B11 */
+
+L10:
+    tau1 = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
+    bet = dabs(tau1);
+    if (bet <= smlnum) {
+	tau1 = smlnum;
+	bet = smlnum;
+	*info = 1;
+    }
+
+    *scale = 1.f;
+    gam = (r__1 = b[b_dim1 + 1], dabs(r__1));
+    if (smlnum * gam > bet) {
+	*scale = 1.f / gam;
+    }
+
+    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / tau1;
+    *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1));
+    return 0;
+
+/*
+       1 by 2:
+       TL11*[X11 X12] + ISGN*[X11 X12]*op[TR11 TR12]  = [B11 B12]
+                                         [TR21 TR22]
+*/
+
+L20:
+
+/*
+   Computing MAX
+   Computing MAX
+*/
+    r__7 = (r__1 = tl[tl_dim1 + 1], dabs(r__1)), r__8 = (r__2 = tr[tr_dim1 +
+	    1], dabs(r__2)), r__7 = max(r__7,r__8), r__8 = (r__3 = tr[(
+	    tr_dim1 << 1) + 1], dabs(r__3)), r__7 = max(r__7,r__8), r__8 = (
+	    r__4 = tr[tr_dim1 + 2], dabs(r__4)), r__7 = max(r__7,r__8), r__8 =
+	     (r__5 = tr[(tr_dim1 << 1) + 2], dabs(r__5));
+    r__6 = eps * dmax(r__7,r__8);
+    smin = dmax(r__6,smlnum);
+    tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
+    tmp[3] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2];
+    if (*ltranr) {
+	tmp[1] = sgn * tr[tr_dim1 + 2];
+	tmp[2] = sgn * tr[(tr_dim1 << 1) + 1];
+    } else {
+	tmp[1] = sgn * tr[(tr_dim1 << 1) + 1];
+	tmp[2] = sgn * tr[tr_dim1 + 2];
+    }
+    btmp[0] = b[b_dim1 + 1];
+    btmp[1] = b[(b_dim1 << 1) + 1];
+    goto L40;
+
+/*
+       2 by 1:
+            op[TL11 TL12]*[X11] + ISGN* [X11]*TR11  = [B11]
+              [TL21 TL22] [X21]         [X21]         [B21]
+*/
+
+L30:
+/*
+   Computing MAX
+   Computing MAX
+*/
+    r__7 = (r__1 = tr[tr_dim1 + 1], dabs(r__1)), r__8 = (r__2 = tl[tl_dim1 +
+	    1], dabs(r__2)), r__7 = max(r__7,r__8), r__8 = (r__3 = tl[(
+	    tl_dim1 << 1) + 1], dabs(r__3)), r__7 = max(r__7,r__8), r__8 = (
+	    r__4 = tl[tl_dim1 + 2], dabs(r__4)), r__7 = max(r__7,r__8), r__8 =
+	     (r__5 = tl[(tl_dim1 << 1) + 2], dabs(r__5));
+    r__6 = eps * dmax(r__7,r__8);
+    smin = dmax(r__6,smlnum);
+    tmp[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
+    tmp[3] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1];
+    if (*ltranl) {
+	tmp[1] = tl[(tl_dim1 << 1) + 1];
+	tmp[2] = tl[tl_dim1 + 2];
+    } else {
+	tmp[1] = tl[tl_dim1 + 2];
+	tmp[2] = tl[(tl_dim1 << 1) + 1];
+    }
+    btmp[0] = b[b_dim1 + 1];
+    btmp[1] = b[b_dim1 + 2];
+L40:
+
+/*
+       Solve 2 by 2 system using complete pivoting.
+       Set pivots less than SMIN to SMIN.
+*/
+
+    ipiv = isamax_(&c__4, tmp, &c__1);
+    u11 = tmp[ipiv - 1];
+    if (dabs(u11) <= smin) {
+	*info = 1;
+	u11 = smin;
+    }
+    u12 = tmp[locu12[ipiv - 1] - 1];
+    l21 = tmp[locl21[ipiv - 1] - 1] / u11;
+    u22 = tmp[locu22[ipiv - 1] - 1] - u12 * l21;
+    xswap = xswpiv[ipiv - 1];
+    bswap = bswpiv[ipiv - 1];
+    if (dabs(u22) <= smin) {
+	*info = 1;
+	u22 = smin;
+    }
+    if (bswap) {
+	temp = btmp[1];
+	btmp[1] = btmp[0] - l21 * temp;
+	btmp[0] = temp;
+    } else {
+	btmp[1] -= l21 * btmp[0];
+    }
+    *scale = 1.f;
+    if (smlnum * 2.f * dabs(btmp[1]) > dabs(u22) || smlnum * 2.f * dabs(btmp[
+	    0]) > dabs(u11)) {
+/* Computing MAX */
+	r__1 = dabs(btmp[0]), r__2 = dabs(btmp[1]);
+	*scale = .5f / dmax(r__1,r__2);
+	btmp[0] *= *scale;
+	btmp[1] *= *scale;
+    }
+    x2[1] = btmp[1] / u22;
+    x2[0] = btmp[0] / u11 - u12 / u11 * x2[1];
+    if (xswap) {
+	temp = x2[1];
+	x2[1] = x2[0];
+	x2[0] = temp;
+    }
+    x[x_dim1 + 1] = x2[0];
+    if (*n1 == 1) {
+	x[(x_dim1 << 1) + 1] = x2[1];
+	*xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)) + (r__2 = x[(x_dim1 << 1)
+		+ 1], dabs(r__2));
+    } else {
+	x[x_dim1 + 2] = x2[1];
+/* Computing MAX */
+	r__3 = (r__1 = x[x_dim1 + 1], dabs(r__1)), r__4 = (r__2 = x[x_dim1 +
+		2], dabs(r__2));
+	*xnorm = dmax(r__3,r__4);
+    }
+    return 0;
+
+/*
+       2 by 2:
+       op[TL11 TL12]*[X11 X12] +ISGN* [X11 X12]*op[TR11 TR12] = [B11 B12]
+         [TL21 TL22] [X21 X22]        [X21 X22]   [TR21 TR22]   [B21 B22]
+
+       Solve equivalent 4 by 4 system using complete pivoting.
+       Set pivots less than SMIN to SMIN.
+*/
+
+L50:
+/* Computing MAX */
+    r__5 = (r__1 = tr[tr_dim1 + 1], dabs(r__1)), r__6 = (r__2 = tr[(tr_dim1 <<
+	     1) + 1], dabs(r__2)), r__5 = max(r__5,r__6), r__6 = (r__3 = tr[
+	    tr_dim1 + 2], dabs(r__3)), r__5 = max(r__5,r__6), r__6 = (r__4 =
+	    tr[(tr_dim1 << 1) + 2], dabs(r__4));
+    smin = dmax(r__5,r__6);
+/* Computing MAX */
+    r__5 = smin, r__6 = (r__1 = tl[tl_dim1 + 1], dabs(r__1)), r__5 = max(r__5,
+	    r__6), r__6 = (r__2 = tl[(tl_dim1 << 1) + 1], dabs(r__2)), r__5 =
+	    max(r__5,r__6), r__6 = (r__3 = tl[tl_dim1 + 2], dabs(r__3)), r__5
+	    = max(r__5,r__6), r__6 = (r__4 = tl[(tl_dim1 << 1) + 2], dabs(
+	    r__4));
+    smin = dmax(r__5,r__6);
+/* Computing MAX */
+    r__1 = eps * smin;
+    smin = dmax(r__1,smlnum);
+    btmp[0] = 0.f;
+    scopy_(&c__16, btmp, &c__0, t16, &c__1);
+    t16[0] = tl[tl_dim1 + 1] + sgn * tr[tr_dim1 + 1];
+    t16[5] = tl[(tl_dim1 << 1) + 2] + sgn * tr[tr_dim1 + 1];
+    t16[10] = tl[tl_dim1 + 1] + sgn * tr[(tr_dim1 << 1) + 2];
+    t16[15] = tl[(tl_dim1 << 1) + 2] + sgn * tr[(tr_dim1 << 1) + 2];
+    if (*ltranl) {
+	t16[4] = tl[tl_dim1 + 2];
+	t16[1] = tl[(tl_dim1 << 1) + 1];
+	t16[14] = tl[tl_dim1 + 2];
+	t16[11] = tl[(tl_dim1 << 1) + 1];
+    } else {
+	t16[4] = tl[(tl_dim1 << 1) + 1];
+	t16[1] = tl[tl_dim1 + 2];
+	t16[14] = tl[(tl_dim1 << 1) + 1];
+	t16[11] = tl[tl_dim1 + 2];
+    }
+    if (*ltranr) {
+	t16[8] = sgn * tr[(tr_dim1 << 1) + 1];
+	t16[13] = sgn * tr[(tr_dim1 << 1) + 1];
+	t16[2] = sgn * tr[tr_dim1 + 2];
+	t16[7] = sgn * tr[tr_dim1 + 2];
+    } else {
+	t16[8] = sgn * tr[tr_dim1 + 2];
+	t16[13] = sgn * tr[tr_dim1 + 2];
+	t16[2] = sgn * tr[(tr_dim1 << 1) + 1];
+	t16[7] = sgn * tr[(tr_dim1 << 1) + 1];
+    }
+    btmp[0] = b[b_dim1 + 1];
+    btmp[1] = b[b_dim1 + 2];
+    btmp[2] = b[(b_dim1 << 1) + 1];
+    btmp[3] = b[(b_dim1 << 1) + 2];
+
+/*     Perform elimination */
+
+    for (i__ = 1; i__ <= 3; ++i__) {
+	xmax = 0.f;
+	for (ip = i__; ip <= 4; ++ip) {
+	    for (jp = i__; jp <= 4; ++jp) {
+		if ((r__1 = t16[ip + (jp << 2) - 5], dabs(r__1)) >= xmax) {
+		    xmax = (r__1 = t16[ip + (jp << 2) - 5], dabs(r__1));
+		    ipsv = ip;
+		    jpsv = jp;
+		}
+/* L60: */
+	    }
+/* L70: */
+	}
+	if (ipsv != i__) {
+	    sswap_(&c__4, &t16[ipsv - 1], &c__4, &t16[i__ - 1], &c__4);
+	    temp = btmp[i__ - 1];
+	    btmp[i__ - 1] = btmp[ipsv - 1];
+	    btmp[ipsv - 1] = temp;
+	}
+	if (jpsv != i__) {
+	    sswap_(&c__4, &t16[(jpsv << 2) - 4], &c__1, &t16[(i__ << 2) - 4],
+		    &c__1);
+	}
+	jpiv[i__ - 1] = jpsv;
+	if ((r__1 = t16[i__ + (i__ << 2) - 5], dabs(r__1)) < smin) {
+	    *info = 1;
+	    t16[i__ + (i__ << 2) - 5] = smin;
+	}
+	for (j = i__ + 1; j <= 4; ++j) {
+	    t16[j + (i__ << 2) - 5] /= t16[i__ + (i__ << 2) - 5];
+	    btmp[j - 1] -= t16[j + (i__ << 2) - 5] * btmp[i__ - 1];
+	    for (k = i__ + 1; k <= 4; ++k) {
+		t16[j + (k << 2) - 5] -= t16[j + (i__ << 2) - 5] * t16[i__ + (
+			k << 2) - 5];
+/* L80: */
+	    }
+/* L90: */
+	}
+/* L100: */
+    }
+    if (dabs(t16[15]) < smin) {
+	t16[15] = smin;
+    }
+    *scale = 1.f;
+    if (smlnum * 8.f * dabs(btmp[0]) > dabs(t16[0]) || smlnum * 8.f * dabs(
+	    btmp[1]) > dabs(t16[5]) || smlnum * 8.f * dabs(btmp[2]) > dabs(
+	    t16[10]) || smlnum * 8.f * dabs(btmp[3]) > dabs(t16[15])) {
+/* Computing MAX */
+	r__1 = dabs(btmp[0]), r__2 = dabs(btmp[1]), r__1 = max(r__1,r__2),
+		r__2 = dabs(btmp[2]), r__1 = max(r__1,r__2), r__2 = dabs(btmp[
+		3]);
+	*scale = .125f / dmax(r__1,r__2);
+	btmp[0] *= *scale;
+	btmp[1] *= *scale;
+	btmp[2] *= *scale;
+	btmp[3] *= *scale;
+    }
+    for (i__ = 1; i__ <= 4; ++i__) {
+	k = 5 - i__;
+	temp = 1.f / t16[k + (k << 2) - 5];
+	tmp[k - 1] = btmp[k - 1] * temp;
+	for (j = k + 1; j <= 4; ++j) {
+	    tmp[k - 1] -= temp * t16[k + (j << 2) - 5] * tmp[j - 1];
+/* L110: */
+	}
+/* L120: */
+    }
+    for (i__ = 1; i__ <= 3; ++i__) {
+	if (jpiv[4 - i__ - 1] != 4 - i__) {
+	    temp = tmp[4 - i__ - 1];
+	    tmp[4 - i__ - 1] = tmp[jpiv[4 - i__ - 1] - 1];
+	    tmp[jpiv[4 - i__ - 1] - 1] = temp;
+	}
+/* L130: */
+    }
+    x[x_dim1 + 1] = tmp[0];
+    x[x_dim1 + 2] = tmp[1];
+    x[(x_dim1 << 1) + 1] = tmp[2];
+    x[(x_dim1 << 1) + 2] = tmp[3];
+/* Computing MAX */
+    r__1 = dabs(tmp[0]) + dabs(tmp[2]), r__2 = dabs(tmp[1]) + dabs(tmp[3]);
+    *xnorm = dmax(r__1,r__2);
+    return 0;
+
+/*     End of SLASY2 */
+
+} /* slasy2_ */
+
 /* Subroutine */ int slatrd_(char *uplo, integer *n, integer *nb, real *a,
 	integer *lda, real *e, real *tau, real *w, integer *ldw)
 {
@@ -25251,10 +30470,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25275,7 +30494,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     Arguments
     =========
 
-    UPLO    (input) CHARACTER
+    UPLO    (input) CHARACTER*1
             Specifies whether the upper or lower triangular part of the
             symmetric matrix A is stored:
             = 'U': Upper triangular
@@ -25571,10 +30790,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25725,10 +30944,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25900,10 +31119,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26056,10 +31275,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26125,7 +31344,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
             reflector H(i) or G(i), which determines Q or P**T, as
             returned by SGEBRD in its array argument TAUQ or TAUP.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -26328,10 +31547,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26368,7 +31587,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by SGEHRD.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -26511,10 +31730,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26668,10 +31887,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26711,7 +31930,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by SGELQF.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -26924,10 +32143,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26968,7 +32187,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by SGEQRF.
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -27179,10 +32398,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -27379,10 +32598,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -27597,10 +32816,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -27687,7 +32906,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -27896,6 +33115,234 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 } /* sormbr_ */
 
+/* Subroutine */ int sormhr_(char *side, char *trans, integer *m, integer *n,
+	integer *ilo, integer *ihi, real *a, integer *lda, real *tau, real *
+	c__, integer *ldc, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, nh, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer lwkopt;
+    static logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    SORMHR overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    IHI-ILO elementary reflectors, as returned by SGEHRD:
+
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of SGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and
+            ILO = 1 and IHI = 0, if M = 0;
+            if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and
+            ILO = 1 and IHI = 0, if N = 0.
+
+    A       (input) REAL array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by SGEHRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) REAL array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGEHRD.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    left = lsame_(side, "L");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ilo < 1 || *ilo > max(1,nq)) {
+	*info = -5;
+    } else if (*ihi < min(*ilo,nq) || *ihi > nq) {
+	*info = -6;
+    } else if (*lda < max(1,nq)) {
+	*info = -8;
+    } else if (*ldc < max(1,*m)) {
+	*info = -11;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -13;
+    }
+
+    if (*info == 0) {
+	if (left) {
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = side;
+	    i__1[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    nb = ilaenv_(&c__1, "SORMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	} else {
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = side;
+	    i__1[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("SORMHR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || nh == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    if (left) {
+	mi = nh;
+	ni = *n;
+	i1 = *ilo + 1;
+	i2 = 1;
+    } else {
+	mi = *m;
+	ni = nh;
+	i1 = 1;
+	i2 = *ilo + 1;
+    }
+
+    sormqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, &
+	    tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SORMHR */
+
+} /* sormhr_ */
+
 /* Subroutine */ int sorml2_(char *side, char *trans, integer *m, integer *n,
 	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
 	 real *work, integer *info)
@@ -27915,10 +33362,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28137,10 +33584,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28205,7 +33652,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -28442,10 +33889,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28510,7 +33957,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -28556,10 +34003,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
     if (left) {
 	nq = *m;
-	nw = *n;
+	nw = max(1,*n);
     } else {
 	nq = *n;
-	nw = *m;
+	nw = max(1,*m);
     }
     if (! left && ! lsame_(side, "R")) {
 	*info = -1;
@@ -28575,27 +34022,34 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	*info = -7;
     } else if (*ldc < max(1,*m)) {
 	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
     }
 
     if (*info == 0) {
+	if (*m == 0 || *n == 0) {
+	    lwkopt = 1;
+	} else {
 
 /*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
+             Determine the block size.  NB may be at most NBMAX, where
+             NBMAX is used to define the local array T.
+
 
    Computing MIN
    Writing concatenation
 */
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQL", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQL", ch__1, m, n, k, &c_n1,
+		    (ftnlen)6, (ftnlen)2);
+	    nb = min(i__1,i__2);
+	    lwkopt = nw * nb;
+	}
 	work[1] = (real) lwkopt;
+
+	if (*lwork < nw && ! lquery) {
+	    *info = -12;
+	}
     }
 
     if (*info != 0) {
@@ -28608,8 +34062,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 /*     Quick return if possible */
 
-    if (*m == 0 || *n == 0 || *k == 0) {
-	work[1] = 1.f;
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -28737,10 +34190,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -28805,7 +34258,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -29034,10 +34487,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -29103,7 +34556,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -29291,13 +34744,14 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	    real *, integer *, real *, real *, integer *);
     static logical upper;
     extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern logical sisnan_(real *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -29392,7 +34846,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	    i__2 = j - 1;
 	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1,
 		    &a[j * a_dim1 + 1], &c__1);
-	    if (ajj <= 0.f) {
+	    if (ajj <= 0.f || sisnan_(&ajj)) {
 		a[j + j * a_dim1] = ajj;
 		goto L30;
 	    }
@@ -29425,7 +34879,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	    i__2 = j - 1;
 	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j
 		    + a_dim1], lda);
-	    if (ajj <= 0.f) {
+	    if (ajj <= 0.f || sisnan_(&ajj)) {
 		a[j + j * a_dim1] = ajj;
 		goto L30;
 	    }
@@ -29483,10 +34937,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -29688,10 +35142,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -29791,10 +35245,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -29930,7 +35384,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     /* Local variables */
     static integer i__, j, k, m;
     static real p;
-    static integer ii, end, lgn;
+    static integer ii, lgn;
     static real eps, tiny;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
@@ -29945,6 +35399,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
     extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
+    static integer finish;
     extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
 	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
 	    real *, integer *), slaset_(char *, integer *, integer *,
@@ -29962,10 +35417,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -30019,8 +35474,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
             The leading dimension of the array Z.  LDZ >= 1.
             If eigenvectors are desired, then LDZ >= max(1,N).
 
-    WORK    (workspace/output) REAL array,
-                                           dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -30032,13 +35486,16 @@ doublereal slapy3_(real *x, real *y, real *z__)
                            that 2**k >= N.
             If COMPZ = 'I' and N > 1 then LWORK must be at least
                            ( 1 + 4*N + N**2 ).
+            Note that for COMPZ = 'I' or 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LWORK need
+            only be max(1,2*(N-1)).
 
             If LWORK = -1, then a workspace query is assumed; the routine
             only calculates the optimal size of the WORK array, returns
             this value as the first entry of the WORK array, and no error
             message related to LWORK is issued by XERBLA.
 
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+    IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
             On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
 
     LIWORK  (input) INTEGER
@@ -30048,6 +35505,9 @@ doublereal slapy3_(real *x, real *y, real *z__)
                            ( 6 + 6*N + 5*N*lg N ).
             If COMPZ = 'I' and N > 1 then LIWORK must be at least
                            ( 3 + 5*N ).
+            Note that for COMPZ = 'I' or 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LIWORK
+            need only be 1.
 
             If LIWORK = -1, then a workspace query is assumed; the
             routine only calculates the optimal size of the IWORK array,
@@ -30097,44 +35557,54 @@ doublereal slapy3_(real *x, real *y, real *z__)
     } else {
 	icompz = -1;
     }
-    if (*n <= 1 || icompz <= 0) {
-	liwmin = 1;
-	lwmin = 1;
-    } else {
-	lgn = (integer) (log((real) (*n)) / log(2.f));
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (icompz == 1) {
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
-	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
-	} else if (icompz == 2) {
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lwmin = (*n << 2) + 1 + i__1 * i__1;
-	    liwmin = *n * 5 + 3;
-	}
-    }
     if (icompz < 0) {
 	*info = -1;
     } else if (*n < 0) {
 	*info = -2;
     } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
 	*info = -6;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -10;
     }
 
     if (*info == 0) {
+
+/*        Compute the workspace requirements */
+
+	smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+		ftnlen)6, (ftnlen)1);
+	if (*n <= 1 || icompz == 0) {
+	    liwmin = 1;
+	    lwmin = 1;
+	} else if (*n <= smlsiz) {
+	    liwmin = 1;
+	    lwmin = *n - 1 << 1;
+	} else {
+	    lgn = (integer) (log((real) (*n)) / log(2.f));
+	    if (pow_ii(&c__2, &lgn) < *n) {
+		++lgn;
+	    }
+	    if (pow_ii(&c__2, &lgn) < *n) {
+		++lgn;
+	    }
+	    if (icompz == 1) {
+/* Computing 2nd power */
+		i__1 = *n;
+		lwmin = *n * 3 + 1 + (*n << 1) * lgn + i__1 * i__1 * 3;
+		liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	    } else if (icompz == 2) {
+/* Computing 2nd power */
+		i__1 = *n;
+		lwmin = (*n << 2) + 1 + i__1 * i__1;
+		liwmin = *n * 5 + 3;
+	    }
+	}
 	work[1] = (real) lwmin;
 	iwork[1] = liwmin;
+
+	if (*lwork < lwmin && ! lquery) {
+	    *info = -8;
+	} else if (*liwork < liwmin && ! lquery) {
+	    *info = -10;
+	}
     }
 
     if (*info != 0) {
@@ -30157,9 +35627,6 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	return 0;
     }
 
-    smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
 /*
        If the following conditional clause is removed, then the routine
        will use the Divide and Conquer routine to compute only the
@@ -30167,14 +35634,15 @@ doublereal slapy3_(real *x, real *y, real *z__)
        (2 + 5N + 2N lg(N)) integer workspace.
        Since on many architectures SSTERF is much faster than any other
        algorithm for finding eigenvalues only, it is used here
-       as the default.
+       as the default. If the conditional clause is removed, then
+       information on the size of workspace needs to be changed.
 
        If COMPZ = 'N', use SSTERF to compute the eigenvalues.
 */
 
     if (icompz == 0) {
 	ssterf_(n, &d__[1], &e[1], info);
-	return 0;
+	goto L50;
     }
 
 /*
@@ -30183,182 +35651,176 @@ doublereal slapy3_(real *x, real *y, real *z__)
 */
 
     if (*n <= smlsiz) {
-	if (icompz == 0) {
-	    ssterf_(n, &d__[1], &e[1], info);
-	    return 0;
-	} else if (icompz == 2) {
-	    ssteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
-		    info);
-	    return 0;
-	} else {
-	    ssteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
-		    info);
-	    return 0;
-	}
-    }
+
+	ssteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1], info);
+
+    } else {
 
 /*
-       If COMPZ = 'V', the Z matrix must be stored elsewhere for later
-       use.
+          If COMPZ = 'V', the Z matrix must be stored elsewhere for later
+          use.
 */
 
-    if (icompz == 1) {
-	storez = *n * *n + 1;
-    } else {
-	storez = 1;
-    }
+	if (icompz == 1) {
+	    storez = *n * *n + 1;
+	} else {
+	    storez = 1;
+	}
 
-    if (icompz == 2) {
-	slaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
-    }
+	if (icompz == 2) {
+	    slaset_("Full", n, n, &c_b29, &c_b15, &z__[z_offset], ldz);
+	}
 
-/*     Scale. */
+/*        Scale. */
 
-    orgnrm = slanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.f) {
-	return 0;
-    }
+	orgnrm = slanst_("M", n, &d__[1], &e[1]);
+	if (orgnrm == 0.f) {
+	    goto L50;
+	}
 
-    eps = slamch_("Epsilon");
+	eps = slamch_("Epsilon");
 
-    start = 1;
+	start = 1;
 
-/*     while ( START <= N ) */
+/*        while ( START <= N ) */
 
 L10:
-    if (start <= *n) {
+	if (start <= *n) {
 
 /*
-       Let END be the position of the next subdiagonal entry such that
-       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
-       matrix identified by the elements between START and END
-       constitutes an independent sub-problem.
+             Let FINISH be the position of the next subdiagonal entry
+             such that E( FINISH ) <= TINY or FINISH = N if no such
+             subdiagonal exists.  The matrix identified by the elements
+             between START and FINISH constitutes an independent
+             sub-problem.
 */
 
-	end = start;
+	    finish = start;
 L20:
-	if (end < *n) {
-	    tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 =
-		    d__[end + 1], dabs(r__2)));
-	    if ((r__1 = e[end], dabs(r__1)) > tiny) {
-		++end;
-		goto L20;
+	    if (finish < *n) {
+		tiny = eps * sqrt((r__1 = d__[finish], dabs(r__1))) * sqrt((
+			r__2 = d__[finish + 1], dabs(r__2)));
+		if ((r__1 = e[finish], dabs(r__1)) > tiny) {
+		    ++finish;
+		    goto L20;
+		}
 	    }
-	}
 
-/*        (Sub) Problem determined.  Compute its size and solve it. */
+/*           (Sub) Problem determined.  Compute its size and solve it. */
 
-	m = end - start + 1;
-	if (m == 1) {
-	    start = end + 1;
-	    goto L10;
-	}
-	if (m > smlsiz) {
-	    *info = smlsiz;
+	    m = finish - start + 1;
+	    if (m == 1) {
+		start = finish + 1;
+		goto L10;
+	    }
+	    if (m > smlsiz) {
 
-/*           Scale. */
+/*              Scale. */
 
-	    orgnrm = slanst_("M", &m, &d__[start], &e[start]);
-	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[start]
-		    , &m, info);
-	    i__1 = m - 1;
-	    i__2 = m - 1;
-	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[
-		    start], &i__2, info);
+		orgnrm = slanst_("M", &m, &d__[start], &e[start]);
+		slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &m, &c__1, &d__[
+			start], &m, info);
+		i__1 = m - 1;
+		i__2 = m - 1;
+		slascl_("G", &c__0, &c__0, &orgnrm, &c_b15, &i__1, &c__1, &e[
+			start], &i__2, info);
 
-	    if (icompz == 1) {
-		strtrw = 1;
-	    } else {
-		strtrw = start;
-	    }
-	    slaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw +
-		    start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[
-		    1], info);
-	    if (*info != 0) {
-		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
-			+ 1) + start - 1;
-		return 0;
-	    }
+		if (icompz == 1) {
+		    strtrw = 1;
+		} else {
+		    strtrw = start;
+		}
+		slaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw +
+			start * z_dim1], ldz, &work[1], n, &work[storez], &
+			iwork[1], info);
+		if (*info != 0) {
+		    *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info %
+			     (m + 1) + start - 1;
+		    goto L50;
+		}
 
-/*           Scale back. */
+/*              Scale back. */
 
-	    slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[start]
-		    , &m, info);
+		slascl_("G", &c__0, &c__0, &c_b15, &orgnrm, &m, &c__1, &d__[
+			start], &m, info);
 
-	} else {
-	    if (icompz == 1) {
+	    } else {
+		if (icompz == 1) {
 
 /*
-       Since QR won't update a Z matrix which is larger than the
-       length of D, we must solve the sub-problem in a workspace and
-       then multiply back into Z.
+                   Since QR won't update a Z matrix which is larger than
+                   the length of D, we must solve the sub-problem in a
+                   workspace and then multiply back into Z.
 */
 
-		ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
-			m * m + 1], info);
-		slacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
-			storez], n);
-		sgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], ldz, &work[
-			1], &m, &c_b29, &z__[start * z_dim1 + 1], ldz);
-	    } else if (icompz == 2) {
-		ssteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
-			z_dim1], ldz, &work[1], info);
-	    } else {
-		ssterf_(&m, &d__[start], &e[start], info);
-	    }
-	    if (*info != 0) {
-		*info = start * (*n + 1) + end;
-		return 0;
+		    ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &
+			    work[m * m + 1], info);
+		    slacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
+			    storez], n);
+		    sgemm_("N", "N", n, &m, &m, &c_b15, &work[storez], n, &
+			    work[1], &m, &c_b29, &z__[start * z_dim1 + 1],
+			    ldz);
+		} else if (icompz == 2) {
+		    ssteqr_("I", &m, &d__[start], &e[start], &z__[start +
+			    start * z_dim1], ldz, &work[1], info);
+		} else {
+		    ssterf_(&m, &d__[start], &e[start], info);
+		}
+		if (*info != 0) {
+		    *info = start * (*n + 1) + finish;
+		    goto L50;
+		}
 	    }
-	}
 
-	start = end + 1;
-	goto L10;
-    }
+	    start = finish + 1;
+	    goto L10;
+	}
 
 /*
-       endwhile
+          endwhile
 
-       If the problem split any number of times, then the eigenvalues
-       will not be properly ordered.  Here we permute the eigenvalues
-       (and the associated eigenvectors) into ascending order.
+          If the problem split any number of times, then the eigenvalues
+          will not be properly ordered.  Here we permute the eigenvalues
+          (and the associated eigenvectors) into ascending order.
 */
 
-    if (m != *n) {
-	if (icompz == 0) {
+	if (m != *n) {
+	    if (icompz == 0) {
 
-/*        Use Quick Sort */
+/*              Use Quick Sort */
 
-	    slasrt_("I", n, &d__[1], info);
+		slasrt_("I", n, &d__[1], info);
 
-	} else {
+	    } else {
 
-/*        Use Selection Sort to minimize swaps of eigenvectors */
+/*              Use Selection Sort to minimize swaps of eigenvectors */
 
-	    i__1 = *n;
-	    for (ii = 2; ii <= i__1; ++ii) {
-		i__ = ii - 1;
-		k = i__;
-		p = d__[i__];
-		i__2 = *n;
-		for (j = ii; j <= i__2; ++j) {
-		    if (d__[j] < p) {
-			k = j;
-			p = d__[j];
-		    }
+		i__1 = *n;
+		for (ii = 2; ii <= i__1; ++ii) {
+		    i__ = ii - 1;
+		    k = i__;
+		    p = d__[i__];
+		    i__2 = *n;
+		    for (j = ii; j <= i__2; ++j) {
+			if (d__[j] < p) {
+			    k = j;
+			    p = d__[j];
+			}
 /* L30: */
-		}
-		if (k != i__) {
-		    d__[k] = d__[i__];
-		    d__[i__] = p;
-		    sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
-			    + 1], &c__1);
-		}
+		    }
+		    if (k != i__) {
+			d__[k] = d__[i__];
+			d__[i__] = p;
+			sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k *
+				z_dim1 + 1], &c__1);
+		    }
 /* L40: */
+		}
 	    }
 	}
     }
 
+L50:
     work[1] = (real) lwmin;
     iwork[1] = liwmin;
 
@@ -30416,10 +35878,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -31004,10 +36466,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -31410,7 +36872,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	integer *liwork, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3;
+    integer a_dim1, a_offset, i__1, i__2;
     real r__1;
 
     /* Builtin functions */
@@ -31430,6 +36892,8 @@ doublereal slapy3_(real *x, real *y, real *z__)
     static integer indwk2, llwrk2, iscale;
     extern doublereal slamch_(char *);
     static real safmin;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
     extern /* Subroutine */ int xerbla_(char *, integer *);
     static real bignum;
     extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
@@ -31454,10 +36918,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -31521,11 +36985,12 @@ doublereal slapy3_(real *x, real *y, real *z__)
                                                   1 + 6*N + 2*N**2.
 
             If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+            only calculates the optimal sizes of the WORK and IWORK
+            arrays, returns these values as the first entries of the WORK
+            and IWORK arrays, and no error message related to LWORK or
+            LIWORK is issued by XERBLA.
 
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+    IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
             On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
 
     LIWORK  (input) INTEGER
@@ -31535,16 +37000,21 @@ doublereal slapy3_(real *x, real *y, real *z__)
             If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
 
             If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK and
+            IWORK arrays, returns these values as the first entries of
+            the WORK and IWORK arrays, and no error message related to
+            LWORK or LIWORK is issued by XERBLA.
 
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the algorithm failed to converge; i
-                  off-diagonal elements of an intermediate tridiagonal
-                  form did not converge to zero.
+            > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed
+                  to converge; i off-diagonal elements of an intermediate
+                  tridiagonal form did not converge to zero;
+                  if INFO = i and JOBZ = 'V', then the algorithm failed
+                  to compute an eigenvalue while working on the submatrix
+                  lying in rows and columns INFO/(N+1) through
+                  mod(INFO,N+1).
 
     Further Details
     ===============
@@ -31554,6 +37024,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
        at Berkeley, USA
     Modified by Francoise Tisseur, University of Tennessee.
 
+    Modified description of INFO. Sven, 16 Feb 05.
     =====================================================================
 
 
@@ -31574,24 +37045,6 @@ doublereal slapy3_(real *x, real *y, real *z__)
     lquery = *lwork == -1 || *liwork == -1;
 
     *info = 0;
-    if (*n <= 1) {
-	liwmin = 1;
-	lwmin = 1;
-	lopt = lwmin;
-	liopt = liwmin;
-    } else {
-	if (wantz) {
-	    liwmin = *n * 5 + 3;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lwmin = *n * 6 + 1 + (i__1 * i__1 << 1);
-	} else {
-	    liwmin = 1;
-	    lwmin = (*n << 1) + 1;
-	}
-	lopt = lwmin;
-	liopt = liwmin;
-    }
     if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
     } else if (! (lower || lsame_(uplo, "U"))) {
@@ -31600,15 +37053,38 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -10;
     }
 
     if (*info == 0) {
+	if (*n <= 1) {
+	    liwmin = 1;
+	    lwmin = 1;
+	    lopt = lwmin;
+	    liopt = liwmin;
+	} else {
+	    if (wantz) {
+		liwmin = *n * 5 + 3;
+/* Computing 2nd power */
+		i__1 = *n;
+		lwmin = *n * 6 + 1 + (i__1 * i__1 << 1);
+	    } else {
+		liwmin = 1;
+		lwmin = (*n << 1) + 1;
+	    }
+/* Computing MAX */
+	    i__1 = lwmin, i__2 = (*n << 1) + ilaenv_(&c__1, "SSYTRD", uplo, n,
+		     &c_n1, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    lopt = max(i__1,i__2);
+	    liopt = liwmin;
+	}
 	work[1] = (real) lopt;
 	iwork[1] = liopt;
+
+	if (*lwork < lwmin && ! lquery) {
+	    *info = -8;
+	} else if (*liwork < liwmin && ! lquery) {
+	    *info = -10;
+	}
     }
 
     if (*info != 0) {
@@ -31669,7 +37145,6 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
     ssytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
 	    work[indwrk], &llwork, &iinfo);
-    lopt = (*n << 1) + work[indwrk];
 
 /*
        For eigenvalues only, call SSTERF.  For eigenvectors, first call
@@ -31686,13 +37161,6 @@ doublereal slapy3_(real *x, real *y, real *z__)
 	sormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
 		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
 	slacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
-/*
-   Computing MAX
-   Computing 2nd power
-*/
-	i__3 = *n;
-	i__1 = lopt, i__2 = *n * 6 + 1 + (i__3 * i__3 << 1);
-	lopt = max(i__1,i__2);
     }
 
 /*     If matrix was scaled, then rescale eigenvalues appropriately. */
@@ -31734,10 +37202,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -32016,10 +37484,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -32073,7 +37541,7 @@ doublereal slapy3_(real *x, real *y, real *z__)
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) REAL array, dimension (LWORK)
+    WORK    (workspace/output) REAL array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -32387,10 +37855,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -32398,28 +37866,23 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
     STREVC computes some or all of the right and/or left eigenvectors of
     a real upper quasi-triangular matrix T.
+    Matrices of this type are produced by the Schur factorization of
+    a real general matrix:  A = Q*T*Q**T, as computed by SHSEQR.
 
     The right eigenvector x and the left eigenvector y of T corresponding
     to an eigenvalue w are defined by:
 
-                 T*x = w*x,     y'*T = w*y'
-
-    where y' denotes the conjugate transpose of the vector y.
+       T*x = w*x,     (y**H)*T = w*(y**H)
 
-    If all eigenvectors are requested, the routine may either return the
-    matrices X and/or Y of right or left eigenvectors of T, or the
-    products Q*X and/or Q*Y, where Q is an input orthogonal
-    matrix. If T was obtained from the real-Schur factorization of an
-    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
-    right or left eigenvectors of A.
+    where y**H denotes the conjugate transpose of y.
+    The eigenvalues are not input to this routine, but are read directly
+    from the diagonal blocks of T.
 
-    T must be in Schur canonical form (as returned by SHSEQR), that is,
-    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
-    2-by-2 diagonal block has its diagonal elements equal and its
-    off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
-    diagonal block is a complex conjugate pair of eigenvalues and
-    eigenvectors; only one eigenvector of the pair is computed, namely
-    the one corresponding to the eigenvalue with positive imaginary part.
+    This routine returns the matrices X and/or Y of right and left
+    eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an
+    input matrix.  If Q is the orthogonal factor that reduces a matrix
+    A to Schur form T, then Q*X and Q*Y are the matrices of right and
+    left eigenvectors of A.
 
     Arguments
     =========
@@ -32432,21 +37895,21 @@ doublereal slapy3_(real *x, real *y, real *z__)
     HOWMNY  (input) CHARACTER*1
             = 'A':  compute all right and/or left eigenvectors;
             = 'B':  compute all right and/or left eigenvectors,
-                    and backtransform them using the input matrices
-                    supplied in VR and/or VL;
+                    backtransformed by the matrices in VR and/or VL;
             = 'S':  compute selected right and/or left eigenvectors,
-                    specified by the logical array SELECT.
+                    as indicated by the logical array SELECT.
 
     SELECT  (input/output) LOGICAL array, dimension (N)
             If HOWMNY = 'S', SELECT specifies the eigenvectors to be
             computed.
-            If HOWMNY = 'A' or 'B', SELECT is not referenced.
-            To select the real eigenvector corresponding to a real
-            eigenvalue w(j), SELECT(j) must be set to .TRUE..  To select
-            the complex eigenvector corresponding to a complex conjugate
-            pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be
-            set to .TRUE.; then on exit SELECT(j) is .TRUE. and
-            SELECT(j+1) is .FALSE..
+            If w(j) is a real eigenvalue, the corresponding real
+            eigenvector is computed if SELECT(j) is .TRUE..
+            If w(j) and w(j+1) are the real and imaginary parts of a
+            complex eigenvalue, the corresponding complex eigenvector is
+            computed if either SELECT(j) or SELECT(j+1) is .TRUE., and
+            on exit SELECT(j) is set to .TRUE. and SELECT(j+1) is set to
+            .FALSE..
+            Not referenced if HOWMNY = 'A' or 'B'.
 
     N       (input) INTEGER
             The order of the matrix T. N >= 0.
@@ -32463,15 +37926,6 @@ doublereal slapy3_(real *x, real *y, real *z__)
             of Schur vectors returned by SHSEQR).
             On exit, if SIDE = 'L' or 'B', VL contains:
             if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
-                             VL has the same quasi-lower triangular form
-                             as T'. If T(i,i) is a real eigenvalue, then
-                             the i-th column VL(i) of VL  is its
-                             corresponding eigenvector. If T(i:i+1,i:i+1)
-                             is a 2-by-2 block whose eigenvalues are
-                             complex-conjugate eigenvalues of T, then
-                             VL(i)+sqrt(-1)*VL(i+1) is the complex
-                             eigenvector corresponding to the eigenvalue
-                             with positive real part.
             if HOWMNY = 'B', the matrix Q*Y;
             if HOWMNY = 'S', the left eigenvectors of T specified by
                              SELECT, stored consecutively in the columns
@@ -32480,11 +37934,11 @@ doublereal slapy3_(real *x, real *y, real *z__)
             A complex eigenvector corresponding to a complex eigenvalue
             is stored in two consecutive columns, the first holding the
             real part, and the second the imaginary part.
-            If SIDE = 'R', VL is not referenced.
+            Not referenced if SIDE = 'R'.
 
     LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= max(1,N) if
-            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+            The leading dimension of the array VL.  LDVL >= 1, and if
+            SIDE = 'L' or 'B', LDVL >= N.
 
     VR      (input/output) REAL array, dimension (LDVR,MM)
             On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
@@ -32492,15 +37946,6 @@ doublereal slapy3_(real *x, real *y, real *z__)
             of Schur vectors returned by SHSEQR).
             On exit, if SIDE = 'R' or 'B', VR contains:
             if HOWMNY = 'A', the matrix X of right eigenvectors of T;
-                             VR has the same quasi-upper triangular form
-                             as T. If T(i,i) is a real eigenvalue, then
-                             the i-th column VR(i) of VR  is its
-                             corresponding eigenvector. If T(i:i+1,i:i+1)
-                             is a 2-by-2 block whose eigenvalues are
-                             complex-conjugate eigenvalues of T, then
-                             VR(i)+sqrt(-1)*VR(i+1) is the complex
-                             eigenvector corresponding to the eigenvalue
-                             with positive real part.
             if HOWMNY = 'B', the matrix Q*X;
             if HOWMNY = 'S', the right eigenvectors of T specified by
                              SELECT, stored consecutively in the columns
@@ -32509,11 +37954,11 @@ doublereal slapy3_(real *x, real *y, real *z__)
             A complex eigenvector corresponding to a complex eigenvalue
             is stored in two consecutive columns, the first holding the
             real part and the second the imaginary part.
-            If SIDE = 'L', VR is not referenced.
+            Not referenced if SIDE = 'L'.
 
     LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= max(1,N) if
-            SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+            The leading dimension of the array VR.  LDVR >= 1, and if
+            SIDE = 'R' or 'B', LDVR >= N.
 
     MM      (input) INTEGER
             The number of columns in the arrays VL and/or VR. MM >= M.
@@ -33491,11 +38936,8 @@ doublereal slapy3_(real *x, real *y, real *z__)
 		    ;
 		}
 
-/*
-                Copy the vector x or Q*x to VL and normalize.
+/*              Copy the vector x or Q*x to VL and normalize. */
 
-   L210:
-*/
 		if (! over) {
 		    i__2 = *n - ki + 1;
 		    scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
@@ -33586,6 +39028,386 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 } /* strevc_ */
 
+/* Subroutine */ int strexc_(char *compq, integer *n, real *t, integer *ldt,
+	real *q, integer *ldq, integer *ifst, integer *ilst, real *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, t_dim1, t_offset, i__1;
+
+    /* Local variables */
+    static integer nbf, nbl, here;
+    extern logical lsame_(char *, char *);
+    static logical wantq;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slaexc_(
+	    logical *, integer *, real *, integer *, real *, integer *,
+	    integer *, integer *, integer *, real *, integer *);
+    static integer nbnext;
+
+
+/*
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    STREXC reorders the real Schur factorization of a real matrix
+    A = Q*T*Q**T, so that the diagonal block of T with row index IFST is
+    moved to row ILST.
+
+    The real Schur form T is reordered by an orthogonal similarity
+    transformation Z**T*T*Z, and optionally the matrix Q of Schur vectors
+    is updated by postmultiplying it with Z.
+
+    T must be in Schur canonical form (as returned by SHSEQR), that is,
+    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+    2-by-2 diagonal block has its diagonal elements equal and its
+    off-diagonal elements of opposite sign.
+
+    Arguments
+    =========
+
+    COMPQ   (input) CHARACTER*1
+            = 'V':  update the matrix Q of Schur vectors;
+            = 'N':  do not update Q.
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input/output) REAL array, dimension (LDT,N)
+            On entry, the upper quasi-triangular matrix T, in Schur
+            Schur canonical form.
+            On exit, the reordered upper quasi-triangular matrix, again
+            in Schur canonical form.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    Q       (input/output) REAL array, dimension (LDQ,N)
+            On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+            On exit, if COMPQ = 'V', Q has been postmultiplied by the
+            orthogonal transformation matrix Z which reorders T.
+            If COMPQ = 'N', Q is not referenced.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    IFST    (input/output) INTEGER
+    ILST    (input/output) INTEGER
+            Specify the reordering of the diagonal blocks of T.
+            The block with row index IFST is moved to row ILST, by a
+            sequence of transpositions between adjacent blocks.
+            On exit, if IFST pointed on entry to the second row of a
+            2-by-2 block, it is changed to point to the first row; ILST
+            always points to the first row of the block in its final
+            position (which may differ from its input value by +1 or -1).
+            1 <= IFST <= N; 1 <= ILST <= N.
+
+    WORK    (workspace) REAL array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            = 1:  two adjacent blocks were too close to swap (the problem
+                  is very ill-conditioned); T may have been partially
+                  reordered, and ILST points to the first row of the
+                  current position of the block being moved.
+
+    =====================================================================
+
+
+       Decode and test the input arguments.
+*/
+
+    /* Parameter adjustments */
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(compq, "V");
+    if (! wantq && ! lsame_(compq, "N")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldt < max(1,*n)) {
+	*info = -4;
+    } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) {
+	*info = -6;
+    } else if (*ifst < 1 || *ifst > *n) {
+	*info = -7;
+    } else if (*ilst < 1 || *ilst > *n) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("STREXC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 1) {
+	return 0;
+    }
+
+/*
+       Determine the first row of specified block
+       and find out it is 1 by 1 or 2 by 2.
+*/
+
+    if (*ifst > 1) {
+	if (t[*ifst + (*ifst - 1) * t_dim1] != 0.f) {
+	    --(*ifst);
+	}
+    }
+    nbf = 1;
+    if (*ifst < *n) {
+	if (t[*ifst + 1 + *ifst * t_dim1] != 0.f) {
+	    nbf = 2;
+	}
+    }
+
+/*
+       Determine the first row of the final block
+       and find out it is 1 by 1 or 2 by 2.
+*/
+
+    if (*ilst > 1) {
+	if (t[*ilst + (*ilst - 1) * t_dim1] != 0.f) {
+	    --(*ilst);
+	}
+    }
+    nbl = 1;
+    if (*ilst < *n) {
+	if (t[*ilst + 1 + *ilst * t_dim1] != 0.f) {
+	    nbl = 2;
+	}
+    }
+
+    if (*ifst == *ilst) {
+	return 0;
+    }
+
+    if (*ifst < *ilst) {
+
+/*        Update ILST */
+
+	if (nbf == 2 && nbl == 1) {
+	    --(*ilst);
+	}
+	if (nbf == 1 && nbl == 2) {
+	    ++(*ilst);
+	}
+
+	here = *ifst;
+
+L10:
+
+/*        Swap block with next one below */
+
+	if (nbf == 1 || nbf == 2) {
+
+/*           Current block either 1 by 1 or 2 by 2 */
+
+	    nbnext = 1;
+	    if (here + nbf + 1 <= *n) {
+		if (t[here + nbf + 1 + (here + nbf) * t_dim1] != 0.f) {
+		    nbnext = 2;
+		}
+	    }
+	    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &here, &
+		    nbf, &nbnext, &work[1], info);
+	    if (*info != 0) {
+		*ilst = here;
+		return 0;
+	    }
+	    here += nbnext;
+
+/*           Test if 2 by 2 block breaks into two 1 by 1 blocks */
+
+	    if (nbf == 2) {
+		if (t[here + 1 + here * t_dim1] == 0.f) {
+		    nbf = 3;
+		}
+	    }
+
+	} else {
+
+/*
+             Current block consists of two 1 by 1 blocks each of which
+             must be swapped individually
+*/
+
+	    nbnext = 1;
+	    if (here + 3 <= *n) {
+		if (t[here + 3 + (here + 2) * t_dim1] != 0.f) {
+		    nbnext = 2;
+		}
+	    }
+	    i__1 = here + 1;
+	    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &
+		    c__1, &nbnext, &work[1], info);
+	    if (*info != 0) {
+		*ilst = here;
+		return 0;
+	    }
+	    if (nbnext == 1) {
+
+/*              Swap two 1 by 1 blocks, no problems possible */
+
+		slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			here, &c__1, &nbnext, &work[1], info);
+		++here;
+	    } else {
+
+/*              Recompute NBNEXT in case 2 by 2 split */
+
+		if (t[here + 2 + (here + 1) * t_dim1] == 0.f) {
+		    nbnext = 1;
+		}
+		if (nbnext == 2) {
+
+/*                 2 by 2 Block did not split */
+
+		    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    here, &c__1, &nbnext, &work[1], info);
+		    if (*info != 0) {
+			*ilst = here;
+			return 0;
+		    }
+		    here += 2;
+		} else {
+
+/*                 2 by 2 Block did split */
+
+		    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    here, &c__1, &c__1, &work[1], info);
+		    i__1 = here + 1;
+		    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    i__1, &c__1, &c__1, &work[1], info);
+		    here += 2;
+		}
+	    }
+	}
+	if (here < *ilst) {
+	    goto L10;
+	}
+
+    } else {
+
+	here = *ifst;
+L20:
+
+/*        Swap block with next one above */
+
+	if (nbf == 1 || nbf == 2) {
+
+/*           Current block either 1 by 1 or 2 by 2 */
+
+	    nbnext = 1;
+	    if (here >= 3) {
+		if (t[here - 1 + (here - 2) * t_dim1] != 0.f) {
+		    nbnext = 2;
+		}
+	    }
+	    i__1 = here - nbnext;
+	    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &
+		    nbnext, &nbf, &work[1], info);
+	    if (*info != 0) {
+		*ilst = here;
+		return 0;
+	    }
+	    here -= nbnext;
+
+/*           Test if 2 by 2 block breaks into two 1 by 1 blocks */
+
+	    if (nbf == 2) {
+		if (t[here + 1 + here * t_dim1] == 0.f) {
+		    nbf = 3;
+		}
+	    }
+
+	} else {
+
+/*
+             Current block consists of two 1 by 1 blocks each of which
+             must be swapped individually
+*/
+
+	    nbnext = 1;
+	    if (here >= 3) {
+		if (t[here - 1 + (here - 2) * t_dim1] != 0.f) {
+		    nbnext = 2;
+		}
+	    }
+	    i__1 = here - nbnext;
+	    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &i__1, &
+		    nbnext, &c__1, &work[1], info);
+	    if (*info != 0) {
+		*ilst = here;
+		return 0;
+	    }
+	    if (nbnext == 1) {
+
+/*              Swap two 1 by 1 blocks, no problems possible */
+
+		slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			here, &nbnext, &c__1, &work[1], info);
+		--here;
+	    } else {
+
+/*              Recompute NBNEXT in case 2 by 2 split */
+
+		if (t[here + (here - 1) * t_dim1] == 0.f) {
+		    nbnext = 1;
+		}
+		if (nbnext == 2) {
+
+/*                 2 by 2 Block did not split */
+
+		    i__1 = here - 1;
+		    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    i__1, &c__2, &c__1, &work[1], info);
+		    if (*info != 0) {
+			*ilst = here;
+			return 0;
+		    }
+		    here += -2;
+		} else {
+
+/*                 2 by 2 Block did split */
+
+		    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    here, &c__1, &c__1, &work[1], info);
+		    i__1 = here - 1;
+		    slaexc_(&wantq, n, &t[t_offset], ldt, &q[q_offset], ldq, &
+			    i__1, &c__1, &c__1, &work[1], info);
+		    here += -2;
+		}
+	    }
+	}
+	if (here > *ilst) {
+	    goto L20;
+	}
+    }
+    *ilst = here;
+
+    return 0;
+
+/*     End of STREXC */
+
+} /* strexc_ */
+
 /* Subroutine */ int strti2_(char *uplo, char *diag, integer *n, real *a,
 	integer *lda, integer *info)
 {
@@ -33605,10 +39427,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -33767,10 +39589,10 @@ doublereal slapy3_(real *x, real *y, real *z__)
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
diff --git a/numpy/linalg/lapack_lite/f2c_z_lapack.c b/numpy/linalg/lapack_lite/f2c_z_lapack.c
index 143b7254fc18..e310331a3158 100644
--- a/numpy/linalg/lapack_lite/f2c_z_lapack.c
+++ b/numpy/linalg/lapack_lite/f2c_z_lapack.c
@@ -30,117 +30,27 @@ them.
 /* Table of constant values */
 
 static integer c__1 = 1;
-static doublecomplex c_b59 = {0.,0.};
-static doublecomplex c_b60 = {1.,0.};
+static doublecomplex c_b56 = {0.,0.};
+static doublecomplex c_b57 = {1.,0.};
 static integer c_n1 = -1;
 static integer c__3 = 3;
 static integer c__2 = 2;
 static integer c__0 = 0;
-static integer c__8 = 8;
-static integer c__4 = 4;
 static integer c__65 = 65;
-static integer c__6 = 6;
 static integer c__9 = 9;
-static doublereal c_b324 = 0.;
-static doublereal c_b1015 = 1.;
+static integer c__6 = 6;
+static doublereal c_b328 = 0.;
+static doublereal c_b1034 = 1.;
+static integer c__12 = 12;
+static integer c__49 = 49;
+static doublereal c_b1276 = -1.;
+static integer c__13 = 13;
 static integer c__15 = 15;
+static integer c__14 = 14;
+static integer c__16 = 16;
 static logical c_false = FALSE_;
-static doublereal c_b1294 = -1.;
-static doublereal c_b2210 = .5;
-
-/* Subroutine */ int zdrot_(integer *n, doublecomplex *cx, integer *incx,
-	doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublecomplex ctemp;
-
-
-/*
-       applies a plane rotation, where the cos and sin (c and s) are real
-       and the vectors cx and cy are complex.
-       jack dongarra, linpack, 3/11/78.
-
-
-   =====================================================================
-*/
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-
-    /* Function Body */
-    if (*n <= 0) {
-	return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-	goto L20;
-    }
-
-/*
-          code for unequal increments or equal increments not equal
-            to 1
-*/
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-	ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-	iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = ix;
-	z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i;
-	i__3 = iy;
-	z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i;
-	z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-	ctemp.r = z__1.r, ctemp.i = z__1.i;
-	i__2 = iy;
-	i__3 = iy;
-	z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i;
-	i__4 = ix;
-	z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i;
-	z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
-	cy[i__2].r = z__1.r, cy[i__2].i = z__1.i;
-	i__2 = ix;
-	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
-	ix += *incx;
-	iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i;
-	i__3 = i__;
-	z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i;
-	z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-	ctemp.r = z__1.r, ctemp.i = z__1.i;
-	i__2 = i__;
-	i__3 = i__;
-	z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i;
-	i__4 = i__;
-	z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i;
-	z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
-	cy[i__2].r = z__1.r, cy[i__2].i = z__1.i;
-	i__2 = i__;
-	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
-/* L30: */
-    }
-    return 0;
-} /* zdrot_ */
+static logical c_true = TRUE_;
+static doublereal c_b2435 = .5;
 
 /* Subroutine */ int zgebak_(char *job, char *side, integer *n, integer *ilo,
 	integer *ihi, doublereal *scale, integer *m, doublecomplex *v,
@@ -162,10 +72,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -373,6 +283,7 @@ static doublereal c_b2210 = .5;
 	    doublecomplex *, integer *);
     static doublereal sfmin1, sfmin2, sfmax1, sfmax2;
 
+    extern logical disnan_(doublereal *);
     extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
 	    integer *, doublereal *, doublecomplex *, integer *);
     extern integer izamax_(integer *, doublecomplex *, integer *);
@@ -380,10 +291,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -623,7 +534,7 @@ static doublereal c_b2210 = .5;
 
     sfmin1 = SAFEMINIMUM / PRECISION;
     sfmax1 = 1. / sfmin1;
-    sfmin2 = sfmin1 * 8.;
+    sfmin2 = sfmin1 * 2.;
     sfmax2 = 1. / sfmin2;
 L140:
     noconv = FALSE_;
@@ -658,7 +569,7 @@ static doublereal c_b2210 = .5;
 	if (c__ == 0. || r__ == 0.) {
 	    goto L200;
 	}
-	g = r__ / 8.;
+	g = r__ / 2.;
 	f = 1.;
 	s = c__ + r__;
 L160:
@@ -669,28 +580,38 @@ static doublereal c_b2210 = .5;
 	if (c__ >= g || max(d__1,ca) >= sfmax2 || min(d__2,ra) <= sfmin2) {
 	    goto L170;
 	}
-	f *= 8.;
-	c__ *= 8.;
-	ca *= 8.;
-	r__ /= 8.;
-	g /= 8.;
-	ra /= 8.;
+	d__1 = c__ + f + ca + r__ + g + ra;
+	if (disnan_(&d__1)) {
+
+/*           Exit if NaN to avoid infinite loop */
+
+	    *info = -3;
+	    i__2 = -(*info);
+	    xerbla_("ZGEBAL", &i__2);
+	    return 0;
+	}
+	f *= 2.;
+	c__ *= 2.;
+	ca *= 2.;
+	r__ /= 2.;
+	g /= 2.;
+	ra /= 2.;
 	goto L160;
 
 L170:
-	g = c__ / 8.;
+	g = c__ / 2.;
 L180:
 /* Computing MIN */
 	d__1 = min(f,c__), d__1 = min(d__1,g);
 	if (g < r__ || max(r__,ra) >= sfmax2 || min(d__1,ca) <= sfmin2) {
 	    goto L190;
 	}
-	f /= 8.;
-	c__ /= 8.;
-	g /= 8.;
-	ca /= 8.;
-	r__ *= 8.;
-	ra *= 8.;
+	f /= 2.;
+	c__ /= 2.;
+	g /= 2.;
+	ca /= 2.;
+	r__ *= 2.;
+	ra *= 2.;
 	goto L180;
 
 /*        Now balance. */
@@ -740,7 +661,7 @@ static doublereal c_b2210 = .5;
 	doublecomplex *taup, doublecomplex *work, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    integer a_dim1, a_offset, i__1, i__2, i__3;
     doublecomplex z__1;
 
     /* Builtin functions */
@@ -757,10 +678,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -926,11 +847,13 @@ static doublereal c_b2210 = .5;
 
 /*           Apply H(i)' to A(i:m,i+1:n) from the left */
 
-	    i__2 = *m - i__ + 1;
-	    i__3 = *n - i__;
-	    d_cnjg(&z__1, &tauq[i__]);
-	    zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &z__1,
-		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    if (i__ < *n) {
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		d_cnjg(&z__1, &tauq[i__]);
+		zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &
+			z__1, &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    }
 	    i__2 = i__ + i__ * a_dim1;
 	    i__3 = i__;
 	    a[i__2].r = d__[i__3], a[i__2].i = 0.;
@@ -999,12 +922,12 @@ static doublereal c_b2210 = .5;
 
 /*           Apply G(i) to A(i+1:m,i:n) from the right */
 
-	    i__2 = *m - i__;
-	    i__3 = *n - i__ + 1;
-/* Computing MIN */
-	    i__4 = i__ + 1;
-	    zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
-		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    if (i__ < *m) {
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &
+			taup[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
 	    i__2 = *n - i__ + 1;
 	    zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
 	    i__2 = i__ + i__ * a_dim1;
@@ -1085,10 +1008,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -1147,7 +1070,7 @@ static doublereal c_b2210 = .5;
             The scalar factors of the elementary reflectors which
             represent the unitary matrix P. See Further Details.
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -1332,14 +1255,14 @@ static doublereal c_b2210 = .5;
 	z__1.r = -1., z__1.i = -0.;
 	zgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, &
 		z__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb +
-		nb + 1], &ldwrky, &c_b60, &a[i__ + nb + (i__ + nb) * a_dim1],
+		nb + 1], &ldwrky, &c_b57, &a[i__ + nb + (i__ + nb) * a_dim1],
 		lda);
 	i__3 = *m - i__ - nb + 1;
 	i__4 = *n - i__ - nb + 1;
 	z__1.r = -1., z__1.i = -0.;
 	zgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &z__1, &
 		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
-		c_b60, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+		c_b57, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
 
 /*        Copy diagonal and off-diagonal elements of B back into A */
 
@@ -1389,7 +1312,7 @@ static doublereal c_b2210 = .5;
 {
     /* System generated locals */
     integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
-	    i__2, i__3, i__4;
+	    i__2, i__3;
     doublereal d__1, d__2;
     doublecomplex z__1, z__2;
 
@@ -1405,7 +1328,6 @@ static doublereal c_b2210 = .5;
     static doublecomplex tmp;
     static integer ibal;
     static char side[1];
-    static integer maxb;
     static doublereal anrm;
     static integer ierr, itau, iwrk, nout;
     extern logical lsame_(char *, char *);
@@ -1453,10 +1375,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -1521,7 +1443,7 @@ static doublereal c_b2210 = .5;
             The leading dimension of the array VR.  LDVR >= 1; if
             JOBVR = 'V', LDVR >= N.
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -1595,62 +1517,44 @@ static doublereal c_b2210 = .5;
          the worst case.)
 */
 
-    minwrk = 1;
-    if (*info == 0 && (*lwork >= 1 || lquery)) {
-	maxwrk = *n + *n * ilaenv_(&c__1, "ZGEHRD", " ", n, &c__1, n, &c__0, (
-		ftnlen)6, (ftnlen)1);
-	if (! wantvl && ! wantvr) {
-/* Computing MAX */
-	    i__1 = 1, i__2 = *n << 1;
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "ZHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "EN", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
-/* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
-	    maxwrk = max(maxwrk,hswork);
+    if (*info == 0) {
+	if (*n == 0) {
+	    minwrk = 1;
+	    maxwrk = 1;
 	} else {
+	    maxwrk = *n + *n * ilaenv_(&c__1, "ZGEHRD", " ", n, &c__1, n, &
+		    c__0, (ftnlen)6, (ftnlen)1);
+	    minwrk = *n << 1;
+	    if (wantvl) {
 /* Computing MAX */
-	    i__1 = 1, i__2 = *n << 1;
-	    minwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR",
-		    " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
-/* Computing MAX */
-	    i__1 = ilaenv_(&c__8, "ZHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
-		    6, (ftnlen)2);
-	    maxb = max(i__1,2);
-/*
-   Computing MIN
-   Computing MAX
-*/
-	    i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "SV", n, &c__1, n, &
-		    c_n1, (ftnlen)6, (ftnlen)2);
-	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
-	    k = min(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR",
+			 " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
+		zhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &w[1], &vl[
+			vl_offset], ldvl, &work[1], &c_n1, info);
+	    } else if (wantvr) {
 /* Computing MAX */
-	    i__1 = k * (k + 2), i__2 = *n << 1;
-	    hswork = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR",
+			 " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
+		zhseqr_("S", "V", n, &c__1, n, &a[a_offset], lda, &w[1], &vr[
+			vr_offset], ldvr, &work[1], &c_n1, info);
+	    } else {
+		zhseqr_("E", "N", n, &c__1, n, &a[a_offset], lda, &w[1], &vr[
+			vr_offset], ldvr, &work[1], &c_n1, info);
+	    }
+	    hswork = (integer) work[1].r;
 /* Computing MAX */
-	    i__1 = max(maxwrk,hswork), i__2 = *n << 1;
-	    maxwrk = max(i__1,i__2);
+	    i__1 = max(maxwrk,hswork);
+	    maxwrk = max(i__1,minwrk);
 	}
 	work[1].r = (doublereal) maxwrk, work[1].i = 0.;
+
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
     }
-    if (*lwork < minwrk && ! lquery) {
-	*info = -12;
-    }
+
     if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGEEV ", &i__1);
@@ -1949,10 +1853,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2108,7 +2012,7 @@ static doublereal c_b2210 = .5;
     doublecomplex z__1;
 
     /* Local variables */
-    static integer i__;
+    static integer i__, j;
     static doublecomplex t[4160]	/* was [65][64] */;
     static integer ib;
     static doublecomplex ei;
@@ -2116,34 +2020,38 @@ static doublereal c_b2210 = .5;
     extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
 	    integer *, doublecomplex *, doublecomplex *, integer *,
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
-	    integer *), zgehd2_(integer *, integer *, integer
-	    *, doublecomplex *, integer *, doublecomplex *, doublecomplex *,
-	    integer *), xerbla_(char *, integer *);
+	    integer *), ztrmm_(char *, char *, char *, char *,
+	     integer *, integer *, doublecomplex *, doublecomplex *, integer *
+	    , doublecomplex *, integer *),
+	    zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), zgehd2_(integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *), zlahr2_(integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(
+	    char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
     extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *,
 	    integer *, integer *, integer *, doublecomplex *, integer *,
 	    doublecomplex *, integer *, doublecomplex *, integer *,
-	    doublecomplex *, integer *),
-	    zlahrd_(integer *, integer *, integer *, doublecomplex *, integer
-	    *, doublecomplex *, doublecomplex *, integer *, doublecomplex *,
-	    integer *);
+	    doublecomplex *, integer *);
     static integer ldwork, lwkopt;
     static logical lquery;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2.1)                                  --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+    -- April 2009                                                      --
 
 
     Purpose
     =======
 
-    ZGEHRD reduces a complex general matrix A to upper Hessenberg form H
-    by a unitary similarity transformation:  Q' * A * Q = H .
+    ZGEHRD reduces a complex general matrix A to upper Hessenberg form H by
+    an unitary similarity transformation:  Q' * A * Q = H .
 
     Arguments
     =========
@@ -2225,6 +2133,10 @@ static doublereal c_b2210 = .5;
     modified element of the upper Hessenberg matrix H, and vi denotes an
     element of the vector defining H(i).
 
+    This file is a slight modification of LAPACK-3.0's DGEHRD
+    subroutine incorporating improvements proposed by Quintana-Orti and
+    Van de Geijn (2006). (See DLAHR2.)
+
     =====================================================================
 
 
@@ -2289,13 +2201,21 @@ static doublereal c_b2210 = .5;
 	return 0;
     }
 
+/*
+       Determine the block size
+
+   Computing MIN
+*/
+    i__1 = 64, i__2 = ilaenv_(&c__1, "ZGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
     nbmin = 2;
     iws = 1;
     if (nb > 1 && nb < nh) {
 
 /*
           Determine when to cross over from blocked to unblocked code
-          (last block is always handled by unblocked code).
+          (last block is always handled by unblocked code)
 
    Computing MAX
 */
@@ -2304,7 +2224,7 @@ static doublereal c_b2210 = .5;
 	nx = max(i__1,i__2);
 	if (nx < nh) {
 
-/*           Determine if workspace is large enough for blocked code. */
+/*           Determine if workspace is large enough for blocked code */
 
 	    iws = *n * nb;
 	    if (*lwork < iws) {
@@ -2312,7 +2232,7 @@ static doublereal c_b2210 = .5;
 /*
                 Not enough workspace to use optimal NB:  determine the
                 minimum value of NB, and reduce NB or force use of
-                unblocked code.
+                unblocked code
 
    Computing MAX
 */
@@ -2352,13 +2272,13 @@ static doublereal c_b2210 = .5;
              which performs the reduction, and also the matrix Y = A*V*T
 */
 
-	    zlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+	    zlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
 		    c__65, &work[1], &ldwork);
 
 /*
              Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
              right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
-             to 1.
+             to 1
 */
 
 	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
@@ -2369,10 +2289,27 @@ static doublereal c_b2210 = .5;
 	    z__1.r = -1., z__1.i = -0.;
 	    zgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, &
 		    z__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda,
-		     &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda);
+		     &c_b57, &a[(i__ + ib) * a_dim1 + 1], lda);
 	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
 	    a[i__3].r = ei.r, a[i__3].i = ei.i;
 
+/*
+             Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
+             right
+*/
+
+	    i__3 = ib - 1;
+	    ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", &i__, &
+		    i__3, &c_b57, &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &
+		    ldwork);
+	    i__3 = ib - 2;
+	    for (j = 0; j <= i__3; ++j) {
+		z__1.r = -1., z__1.i = -0.;
+		zaxpy_(&i__, &z__1, &work[ldwork * j + 1], &c__1, &a[(i__ + j
+			+ 1) * a_dim1 + 1], &c__1);
+/* L30: */
+	    }
+
 /*
              Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
              left
@@ -2384,7 +2321,7 @@ static doublereal c_b2210 = .5;
 		    i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &
 		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
 		    ldwork);
-/* L30: */
+/* L40: */
 	}
     }
 
@@ -2416,10 +2353,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -2567,10 +2504,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2603,7 +2540,7 @@ static doublereal c_b2210 = .5;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -2783,13 +2720,14 @@ static doublereal c_b2210 = .5;
 {
     /* System generated locals */
     integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
-    doublereal d__1;
-    doublecomplex z__1;
+
+    /* Builtin functions */
+    double log(doublereal);
 
     /* Local variables */
     static integer ie, il, mm;
     static doublereal eps, anrm, bnrm;
-    static integer itau, iascl, ibscl;
+    static integer itau, nlvl, iascl, ibscl;
     static doublereal sfmin;
     static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
     extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
@@ -2820,12 +2758,13 @@ static doublereal c_b2210 = .5;
 	    doublecomplex *, integer *, doublecomplex *, integer *),
 	    zlaset_(char *, integer *, integer *, doublecomplex *,
 	    doublecomplex *, doublecomplex *, integer *);
-    static integer minwrk, maxwrk;
+    static integer liwork, minwrk, maxwrk;
     static doublereal smlnum;
     extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *,
 	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
 	     doublecomplex *, integer *, doublecomplex *, integer *, integer *
 	    );
+    static integer lrwork;
     static logical lquery;
     static integer nrwork, smlsiz;
     extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *,
@@ -2836,10 +2775,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -2900,7 +2839,7 @@ static doublereal c_b2210 = .5;
             On exit, B is overwritten by the N-by-NRHS solution matrix X.
             If m >= n and RANK = n, the residual sum-of-squares for
             the solution in the i-th column is given by the sum of
-            squares of elements n+1:m in that column.
+            squares of the modulus of elements n+1:m in that column.
 
     LDB     (input) INTEGER
             The leading dimension of the array B.  LDB >= max(1,M,N).
@@ -2918,39 +2857,44 @@ static doublereal c_b2210 = .5;
             The effective rank of A, i.e., the number of singular values
             which are greater than RCOND*S(1).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
             The dimension of the array WORK. LWORK must be at least 1.
             The exact minimum amount of workspace needed depends on M,
             N and NRHS. As long as LWORK is at least
-                2 * N + N * NRHS
+                2*N + N*NRHS
             if M is greater than or equal to N or
-                2 * M + M * NRHS
+                2*M + M*NRHS
             if M is less than N, the code will execute correctly.
             For good performance, LWORK should generally be larger.
 
             If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    RWORK   (workspace) DOUBLE PRECISION array, dimension at least
+            only calculates the optimal size of the array WORK and the
+            minimum sizes of the arrays RWORK and IWORK, and returns
+            these values as the first entries of the WORK, RWORK and
+            IWORK arrays, and no error message related to LWORK is issued
+            by XERBLA.
+
+    RWORK   (workspace) DOUBLE PRECISION array, dimension (MAX(1,LRWORK))
+            LRWORK >=
                10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS +
-               (SMLSIZ+1)**2
+               MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS )
             if M is greater than or equal to N or
                10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS +
-               (SMLSIZ+1)**2
+               MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS )
             if M is less than N, the code will execute correctly.
             SMLSIZ is returned by ILAENV and is equal to the maximum
             size of the subproblems at the bottom of the computation
             tree (usually about 25), and
                NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+            On exit, if INFO = 0, RWORK(1) returns the minimum LRWORK.
 
-    IWORK   (workspace) INTEGER array, dimension (LIWORK)
-            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+    IWORK   (workspace) INTEGER array, dimension (MAX(1,LIWORK))
+            LIWORK >= max(1, 3*MINMN*NLVL + 11*MINMN),
             where MINMN = MIN( M,N ).
+            On exit, if INFO = 0, IWORK(1) returns the minimum LIWORK.
 
     INFO    (output) INTEGER
             = 0: successful exit
@@ -2989,8 +2933,6 @@ static doublereal c_b2210 = .5;
     *info = 0;
     minmn = min(*m,*n);
     maxmn = max(*m,*n);
-    mnthr = ilaenv_(&c__6, "ZGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
-	    ftnlen)1);
     lquery = *lwork == -1;
     if (*m < 0) {
 	*info = -1;
@@ -3004,9 +2946,6 @@ static doublereal c_b2210 = .5;
 	*info = -7;
     }
 
-    smlsiz = ilaenv_(&c__9, "ZGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
 /*
        Compute workspace.
        (Note: Comments in the code beginning "Workspace:" describe the
@@ -3016,114 +2955,159 @@ static doublereal c_b2210 = .5;
        following subroutine, as returned by ILAENV.)
 */
 
-    minwrk = 1;
     if (*info == 0) {
-	maxwrk = 0;
-	mm = *m;
-	if (*m >= *n && *m >= mnthr) {
+	minwrk = 1;
+	maxwrk = 1;
+	liwork = 1;
+	lrwork = 1;
+	if (minmn > 0) {
+	    smlsiz = ilaenv_(&c__9, "ZGELSD", " ", &c__0, &c__0, &c__0, &c__0,
+		     (ftnlen)6, (ftnlen)1);
+	    mnthr = ilaenv_(&c__6, "ZGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)
+		    6, (ftnlen)1);
+/* Computing MAX */
+	    i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz +
+		    1)) / log(2.)) + 1;
+	    nlvl = max(i__1,0);
+	    liwork = minmn * 3 * nlvl + minmn * 11;
+	    mm = *m;
+	    if (*m >= *n && *m >= mnthr) {
 
-/*           Path 1a - overdetermined, with many more rows than columns. */
+/*
+                Path 1a - overdetermined, with many more rows than
+                          columns.
+*/
 
-	    mm = *n;
+		mm = *n;
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, &
-		    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n,
+			 &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "ZUNMQR", "LC", m,
-		    nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
-	    maxwrk = max(i__1,i__2);
-	}
-	if (*m >= *n) {
+		i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "ZUNMQR", "LC",
+			m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
+		maxwrk = max(i__1,i__2);
+	    }
+	    if (*m >= *n) {
 
 /*
-             Path 1 - overdetermined or exactly determined.
+                Path 1 - overdetermined or exactly determined.
 
    Computing MAX
+   Computing 2nd power
 */
-	    i__1 = maxwrk, i__2 = (*n << 1) + (mm + *n) * ilaenv_(&c__1,
-		    "ZGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1)
-		    ;
-	    maxwrk = max(i__1,i__2);
+		i__3 = smlsiz + 1;
+		i__1 = i__3 * i__3, i__2 = *n * (*nrhs + 1) + (*nrhs << 1);
+		lrwork = *n * 10 + (*n << 1) * smlsiz + (*n << 3) * nlvl +
+			smlsiz * 3 * *nrhs + max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = (*n << 1) + *nrhs * ilaenv_(&c__1, "ZUNMBR",
-		     "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = (*n << 1) + (mm + *n) * ilaenv_(&c__1,
+			"ZGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (
+			ftnlen)1);
+		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1, "ZUN"
-		    "MBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = (*n << 1) + *nrhs * ilaenv_(&c__1,
+			"ZUNMBR", "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (
+			ftnlen)3);
+		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = maxwrk, i__2 = (*n << 1) + *n * *nrhs;
-	    maxwrk = max(i__1,i__2);
+		i__1 = maxwrk, i__2 = (*n << 1) + (*n - 1) * ilaenv_(&c__1,
+			"ZUNMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (
+			ftnlen)3);
+		maxwrk = max(i__1,i__2);
 /* Computing MAX */
-	    i__1 = (*n << 1) + mm, i__2 = (*n << 1) + *n * *nrhs;
-	    minwrk = max(i__1,i__2);
-	}
-	if (*n > *m) {
-	    if (*n >= mnthr) {
+		i__1 = maxwrk, i__2 = (*n << 1) + *n * *nrhs;
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = (*n << 1) + mm, i__2 = (*n << 1) + *n * *nrhs;
+		minwrk = max(i__1,i__2);
+	    }
+	    if (*n > *m) {
+/*
+   Computing MAX
+   Computing 2nd power
+*/
+		i__3 = smlsiz + 1;
+		i__1 = i__3 * i__3, i__2 = *n * (*nrhs + 1) + (*nrhs << 1);
+		lrwork = *m * 10 + (*m << 1) * smlsiz + (*m << 3) * nlvl +
+			smlsiz * 3 * *nrhs + max(i__1,i__2);
+		if (*n >= mnthr) {
 
 /*
-                Path 2a - underdetermined, with many more columns
-                than rows.
+                   Path 2a - underdetermined, with many more columns
+                             than rows.
 */
 
-		maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &c_n1,
-			&c_n1, (ftnlen)6, (ftnlen)1);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) *
-			ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (
-			ftnlen)6, (ftnlen)1);
-		maxwrk = max(i__1,i__2);
-/* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs * ilaenv_(&
-			c__1, "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, (
-			ftnlen)3);
-		maxwrk = max(i__1,i__2);
+		    maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) *
-			ilaenv_(&c__1, "ZUNMLQ", "LC", n, nrhs, m, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-		maxwrk = max(i__1,i__2);
-		if (*nrhs > 1) {
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m << 1) *
+			    ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *nrhs *
+			    ilaenv_(&c__1, "ZUNMBR", "QLC", m, nrhs, m, &c_n1,
+			     (ftnlen)6, (ftnlen)3);
 		    maxwrk = max(i__1,i__2);
-		} else {
 /* Computing MAX */
-		    i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + (*m - 1) *
+			    ilaenv_(&c__1, "ZUNMLQ", "LC", n, nrhs, m, &c_n1,
+			    (ftnlen)6, (ftnlen)2);
 		    maxwrk = max(i__1,i__2);
-		}
+		    if (*nrhs > 1) {
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *m * *nrhs;
-		maxwrk = max(i__1,i__2);
-	    } else {
+			i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+			maxwrk = max(i__1,i__2);
+		    } else {
+/* Computing MAX */
+			i__1 = maxwrk, i__2 = *m * *m + (*m << 1);
+			maxwrk = max(i__1,i__2);
+		    }
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + (*m << 2) + *m * *nrhs;
+		    maxwrk = max(i__1,i__2);
+/*
+       XXX: Ensure the Path 2a case below is triggered.  The workspace
+       calculation should use queries for all routines eventually.
+   Computing MAX
+   Computing MAX
+*/
+		    i__3 = *m, i__4 = (*m << 1) - 4, i__3 = max(i__3,i__4),
+			    i__3 = max(i__3,*nrhs), i__4 = *n - *m * 3;
+		    i__1 = maxwrk, i__2 = (*m << 2) + *m * *m + max(i__3,i__4)
+			    ;
+		    maxwrk = max(i__1,i__2);
+		} else {
 
-/*              Path 2 - underdetermined. */
+/*                 Path 2 - underdetermined. */
 
-		maxwrk = (*m << 1) + (*n + *m) * ilaenv_(&c__1, "ZGEBRD",
-			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		    maxwrk = (*m << 1) + (*n + *m) * ilaenv_(&c__1, "ZGEBRD",
+			    " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = (*m << 1) + *nrhs * ilaenv_(&c__1,
-			"ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, (
-			ftnlen)3);
-		maxwrk = max(i__1,i__2);
+		    i__1 = maxwrk, i__2 = (*m << 1) + *nrhs * ilaenv_(&c__1,
+			    "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1, "ZUNMBR"
-			, "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
-		maxwrk = max(i__1,i__2);
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * ilaenv_(&c__1,
+			    "ZUNMBR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
 /* Computing MAX */
-		i__1 = maxwrk, i__2 = (*m << 1) + *m * *nrhs;
-		maxwrk = max(i__1,i__2);
-	    }
+		    i__1 = maxwrk, i__2 = (*m << 1) + *m * *nrhs;
+		    maxwrk = max(i__1,i__2);
+		}
 /* Computing MAX */
-	    i__1 = (*m << 1) + *n, i__2 = (*m << 1) + *m * *nrhs;
-	    minwrk = max(i__1,i__2);
+		i__1 = (*m << 1) + *n, i__2 = (*m << 1) + *m * *nrhs;
+		minwrk = max(i__1,i__2);
+	    }
 	}
 	minwrk = min(minwrk,maxwrk);
-	d__1 = (doublereal) maxwrk;
-	z__1.r = d__1, z__1.i = 0.;
-	work[1].r = z__1.r, work[1].i = z__1.i;
+	work[1].r = (doublereal) maxwrk, work[1].i = 0.;
+	iwork[1] = liwork;
+	rwork[1] = (doublereal) lrwork;
+
 	if (*lwork < minwrk && ! lquery) {
 	    *info = -12;
 	}
@@ -3134,7 +3118,7 @@ static doublereal c_b2210 = .5;
 	xerbla_("ZGELSD", &i__1);
 	return 0;
     } else if (lquery) {
-	goto L10;
+	return 0;
     }
 
 /*     Quick return if possible. */
@@ -3175,8 +3159,8 @@ static doublereal c_b2210 = .5;
 /*        Matrix all zero. Return zero solution. */
 
 	i__1 = max(*m,*n);
-	zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[b_offset], ldb);
-	dlaset_("F", &minmn, &c__1, &c_b324, &c_b324, &s[1], &c__1)
+	zlaset_("F", &i__1, nrhs, &c_b56, &c_b56, &b[b_offset], ldb);
+	dlaset_("F", &minmn, &c__1, &c_b328, &c_b328, &s[1], &c__1)
 		;
 	*rank = 0;
 	goto L10;
@@ -3206,7 +3190,7 @@ static doublereal c_b2210 = .5;
 
     if (*m < *n) {
 	i__1 = *n - *m;
-	zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[*m + 1 + b_dim1], ldb);
+	zlaset_("F", &i__1, nrhs, &c_b56, &c_b56, &b[*m + 1 + b_dim1], ldb);
     }
 
 /*     Overdetermined case. */
@@ -3249,7 +3233,7 @@ static doublereal c_b2210 = .5;
 	    if (*n > 1) {
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &a[a_dim1 + 2],
+		zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &a[a_dim1 + 2],
 			lda);
 	    }
 	}
@@ -3334,7 +3318,7 @@ static doublereal c_b2210 = .5;
 	    zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
 	    i__1 = *m - 1;
 	    i__2 = *m - 1;
-	    zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &work[il + ldwork], &
+	    zlaset_("U", &i__1, &i__2, &c_b56, &c_b56, &work[il + ldwork], &
 		    ldwork);
 	    itauq = il + ldwork * *m;
 	    itaup = itauq + *m;
@@ -3379,7 +3363,7 @@ static doublereal c_b2210 = .5;
 /*        Zero out below first M rows of B. */
 
 	    i__1 = *n - *m;
-	    zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[*m + 1 + b_dim1],
+	    zlaset_("F", &i__1, nrhs, &c_b56, &c_b56, &b[*m + 1 + b_dim1],
 		    ldb);
 	    nwork = itau + *m;
 
@@ -3461,9 +3445,9 @@ static doublereal c_b2210 = .5;
     }
 
 L10:
-    d__1 = (doublereal) maxwrk;
-    z__1.r = d__1, z__1.i = 0.;
-    work[1].r = z__1.r, work[1].i = z__1.i;
+    work[1].r = (doublereal) maxwrk, work[1].i = 0.;
+    iwork[1] = liwork;
+    rwork[1] = (doublereal) lrwork;
     return 0;
 
 /*     End of ZGELSD */
@@ -3490,10 +3474,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -3638,10 +3622,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -3675,7 +3659,7 @@ static doublereal c_b2210 = .5;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -3923,7 +3907,6 @@ static doublereal c_b2210 = .5;
 	    ), zunglq_(integer *, integer *, integer *
 	    , doublecomplex *, integer *, doublecomplex *, doublecomplex *,
 	    integer *, integer *);
-    static logical lquery;
     static integer nrwork;
     extern /* Subroutine */ int zungqr_(integer *, integer *, integer *,
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
@@ -3931,10 +3914,11 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK driver routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
+       8-15-00:  Improve consistency of WS calculations (eca)
 
 
     Purpose
@@ -3973,11 +3957,11 @@ static doublereal c_b2210 = .5;
                     min(M,N) rows of V**H are returned in the arrays U
                     and VT;
             = 'O':  If M >= N, the first N columns of U are overwritten
-                    on the array A and all rows of V**H are returned in
+                    in the array A and all rows of V**H are returned in
                     the array VT;
                     otherwise, all columns of U are returned in the
                     array U and the first M rows of V**H are overwritten
-                    in the array VT;
+                    in the array A;
             = 'N':  no columns of U or rows of V**H are computed.
 
     M       (input) INTEGER
@@ -4028,7 +4012,7 @@ static doublereal c_b2210 = .5;
             JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
             if JOBZ = 'S', LDVT >= min(M,N).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -4039,12 +4023,15 @@ static doublereal c_b2210 = .5;
             if JOBZ = 'S' or 'A',
                   LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
             For good performance, LWORK should generally be larger.
-            If LWORK < 0 but other input arguments are legal, WORK(1)
-            returns the optimal LWORK.
 
-    RWORK   (workspace) DOUBLE PRECISION array, dimension (LRWORK)
-            If JOBZ = 'N', LRWORK >= 7*min(M,N).
-            Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N)
+            If LWORK = -1, a workspace query is assumed.  The optimal
+            size for the WORK array is calculated and stored in WORK(1),
+            and no other work except argument checking is performed.
+
+    RWORK   (workspace) DOUBLE PRECISION array, dimension (MAX(1,LRWORK))
+            If JOBZ = 'N', LRWORK >= 5*min(M,N).
+            Otherwise,
+            LRWORK >= min(M,N)*max(5*min(M,N)+7,2*max(M,N)+2*min(M,N)+1)
 
     IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
 
@@ -4093,7 +4080,6 @@ static doublereal c_b2210 = .5;
     wntqn = lsame_(jobz, "N");
     minwrk = 1;
     maxwrk = 1;
-    lquery = *lwork == -1;
 
     if (! (wntqa || wntqs || wntqo || wntqn)) {
 	*info = -1;
@@ -4126,8 +4112,11 @@ static doublereal c_b2210 = .5;
 
 /*
              There is no complex work space needed for bidiagonal SVD
-             The real work space needed for bidiagonal SVD is BDSPAC,
-             BDSPAC = 3*N*N + 4*N
+             The real work space needed for bidiagonal SVD is BDSPAC
+             for computing singular values and singular vectors; BDSPAN
+             for computing singular values only.
+             BDSPAC = 5*N*N + 7*N
+             BDSPAN = MAX(7*N+4, 3*N+2+SMLSIZ*(SMLSIZ+8))
 */
 
 	    if (*m >= mnthr1) {
@@ -4135,14 +4124,13 @@ static doublereal c_b2210 = .5;
 
 /*                 Path 1 (M much larger than N, JOBZ='N') */
 
-		    wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, &
+		    maxwrk = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, &
 			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
 /* Computing MAX */
-		    i__1 = wrkbl, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
+		    i__1 = maxwrk, i__2 = (*n << 1) + (*n << 1) * ilaenv_(&
 			    c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
 			    6, (ftnlen)1);
-		    wrkbl = max(i__1,i__2);
-		    maxwrk = wrkbl;
+		    maxwrk = max(i__1,i__2);
 		    minwrk = *n * 3;
 		} else if (wntqo) {
 
@@ -4317,8 +4305,11 @@ static doublereal c_b2210 = .5;
 
 /*
              There is no complex work space needed for bidiagonal SVD
-             The real work space needed for bidiagonal SVD is BDSPAC,
-             BDSPAC = 3*M*M + 4*M
+             The real work space needed for bidiagonal SVD is BDSPAC
+             for computing singular values and singular vectors; BDSPAN
+             for computing singular values only.
+             BDSPAC = 5*M*M + 7*M
+             BDSPAN = MAX(7*M+4, 3*M+2+SMLSIZ*(SMLSIZ+8))
 */
 
 	    if (*n >= mnthr1) {
@@ -4505,26 +4496,25 @@ static doublereal c_b2210 = .5;
 	    }
 	}
 	maxwrk = max(maxwrk,minwrk);
+    }
+    if (*info == 0) {
 	work[1].r = (doublereal) maxwrk, work[1].i = 0.;
+	if (*lwork < minwrk && *lwork != -1) {
+	    *info = -13;
+	}
     }
 
-    if (*lwork < minwrk && ! lquery) {
-	*info = -13;
-    }
+/*     Quick returns */
+
     if (*info != 0) {
 	i__1 = -(*info);
 	xerbla_("ZGESDD", &i__1);
 	return 0;
-    } else if (lquery) {
+    }
+    if (*lwork == -1) {
 	return 0;
     }
-
-/*     Quick return if possible */
-
     if (*m == 0 || *n == 0) {
-	if (*lwork >= 1) {
-	    work[1].r = 1., work[1].i = 0.;
-	}
 	return 0;
     }
 
@@ -4582,7 +4572,7 @@ static doublereal c_b2210 = .5;
 
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &a[a_dim1 + 2],
+		zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &a[a_dim1 + 2],
 			lda);
 		ie = 1;
 		itauq = 1;
@@ -4603,7 +4593,7 @@ static doublereal c_b2210 = .5;
 /*
                 Perform bidiagonal SVD, compute singular values only
                 (CWorkspace: 0)
-                (RWorkspace: need BDSPAC)
+                (RWorkspace: need BDSPAN)
 */
 
 		dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -4649,7 +4639,7 @@ static doublereal c_b2210 = .5;
 		zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
 		i__1 = *n - 1;
 		i__2 = *n - 1;
-		zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &work[ir + 1], &
+		zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &work[ir + 1], &
 			ldwrkr);
 
 /*
@@ -4731,8 +4721,8 @@ static doublereal c_b2210 = .5;
 /* Computing MIN */
 		    i__3 = *m - i__ + 1;
 		    chunk = min(i__3,ldwrkr);
-		    zgemm_("N", "N", &chunk, n, n, &c_b60, &a[i__ + a_dim1],
-			    lda, &work[iu], &ldwrku, &c_b59, &work[ir], &
+		    zgemm_("N", "N", &chunk, n, n, &c_b57, &a[i__ + a_dim1],
+			    lda, &work[iu], &ldwrku, &c_b56, &work[ir], &
 			    ldwrkr);
 		    zlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
 			    a_dim1], lda);
@@ -4770,7 +4760,7 @@ static doublereal c_b2210 = .5;
 		zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
 		i__2 = *n - 1;
 		i__1 = *n - 1;
-		zlaset_("L", &i__2, &i__1, &c_b59, &c_b59, &work[ir + 1], &
+		zlaset_("L", &i__2, &i__1, &c_b56, &c_b56, &work[ir + 1], &
 			ldwrkr);
 
 /*
@@ -4845,8 +4835,8 @@ static doublereal c_b2210 = .5;
 */
 
 		zlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
-		zgemm_("N", "N", m, n, n, &c_b60, &a[a_offset], lda, &work[ir]
-			, &ldwrkr, &c_b59, &u[u_offset], ldu);
+		zgemm_("N", "N", m, n, n, &c_b57, &a[a_offset], lda, &work[ir]
+			, &ldwrkr, &c_b56, &u[u_offset], ldu);
 
 	    } else if (wntqa) {
 
@@ -4889,7 +4879,7 @@ static doublereal c_b2210 = .5;
 
 		i__2 = *n - 1;
 		i__1 = *n - 1;
-		zlaset_("L", &i__2, &i__1, &c_b59, &c_b59, &a[a_dim1 + 2],
+		zlaset_("L", &i__2, &i__1, &c_b56, &c_b56, &a[a_dim1 + 2],
 			lda);
 		ie = 1;
 		itauq = itau;
@@ -4954,8 +4944,8 @@ static doublereal c_b2210 = .5;
                 (RWorkspace: 0)
 */
 
-		zgemm_("N", "N", m, n, n, &c_b60, &u[u_offset], ldu, &work[iu]
-			, &ldwrku, &c_b59, &a[a_offset], lda);
+		zgemm_("N", "N", m, n, n, &c_b57, &u[u_offset], ldu, &work[iu]
+			, &ldwrku, &c_b56, &a[a_offset], lda);
 
 /*              Copy left singular vectors of A from A to U */
 
@@ -4993,7 +4983,7 @@ static doublereal c_b2210 = .5;
 /*
                 Compute singular values only
                 (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
+                (Rworkspace: need BDSPAN)
 */
 
 		dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -5237,7 +5227,7 @@ static doublereal c_b2210 = .5;
 /*
                 Compute singular values only
                 (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
+                (Rworkspace: need BDSPAN)
 */
 
 		dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -5295,7 +5285,7 @@ static doublereal c_b2210 = .5;
                 (Rworkspace: need 0)
 */
 
-		    zlaset_("F", m, n, &c_b59, &c_b59, &work[iu], &ldwrku);
+		    zlaset_("F", m, n, &c_b56, &c_b56, &work[iu], &ldwrku);
 		    zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
 		    i__1 = *lwork - nwork + 1;
 		    zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
@@ -5361,7 +5351,7 @@ static doublereal c_b2210 = .5;
                 (RWorkspace: 0)
 */
 
-		zlaset_("F", m, n, &c_b59, &c_b59, &u[u_offset], ldu);
+		zlaset_("F", m, n, &c_b56, &c_b56, &u[u_offset], ldu);
 		zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
 		i__2 = *lwork - nwork + 1;
 		zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
@@ -5398,11 +5388,13 @@ static doublereal c_b2210 = .5;
 
 /*              Set the right corner of U to identity matrix */
 
-		zlaset_("F", m, m, &c_b59, &c_b59, &u[u_offset], ldu);
-		i__2 = *m - *n;
-		i__1 = *m - *n;
-		zlaset_("F", &i__2, &i__1, &c_b59, &c_b60, &u[*n + 1 + (*n +
-			1) * u_dim1], ldu);
+		zlaset_("F", m, m, &c_b56, &c_b56, &u[u_offset], ldu);
+		if (*m > *n) {
+		    i__2 = *m - *n;
+		    i__1 = *m - *n;
+		    zlaset_("F", &i__2, &i__1, &c_b56, &c_b57, &u[*n + 1 + (*
+			    n + 1) * u_dim1], ldu);
+		}
 
 /*
                 Copy real matrix RWORK(IRU) to complex matrix U
@@ -5436,8 +5428,8 @@ static doublereal c_b2210 = .5;
 
 /*
           A has more columns than rows. If A has sufficiently more
-          columns than rows, first reduce using the LQ decomposition
-          (if sufficient workspace available)
+          columns than rows, first reduce using the LQ decomposition (if
+          sufficient workspace available)
 */
 
 	if (*n >= mnthr1) {
@@ -5466,7 +5458,7 @@ static doublereal c_b2210 = .5;
 
 		i__2 = *m - 1;
 		i__1 = *m - 1;
-		zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &a[(a_dim1 << 1) +
+		zlaset_("U", &i__2, &i__1, &c_b56, &c_b56, &a[(a_dim1 << 1) +
 			1], lda);
 		ie = 1;
 		itauq = 1;
@@ -5487,7 +5479,7 @@ static doublereal c_b2210 = .5;
 /*
                 Perform bidiagonal SVD, compute singular values only
                 (CWorkspace: 0)
-                (RWorkspace: need BDSPAC)
+                (RWorkspace: need BDSPAN)
 */
 
 		dbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -5538,7 +5530,7 @@ static doublereal c_b2210 = .5;
 		zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
 		i__2 = *m - 1;
 		i__1 = *m - 1;
-		zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &work[il + ldwrkl],
+		zlaset_("U", &i__2, &i__1, &c_b56, &c_b56, &work[il + ldwrkl],
 			 &ldwrkl);
 
 /*
@@ -5619,8 +5611,8 @@ static doublereal c_b2210 = .5;
 /* Computing MIN */
 		    i__3 = *n - i__ + 1;
 		    blk = min(i__3,chunk);
-		    zgemm_("N", "N", m, &blk, m, &c_b60, &work[ivt], m, &a[
-			    i__ * a_dim1 + 1], lda, &c_b59, &work[il], &
+		    zgemm_("N", "N", m, &blk, m, &c_b57, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b56, &work[il], &
 			    ldwrkl);
 		    zlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
 			    + 1], lda);
@@ -5658,7 +5650,7 @@ static doublereal c_b2210 = .5;
 		zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &work[il + ldwrkl],
+		zlaset_("U", &i__1, &i__2, &c_b56, &c_b56, &work[il + ldwrkl],
 			 &ldwrkl);
 
 /*
@@ -5733,8 +5725,8 @@ static doublereal c_b2210 = .5;
 */
 
 		zlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
-		zgemm_("N", "N", m, n, m, &c_b60, &work[il], &ldwrkl, &a[
-			a_offset], lda, &c_b59, &vt[vt_offset], ldvt);
+		zgemm_("N", "N", m, n, m, &c_b57, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b56, &vt[vt_offset], ldvt);
 
 	    } else if (wntqa) {
 
@@ -5777,7 +5769,7 @@ static doublereal c_b2210 = .5;
 
 		i__1 = *m - 1;
 		i__2 = *m - 1;
-		zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &a[(a_dim1 << 1) +
+		zlaset_("U", &i__1, &i__2, &c_b56, &c_b56, &a[(a_dim1 << 1) +
 			1], lda);
 		ie = 1;
 		itauq = itau;
@@ -5841,8 +5833,8 @@ static doublereal c_b2210 = .5;
                 (RWorkspace: 0)
 */
 
-		zgemm_("N", "N", m, n, m, &c_b60, &work[ivt], &ldwkvt, &vt[
-			vt_offset], ldvt, &c_b59, &a[a_offset], lda);
+		zgemm_("N", "N", m, n, m, &c_b57, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b56, &a[a_offset], lda);
 
 /*              Copy right singular vectors of A from A to VT */
 
@@ -5882,7 +5874,7 @@ static doublereal c_b2210 = .5;
 /*
                 Compute singular values only
                 (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
+                (Rworkspace: need BDSPAN)
 */
 
 		dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -6126,7 +6118,7 @@ static doublereal c_b2210 = .5;
 /*
                 Compute singular values only
                 (Cworkspace: 0)
-                (Rworkspace: need BDSPAC)
+                (Rworkspace: need BDSPAN)
 */
 
 		dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
@@ -6138,7 +6130,7 @@ static doublereal c_b2210 = .5;
 
 /*                 WORK( IVT ) is M by N */
 
-		    zlaset_("F", m, n, &c_b59, &c_b59, &work[ivt], &ldwkvt);
+		    zlaset_("F", m, n, &c_b56, &c_b56, &work[ivt], &ldwkvt);
 		    nwork = ivt + ldwkvt * *n;
 		} else {
 
@@ -6261,7 +6253,7 @@ static doublereal c_b2210 = .5;
                 (RWorkspace: M*M)
 */
 
-		zlaset_("F", m, n, &c_b59, &c_b59, &vt[vt_offset], ldvt);
+		zlaset_("F", m, n, &c_b56, &c_b56, &vt[vt_offset], ldvt);
 		zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
 		i__1 = *lwork - nwork + 1;
 		zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
@@ -6297,12 +6289,9 @@ static doublereal c_b2210 = .5;
 		zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
 			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
 
-/*              Set the right corner of VT to identity matrix */
+/*              Set all of VT to identity matrix */
 
-		i__1 = *n - *m;
-		i__2 = *n - *m;
-		zlaset_("F", &i__1, &i__2, &c_b59, &c_b60, &vt[*m + 1 + (*m +
-			1) * vt_dim1], ldvt);
+		zlaset_("F", n, n, &c_b56, &c_b57, &vt[vt_offset], ldvt);
 
 /*
                 Copy real matrix RWORK(IRVT) to complex matrix VT
@@ -6311,7 +6300,6 @@ static doublereal c_b2210 = .5;
                 (RWorkspace: M*M)
 */
 
-		zlaset_("F", n, n, &c_b59, &c_b59, &vt[vt_offset], ldvt);
 		zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
 		i__1 = *lwork - nwork + 1;
 		zunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[
@@ -6330,10 +6318,20 @@ static doublereal c_b2210 = .5;
 	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
 		    minmn, &ierr);
 	}
+	if (*info != 0 && anrm > bignum) {
+	    i__1 = minmn - 1;
+	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &i__1, &c__1, &rwork[
+		    ie], &minmn, &ierr);
+	}
 	if (anrm < smlnum) {
 	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
 		    minmn, &ierr);
 	}
+	if (*info != 0 && anrm < smlnum) {
+	    i__1 = minmn - 1;
+	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &i__1, &c__1, &rwork[
+		    ie], &minmn, &ierr);
+	}
     }
 
 /*     Return optimal workspace in WORK(1) */
@@ -6361,10 +6359,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6474,24 +6472,27 @@ static doublereal c_b2210 = .5;
     doublecomplex z__1;
 
     /* Builtin functions */
+    double z_abs(doublecomplex *);
     void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
 
     /* Local variables */
-    static integer j, jp;
+    static integer i__, j, jp;
+    static doublereal sfmin;
     extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
 	    doublecomplex *, integer *), zgeru_(integer *, integer *,
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
 	    integer *, doublecomplex *, integer *), zswap_(integer *,
-	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(
-	    char *, integer *);
+	    doublecomplex *, integer *, doublecomplex *, integer *);
+
+    extern /* Subroutine */ int xerbla_(char *, integer *);
     extern integer izamax_(integer *, doublecomplex *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6570,6 +6571,10 @@ static doublereal c_b2210 = .5;
 	return 0;
     }
 
+/*     Compute machine safe minimum */
+
+    sfmin = SAFEMINIMUM;
+
     i__1 = min(*m,*n);
     for (j = 1; j <= i__1; ++j) {
 
@@ -6590,9 +6595,20 @@ static doublereal c_b2210 = .5;
 /*           Compute elements J+1:M of J-th column. */
 
 	    if (j < *m) {
-		i__2 = *m - j;
-		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
-		zscal_(&i__2, &z__1, &a[j + 1 + j * a_dim1], &c__1);
+		if (z_abs(&a[j + j * a_dim1]) >= sfmin) {
+		    i__2 = *m - j;
+		    z_div(&z__1, &c_b57, &a[j + j * a_dim1]);
+		    zscal_(&i__2, &z__1, &a[j + 1 + j * a_dim1], &c__1);
+		} else {
+		    i__2 = *m - j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = j + i__ + j * a_dim1;
+			z_div(&z__1, &a[j + i__ + j * a_dim1], &a[j + j *
+				a_dim1]);
+			a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L20: */
+		    }
+		}
 	    }
 
 	} else if (*info == 0) {
@@ -6643,10 +6659,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6785,7 +6801,7 @@ static doublereal c_b2210 = .5;
 
 		i__3 = *n - j - jb + 1;
 		ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
-			c_b60, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+			c_b57, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
 			a_dim1], lda);
 		if (j + jb <= *m) {
 
@@ -6796,7 +6812,7 @@ static doublereal c_b2210 = .5;
 		    z__1.r = -1., z__1.i = -0.;
 		    zgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
 			    &z__1, &a[j + jb + j * a_dim1], lda, &a[j + (j +
-			    jb) * a_dim1], lda, &c_b60, &a[j + jb + (j + jb) *
+			    jb) * a_dim1], lda, &c_b57, &a[j + jb + (j + jb) *
 			     a_dim1], lda);
 		}
 	    }
@@ -6828,10 +6844,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -6934,12 +6950,12 @@ static doublereal c_b2210 = .5;
 
 /*        Solve L*X = B, overwriting B with X. */
 
-	ztrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b60, &a[
+	ztrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b57, &a[
 		a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve U*X = B, overwriting B with X. */
 
-	ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, &
+	ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b57, &
 		a[a_offset], lda, &b[b_offset], ldb);
     } else {
 
@@ -6949,12 +6965,12 @@ static doublereal c_b2210 = .5;
           Solve U'*X = B, overwriting B with X.
 */
 
-	ztrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b60, &a[
+	ztrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b57, &a[
 		a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve L'*X = B, overwriting B with X. */
 
-	ztrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b60, &a[a_offset],
+	ztrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b57, &a[a_offset],
 		lda, &b[b_offset], ldb);
 
 /*        Apply row interchanges to the solution vectors. */
@@ -6974,8 +6990,8 @@ static doublereal c_b2210 = .5;
 	integer *liwork, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    doublereal d__1, d__2;
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1;
 
     /* Builtin functions */
     double sqrt(doublereal);
@@ -6999,6 +7015,8 @@ static doublereal c_b2210 = .5;
 
     static integer iscale;
     static doublereal safmin;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
     extern /* Subroutine */ int xerbla_(char *, integer *);
     static doublereal bignum;
     extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *,
@@ -7026,10 +7044,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK driver routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK driver routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7078,7 +7096,7 @@ static doublereal c_b2210 = .5;
     W       (output) DOUBLE PRECISION array, dimension (N)
             If INFO = 0, the eigenvalues in ascending order.
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -7088,9 +7106,10 @@ static doublereal c_b2210 = .5;
             If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2.
 
             If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+            only calculates the optimal sizes of the WORK, RWORK and
+            IWORK arrays, returns these values as the first entries of
+            the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
     RWORK   (workspace/output) DOUBLE PRECISION array,
                                            dimension (LRWORK)
@@ -7104,11 +7123,12 @@ static doublereal c_b2210 = .5;
                            1 + 5*N + 2*N**2.
 
             If LRWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the RWORK array,
-            returns this value as the first entry of the RWORK array, and
-            no error message related to LRWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK, RWORK
+            and IWORK arrays, returns these values as the first entries
+            of the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+    IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
             On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
 
     LIWORK  (input) INTEGER
@@ -7118,16 +7138,21 @@ static doublereal c_b2210 = .5;
             If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
 
             If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK, RWORK
+            and IWORK arrays, returns these values as the first entries
+            of the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
     INFO    (output) INTEGER
             = 0:  successful exit
             < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, the algorithm failed to converge; i
-                  off-diagonal elements of an intermediate tridiagonal
-                  form did not converge to zero.
+            > 0:  if INFO = i and JOBZ = 'N', then the algorithm failed
+                  to converge; i off-diagonal elements of an intermediate
+                  tridiagonal form did not converge to zero;
+                  if INFO = i and JOBZ = 'V', then the algorithm failed
+                  to compute an eigenvalue while working on the submatrix
+                  lying in rows and columns INFO/(N+1) through
+                  mod(INFO,N+1).
 
     Further Details
     ===============
@@ -7136,6 +7161,7 @@ static doublereal c_b2210 = .5;
        Jeff Rutter, Computer Science Division, University of California
        at Berkeley, USA
 
+    Modified description of INFO. Sven, 16 Feb 05.
     =====================================================================
 
 
@@ -7157,29 +7183,6 @@ static doublereal c_b2210 = .5;
     lquery = *lwork == -1 || *lrwork == -1 || *liwork == -1;
 
     *info = 0;
-    if (*n <= 1) {
-	lwmin = 1;
-	lrwmin = 1;
-	liwmin = 1;
-	lopt = lwmin;
-	lropt = lrwmin;
-	liopt = liwmin;
-    } else {
-	if (wantz) {
-	    lwmin = (*n << 1) + *n * *n;
-/* Computing 2nd power */
-	    i__1 = *n;
-	    lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
-	    liwmin = *n * 5 + 3;
-	} else {
-	    lwmin = *n + 1;
-	    lrwmin = *n;
-	    liwmin = 1;
-	}
-	lopt = lwmin;
-	lropt = lrwmin;
-	liopt = liwmin;
-    }
     if (! (wantz || lsame_(jobz, "N"))) {
 	*info = -1;
     } else if (! (lower || lsame_(uplo, "U"))) {
@@ -7188,18 +7191,46 @@ static doublereal c_b2210 = .5;
 	*info = -3;
     } else if (*lda < max(1,*n)) {
 	*info = -5;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*lrwork < lrwmin && ! lquery) {
-	*info = -10;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -12;
     }
 
     if (*info == 0) {
+	if (*n <= 1) {
+	    lwmin = 1;
+	    lrwmin = 1;
+	    liwmin = 1;
+	    lopt = lwmin;
+	    lropt = lrwmin;
+	    liopt = liwmin;
+	} else {
+	    if (wantz) {
+		lwmin = (*n << 1) + *n * *n;
+/* Computing 2nd power */
+		i__1 = *n;
+		lrwmin = *n * 5 + 1 + (i__1 * i__1 << 1);
+		liwmin = *n * 5 + 3;
+	    } else {
+		lwmin = *n + 1;
+		lrwmin = *n;
+		liwmin = 1;
+	    }
+/* Computing MAX */
+	    i__1 = lwmin, i__2 = *n + ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1,
+		     &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    lopt = max(i__1,i__2);
+	    lropt = lrwmin;
+	    liopt = liwmin;
+	}
 	work[1].r = (doublereal) lopt, work[1].i = 0.;
 	rwork[1] = (doublereal) lropt;
 	iwork[1] = liopt;
+
+	if (*lwork < lwmin && ! lquery) {
+	    *info = -8;
+	} else if (*lrwork < lrwmin && ! lquery) {
+	    *info = -10;
+	} else if (*liwork < liwmin && ! lquery) {
+	    *info = -12;
+	}
     }
 
     if (*info != 0) {
@@ -7247,7 +7278,7 @@ static doublereal c_b2210 = .5;
 	sigma = rmax / anrm;
     }
     if (iscale == 1) {
-	zlascl_(uplo, &c__0, &c__0, &c_b1015, &sigma, n, n, &a[a_offset], lda,
+	zlascl_(uplo, &c__0, &c__0, &c_b1034, &sigma, n, n, &a[a_offset], lda,
 		 info);
     }
 
@@ -7263,10 +7294,6 @@ static doublereal c_b2210 = .5;
     llrwk = *lrwork - indrwk + 1;
     zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
 	    work[indwrk], &llwork, &iinfo);
-/* Computing MAX */
-    i__1 = indwrk;
-    d__1 = (doublereal) lopt, d__2 = (doublereal) (*n) + work[i__1].r;
-    lopt = (integer) max(d__1,d__2);
 
 /*
        For eigenvalues only, call DSTERF.  For eigenvectors, first call
@@ -7284,14 +7311,6 @@ static doublereal c_b2210 = .5;
 	zunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
 		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
 	zlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
-/*
-   Computing MAX
-   Computing 2nd power
-*/
-	i__3 = *n;
-	i__4 = indwk2;
-	i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r;
-	lopt = max(i__1,i__2);
     }
 
 /*     If matrix was scaled, then rescale eigenvalues appropriately. */
@@ -7346,10 +7365,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7519,7 +7538,7 @@ static doublereal c_b2210 = .5;
 /*              Compute  x := tau * A * v  storing x in TAU(1:i) */
 
 		zhemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
-			a_dim1 + 1], &c__1, &c_b59, &tau[1], &c__1)
+			a_dim1 + 1], &c__1, &c_b56, &tau[1], &c__1)
 			;
 
 /*              Compute  w := x - 1/2 * tau * (x'*v) * v */
@@ -7599,7 +7618,7 @@ static doublereal c_b2210 = .5;
 
 		i__2 = *n - i__;
 		zhemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
-			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b59, &tau[
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b56, &tau[
 			i__], &c__1);
 
 /*              Compute  w := x - 1/2 * tau * (x'*v) * v */
@@ -7682,10 +7701,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -7739,7 +7758,7 @@ static doublereal c_b2210 = .5;
             The scalar factors of the elementary reflectors (see Further
             Details).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -7927,7 +7946,7 @@ static doublereal c_b2210 = .5;
 	    i__3 = i__ - 1;
 	    z__1.r = -1., z__1.i = -0.;
 	    zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ * a_dim1
-		    + 1], lda, &work[1], &ldwork, &c_b1015, &a[a_offset], lda);
+		    + 1], lda, &work[1], &ldwork, &c_b1034, &a[a_offset], lda);
 
 /*
              Copy superdiagonal elements back into A, and diagonal
@@ -7976,7 +7995,7 @@ static doublereal c_b2210 = .5;
 	    i__3 = *n - i__ - nb + 1;
 	    z__1.r = -1., z__1.i = -0.;
 	    zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ + nb +
-		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1015, &a[
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1034, &a[
 		    i__ + nb + (i__ + nb) * a_dim1], lda);
 
 /*
@@ -8018,165 +8037,261 @@ static doublereal c_b2210 = .5;
 {
     /* System generated locals */
     address a__1[2];
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2],
-	    i__5, i__6;
-    doublereal d__1, d__2, d__3, d__4;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2];
+    doublereal d__1, d__2, d__3;
     doublecomplex z__1;
     char ch__1[2];
 
     /* Builtin functions */
-    double d_imag(doublecomplex *);
-    void d_cnjg(doublecomplex *, doublecomplex *);
     /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
 
     /* Local variables */
-    static integer i__, j, k, l;
-    static doublecomplex s[225]	/* was [15][15] */, v[16];
-    static integer i1, i2, ii, nh, nr, ns, nv;
-    static doublecomplex vv[16];
-    static integer itn;
-    static doublecomplex tau;
-    static integer its;
-    static doublereal ulp, tst1;
-    static integer maxb, ierr;
-    static doublereal unfl;
-    static doublecomplex temp;
-    static doublereal ovfl;
+    static doublecomplex hl[2401]	/* was [49][49] */;
+    static integer kbot, nmin;
     extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
-	    doublecomplex *, integer *);
-    static integer itemp;
-    static doublereal rtemp;
-    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
-	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
-	    integer *, doublecomplex *, doublecomplex *, integer *);
-    static logical initz, wantt, wantz;
-    static doublereal rwork[1];
+    static logical initz;
+    static doublecomplex workl[49];
+    static logical wantt, wantz;
     extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
-	    doublecomplex *, integer *);
-    extern doublereal dlapy2_(doublereal *, doublereal *);
-    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
-
-    extern /* Subroutine */ int xerbla_(char *, integer *);
+	    doublecomplex *, integer *), zlaqr0_(logical *, logical *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
+	     doublecomplex *, integer *, integer *), xerbla_(char *, integer *
+	    );
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
-    extern /* Subroutine */ int zdscal_(integer *, doublereal *,
-	    doublecomplex *, integer *), zlarfg_(integer *, doublecomplex *,
-	    doublecomplex *, integer *, doublecomplex *);
-    extern integer izamax_(integer *, doublecomplex *, integer *);
-    extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *,
-	    doublereal *);
     extern /* Subroutine */ int zlahqr_(logical *, logical *, integer *,
 	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
 	     integer *, integer *, doublecomplex *, integer *, integer *),
 	    zlacpy_(char *, integer *, integer *, doublecomplex *, integer *,
 	    doublecomplex *, integer *), zlaset_(char *, integer *,
 	    integer *, doublecomplex *, doublecomplex *, doublecomplex *,
-	    integer *), zlarfx_(char *, integer *, integer *,
-	    doublecomplex *, doublecomplex *, doublecomplex *, integer *,
-	    doublecomplex *);
-    static doublereal smlnum;
+	    integer *);
     static logical lquery;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
-
-
-    Purpose
-    =======
-
-    ZHSEQR computes the eigenvalues of a complex upper Hessenberg
-    matrix H, and, optionally, the matrices T and Z from the Schur
-    decomposition H = Z T Z**H, where T is an upper triangular matrix
-    (the Schur form), and Z is the unitary matrix of Schur vectors.
-
-    Optionally Z may be postmultiplied into an input unitary matrix Q,
-    so that this routine can give the Schur factorization of a matrix A
-    which has been reduced to the Hessenberg form H by the unitary
-    matrix Q:  A = Q*H*Q**H = (QZ)*T*(QZ)**H.
-
-    Arguments
-    =========
-
-    JOB     (input) CHARACTER*1
-            = 'E': compute eigenvalues only;
-            = 'S': compute eigenvalues and the Schur form T.
-
-    COMPZ   (input) CHARACTER*1
-            = 'N': no Schur vectors are computed;
-            = 'I': Z is initialized to the unit matrix and the matrix Z
-                   of Schur vectors of H is returned;
-            = 'V': Z must contain an unitary matrix Q on entry, and
-                   the product Q*Z is returned.
+    -- LAPACK computational routine (version 3.2.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       June 2010
+
+       Purpose
+       =======
+
+       ZHSEQR computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**H, where T is an upper triangular matrix (the
+       Schur form), and Z is the unitary matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input unitary
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
+
+       Arguments
+       =========
+
+       JOB   (input) CHARACTER*1
+             = 'E':  compute eigenvalues only;
+             = 'S':  compute eigenvalues and the Schur form T.
+
+       COMPZ (input) CHARACTER*1
+             = 'N':  no Schur vectors are computed;
+             = 'I':  Z is initialized to the unit matrix and the matrix Z
+                     of Schur vectors of H is returned;
+             = 'V':  Z must contain an unitary matrix Q on entry, and
+                     the product Q*Z is returned.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+             set by a previous call to ZGEBAL, and then passed to ZGEHRD
+             when the matrix output by ZGEBAL is reduced to Hessenberg
+             form. Otherwise ILO and IHI should be set to 1 and N
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) COMPLEX*16 array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and JOB = 'S', H contains the upper
+             triangular matrix T from the Schur decomposition (the
+             Schur form). If INFO = 0 and JOB = 'E', the contents of
+             H are unspecified on exit.  (The output value of H when
+             INFO.GT.0 is given under the description of INFO below.)
+
+             Unlike earlier versions of ZHSEQR, this subroutine may
+             explicitly H(i,j) = 0 for i.GT.j and j = 1, 2, ... ILO-1
+             or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       W        (output) COMPLEX*16 array, dimension (N)
+             The computed eigenvalues. If JOB = 'S', the eigenvalues are
+             stored in the same order as on the diagonal of the Schur
+             form returned in H, with W(i) = H(i,i).
+
+       Z     (input/output) COMPLEX*16 array, dimension (LDZ,N)
+             If COMPZ = 'N', Z is not referenced.
+             If COMPZ = 'I', on entry Z need not be set and on exit,
+             if INFO = 0, Z contains the unitary matrix Z of the Schur
+             vectors of H.  If COMPZ = 'V', on entry Z must contain an
+             N-by-N matrix Q, which is assumed to be equal to the unit
+             matrix except for the submatrix Z(ILO:IHI,ILO:IHI). On exit,
+             if INFO = 0, Z contains Q*Z.
+             Normally Q is the unitary matrix generated by ZUNGHR
+             after the call to ZGEHRD which formed the Hessenberg matrix
+             H. (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if COMPZ = 'I' or
+             COMPZ = 'V', then LDZ.GE.MAX(1,N).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) COMPLEX*16 array, dimension (LWORK)
+             On exit, if INFO = 0, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient and delivers very good and sometimes
+             optimal performance.  However, LWORK as large as 11*N
+             may be required for optimal performance.  A workspace
+             query is recommended to determine the optimal workspace
+             size.
+
+             If LWORK = -1, then ZHSEQR does a workspace query.
+             In this case, ZHSEQR checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .LT. 0:  if INFO = -i, the i-th argument had an illegal
+                      value
+             .GT. 0:  if INFO = i, ZHSEQR failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and JOB = 'E', then on exit, the
+                  remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and JOB   = 'S', then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is a unitary matrix.  The final
+                  value of  H is upper Hessenberg and triangular in
+                  rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and COMPZ = 'V', then on exit
+
+                    (final value of Z)  =  (initial value of Z)*U
+
+                  where U is the unitary matrix in (*) (regard-
+                  less of the value of JOB.)
+
+                  If INFO .GT. 0 and COMPZ = 'I', then on exit
+                        (final value of Z)  = U
+                  where U is the unitary matrix in (*) (regard-
+                  less of the value of JOB.)
+
+                  If INFO .GT. 0 and COMPZ = 'N', then Z is not
+                  accessed.
+
+       ================================================================
+               Default values supplied by
+               ILAENV(ISPEC,'ZHSEQR',JOB(:1)//COMPZ(:1),N,ILO,IHI,LWORK).
+               It is suggested that these defaults be adjusted in order
+               to attain best performance in each particular
+               computational environment.
+
+              ISPEC=12: The ZLAHQR vs ZLAQR0 crossover point.
+                        Default: 75. (Must be at least 11.)
+
+              ISPEC=13: Recommended deflation window size.
+                        This depends on ILO, IHI and NS.  NS is the
+                        number of simultaneous shifts returned
+                        by ILAENV(ISPEC=15).  (See ISPEC=15 below.)
+                        The default for (IHI-ILO+1).LE.500 is NS.
+                        The default for (IHI-ILO+1).GT.500 is 3*NS/2.
+
+              ISPEC=14: Nibble crossover point. (See IPARMQ for
+                        details.)  Default: 14% of deflation window
+                        size.
+
+              ISPEC=15: Number of simultaneous shifts in a multishift
+                        QR iteration.
+
+                        If IHI-ILO+1 is ...
+
+                        greater than      ...but less    ... the
+                        or equal to ...      than        default is
+
+                             1               30          NS =   2(+)
+                            30               60          NS =   4(+)
+                            60              150          NS =  10(+)
+                           150              590          NS =  **
+                           590             3000          NS =  64
+                          3000             6000          NS = 128
+                          6000             infinity      NS = 256
+
+                    (+)  By default some or all matrices of this order
+                         are passed to the implicit double shift routine
+                         ZLAHQR and this parameter is ignored.  See
+                         ISPEC=12 above and comments in IPARMQ for
+                         details.
+
+                   (**)  The asterisks (**) indicate an ad-hoc
+                         function of N increasing from 10 to 64.
+
+              ISPEC=16: Select structured matrix multiply.
+                        If the number of simultaneous shifts (specified
+                        by ISPEC=15) is less than 14, then the default
+                        for ISPEC=16 is 0.  Otherwise the default for
+                        ISPEC=16 is 2.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
 
-    N       (input) INTEGER
-            The order of the matrix H.  N >= 0.
-
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
-            It is assumed that H is already upper triangular in rows
-            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
-            set by a previous call to ZGEBAL, and then passed to CGEHRD
-            when the matrix output by ZGEBAL is reduced to Hessenberg
-            form. Otherwise ILO and IHI should be set to 1 and N
-            respectively.
-            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
-
-    H       (input/output) COMPLEX*16 array, dimension (LDH,N)
-            On entry, the upper Hessenberg matrix H.
-            On exit, if JOB = 'S', H contains the upper triangular matrix
-            T from the Schur decomposition (the Schur form). If
-            JOB = 'E', the contents of H are unspecified on exit.
-
-    LDH     (input) INTEGER
-            The leading dimension of the array H. LDH >= max(1,N).
-
-    W       (output) COMPLEX*16 array, dimension (N)
-            The computed eigenvalues. If JOB = 'S', the eigenvalues are
-            stored in the same order as on the diagonal of the Schur form
-            returned in H, with W(i) = H(i,i).
-
-    Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
-            If COMPZ = 'N': Z is not referenced.
-            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
-            contains the unitary matrix Z of the Schur vectors of H.
-            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
-            which is assumed to be equal to the unit matrix except for
-            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
-            Normally Q is the unitary matrix generated by ZUNGHR after
-            the call to ZGEHRD which formed the Hessenberg matrix H.
-
-    LDZ     (input) INTEGER
-            The leading dimension of the array Z.
-            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
-
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
-            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
-
-    LWORK   (input) INTEGER
-            The dimension of the array WORK.  LWORK >= max(1,N).
-
-            If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
-
-    INFO    (output) INTEGER
-            = 0:  successful exit
-            < 0:  if INFO = -i, the i-th argument had an illegal value
-            > 0:  if INFO = i, ZHSEQR failed to compute all the
-                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
-                  elements 1:ilo-1 and i+1:n of W contain those
-                  eigenvalues which have been successfully computed.
+       ================================================================
 
-    =====================================================================
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    ZLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
 
+       ==== NL allocates some local workspace to help small matrices
+       .    through a rare ZLAHQR failure.  NL .GT. NTINY = 11 is
+       .    required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom-
+       .    mended.  (The default value of NMIN is 75.)  Using NL = 49
+       .    allows up to six simultaneous shifts and a 16-by-16
+       .    deflation window.  ====
 
-       Decode and test the input parameters
+       ==== Decode and check the input parameters. ====
 */
 
     /* Parameter adjustments */
@@ -8193,11 +8308,12 @@ static doublereal c_b2210 = .5;
     wantt = lsame_(job, "S");
     initz = lsame_(compz, "I");
     wantz = initz || lsame_(compz, "V");
+    d__1 = (doublereal) max(1,*n);
+    z__1.r = d__1, z__1.i = 0.;
+    work[1].r = z__1.r, work[1].i = z__1.i;
+    lquery = *lwork == -1;
 
     *info = 0;
-    i__1 = max(1,*n);
-    work[1].r = (doublereal) i__1, work[1].i = 0.;
-    lquery = *lwork == -1;
     if (! lsame_(job, "E") && ! wantt) {
 	*info = -1;
     } else if (! lsame_(compz, "N") && ! wantz) {
@@ -8215,451 +8331,162 @@ static doublereal c_b2210 = .5;
     } else if (*lwork < max(1,*n) && ! lquery) {
 	*info = -12;
     }
+
     if (*info != 0) {
+
+/*        ==== Quick return in case of invalid argument. ==== */
+
 	i__1 = -(*info);
 	xerbla_("ZHSEQR", &i__1);
 	return 0;
-    } else if (lquery) {
-	return 0;
-    }
 
-/*     Initialize Z, if necessary */
+    } else if (*n == 0) {
 
-    if (initz) {
-	zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz);
-    }
-
-/*     Store the eigenvalues isolated by ZGEBAL. */
+/*        ==== Quick return in case N = 0; nothing to do. ==== */
 
-    i__1 = *ilo - 1;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	i__3 = i__ + i__ * h_dim1;
-	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
-/* L10: */
-    }
-    i__1 = *n;
-    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
-	i__2 = i__;
-	i__3 = i__ + i__ * h_dim1;
-	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
-/* L20: */
-    }
+	return 0;
 
-/*     Quick return if possible. */
+    } else if (lquery) {
 
-    if (*n == 0) {
-	return 0;
-    }
-    if (*ilo == *ihi) {
-	i__1 = *ilo;
-	i__2 = *ilo + *ilo * h_dim1;
-	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
-	return 0;
-    }
+/*        ==== Quick return in case of a workspace query ==== */
 
+	zlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo,
+		ihi, &z__[z_offset], ldz, &work[1], lwork, info);
 /*
-       Set rows and columns ILO to IHI to zero below the first
-       subdiagonal.
+          ==== Ensure reported workspace size is backward-compatible with
+          .    previous LAPACK versions. ====
+   Computing MAX
 */
+	d__2 = work[1].r, d__3 = (doublereal) max(1,*n);
+	d__1 = max(d__2,d__3);
+	z__1.r = d__1, z__1.i = 0.;
+	work[1].r = z__1.r, work[1].i = z__1.i;
+	return 0;
 
-    i__1 = *ihi - 2;
-    for (j = *ilo; j <= i__1; ++j) {
-	i__2 = *n;
-	for (i__ = j + 2; i__ <= i__2; ++i__) {
-	    i__3 = i__ + j * h_dim1;
-	    h__[i__3].r = 0., h__[i__3].i = 0.;
-/* L30: */
+    } else {
+
+/*        ==== copy eigenvalues isolated by ZGEBAL ==== */
+
+	if (*ilo > 1) {
+	    i__1 = *ilo - 1;
+	    i__2 = *ldh + 1;
+	    zcopy_(&i__1, &h__[h_offset], &i__2, &w[1], &c__1);
+	}
+	if (*ihi < *n) {
+	    i__1 = *n - *ihi;
+	    i__2 = *ldh + 1;
+	    zcopy_(&i__1, &h__[*ihi + 1 + (*ihi + 1) * h_dim1], &i__2, &w[*
+		    ihi + 1], &c__1);
 	}
-/* L40: */
-    }
-    nh = *ihi - *ilo + 1;
 
-/*
-       I1 and I2 are the indices of the first row and last column of H
-       to which transformations must be applied. If eigenvalues only are
-       being computed, I1 and I2 are re-set inside the main loop.
-*/
+/*        ==== Initialize Z, if requested ==== */
 
-    if (wantt) {
-	i1 = 1;
-	i2 = *n;
-    } else {
-	i1 = *ilo;
-	i2 = *ihi;
-    }
+	if (initz) {
+	    zlaset_("A", n, n, &c_b56, &c_b57, &z__[z_offset], ldz)
+		    ;
+	}
 
-/*     Ensure that the subdiagonal elements are real. */
+/*        ==== Quick return if possible ==== */
 
-    i__1 = *ihi;
-    for (i__ = *ilo + 1; i__ <= i__1; ++i__) {
-	i__2 = i__ + (i__ - 1) * h_dim1;
-	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
-	if (d_imag(&temp) != 0.) {
-	    d__1 = temp.r;
-	    d__2 = d_imag(&temp);
-	    rtemp = dlapy2_(&d__1, &d__2);
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    h__[i__2].r = rtemp, h__[i__2].i = 0.;
-	    z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp;
-	    temp.r = z__1.r, temp.i = z__1.i;
-	    if (i2 > i__) {
-		i__2 = i2 - i__;
-		d_cnjg(&z__1, &temp);
-		zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
-	    }
-	    i__2 = i__ - i1;
-	    zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
-	    if (i__ < *ihi) {
-		i__2 = i__ + 1 + i__ * h_dim1;
-		i__3 = i__ + 1 + i__ * h_dim1;
-		z__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, z__1.i =
-			 temp.r * h__[i__3].i + temp.i * h__[i__3].r;
-		h__[i__2].r = z__1.r, h__[i__2].i = z__1.i;
-	    }
-	    if (wantz) {
-		zscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
-	    }
+	if (*ilo == *ihi) {
+	    i__1 = *ilo;
+	    i__2 = *ilo + *ilo * h_dim1;
+	    w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+	    return 0;
 	}
-/* L50: */
-    }
 
 /*
-       Determine the order of the multi-shift QR algorithm to be used.
+          ==== ZLAHQR/ZLAQR0 crossover point ====
 
    Writing concatenation
 */
-    i__4[0] = 1, a__1[0] = job;
-    i__4[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
-    ns = ilaenv_(&c__4, "ZHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-/* Writing concatenation */
-    i__4[0] = 1, a__1[0] = job;
-    i__4[1] = 1, a__1[1] = compz;
-    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
-    maxb = ilaenv_(&c__8, "ZHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
-	    ftnlen)2);
-    if (ns <= 1 || ns > nh || maxb >= nh) {
+	i__3[0] = 1, a__1[0] = job;
+	i__3[1] = 1, a__1[1] = compz;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	nmin = ilaenv_(&c__12, "ZHSEQR", ch__1, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nmin = max(11,nmin);
 
-/*        Use the standard double-shift algorithm */
+/*        ==== ZLAQR0 for big matrices; ZLAHQR for small ones ==== */
 
-	zlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo,
-		ihi, &z__[z_offset], ldz, info);
-	return 0;
-    }
-    maxb = max(2,maxb);
-/* Computing MIN */
-    i__1 = min(ns,maxb);
-    ns = min(i__1,15);
+	if (*n > nmin) {
+	    zlaqr0_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1],
+		    ilo, ihi, &z__[z_offset], ldz, &work[1], lwork, info);
+	} else {
 
-/*
-       Now 1 < NS <= MAXB < NH.
+/*           ==== Small matrix ==== */
 
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
+	    zlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1],
+		    ilo, ihi, &z__[z_offset], ldz, info);
 
-    unfl = SAFEMINIMUM;
-    ovfl = 1. / unfl;
-    dlabad_(&unfl, &ovfl);
-    ulp = PRECISION;
-    smlnum = unfl * (nh / ulp);
+	    if (*info > 0) {
+
+/*
+                ==== A rare ZLAHQR failure!  ZLAQR0 sometimes succeeds
+                .    when ZLAHQR fails. ====
+*/
 
-/*     ITN is the total number of multiple-shift QR iterations allowed. */
+		kbot = *info;
 
-    itn = nh * 30;
+		if (*n >= 49) {
 
 /*
-       The main loop begins here. I is the loop index and decreases from
-       IHI to ILO in steps of at most MAXB. Each iteration of the loop
-       works with the active submatrix in rows and columns L to I.
-       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
-       H(L,L-1) is negligible so that the matrix splits.
+                   ==== Larger matrices have enough subdiagonal scratch
+                   .    space to call ZLAQR0 directly. ====
 */
 
-    i__ = *ihi;
-L60:
-    if (i__ < *ilo) {
-	goto L180;
-    }
-
-/*
-       Perform multiple-shift QR iterations on rows and columns ILO to I
-       until a submatrix of order at most MAXB splits off at the bottom
-       because a subdiagonal element has become negligible.
-*/
-
-    l = *ilo;
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
-
-/*        Look for a single small subdiagonal element. */
-
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    i__3 = k - 1 + (k - 1) * h_dim1;
-	    i__5 = k + k * h_dim1;
-	    tst1 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[k -
-		    1 + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__5].r,
-		     abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs(
-		    d__4)));
-	    if (tst1 == 0.) {
-		i__3 = i__ - l + 1;
-		tst1 = zlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
-	    }
-	    i__3 = k + (k - 1) * h_dim1;
-/* Computing MAX */
-	    d__2 = ulp * tst1;
-	    if ((d__1 = h__[i__3].r, abs(d__1)) <= max(d__2,smlnum)) {
-		goto L80;
-	    }
-/* L70: */
-	}
-L80:
-	l = k;
-	if (l > *ilo) {
-
-/*           H(L,L-1) is negligible. */
-
-	    i__2 = l + (l - 1) * h_dim1;
-	    h__[i__2].r = 0., h__[i__2].i = 0.;
-	}
-
-/*        Exit from loop if a submatrix of order <= MAXB has split off. */
-
-	if (l >= i__ - maxb + 1) {
-	    goto L170;
-	}
-
-/*
-          Now the active submatrix is in rows and columns L to I. If
-          eigenvalues only are being computed, only the active submatrix
-          need be transformed.
-*/
-
-	if (! wantt) {
-	    i1 = l;
-	    i2 = i__;
-	}
-
-	if (its == 20 || its == 30) {
+		    zlaqr0_(&wantt, &wantz, n, ilo, &kbot, &h__[h_offset],
+			    ldh, &w[1], ilo, ihi, &z__[z_offset], ldz, &work[
+			    1], lwork, info);
 
-/*           Exceptional shifts. */
-
-	    i__2 = i__;
-	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
-		i__3 = ii;
-		i__5 = ii + (ii - 1) * h_dim1;
-		i__6 = ii + ii * h_dim1;
-		d__3 = ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 = h__[i__6].r,
-			 abs(d__2))) * 1.5;
-		w[i__3].r = d__3, w[i__3].i = 0.;
-/* L90: */
-	    }
-	} else {
-
-/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
-
-	    zlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
-		    h_dim1], ldh, s, &c__15);
-	    zlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ -
-		    ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr);
-	    if (ierr > 0) {
+		} else {
 
 /*
-                If ZLAHQR failed to compute all NS eigenvalues, use the
-                unconverged diagonal elements as the remaining shifts.
+                   ==== Tiny matrices don't have enough subdiagonal
+                   .    scratch space to benefit from ZLAQR0.  Hence,
+                   .    tiny matrices must be copied into a larger
+                   .    array before calling ZLAQR0. ====
 */
 
-		i__2 = ierr;
-		for (ii = 1; ii <= i__2; ++ii) {
-		    i__3 = i__ - ns + ii;
-		    i__5 = ii + ii * 15 - 16;
-		    w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i;
-/* L100: */
+		    zlacpy_("A", n, n, &h__[h_offset], ldh, hl, &c__49);
+		    i__1 = *n + 1 + *n * 49 - 50;
+		    hl[i__1].r = 0., hl[i__1].i = 0.;
+		    i__1 = 49 - *n;
+		    zlaset_("A", &c__49, &i__1, &c_b56, &c_b56, &hl[(*n + 1) *
+			     49 - 49], &c__49);
+		    zlaqr0_(&wantt, &wantz, &c__49, ilo, &kbot, hl, &c__49, &
+			    w[1], ilo, ihi, &z__[z_offset], ldz, workl, &
+			    c__49, info);
+		    if (wantt || *info != 0) {
+			zlacpy_("A", n, n, hl, &c__49, &h__[h_offset], ldh);
+		    }
 		}
 	    }
 	}
 
-/*
-          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
-          where G is the Hessenberg submatrix H(L:I,L:I) and w is
-          the vector of shifts (stored in W). The result is
-          stored in the local array V.
-*/
+/*        ==== Clear out the trash, if necessary. ==== */
 
-	v[0].r = 1., v[0].i = 0.;
-	i__2 = ns + 1;
-	for (ii = 2; ii <= i__2; ++ii) {
-	    i__3 = ii - 1;
-	    v[i__3].r = 0., v[i__3].i = 0.;
-/* L110: */
-	}
-	nv = 1;
-	i__2 = i__;
-	for (j = i__ - ns + 1; j <= i__2; ++j) {
-	    i__3 = nv + 1;
-	    zcopy_(&i__3, v, &c__1, vv, &c__1);
-	    i__3 = nv + 1;
-	    i__5 = j;
-	    z__1.r = -w[i__5].r, z__1.i = -w[i__5].i;
-	    zgemv_("No transpose", &i__3, &nv, &c_b60, &h__[l + l * h_dim1],
-		    ldh, vv, &c__1, &z__1, v, &c__1);
-	    ++nv;
-
-/*
-             Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
-             reset it to the unit vector.
-*/
-
-	    itemp = izamax_(&nv, v, &c__1);
-	    i__3 = itemp - 1;
-	    rtemp = (d__1 = v[i__3].r, abs(d__1)) + (d__2 = d_imag(&v[itemp -
-		    1]), abs(d__2));
-	    if (rtemp == 0.) {
-		v[0].r = 1., v[0].i = 0.;
-		i__3 = nv;
-		for (ii = 2; ii <= i__3; ++ii) {
-		    i__5 = ii - 1;
-		    v[i__5].r = 0., v[i__5].i = 0.;
-/* L120: */
-		}
-	    } else {
-		rtemp = max(rtemp,smlnum);
-		d__1 = 1. / rtemp;
-		zdscal_(&nv, &d__1, v, &c__1);
-	    }
-/* L130: */
+	if ((wantt || *info != 0) && *n > 2) {
+	    i__1 = *n - 2;
+	    i__2 = *n - 2;
+	    zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &h__[h_dim1 + 3], ldh);
 	}
 
-/*        Multiple-shift QR step */
-
-	i__2 = i__ - 1;
-	for (k = l; k <= i__2; ++k) {
-
-/*
-             The first iteration of this loop determines a reflection G
-             from the vector V and applies it from left and right to H,
-             thus creating a nonzero bulge below the subdiagonal.
-
-             Each subsequent iteration determines a reflection G to
-             restore the Hessenberg form in the (K-1)th column, and thus
-             chases the bulge one step toward the bottom of the active
-             submatrix. NR is the order of G.
-
-   Computing MIN
-*/
-	    i__3 = ns + 1, i__5 = i__ - k + 1;
-	    nr = min(i__3,i__5);
-	    if (k > l) {
-		zcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
-	    }
-	    zlarfg_(&nr, v, &v[1], &c__1, &tau);
-	    if (k > l) {
-		i__3 = k + (k - 1) * h_dim1;
-		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
-		i__3 = i__;
-		for (ii = k + 1; ii <= i__3; ++ii) {
-		    i__5 = ii + (k - 1) * h_dim1;
-		    h__[i__5].r = 0., h__[i__5].i = 0.;
-/* L140: */
-		}
-	    }
-	    v[0].r = 1., v[0].i = 0.;
-
-/*
-             Apply G' from the left to transform the rows of the matrix
-             in columns K to I2.
-*/
-
-	    i__3 = i2 - k + 1;
-	    d_cnjg(&z__1, &tau);
-	    zlarfx_("Left", &nr, &i__3, v, &z__1, &h__[k + k * h_dim1], ldh, &
-		    work[1]);
-
 /*
-             Apply G from the right to transform the columns of the
-             matrix in rows I1 to min(K+NR,I).
-
-   Computing MIN
-*/
-	    i__5 = k + nr;
-	    i__3 = min(i__5,i__) - i1 + 1;
-	    zlarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
-		    &work[1]);
-
-	    if (wantz) {
-
-/*              Accumulate transformations in the matrix Z */
-
-		zlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
-			ldz, &work[1]);
-	    }
-/* L150: */
-	}
-
-/*        Ensure that H(I,I-1) is real. */
-
-	i__2 = i__ + (i__ - 1) * h_dim1;
-	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
-	if (d_imag(&temp) != 0.) {
-	    d__1 = temp.r;
-	    d__2 = d_imag(&temp);
-	    rtemp = dlapy2_(&d__1, &d__2);
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    h__[i__2].r = rtemp, h__[i__2].i = 0.;
-	    z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp;
-	    temp.r = z__1.r, temp.i = z__1.i;
-	    if (i2 > i__) {
-		i__2 = i2 - i__;
-		d_cnjg(&z__1, &temp);
-		zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
-	    }
-	    i__2 = i__ - i1;
-	    zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
-	    if (wantz) {
-		zscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
-	    }
-	}
+          ==== Ensure reported workspace size is backward-compatible with
+          .    previous LAPACK versions. ====
 
-/* L160: */
-    }
-
-/*     Failure to converge in remaining number of iterations */
-
-    *info = i__;
-    return 0;
-
-L170:
-
-/*
-       A submatrix of order <= MAXB in rows and columns L to I has split
-       off. Use the double-shift QR algorithm to handle it.
+   Computing MAX
 */
-
-    zlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi,
-	     &z__[z_offset], ldz, info);
-    if (*info > 0) {
-	return 0;
+	d__2 = (doublereal) max(1,*n), d__3 = work[1].r;
+	d__1 = max(d__2,d__3);
+	z__1.r = d__1, z__1.i = 0.;
+	work[1].r = z__1.r, work[1].i = z__1.i;
     }
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with a new value of I.
-*/
-
-    itn -= its;
-    i__ = l - 1;
-    goto L60;
+/*     ==== End of ZHSEQR ==== */
 
-L180:
-    i__1 = max(1,*n);
-    work[1].r = (doublereal) i__1, work[1].i = 0.;
     return 0;
-
-/*     End of ZHSEQR */
-
 } /* zhseqr_ */
 
 /* Subroutine */ int zlabrd_(integer *m, integer *n, integer *nb,
@@ -8684,10 +8511,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -8763,7 +8590,7 @@ static doublereal c_b2210 = .5;
             The n-by-nb matrix Y required to update the unreduced part
             of A.
 
-    LDY     (output) INTEGER
+    LDY     (input) INTEGER
             The leading dimension of the array Y. LDY >= max(1,N).
 
     Further Details
@@ -8851,7 +8678,7 @@ static doublereal c_b2210 = .5;
 	    i__3 = i__ - 1;
 	    z__1.r = -1., z__1.i = -0.;
 	    zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda,
-		     &y[i__ + y_dim1], ldy, &c_b60, &a[i__ + i__ * a_dim1], &
+		     &y[i__ + y_dim1], ldy, &c_b57, &a[i__ + i__ * a_dim1], &
 		    c__1);
 	    i__2 = i__ - 1;
 	    zlacgv_(&i__2, &y[i__ + y_dim1], ldy);
@@ -8859,7 +8686,7 @@ static doublereal c_b2210 = .5;
 	    i__3 = i__ - 1;
 	    z__1.r = -1., z__1.i = -0.;
 	    zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + x_dim1], ldx,
-		     &a[i__ * a_dim1 + 1], &c__1, &c_b60, &a[i__ + i__ *
+		     &a[i__ * a_dim1 + 1], &c__1, &c_b57, &a[i__ + i__ *
 		    a_dim1], &c__1);
 
 /*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
@@ -8881,31 +8708,31 @@ static doublereal c_b2210 = .5;
 
 		i__2 = *m - i__ + 1;
 		i__3 = *n - i__;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + (
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ + (
 			i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], &
-			c__1, &c_b59, &y[i__ + 1 + i__ * y_dim1], &c__1);
+			c__1, &c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__ + 1;
 		i__3 = i__ - 1;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
-			a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b59, &
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
+			a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b56, &
 			y[i__ * y_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b60, &y[
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b57, &y[
 			i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__ + 1;
 		i__3 = i__ - 1;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &x[i__ +
-			x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b59, &
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &x[i__ +
+			x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b56, &
 			y[i__ * y_dim1 + 1], &c__1);
 		i__2 = i__ - 1;
 		i__3 = *n - i__;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ +
 			1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b60, &y[i__ + 1 + i__ * y_dim1], &c__1);
+			c_b57, &y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *n - i__;
 		zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
 
@@ -8917,7 +8744,7 @@ static doublereal c_b2210 = .5;
 		i__2 = *n - i__;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__, &z__1, &y[i__ + 1 +
-			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b60, &a[i__ +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b57, &a[i__ +
 			(i__ + 1) * a_dim1], lda);
 		zlacgv_(&i__, &a[i__ + a_dim1], lda);
 		i__2 = i__ - 1;
@@ -8926,7 +8753,7 @@ static doublereal c_b2210 = .5;
 		i__3 = *n - i__;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ +
-			1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b60,
+			1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b57,
 			&a[i__ + (i__ + 1) * a_dim1], lda);
 		i__2 = i__ - 1;
 		zlacgv_(&i__2, &x[i__ + x_dim1], ldx);
@@ -8949,28 +8776,28 @@ static doublereal c_b2210 = .5;
 
 		i__2 = *m - i__;
 		i__3 = *n - i__;
-		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ + 1 + (
+		zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ + 1 + (
 			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
-			 lda, &c_b59, &x[i__ + 1 + i__ * x_dim1], &c__1);
+			 lda, &c_b56, &x[i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *n - i__;
-		zgemv_("Conjugate transpose", &i__2, &i__, &c_b60, &y[i__ + 1
+		zgemv_("Conjugate transpose", &i__2, &i__, &c_b57, &y[i__ + 1
 			+ y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b59, &x[i__ * x_dim1 + 1], &c__1);
+			c_b56, &x[i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__, &z__1, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[
 			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = i__ - 1;
 		i__3 = *n - i__;
-		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) *
+		zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[(i__ + 1) *
 			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b59, &x[i__ * x_dim1 + 1], &c__1);
+			c_b56, &x[i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[
 			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *m - i__;
 		zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
@@ -8996,7 +8823,7 @@ static doublereal c_b2210 = .5;
 	    i__3 = i__ - 1;
 	    z__1.r = -1., z__1.i = -0.;
 	    zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + y_dim1], ldy,
-		     &a[i__ + a_dim1], lda, &c_b60, &a[i__ + i__ * a_dim1],
+		     &a[i__ + a_dim1], lda, &c_b57, &a[i__ + i__ * a_dim1],
 		    lda);
 	    i__2 = i__ - 1;
 	    zlacgv_(&i__2, &a[i__ + a_dim1], lda);
@@ -9006,7 +8833,7 @@ static doublereal c_b2210 = .5;
 	    i__3 = *n - i__ + 1;
 	    z__1.r = -1., z__1.i = -0.;
 	    zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[i__ *
-		    a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b60, &a[i__ +
+		    a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b57, &a[i__ +
 		    i__ * a_dim1], lda);
 	    i__2 = i__ - 1;
 	    zlacgv_(&i__2, &x[i__ + x_dim1], ldx);
@@ -9030,30 +8857,30 @@ static doublereal c_b2210 = .5;
 
 		i__2 = *m - i__;
 		i__3 = *n - i__ + 1;
-		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ + 1 + i__
-			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b59, &
+		zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ + 1 + i__
+			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b56, &
 			x[i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *n - i__ + 1;
 		i__3 = i__ - 1;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &y[i__ +
-			y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b59, &x[
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &y[i__ +
+			y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b56, &x[
 			i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 +
-			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[
 			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = i__ - 1;
 		i__3 = *n - i__ + 1;
-		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ * a_dim1
-			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b59, &x[
+		zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[i__ * a_dim1
+			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b56, &x[
 			i__ * x_dim1 + 1], &c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 +
-			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b57, &x[
 			i__ + 1 + i__ * x_dim1], &c__1);
 		i__2 = *m - i__;
 		zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
@@ -9068,14 +8895,14 @@ static doublereal c_b2210 = .5;
 		i__3 = i__ - 1;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 +
-			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b60, &a[i__ +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b57, &a[i__ +
 			1 + i__ * a_dim1], &c__1);
 		i__2 = i__ - 1;
 		zlacgv_(&i__2, &y[i__ + y_dim1], ldy);
 		i__2 = *m - i__;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__, &z__1, &x[i__ + 1 +
-			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b60, &a[
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b57, &a[
 			i__ + 1 + i__ * a_dim1], &c__1);
 
 /*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
@@ -9096,30 +8923,30 @@ static doublereal c_b2210 = .5;
 
 		i__2 = *m - i__;
 		i__3 = *n - i__;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
 			1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ *
-			a_dim1], &c__1, &c_b59, &y[i__ + 1 + i__ * y_dim1], &
+			a_dim1], &c__1, &c_b56, &y[i__ + 1 + i__ * y_dim1], &
 			c__1);
 		i__2 = *m - i__;
 		i__3 = i__ - 1;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
 			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b59, &y[i__ * y_dim1 + 1], &c__1);
+			c_b56, &y[i__ * y_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 +
-			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b60, &y[
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b57, &y[
 			i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *m - i__;
-		zgemv_("Conjugate transpose", &i__2, &i__, &c_b60, &x[i__ + 1
+		zgemv_("Conjugate transpose", &i__2, &i__, &c_b57, &x[i__ + 1
 			+ x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b59, &y[i__ * y_dim1 + 1], &c__1);
+			c_b56, &y[i__ * y_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("Conjugate transpose", &i__, &i__2, &z__1, &a[(i__ + 1)
 			 * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
-			c_b60, &y[i__ + 1 + i__ * y_dim1], &c__1);
+			c_b57, &y[i__ + 1 + i__ * y_dim1], &c__1);
 		i__2 = *n - i__;
 		zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
 	    } else {
@@ -9149,10 +8976,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9222,10 +9049,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9334,10 +9161,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9455,10 +9282,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9536,8 +9363,8 @@ static doublereal c_b2210 = .5;
     }
 
     l = *m * *n + 1;
-    dgemm_("N", "N", m, n, n, &c_b1015, &rwork[1], m, &b[b_offset], ldb, &
-	    c_b324, &rwork[l], m);
+    dgemm_("N", "N", m, n, n, &c_b1034, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b328, &rwork[l], m);
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
@@ -9559,8 +9386,8 @@ static doublereal c_b2210 = .5;
 	}
 /* L60: */
     }
-    dgemm_("N", "N", m, n, n, &c_b1015, &rwork[1], m, &b[b_offset], ldb, &
-	    c_b324, &rwork[l], m);
+    dgemm_("N", "N", m, n, n, &c_b1034, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b328, &rwork[l], m);
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
@@ -9599,10 +9426,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -9677,10 +9504,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -10001,8 +9828,7 @@ static doublereal c_b2210 = .5;
     integer pow_ii(integer *, integer *);
 
     /* Local variables */
-    static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr,
-	    indxc, indxp;
+    static integer i__, k, n1, n2, iq, iw, iz, ptr, indx, curr, indxc, indxp;
     extern /* Subroutine */ int dlaed9_(integer *, integer *, integer *,
 	    integer *, doublereal *, doublereal *, integer *, doublereal *,
 	    doublereal *, doublereal *, doublereal *, integer *, integer *),
@@ -10023,10 +9849,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -10279,8 +10105,6 @@ static doublereal c_b2210 = .5;
 
 	n1 = k;
 	n2 = *n - k;
-	ind1 = 1;
-	ind2 = *n;
 	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
     } else {
 	qptr[curr + 1] = qptr[curr];
@@ -10332,10 +10156,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
-       Courant Institute, NAG Ltd., and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -10492,6 +10316,15 @@ static doublereal c_b2210 = .5;
 	return 0;
     }
 
+/*
+       Need to initialize GIVPTR to O here in case of quick exit
+       to prevent an unspecified code behavior (usually sigfault)
+       when IWORK array on entry to *stedc is not zeroed
+       (or at least some IWORK entries which used in *laed7 for GIVPTR).
+*/
+
+    *givptr = 0;
+
 /*     Quick return if possible */
 
     if (*n == 0) {
@@ -10503,7 +10336,7 @@ static doublereal c_b2210 = .5;
     n1p1 = n1 + 1;
 
     if (*rho < 0.) {
-	dscal_(&n2, &c_b1294, &z__[n1p1], &c__1);
+	dscal_(&n2, &c_b1276, &z__[n1p1], &c__1);
     }
 
 /*     Normalize z so that norm(z) = 1 */
@@ -10575,7 +10408,6 @@ static doublereal c_b2210 = .5;
 */
 
     *k = 0;
-    *givptr = 0;
     k2 = *n + 1;
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
@@ -10718,15 +10550,16 @@ static doublereal c_b2210 = .5;
 	integer *ldz, integer *info)
 {
     /* System generated locals */
-    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
     doublereal d__1, d__2, d__3, d__4, d__5, d__6;
-    doublecomplex z__1, z__2, z__3, z__4;
+    doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7;
 
     /* Builtin functions */
     double d_imag(doublecomplex *);
-    void z_sqrt(doublecomplex *, doublecomplex *), d_cnjg(doublecomplex *,
-	    doublecomplex *);
+    void d_cnjg(doublecomplex *, doublecomplex *);
     double z_abs(doublecomplex *);
+    void z_sqrt(doublecomplex *, doublecomplex *), pow_zi(doublecomplex *,
+	    doublecomplex *, integer *);
 
     /* Local variables */
     static integer i__, j, k, l, m;
@@ -10736,62 +10569,63 @@ static doublereal c_b2210 = .5;
     static doublecomplex t1;
     static doublereal t2;
     static doublecomplex v2;
-    static doublereal h10;
+    static doublereal aa, ab, ba, bb, h10;
     static doublecomplex h11;
     static doublereal h21;
-    static doublecomplex h22;
+    static doublecomplex h22, sc;
     static integer nh, nz;
+    static doublereal sx;
+    static integer jhi;
     static doublecomplex h11s;
-    static integer itn, its;
+    static integer jlo, its;
     static doublereal ulp;
     static doublecomplex sum;
-    static doublereal tst1;
+    static doublereal tst;
     static doublecomplex temp;
     extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
 	    doublecomplex *, integer *);
-    static doublereal rtemp, rwork[1];
+    static doublereal rtemp;
     extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
-	    doublecomplex *, integer *);
+	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
 
+    static doublereal safmin, safmax;
     extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *,
 	    doublecomplex *, integer *, doublecomplex *);
     extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *,
 	     doublecomplex *);
-    extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *,
-	    doublereal *);
     static doublereal smlnum;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
 
 
-    Purpose
-    =======
+       Purpose
+       =======
 
-    ZLAHQR is an auxiliary routine called by ZHSEQR to update the
-    eigenvalues and Schur decomposition already computed by ZHSEQR, by
-    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+       ZLAHQR is an auxiliary routine called by CHSEQR to update the
+       eigenvalues and Schur decomposition already computed by CHSEQR, by
+       dealing with the Hessenberg submatrix in rows and columns ILO to
+       IHI.
 
-    Arguments
-    =========
+       Arguments
+       =========
 
-    WANTT   (input) LOGICAL
+       WANTT   (input) LOGICAL
             = .TRUE. : the full Schur form T is required;
             = .FALSE.: only eigenvalues are required.
 
-    WANTZ   (input) LOGICAL
+       WANTZ   (input) LOGICAL
             = .TRUE. : the matrix of Schur vectors Z is required;
             = .FALSE.: Schur vectors are not required.
 
-    N       (input) INTEGER
+       N       (input) INTEGER
             The order of the matrix H.  N >= 0.
 
-    ILO     (input) INTEGER
-    IHI     (input) INTEGER
+       ILO     (input) INTEGER
+       IHI     (input) INTEGER
             It is assumed that H is already upper triangular in rows and
             columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1).
             ZLAHQR works primarily with the Hessenberg submatrix in rows
@@ -10799,46 +10633,78 @@ static doublereal c_b2210 = .5;
             H if WANTT is .TRUE..
             1 <= ILO <= max(1,IHI); IHI <= N.
 
-    H       (input/output) COMPLEX*16 array, dimension (LDH,N)
+       H       (input/output) COMPLEX*16 array, dimension (LDH,N)
             On entry, the upper Hessenberg matrix H.
-            On exit, if WANTT is .TRUE., H is upper triangular in rows
-            and columns ILO:IHI, with any 2-by-2 diagonal blocks in
-            standard form. If WANTT is .FALSE., the contents of H are
-            unspecified on exit.
+            On exit, if INFO is zero and if WANTT is .TRUE., then H
+            is upper triangular in rows and columns ILO:IHI.  If INFO
+            is zero and if WANTT is .FALSE., then the contents of H
+            are unspecified on exit.  The output state of H in case
+            INF is positive is below under the description of INFO.
 
-    LDH     (input) INTEGER
+       LDH     (input) INTEGER
             The leading dimension of the array H. LDH >= max(1,N).
 
-    W       (output) COMPLEX*16 array, dimension (N)
+       W       (output) COMPLEX*16 array, dimension (N)
             The computed eigenvalues ILO to IHI are stored in the
             corresponding elements of W. If WANTT is .TRUE., the
             eigenvalues are stored in the same order as on the diagonal
             of the Schur form returned in H, with W(i) = H(i,i).
 
-    ILOZ    (input) INTEGER
-    IHIZ    (input) INTEGER
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
             Specify the rows of Z to which transformations must be
             applied if WANTZ is .TRUE..
             1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
 
-    Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+       Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
             If WANTZ is .TRUE., on entry Z must contain the current
-            matrix Z of transformations accumulated by ZHSEQR, and on
+            matrix Z of transformations accumulated by CHSEQR, and on
             exit Z has been updated; transformations are applied only to
             the submatrix Z(ILOZ:IHIZ,ILO:IHI).
             If WANTZ is .FALSE., Z is not referenced.
 
-    LDZ     (input) INTEGER
+       LDZ     (input) INTEGER
             The leading dimension of the array Z. LDZ >= max(1,N).
 
-    INFO    (output) INTEGER
-            = 0: successful exit
-            > 0: if INFO = i, ZLAHQR failed to compute all the
-                 eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1)
-                 iterations; elements i+1:ihi of W contain those
-                 eigenvalues which have been successfully computed.
+       INFO    (output) INTEGER
+             =   0: successful exit
+            .GT. 0: if INFO = i, ZLAHQR failed to compute all the
+                    eigenvalues ILO to IHI in a total of 30 iterations
+                    per eigenvalue; elements i+1:ihi of W contain
+                    those eigenvalues which have been successfully
+                    computed.
 
-    =====================================================================
+                    If INFO .GT. 0 and WANTT is .FALSE., then on exit,
+                    the remaining unconverged eigenvalues are the
+                    eigenvalues of the upper Hessenberg matrix
+                    rows and columns ILO thorugh INFO of the final,
+                    output value of H.
+
+                    If INFO .GT. 0 and WANTT is .TRUE., then on exit
+            (*)       (initial value of H)*U  = U*(final value of H)
+                    where U is an orthognal matrix.    The final
+                    value of H is upper Hessenberg and triangular in
+                    rows and columns INFO+1 through IHI.
+
+                    If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+                        (final value of Z)  = (initial value of Z)*U
+                    where U is the orthogonal matrix in (*)
+                    (regardless of the value of WANTT.)
+
+       Further Details
+       ===============
+
+       02-96 Based on modifications by
+       David Day, Sandia National Laboratory, USA
+
+       12-04 Further modifications by
+       Ralph Byers, University of Kansas, USA
+       This is a modified version of ZLAHQR from LAPACK version 3.0.
+       It is (1) more robust against overflow and underflow and
+       (2) adopts the more conservative Ahues & Tisseur stopping
+       criterion (LAWN 122, 1997).
+
+       =========================================================
 */
 
 
@@ -10866,16 +10732,74 @@ static doublereal c_b2210 = .5;
 	return 0;
     }
 
+/*     ==== clear out the trash ==== */
+    i__1 = *ihi - 3;
+    for (j = *ilo; j <= i__1; ++j) {
+	i__2 = j + 2 + j * h_dim1;
+	h__[i__2].r = 0., h__[i__2].i = 0.;
+	i__2 = j + 3 + j * h_dim1;
+	h__[i__2].r = 0., h__[i__2].i = 0.;
+/* L10: */
+    }
+    if (*ilo <= *ihi - 2) {
+	i__1 = *ihi + (*ihi - 2) * h_dim1;
+	h__[i__1].r = 0., h__[i__1].i = 0.;
+    }
+/*     ==== ensure that subdiagonal entries are real ==== */
+    if (*wantt) {
+	jlo = 1;
+	jhi = *n;
+    } else {
+	jlo = *ilo;
+	jhi = *ihi;
+    }
+    i__1 = *ihi;
+    for (i__ = *ilo + 1; i__ <= i__1; ++i__) {
+	if (d_imag(&h__[i__ + (i__ - 1) * h_dim1]) != 0.) {
+/*
+             ==== The following redundant normalization
+             .    avoids problems with both gradual and
+             .    sudden underflow in ABS(H(I,I-1)) ====
+*/
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    i__3 = i__ + (i__ - 1) * h_dim1;
+	    d__3 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[i__
+		    + (i__ - 1) * h_dim1]), abs(d__2));
+	    z__1.r = h__[i__2].r / d__3, z__1.i = h__[i__2].i / d__3;
+	    sc.r = z__1.r, sc.i = z__1.i;
+	    d_cnjg(&z__2, &sc);
+	    d__1 = z_abs(&sc);
+	    z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1;
+	    sc.r = z__1.r, sc.i = z__1.i;
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    d__1 = z_abs(&h__[i__ + (i__ - 1) * h_dim1]);
+	    h__[i__2].r = d__1, h__[i__2].i = 0.;
+	    i__2 = jhi - i__ + 1;
+	    zscal_(&i__2, &sc, &h__[i__ + i__ * h_dim1], ldh);
+/* Computing MIN */
+	    i__3 = jhi, i__4 = i__ + 1;
+	    i__2 = min(i__3,i__4) - jlo + 1;
+	    d_cnjg(&z__1, &sc);
+	    zscal_(&i__2, &z__1, &h__[jlo + i__ * h_dim1], &c__1);
+	    if (*wantz) {
+		i__2 = *ihiz - *iloz + 1;
+		d_cnjg(&z__1, &sc);
+		zscal_(&i__2, &z__1, &z__[*iloz + i__ * z_dim1], &c__1);
+	    }
+	}
+/* L20: */
+    }
+
     nh = *ihi - *ilo + 1;
     nz = *ihiz - *iloz + 1;
 
-/*
-       Set machine-dependent constants for the stopping criterion.
-       If norm(H) <= sqrt(OVFL), overflow should not occur.
-*/
+/*     Set machine-dependent constants for the stopping criterion. */
 
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    dlabad_(&safmin, &safmax);
     ulp = PRECISION;
-    smlnum = SAFEMINIMUM / ulp;
+    smlnum = safmin * ((doublereal) nh / ulp);
 
 /*
        I1 and I2 are the indices of the first row and last column of H
@@ -10888,10 +10812,6 @@ static doublereal c_b2210 = .5;
 	i2 = *n;
     }
 
-/*     ITN is the total number of QR iterations allowed. */
-
-    itn = nh * 30;
-
 /*
        The main loop begins here. I is the loop index and decreases from
        IHI to ILO in steps of 1. Each iteration of the loop works
@@ -10901,9 +10821,9 @@ static doublereal c_b2210 = .5;
 */
 
     i__ = *ihi;
-L10:
+L30:
     if (i__ < *ilo) {
-	goto L130;
+	goto L150;
     }
 
 /*
@@ -10913,45 +10833,102 @@ static doublereal c_b2210 = .5;
 */
 
     l = *ilo;
-    i__1 = itn;
-    for (its = 0; its <= i__1; ++its) {
+    for (its = 0; its <= 30; ++its) {
 
 /*        Look for a single small subdiagonal element. */
 
-	i__2 = l + 1;
-	for (k = i__; k >= i__2; --k) {
-	    i__3 = k - 1 + (k - 1) * h_dim1;
-	    i__4 = k + k * h_dim1;
-	    tst1 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[k -
-		    1 + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__4].r,
-		     abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs(
+	i__1 = l + 1;
+	for (k = i__; k >= i__1; --k) {
+	    i__2 = k + (k - 1) * h_dim1;
+	    if ((d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[k + (k
+		    - 1) * h_dim1]), abs(d__2)) <= smlnum) {
+		goto L50;
+	    }
+	    i__2 = k - 1 + (k - 1) * h_dim1;
+	    i__3 = k + k * h_dim1;
+	    tst = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[k - 1
+		    + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__3].r,
+		    abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs(
 		    d__4)));
-	    if (tst1 == 0.) {
-		i__3 = i__ - l + 1;
-		tst1 = zlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
+	    if (tst == 0.) {
+		if (k - 2 >= *ilo) {
+		    i__2 = k - 1 + (k - 2) * h_dim1;
+		    tst += (d__1 = h__[i__2].r, abs(d__1));
+		}
+		if (k + 1 <= *ihi) {
+		    i__2 = k + 1 + k * h_dim1;
+		    tst += (d__1 = h__[i__2].r, abs(d__1));
+		}
 	    }
-	    i__3 = k + (k - 1) * h_dim1;
+/*
+             ==== The following is a conservative small subdiagonal
+             .    deflation criterion due to Ahues & Tisseur (LAWN 122,
+             .    1997). It has better mathematical foundation and
+             .    improves accuracy in some examples.  ====
+*/
+	    i__2 = k + (k - 1) * h_dim1;
+	    if ((d__1 = h__[i__2].r, abs(d__1)) <= ulp * tst) {
 /* Computing MAX */
-	    d__2 = ulp * tst1;
-	    if ((d__1 = h__[i__3].r, abs(d__1)) <= max(d__2,smlnum)) {
-		goto L30;
+		i__2 = k + (k - 1) * h_dim1;
+		i__3 = k - 1 + k * h_dim1;
+		d__5 = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[
+			k + (k - 1) * h_dim1]), abs(d__2)), d__6 = (d__3 =
+			h__[i__3].r, abs(d__3)) + (d__4 = d_imag(&h__[k - 1 +
+			k * h_dim1]), abs(d__4));
+		ab = max(d__5,d__6);
+/* Computing MIN */
+		i__2 = k + (k - 1) * h_dim1;
+		i__3 = k - 1 + k * h_dim1;
+		d__5 = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[
+			k + (k - 1) * h_dim1]), abs(d__2)), d__6 = (d__3 =
+			h__[i__3].r, abs(d__3)) + (d__4 = d_imag(&h__[k - 1 +
+			k * h_dim1]), abs(d__4));
+		ba = min(d__5,d__6);
+		i__2 = k - 1 + (k - 1) * h_dim1;
+		i__3 = k + k * h_dim1;
+		z__2.r = h__[i__2].r - h__[i__3].r, z__2.i = h__[i__2].i -
+			h__[i__3].i;
+		z__1.r = z__2.r, z__1.i = z__2.i;
+/* Computing MAX */
+		i__4 = k + k * h_dim1;
+		d__5 = (d__1 = h__[i__4].r, abs(d__1)) + (d__2 = d_imag(&h__[
+			k + k * h_dim1]), abs(d__2)), d__6 = (d__3 = z__1.r,
+			abs(d__3)) + (d__4 = d_imag(&z__1), abs(d__4));
+		aa = max(d__5,d__6);
+		i__2 = k - 1 + (k - 1) * h_dim1;
+		i__3 = k + k * h_dim1;
+		z__2.r = h__[i__2].r - h__[i__3].r, z__2.i = h__[i__2].i -
+			h__[i__3].i;
+		z__1.r = z__2.r, z__1.i = z__2.i;
+/* Computing MIN */
+		i__4 = k + k * h_dim1;
+		d__5 = (d__1 = h__[i__4].r, abs(d__1)) + (d__2 = d_imag(&h__[
+			k + k * h_dim1]), abs(d__2)), d__6 = (d__3 = z__1.r,
+			abs(d__3)) + (d__4 = d_imag(&z__1), abs(d__4));
+		bb = min(d__5,d__6);
+		s = aa + ab;
+/* Computing MAX */
+		d__1 = smlnum, d__2 = ulp * (bb * (aa / s));
+		if (ba * (ab / s) <= max(d__1,d__2)) {
+		    goto L50;
+		}
 	    }
-/* L20: */
+/* L40: */
 	}
-L30:
+L50:
 	l = k;
 	if (l > *ilo) {
 
 /*           H(L,L-1) is negligible */
 
-	    i__2 = l + (l - 1) * h_dim1;
-	    h__[i__2].r = 0., h__[i__2].i = 0.;
+	    i__1 = l + (l - 1) * h_dim1;
+	    h__[i__1].r = 0., h__[i__1].i = 0.;
 	}
 
 /*        Exit from loop if a submatrix of order 1 has split off. */
 
 	if (l >= i__) {
-	    goto L120;
+	    goto L140;
 	}
 
 /*
@@ -10965,42 +10942,66 @@ static doublereal c_b2210 = .5;
 	    i2 = i__;
 	}
 
-	if (its == 10 || its == 20) {
+	if (its == 10) {
 
 /*           Exceptional shift. */
 
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    s = (d__1 = h__[i__2].r, abs(d__1)) * .75;
-	    i__2 = i__ + i__ * h_dim1;
-	    z__1.r = s + h__[i__2].r, z__1.i = h__[i__2].i;
+	    i__1 = l + 1 + l * h_dim1;
+	    s = (d__1 = h__[i__1].r, abs(d__1)) * .75;
+	    i__1 = l + l * h_dim1;
+	    z__1.r = s + h__[i__1].r, z__1.i = h__[i__1].i;
+	    t.r = z__1.r, t.i = z__1.i;
+	} else if (its == 20) {
+
+/*           Exceptional shift. */
+
+	    i__1 = i__ + (i__ - 1) * h_dim1;
+	    s = (d__1 = h__[i__1].r, abs(d__1)) * .75;
+	    i__1 = i__ + i__ * h_dim1;
+	    z__1.r = s + h__[i__1].r, z__1.i = h__[i__1].i;
 	    t.r = z__1.r, t.i = z__1.i;
 	} else {
 
 /*           Wilkinson's shift. */
 
-	    i__2 = i__ + i__ * h_dim1;
-	    t.r = h__[i__2].r, t.i = h__[i__2].i;
-	    i__2 = i__ - 1 + i__ * h_dim1;
-	    i__3 = i__ + (i__ - 1) * h_dim1;
-	    d__1 = h__[i__3].r;
-	    z__1.r = d__1 * h__[i__2].r, z__1.i = d__1 * h__[i__2].i;
+	    i__1 = i__ + i__ * h_dim1;
+	    t.r = h__[i__1].r, t.i = h__[i__1].i;
+	    z_sqrt(&z__2, &h__[i__ - 1 + i__ * h_dim1]);
+	    z_sqrt(&z__3, &h__[i__ + (i__ - 1) * h_dim1]);
+	    z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r *
+		    z__3.i + z__2.i * z__3.r;
 	    u.r = z__1.r, u.i = z__1.i;
-	    if (u.r != 0. || u.i != 0.) {
-		i__2 = i__ - 1 + (i__ - 1) * h_dim1;
-		z__2.r = h__[i__2].r - t.r, z__2.i = h__[i__2].i - t.i;
+	    s = (d__1 = u.r, abs(d__1)) + (d__2 = d_imag(&u), abs(d__2));
+	    if (s != 0.) {
+		i__1 = i__ - 1 + (i__ - 1) * h_dim1;
+		z__2.r = h__[i__1].r - t.r, z__2.i = h__[i__1].i - t.i;
 		z__1.r = z__2.r * .5, z__1.i = z__2.i * .5;
 		x.r = z__1.r, x.i = z__1.i;
-		z__3.r = x.r * x.r - x.i * x.i, z__3.i = x.r * x.i + x.i *
-			x.r;
-		z__2.r = z__3.r + u.r, z__2.i = z__3.i + u.i;
-		z_sqrt(&z__1, &z__2);
+		sx = (d__1 = x.r, abs(d__1)) + (d__2 = d_imag(&x), abs(d__2));
+/* Computing MAX */
+		d__3 = s, d__4 = (d__1 = x.r, abs(d__1)) + (d__2 = d_imag(&x),
+			 abs(d__2));
+		s = max(d__3,d__4);
+		z__5.r = x.r / s, z__5.i = x.i / s;
+		pow_zi(&z__4, &z__5, &c__2);
+		z__7.r = u.r / s, z__7.i = u.i / s;
+		pow_zi(&z__6, &z__7, &c__2);
+		z__3.r = z__4.r + z__6.r, z__3.i = z__4.i + z__6.i;
+		z_sqrt(&z__2, &z__3);
+		z__1.r = s * z__2.r, z__1.i = s * z__2.i;
 		y.r = z__1.r, y.i = z__1.i;
-		if (x.r * y.r + d_imag(&x) * d_imag(&y) < 0.) {
-		    z__1.r = -y.r, z__1.i = -y.i;
-		    y.r = z__1.r, y.i = z__1.i;
+		if (sx > 0.) {
+		    z__1.r = x.r / sx, z__1.i = x.i / sx;
+		    z__2.r = x.r / sx, z__2.i = x.i / sx;
+		    if (z__1.r * y.r + d_imag(&z__2) * d_imag(&y) < 0.) {
+			z__3.r = -y.r, z__3.i = -y.i;
+			y.r = z__3.r, y.i = z__3.i;
+		    }
 		}
-		z__3.r = x.r + y.r, z__3.i = x.i + y.i;
-		zladiv_(&z__2, &u, &z__3);
+		z__4.r = x.r + y.r, z__4.i = x.i + y.i;
+		zladiv_(&z__3, &u, &z__4);
+		z__2.r = u.r * z__3.r - u.i * z__3.i, z__2.i = u.r * z__3.i +
+			u.i * z__3.r;
 		z__1.r = t.r - z__2.r, z__1.i = t.i - z__2.i;
 		t.r = z__1.r, t.i = z__1.i;
 	    }
@@ -11008,8 +11009,8 @@ static doublereal c_b2210 = .5;
 
 /*        Look for two consecutive small subdiagonal elements. */
 
-	i__2 = l + 1;
-	for (m = i__ - 1; m >= i__2; --m) {
+	i__1 = l + 1;
+	for (m = i__ - 1; m >= i__1; --m) {
 
 /*
              Determine the effect of starting the single-shift QR
@@ -11017,14 +11018,14 @@ static doublereal c_b2210 = .5;
              negligible.
 */
 
-	    i__3 = m + m * h_dim1;
-	    h11.r = h__[i__3].r, h11.i = h__[i__3].i;
-	    i__3 = m + 1 + (m + 1) * h_dim1;
-	    h22.r = h__[i__3].r, h22.i = h__[i__3].i;
+	    i__2 = m + m * h_dim1;
+	    h11.r = h__[i__2].r, h11.i = h__[i__2].i;
+	    i__2 = m + 1 + (m + 1) * h_dim1;
+	    h22.r = h__[i__2].r, h22.i = h__[i__2].i;
 	    z__1.r = h11.r - t.r, z__1.i = h11.i - t.i;
 	    h11s.r = z__1.r, h11s.i = z__1.i;
-	    i__3 = m + 1 + m * h_dim1;
-	    h21 = h__[i__3].r;
+	    i__2 = m + 1 + m * h_dim1;
+	    h21 = h__[i__2].r;
 	    s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2))
 		     + abs(h21);
 	    z__1.r = h11s.r / s, z__1.i = h11s.i / s;
@@ -11032,25 +11033,24 @@ static doublereal c_b2210 = .5;
 	    h21 /= s;
 	    v[0].r = h11s.r, v[0].i = h11s.i;
 	    v[1].r = h21, v[1].i = 0.;
-	    i__3 = m + (m - 1) * h_dim1;
-	    h10 = h__[i__3].r;
-	    tst1 = ((d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(
-		    d__2))) * ((d__3 = h11.r, abs(d__3)) + (d__4 = d_imag(&
-		    h11), abs(d__4)) + ((d__5 = h22.r, abs(d__5)) + (d__6 =
-		    d_imag(&h22), abs(d__6))));
-	    if ((d__1 = h10 * h21, abs(d__1)) <= ulp * tst1) {
-		goto L50;
+	    i__2 = m + (m - 1) * h_dim1;
+	    h10 = h__[i__2].r;
+	    if (abs(h10) * abs(h21) <= ulp * (((d__1 = h11s.r, abs(d__1)) + (
+		    d__2 = d_imag(&h11s), abs(d__2))) * ((d__3 = h11.r, abs(
+		    d__3)) + (d__4 = d_imag(&h11), abs(d__4)) + ((d__5 =
+		    h22.r, abs(d__5)) + (d__6 = d_imag(&h22), abs(d__6)))))) {
+		goto L70;
 	    }
-/* L40: */
+/* L60: */
 	}
-	i__2 = l + l * h_dim1;
-	h11.r = h__[i__2].r, h11.i = h__[i__2].i;
-	i__2 = l + 1 + (l + 1) * h_dim1;
-	h22.r = h__[i__2].r, h22.i = h__[i__2].i;
+	i__1 = l + l * h_dim1;
+	h11.r = h__[i__1].r, h11.i = h__[i__1].i;
+	i__1 = l + 1 + (l + 1) * h_dim1;
+	h22.r = h__[i__1].r, h22.i = h__[i__1].i;
 	z__1.r = h11.r - t.r, z__1.i = h11.i - t.i;
 	h11s.r = z__1.r, h11s.i = z__1.i;
-	i__2 = l + 1 + l * h_dim1;
-	h21 = h__[i__2].r;
+	i__1 = l + 1 + l * h_dim1;
+	h21 = h__[i__1].r;
 	s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2)) +
 		abs(h21);
 	z__1.r = h11s.r / s, z__1.i = h11s.i / s;
@@ -11058,12 +11058,12 @@ static doublereal c_b2210 = .5;
 	h21 /= s;
 	v[0].r = h11s.r, v[0].i = h11s.i;
 	v[1].r = h21, v[1].i = 0.;
-L50:
+L70:
 
 /*        Single-shift QR step */
 
-	i__2 = i__ - 1;
-	for (k = m; k <= i__2; ++k) {
+	i__1 = i__ - 1;
+	for (k = m; k <= i__1; ++k) {
 
 /*
              The first iteration of this loop determines a reflection G
@@ -11084,10 +11084,10 @@ static doublereal c_b2210 = .5;
 	    }
 	    zlarfg_(&c__2, v, &v[1], &c__1, &t1);
 	    if (k > m) {
-		i__3 = k + (k - 1) * h_dim1;
-		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
-		i__3 = k + 1 + (k - 1) * h_dim1;
-		h__[i__3].r = 0., h__[i__3].i = 0.;
+		i__2 = k + (k - 1) * h_dim1;
+		h__[i__2].r = v[0].r, h__[i__2].i = v[0].i;
+		i__2 = k + 1 + (k - 1) * h_dim1;
+		h__[i__2].r = 0., h__[i__2].i = 0.;
 	    }
 	    v2.r = v[1].r, v2.i = v[1].i;
 	    z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * v2.i + t1.i *
@@ -11099,27 +11099,27 @@ static doublereal c_b2210 = .5;
              in columns K to I2.
 */
 
-	    i__3 = i2;
-	    for (j = k; j <= i__3; ++j) {
+	    i__2 = i2;
+	    for (j = k; j <= i__2; ++j) {
 		d_cnjg(&z__3, &t1);
-		i__4 = k + j * h_dim1;
-		z__2.r = z__3.r * h__[i__4].r - z__3.i * h__[i__4].i, z__2.i =
-			 z__3.r * h__[i__4].i + z__3.i * h__[i__4].r;
-		i__5 = k + 1 + j * h_dim1;
-		z__4.r = t2 * h__[i__5].r, z__4.i = t2 * h__[i__5].i;
+		i__3 = k + j * h_dim1;
+		z__2.r = z__3.r * h__[i__3].r - z__3.i * h__[i__3].i, z__2.i =
+			 z__3.r * h__[i__3].i + z__3.i * h__[i__3].r;
+		i__4 = k + 1 + j * h_dim1;
+		z__4.r = t2 * h__[i__4].r, z__4.i = t2 * h__[i__4].i;
 		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
 		sum.r = z__1.r, sum.i = z__1.i;
+		i__3 = k + j * h_dim1;
 		i__4 = k + j * h_dim1;
-		i__5 = k + j * h_dim1;
-		z__1.r = h__[i__5].r - sum.r, z__1.i = h__[i__5].i - sum.i;
-		h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
+		z__1.r = h__[i__4].r - sum.r, z__1.i = h__[i__4].i - sum.i;
+		h__[i__3].r = z__1.r, h__[i__3].i = z__1.i;
+		i__3 = k + 1 + j * h_dim1;
 		i__4 = k + 1 + j * h_dim1;
-		i__5 = k + 1 + j * h_dim1;
 		z__2.r = sum.r * v2.r - sum.i * v2.i, z__2.i = sum.r * v2.i +
 			sum.i * v2.r;
-		z__1.r = h__[i__5].r - z__2.r, z__1.i = h__[i__5].i - z__2.i;
-		h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
-/* L60: */
+		z__1.r = h__[i__4].r - z__2.r, z__1.i = h__[i__4].i - z__2.i;
+		h__[i__3].r = z__1.r, h__[i__3].i = z__1.i;
+/* L80: */
 	    }
 
 /*
@@ -11128,57 +11128,57 @@ static doublereal c_b2210 = .5;
 
    Computing MIN
 */
-	    i__4 = k + 2;
-	    i__3 = min(i__4,i__);
-	    for (j = i1; j <= i__3; ++j) {
-		i__4 = j + k * h_dim1;
-		z__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, z__2.i =
-			t1.r * h__[i__4].i + t1.i * h__[i__4].r;
-		i__5 = j + (k + 1) * h_dim1;
-		z__3.r = t2 * h__[i__5].r, z__3.i = t2 * h__[i__5].i;
+	    i__3 = k + 2;
+	    i__2 = min(i__3,i__);
+	    for (j = i1; j <= i__2; ++j) {
+		i__3 = j + k * h_dim1;
+		z__2.r = t1.r * h__[i__3].r - t1.i * h__[i__3].i, z__2.i =
+			t1.r * h__[i__3].i + t1.i * h__[i__3].r;
+		i__4 = j + (k + 1) * h_dim1;
+		z__3.r = t2 * h__[i__4].r, z__3.i = t2 * h__[i__4].i;
 		z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
 		sum.r = z__1.r, sum.i = z__1.i;
+		i__3 = j + k * h_dim1;
 		i__4 = j + k * h_dim1;
-		i__5 = j + k * h_dim1;
-		z__1.r = h__[i__5].r - sum.r, z__1.i = h__[i__5].i - sum.i;
-		h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
+		z__1.r = h__[i__4].r - sum.r, z__1.i = h__[i__4].i - sum.i;
+		h__[i__3].r = z__1.r, h__[i__3].i = z__1.i;
+		i__3 = j + (k + 1) * h_dim1;
 		i__4 = j + (k + 1) * h_dim1;
-		i__5 = j + (k + 1) * h_dim1;
 		d_cnjg(&z__3, &v2);
 		z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r *
 			z__3.i + sum.i * z__3.r;
-		z__1.r = h__[i__5].r - z__2.r, z__1.i = h__[i__5].i - z__2.i;
-		h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
-/* L70: */
+		z__1.r = h__[i__4].r - z__2.r, z__1.i = h__[i__4].i - z__2.i;
+		h__[i__3].r = z__1.r, h__[i__3].i = z__1.i;
+/* L90: */
 	    }
 
 	    if (*wantz) {
 
 /*              Accumulate transformations in the matrix Z */
 
-		i__3 = *ihiz;
-		for (j = *iloz; j <= i__3; ++j) {
-		    i__4 = j + k * z_dim1;
-		    z__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, z__2.i =
-			     t1.r * z__[i__4].i + t1.i * z__[i__4].r;
-		    i__5 = j + (k + 1) * z_dim1;
-		    z__3.r = t2 * z__[i__5].r, z__3.i = t2 * z__[i__5].i;
+		i__2 = *ihiz;
+		for (j = *iloz; j <= i__2; ++j) {
+		    i__3 = j + k * z_dim1;
+		    z__2.r = t1.r * z__[i__3].r - t1.i * z__[i__3].i, z__2.i =
+			     t1.r * z__[i__3].i + t1.i * z__[i__3].r;
+		    i__4 = j + (k + 1) * z_dim1;
+		    z__3.r = t2 * z__[i__4].r, z__3.i = t2 * z__[i__4].i;
 		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
 		    sum.r = z__1.r, sum.i = z__1.i;
+		    i__3 = j + k * z_dim1;
 		    i__4 = j + k * z_dim1;
-		    i__5 = j + k * z_dim1;
-		    z__1.r = z__[i__5].r - sum.r, z__1.i = z__[i__5].i -
+		    z__1.r = z__[i__4].r - sum.r, z__1.i = z__[i__4].i -
 			    sum.i;
-		    z__[i__4].r = z__1.r, z__[i__4].i = z__1.i;
+		    z__[i__3].r = z__1.r, z__[i__3].i = z__1.i;
+		    i__3 = j + (k + 1) * z_dim1;
 		    i__4 = j + (k + 1) * z_dim1;
-		    i__5 = j + (k + 1) * z_dim1;
 		    d_cnjg(&z__3, &v2);
 		    z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r *
 			     z__3.i + sum.i * z__3.r;
-		    z__1.r = z__[i__5].r - z__2.r, z__1.i = z__[i__5].i -
+		    z__1.r = z__[i__4].r - z__2.r, z__1.i = z__[i__4].i -
 			    z__2.i;
-		    z__[i__4].r = z__1.r, z__[i__4].i = z__1.i;
-/* L80: */
+		    z__[i__3].r = z__1.r, z__[i__3].i = z__1.i;
+/* L100: */
 		}
 	    }
 
@@ -11196,66 +11196,66 @@ static doublereal c_b2210 = .5;
 		d__1 = z_abs(&temp);
 		z__1.r = temp.r / d__1, z__1.i = temp.i / d__1;
 		temp.r = z__1.r, temp.i = z__1.i;
+		i__2 = m + 1 + m * h_dim1;
 		i__3 = m + 1 + m * h_dim1;
-		i__4 = m + 1 + m * h_dim1;
 		d_cnjg(&z__2, &temp);
-		z__1.r = h__[i__4].r * z__2.r - h__[i__4].i * z__2.i, z__1.i =
-			 h__[i__4].r * z__2.i + h__[i__4].i * z__2.r;
-		h__[i__3].r = z__1.r, h__[i__3].i = z__1.i;
+		z__1.r = h__[i__3].r * z__2.r - h__[i__3].i * z__2.i, z__1.i =
+			 h__[i__3].r * z__2.i + h__[i__3].i * z__2.r;
+		h__[i__2].r = z__1.r, h__[i__2].i = z__1.i;
 		if (m + 2 <= i__) {
+		    i__2 = m + 2 + (m + 1) * h_dim1;
 		    i__3 = m + 2 + (m + 1) * h_dim1;
-		    i__4 = m + 2 + (m + 1) * h_dim1;
-		    z__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i,
-			    z__1.i = h__[i__4].r * temp.i + h__[i__4].i *
+		    z__1.r = h__[i__3].r * temp.r - h__[i__3].i * temp.i,
+			    z__1.i = h__[i__3].r * temp.i + h__[i__3].i *
 			    temp.r;
-		    h__[i__3].r = z__1.r, h__[i__3].i = z__1.i;
+		    h__[i__2].r = z__1.r, h__[i__2].i = z__1.i;
 		}
-		i__3 = i__;
-		for (j = m; j <= i__3; ++j) {
+		i__2 = i__;
+		for (j = m; j <= i__2; ++j) {
 		    if (j != m + 1) {
 			if (i2 > j) {
-			    i__4 = i2 - j;
-			    zscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1],
+			    i__3 = i2 - j;
+			    zscal_(&i__3, &temp, &h__[j + (j + 1) * h_dim1],
 				    ldh);
 			}
-			i__4 = j - i1;
+			i__3 = j - i1;
 			d_cnjg(&z__1, &temp);
-			zscal_(&i__4, &z__1, &h__[i1 + j * h_dim1], &c__1);
+			zscal_(&i__3, &z__1, &h__[i1 + j * h_dim1], &c__1);
 			if (*wantz) {
 			    d_cnjg(&z__1, &temp);
 			    zscal_(&nz, &z__1, &z__[*iloz + j * z_dim1], &
 				    c__1);
 			}
 		    }
-/* L90: */
+/* L110: */
 		}
 	    }
-/* L100: */
+/* L120: */
 	}
 
 /*        Ensure that H(I,I-1) is real. */
 
-	i__2 = i__ + (i__ - 1) * h_dim1;
-	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	i__1 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__1].r, temp.i = h__[i__1].i;
 	if (d_imag(&temp) != 0.) {
 	    rtemp = z_abs(&temp);
-	    i__2 = i__ + (i__ - 1) * h_dim1;
-	    h__[i__2].r = rtemp, h__[i__2].i = 0.;
+	    i__1 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__1].r = rtemp, h__[i__1].i = 0.;
 	    z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp;
 	    temp.r = z__1.r, temp.i = z__1.i;
 	    if (i2 > i__) {
-		i__2 = i2 - i__;
+		i__1 = i2 - i__;
 		d_cnjg(&z__1, &temp);
-		zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+		zscal_(&i__1, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
 	    }
-	    i__2 = i__ - i1;
-	    zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    i__1 = i__ - i1;
+	    zscal_(&i__1, &temp, &h__[i1 + i__ * h_dim1], &c__1);
 	    if (*wantz) {
 		zscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1);
 	    }
 	}
 
-/* L110: */
+/* L130: */
     }
 
 /*     Failure to converge in remaining number of iterations */
@@ -11263,7 +11263,7 @@ static doublereal c_b2210 = .5;
     *info = i__;
     return 0;
 
-L120:
+L140:
 
 /*     H(I,I-1) is negligible: one eigenvalue has converged. */
 
@@ -11271,23 +11271,19 @@ static doublereal c_b2210 = .5;
     i__2 = i__ + i__ * h_dim1;
     w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
 
-/*
-       Decrement number of remaining iterations, and return to start of
-       the main loop with new value of I.
-*/
+/*     return to start of the main loop with new value of I. */
 
-    itn -= its;
     i__ = l - 1;
-    goto L10;
+    goto L30;
 
-L130:
+L150:
     return 0;
 
 /*     End of ZLAHQR */
 
 } /* zlahqr_ */
 
-/* Subroutine */ int zlahrd_(integer *n, integer *k, integer *nb,
+/* Subroutine */ int zlahr2_(integer *n, integer *k, integer *nb,
 	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t,
 	integer *ldt, doublecomplex *y, integer *ldy)
 {
@@ -11300,31 +11296,37 @@ static doublereal c_b2210 = .5;
     static integer i__;
     static doublecomplex ei;
     extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
-	    doublecomplex *, integer *), zgemv_(char *, integer *, integer *,
+	    doublecomplex *, integer *), zgemm_(char *, char *, integer *,
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
+	     doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), zgemv_(char *, integer *, integer *,
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
 	    integer *, doublecomplex *, doublecomplex *, integer *),
 	    zcopy_(integer *, doublecomplex *, integer *, doublecomplex *,
-	    integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *,
-	    integer *, doublecomplex *, integer *), ztrmv_(char *, char *,
-	    char *, integer *, doublecomplex *, integer *, doublecomplex *,
-	    integer *), zlarfg_(integer *,
-	    doublecomplex *, doublecomplex *, integer *, doublecomplex *),
-	    zlacgv_(integer *, doublecomplex *, integer *);
+	    integer *), ztrmm_(char *, char *, char *, char *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *),
+	    zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), ztrmv_(char *, char *, char *,
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *), zlarfg_(integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *), zlacgv_(integer *,
+	    doublecomplex *, integer *), zlacpy_(char *, integer *, integer *,
+	     doublecomplex *, integer *, doublecomplex *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+    -- April 2009                                                      --
 
 
     Purpose
     =======
 
-    ZLAHRD reduces the first NB columns of a complex general n-by-(n-k+1)
+    ZLAHR2 reduces the first NB columns of A complex general n-BY-(n-k+1)
     matrix A so that elements below the k-th subdiagonal are zero. The
-    reduction is performed by a unitary similarity transformation
+    reduction is performed by an unitary similarity transformation
     Q' * A * Q. The routine returns the matrices V and T which determine
     Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
 
@@ -11339,6 +11341,7 @@ static doublereal c_b2210 = .5;
     K       (input) INTEGER
             The offset for the reduction. Elements below the k-th
             subdiagonal in the first NB columns are reduced to zero.
+            K < N.
 
     NB      (input) INTEGER
             The number of columns to be reduced.
@@ -11369,7 +11372,7 @@ static doublereal c_b2210 = .5;
             The n-by-nb matrix Y.
 
     LDY     (input) INTEGER
-            The leading dimension of the array Y. LDY >= max(1,N).
+            The leading dimension of the array Y. LDY >= N.
 
     Further Details
     ===============
@@ -11394,9 +11397,9 @@ static doublereal c_b2210 = .5;
     The contents of A on exit are illustrated by the following example
     with n = 7, k = 3 and nb = 2:
 
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
-       ( a   h   a   a   a )
+       ( a   a   a   a   a )
+       ( a   a   a   a   a )
+       ( a   a   a   a   a )
        ( h   h   a   a   a )
        ( v1  h   a   a   a )
        ( v1  v2  a   a   a )
@@ -11406,6 +11409,19 @@ static doublereal c_b2210 = .5;
     modified element of the upper Hessenberg matrix H, and vi denotes an
     element of the vector defining H(i).
 
+    This subroutine is a slight modification of LAPACK-3.0's DLAHRD
+    incorporating improvements proposed by Quintana-Orti and Van de
+    Gejin. Note that the entries of A(1:K,2:NB) differ from those
+    returned by the original LAPACK-3.0's DLAHRD routine. (This
+    subroutine is not backward compatible with LAPACK-3.0's DLAHRD.)
+
+    References
+    ==========
+
+    Gregorio Quintana-Orti and Robert van de Geijn, "Improving the
+    performance of reduction to Hessenberg form," ACM Transactions on
+    Mathematical Software, 32(2):180-194, June 2006.
+
     =====================================================================
 
 
@@ -11434,18 +11450,19 @@ static doublereal c_b2210 = .5;
 	if (i__ > 1) {
 
 /*
-             Update A(1:n,i)
+             Update A(K+1:N,I)
 
-             Compute i-th column of A - Y * V'
+             Update I-th column of A - Y * V'
 */
 
 	    i__2 = i__ - 1;
 	    zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
-	    i__2 = i__ - 1;
+	    i__2 = *n - *k;
+	    i__3 = i__ - 1;
 	    z__1.r = -1., z__1.i = -0.;
-	    zgemv_("No transpose", n, &i__2, &z__1, &y[y_offset], ldy, &a[*k
-		    + i__ - 1 + a_dim1], lda, &c_b60, &a[i__ * a_dim1 + 1], &
-		    c__1);
+	    zgemv_("NO TRANSPOSE", &i__2, &i__3, &z__1, &y[*k + 1 + y_dim1],
+		    ldy, &a[*k + i__ - 1 + a_dim1], lda, &c_b57, &a[*k + 1 +
+		    i__ * a_dim1], &c__1);
 	    i__2 = i__ - 1;
 	    zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
 
@@ -11465,21 +11482,21 @@ static doublereal c_b2210 = .5;
 	    zcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
 		    1], &c__1);
 	    i__2 = i__ - 1;
-	    ztrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 +
+	    ztrmv_("Lower", "Conjugate transpose", "UNIT", &i__2, &a[*k + 1 +
 		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1);
 
 /*           w := w + V2'*b2 */
 
 	    i__2 = *n - *k - i__ + 1;
 	    i__3 = i__ - 1;
-	    zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[*k + i__ +
-		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b60,
+	    zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[*k + i__ +
+		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b57,
 		    &t[*nb * t_dim1 + 1], &c__1);
 
 /*           w := T'*w */
 
 	    i__2 = i__ - 1;
-	    ztrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[
+	    ztrmv_("Upper", "Conjugate transpose", "NON-UNIT", &i__2, &t[
 		    t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1);
 
 /*           b2 := b2 - V2*w */
@@ -11487,14 +11504,14 @@ static doublereal c_b2210 = .5;
 	    i__2 = *n - *k - i__ + 1;
 	    i__3 = i__ - 1;
 	    z__1.r = -1., z__1.i = -0.;
-	    zgemv_("No transpose", &i__2, &i__3, &z__1, &a[*k + i__ + a_dim1],
-		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b60, &a[*k + i__ +
+	    zgemv_("NO TRANSPOSE", &i__2, &i__3, &z__1, &a[*k + i__ + a_dim1],
+		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b57, &a[*k + i__ +
 		    i__ * a_dim1], &c__1);
 
 /*           b1 := b1 - V1*w */
 
 	    i__2 = i__ - 1;
-	    ztrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+	    ztrmv_("Lower", "NO TRANSPOSE", "UNIT", &i__2, &a[*k + 1 + a_dim1]
 		    , lda, &t[*nb * t_dim1 + 1], &c__1);
 	    i__2 = i__ - 1;
 	    z__1.r = -1., z__1.i = -0.;
@@ -11506,45 +11523,49 @@ static doublereal c_b2210 = .5;
 	}
 
 /*
-          Generate the elementary reflector H(i) to annihilate
-          A(k+i+1:n,i)
+          Generate the elementary reflector H(I) to annihilate
+          A(K+I+1:N,I)
 */
 
-	i__2 = *k + i__ + i__ * a_dim1;
-	ei.r = a[i__2].r, ei.i = a[i__2].i;
 	i__2 = *n - *k - i__ + 1;
 /* Computing MIN */
 	i__3 = *k + i__ + 1;
-	zlarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__])
-		;
+	zlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	i__2 = *k + i__ + i__ * a_dim1;
+	ei.r = a[i__2].r, ei.i = a[i__2].i;
 	i__2 = *k + i__ + i__ * a_dim1;
 	a[i__2].r = 1., a[i__2].i = 0.;
 
-/*        Compute  Y(1:n,i) */
+/*        Compute  Y(K+1:N,I) */
 
-	i__2 = *n - *k - i__ + 1;
-	zgemv_("No transpose", n, &i__2, &c_b60, &a[(i__ + 1) * a_dim1 + 1],
-		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b59, &y[i__ *
-		y_dim1 + 1], &c__1);
+	i__2 = *n - *k;
+	i__3 = *n - *k - i__ + 1;
+	zgemv_("NO TRANSPOSE", &i__2, &i__3, &c_b57, &a[*k + 1 + (i__ + 1) *
+		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, &y[*
+		k + 1 + i__ * y_dim1], &c__1);
 	i__2 = *n - *k - i__ + 1;
 	i__3 = i__ - 1;
-	zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[*k + i__ +
-		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b59, &t[
+	zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[*k + i__ +
+		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56, &t[
 		i__ * t_dim1 + 1], &c__1);
-	i__2 = i__ - 1;
+	i__2 = *n - *k;
+	i__3 = i__ - 1;
 	z__1.r = -1., z__1.i = -0.;
-	zgemv_("No transpose", n, &i__2, &z__1, &y[y_offset], ldy, &t[i__ *
-		t_dim1 + 1], &c__1, &c_b60, &y[i__ * y_dim1 + 1], &c__1);
-	zscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+	zgemv_("NO TRANSPOSE", &i__2, &i__3, &z__1, &y[*k + 1 + y_dim1], ldy,
+		&t[i__ * t_dim1 + 1], &c__1, &c_b57, &y[*k + 1 + i__ * y_dim1]
+		, &c__1);
+	i__2 = *n - *k;
+	zscal_(&i__2, &tau[i__], &y[*k + 1 + i__ * y_dim1], &c__1);
 
-/*        Compute T(1:i,i) */
+/*        Compute T(1:I,I) */
 
 	i__2 = i__ - 1;
 	i__3 = i__;
 	z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
 	zscal_(&i__2, &z__1, &t[i__ * t_dim1 + 1], &c__1);
 	i__2 = i__ - 1;
-	ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+	ztrmv_("Upper", "No Transpose", "NON-UNIT", &i__2, &t[t_offset], ldt,
 		&t[i__ * t_dim1 + 1], &c__1)
 		;
 	i__2 = i__ + i__ * t_dim1;
@@ -11556,11 +11577,25 @@ static doublereal c_b2210 = .5;
     i__1 = *k + *nb + *nb * a_dim1;
     a[i__1].r = ei.r, a[i__1].i = ei.i;
 
+/*     Compute Y(1:K,1:NB) */
+
+    zlacpy_("ALL", k, nb, &a[(a_dim1 << 1) + 1], lda, &y[y_offset], ldy);
+    ztrmm_("RIGHT", "Lower", "NO TRANSPOSE", "UNIT", k, nb, &c_b57, &a[*k + 1
+	    + a_dim1], lda, &y[y_offset], ldy);
+    if (*n > *k + *nb) {
+	i__1 = *n - *k - *nb;
+	zgemm_("NO TRANSPOSE", "NO TRANSPOSE", k, nb, &i__1, &c_b57, &a[(*nb
+		+ 2) * a_dim1 + 1], lda, &a[*k + 1 + *nb + a_dim1], lda, &
+		c_b57, &y[y_offset], ldy);
+    }
+    ztrmm_("RIGHT", "Upper", "NO TRANSPOSE", "NON-UNIT", k, nb, &c_b57, &t[
+	    t_offset], ldt, &y[y_offset], ldy);
+
     return 0;
 
-/*     End of ZLAHRD */
+/*     End of ZLAHR2 */
 
-} /* zlahrd_ */
+} /* zlahr2_ */
 
 /* Subroutine */ int zlals0_(integer *icompq, integer *nl, integer *nr,
 	integer *sqre, integer *nrhs, doublecomplex *b, integer *ldb,
@@ -11605,10 +11640,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       December 1, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -11857,7 +11892,7 @@ static doublereal c_b2210 = .5;
 	if (*k == 1) {
 	    zcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
 	    if (z__[1] < 0.) {
-		zdscal_(nrhs, &c_b1294, &b[b_offset], ldb);
+		zdscal_(nrhs, &c_b1276, &b[b_offset], ldb);
 	    }
 	} else {
 	    i__1 = *k;
@@ -11924,8 +11959,8 @@ static doublereal c_b2210 = .5;
 		    }
 /* L60: */
 		}
-		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)],
-			k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &c__1);
+		dgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)],
+			k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1], &c__1);
 		i__ = *k + (*nrhs << 1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
@@ -11937,8 +11972,8 @@ static doublereal c_b2210 = .5;
 		    }
 /* L80: */
 		}
-		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)],
-			k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *nrhs],
+		dgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)],
+			k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1 + *nrhs],
 			&c__1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
@@ -11949,7 +11984,7 @@ static doublereal c_b2210 = .5;
 		    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
 /* L90: */
 		}
-		zlascl_("G", &c__0, &c__0, &temp, &c_b1015, &c__1, nrhs, &b[j
+		zlascl_("G", &c__0, &c__0, &temp, &c_b1034, &c__1, nrhs, &b[j
 			+ b_dim1], ldb, info);
 /* L100: */
 	    }
@@ -12028,8 +12063,8 @@ static doublereal c_b2210 = .5;
 		    }
 /* L140: */
 		}
-		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)],
-			k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &c__1);
+		dgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)],
+			k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1], &c__1);
 		i__ = *k + (*nrhs << 1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
@@ -12041,8 +12076,8 @@ static doublereal c_b2210 = .5;
 		    }
 /* L160: */
 		}
-		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + (*nrhs << 1)],
-			k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *nrhs],
+		dgemv_("T", k, nrhs, &c_b1034, &rwork[*k + 1 + (*nrhs << 1)],
+			k, &rwork[1], &c__1, &c_b328, &rwork[*k + 1 + *nrhs],
 			&c__1);
 		i__2 = *nrhs;
 		for (jcol = 1; jcol <= i__2; ++jcol) {
@@ -12142,10 +12177,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -12181,10 +12216,10 @@ static doublereal c_b2210 = .5;
     NRHS   (input) INTEGER
            The number of columns of B and BX. NRHS must be at least 1.
 
-    B      (input) COMPLEX*16 array, dimension ( LDB, NRHS )
+    B      (input/output) COMPLEX*16 array, dimension ( LDB, NRHS )
            On input, B contains the right hand sides of the least
-           squares problem in rows 1 through M. On output, B contains
-           the solution X in rows 1 through N.
+           squares problem in rows 1 through M.
+           On output, B contains the solution X in rows 1 through N.
 
     LDB    (input) INTEGER
            The leading dimension of B in the calling subprogram.
@@ -12264,7 +12299,7 @@ static doublereal c_b2210 = .5;
            the right null space of the I-th subproblem.
 
     RWORK  (workspace) DOUBLE PRECISION array, dimension at least
-           max ( N, (SMLSZ+1)*NRHS*3 ).
+           MAX( (SMLSZ+1)*NRHS*3, N*(1+NRHS) + 2*NRHS ).
 
     IWORK  (workspace) INTEGER array.
            The dimension must be at least 3 * N
@@ -12418,8 +12453,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L20: */
 	}
-	dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, &
-		rwork[(nl * *nrhs << 1) + 1], &nl, &c_b324, &rwork[1], &nl);
+	dgemm_("T", "N", &nl, nrhs, &nl, &c_b1034, &u[nlf + u_dim1], ldu, &
+		rwork[(nl * *nrhs << 1) + 1], &nl, &c_b328, &rwork[1], &nl);
 	j = nl * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
@@ -12431,8 +12466,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L40: */
 	}
-	dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, &
-		rwork[(nl * *nrhs << 1) + 1], &nl, &c_b324, &rwork[nl * *nrhs
+	dgemm_("T", "N", &nl, nrhs, &nl, &c_b1034, &u[nlf + u_dim1], ldu, &
+		rwork[(nl * *nrhs << 1) + 1], &nl, &c_b328, &rwork[nl * *nrhs
 		+ 1], &nl);
 	jreal = 0;
 	jimag = nl * *nrhs;
@@ -12472,8 +12507,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L80: */
 	}
-	dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, &
-		rwork[(nr * *nrhs << 1) + 1], &nr, &c_b324, &rwork[1], &nr);
+	dgemm_("T", "N", &nr, nrhs, &nr, &c_b1034, &u[nrf + u_dim1], ldu, &
+		rwork[(nr * *nrhs << 1) + 1], &nr, &c_b328, &rwork[1], &nr);
 	j = nr * *nrhs << 1;
 	i__2 = *nrhs;
 	for (jcol = 1; jcol <= i__2; ++jcol) {
@@ -12485,8 +12520,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L100: */
 	}
-	dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, &
-		rwork[(nr * *nrhs << 1) + 1], &nr, &c_b324, &rwork[nr * *nrhs
+	dgemm_("T", "N", &nr, nrhs, &nr, &c_b1034, &u[nrf + u_dim1], ldu, &
+		rwork[(nr * *nrhs << 1) + 1], &nr, &c_b328, &rwork[nr * *nrhs
 		+ 1], &nr);
 	jreal = 0;
 	jimag = nr * *nrhs;
@@ -12662,8 +12697,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L210: */
 	}
-	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1],
-		ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b324, &rwork[
+	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1034, &vt[nlf + vt_dim1],
+		ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b328, &rwork[
 		1], &nlp1);
 	j = nlp1 * *nrhs << 1;
 	i__2 = *nrhs;
@@ -12676,8 +12711,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L230: */
 	}
-	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1],
-		ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b324, &rwork[
+	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1034, &vt[nlf + vt_dim1],
+		ldu, &rwork[(nlp1 * *nrhs << 1) + 1], &nlp1, &c_b328, &rwork[
 		nlp1 * *nrhs + 1], &nlp1);
 	jreal = 0;
 	jimag = nlp1 * *nrhs;
@@ -12717,8 +12752,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L270: */
 	}
-	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1],
-		ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b324, &rwork[
+	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1034, &vt[nrf + vt_dim1],
+		ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b328, &rwork[
 		1], &nrp1);
 	j = nrp1 * *nrhs << 1;
 	i__2 = *nrhs;
@@ -12731,8 +12766,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L290: */
 	}
-	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1],
-		ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b324, &rwork[
+	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1034, &vt[nrf + vt_dim1],
+		ldu, &rwork[(nrp1 * *nrhs << 1) + 1], &nrp1, &c_b328, &rwork[
 		nrp1 * *nrhs + 1], &nrp1);
 	jreal = 0;
 	jimag = nrp1 * *nrhs;
@@ -12788,8 +12823,10 @@ static doublereal c_b2210 = .5;
     static doublereal eps;
     static integer iwk;
     static doublereal tol;
-    static integer difl, difr, jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow,
-	     irwu, jimag;
+    static integer difl, difr;
+    static doublereal rcnd;
+    static integer jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow, irwu,
+	    jimag;
     extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
 	    integer *, doublereal *, doublereal *, integer *, doublereal *,
 	    integer *, doublereal *, doublereal *, integer *);
@@ -12834,10 +12871,10 @@ static doublereal c_b2210 = .5;
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1999
+    -- LAPACK routine (version 3.2.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       June 2010
 
 
     Purpose
@@ -12881,7 +12918,7 @@ static doublereal c_b2210 = .5;
            On entry D contains the main diagonal of the bidiagonal
            matrix. On exit, if INFO = 0, D contains its singular values.
 
-    E      (input) DOUBLE PRECISION array, dimension (N-1)
+    E      (input/output) DOUBLE PRECISION array, dimension (N-1)
            Contains the super-diagonal entries of the bidiagonal matrix.
            On exit, E has been destroyed.
 
@@ -12912,7 +12949,8 @@ static doublereal c_b2210 = .5;
            (N * NRHS).
 
     RWORK  (workspace) DOUBLE PRECISION array, dimension at least
-           (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2),
+           (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS +
+           MAX( (SMLSIZ+1)**2, N*(1+NRHS) + 2*NRHS ),
            where
            NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
 
@@ -12922,7 +12960,7 @@ static doublereal c_b2210 = .5;
     INFO   (output) INTEGER
            = 0:  successful exit.
            < 0:  if INFO = -i, the i-th argument had an illegal value.
-           > 0:  The algorithm failed to compute an singular value while
+           > 0:  The algorithm failed to compute a singular value while
                  working on the submatrix lying in rows and columns
                  INFO/(N+1) through MOD(INFO,N+1).
 
@@ -12971,7 +13009,9 @@ static doublereal c_b2210 = .5;
 /*     Set up the tolerance. */
 
     if (*rcond <= 0. || *rcond >= 1.) {
-	*rcond = eps;
+	rcnd = eps;
+    } else {
+	rcnd = *rcond;
     }
 
     *rank = 0;
@@ -12982,10 +13022,10 @@ static doublereal c_b2210 = .5;
 	return 0;
     } else if (*n == 1) {
 	if (d__[1] == 0.) {
-	    zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &b[b_offset], ldb);
+	    zlaset_("A", &c__1, nrhs, &c_b56, &c_b56, &b[b_offset], ldb);
 	} else {
 	    *rank = 1;
-	    zlascl_("G", &c__0, &c__0, &d__[1], &c_b1015, &c__1, nrhs, &b[
+	    zlascl_("G", &c__0, &c__0, &d__[1], &c_b1034, &c__1, nrhs, &b[
 		    b_offset], ldb, info);
 	    d__[1] = abs(d__[1]);
 	}
@@ -13031,12 +13071,12 @@ static doublereal c_b2210 = .5;
     nm1 = *n - 1;
     orgnrm = dlanst_("M", n, &d__[1], &e[1]);
     if (orgnrm == 0.) {
-	zlaset_("A", n, nrhs, &c_b59, &c_b59, &b[b_offset], ldb);
+	zlaset_("A", n, nrhs, &c_b56, &c_b56, &b[b_offset], ldb);
 	return 0;
     }
 
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, &c__1, &d__[1], n, info);
-    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &nm1, &c__1, &e[1], &nm1,
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, n, &c__1, &d__[1], n, info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, &nm1, &c__1, &e[1], &nm1,
 	    info);
 
 /*
@@ -13051,8 +13091,8 @@ static doublereal c_b2210 = .5;
 	irwrb = irwwrk;
 	irwib = irwrb + *n * *nrhs;
 	irwb = irwib + *n * *nrhs;
-	dlaset_("A", n, n, &c_b324, &c_b1015, &rwork[irwu], n);
-	dlaset_("A", n, n, &c_b324, &c_b1015, &rwork[irwvt], n);
+	dlaset_("A", n, n, &c_b328, &c_b1034, &rwork[irwu], n);
+	dlaset_("A", n, n, &c_b328, &c_b1034, &rwork[irwvt], n);
 	dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &rwork[irwvt], n,
 		&rwork[irwu], n, &rwork[irwwrk], &c__1, &rwork[irwwrk], info);
 	if (*info != 0) {
@@ -13077,8 +13117,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L50: */
 	}
-	dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwu], n, &rwork[irwb],
-		n, &c_b324, &rwork[irwrb], n);
+	dgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwu], n, &rwork[irwb],
+		n, &c_b328, &rwork[irwrb], n);
 	j = irwb - 1;
 	i__1 = *nrhs;
 	for (jcol = 1; jcol <= i__1; ++jcol) {
@@ -13090,8 +13130,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L70: */
 	}
-	dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwu], n, &rwork[irwb],
-		n, &c_b324, &rwork[irwib], n);
+	dgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwu], n, &rwork[irwb],
+		n, &c_b328, &rwork[irwib], n);
 	jreal = irwrb - 1;
 	jimag = irwib - 1;
 	i__1 = *nrhs;
@@ -13110,14 +13150,14 @@ static doublereal c_b2210 = .5;
 /* L90: */
 	}
 
-	tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+	tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
 	i__1 = *n;
 	for (i__ = 1; i__ <= i__1; ++i__) {
 	    if (d__[i__] <= tol) {
-		zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &b[i__ + b_dim1],
+		zlaset_("A", &c__1, nrhs, &c_b56, &c_b56, &b[i__ + b_dim1],
 			ldb);
 	    } else {
-		zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1015, &c__1, nrhs, &
+		zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1034, &c__1, nrhs, &
 			b[i__ + b_dim1], ldb, info);
 		++(*rank);
 	    }
@@ -13145,8 +13185,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L120: */
 	}
-	dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwvt], n, &rwork[irwb],
-		 n, &c_b324, &rwork[irwrb], n);
+	dgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwvt], n, &rwork[irwb],
+		 n, &c_b328, &rwork[irwrb], n);
 	j = irwb - 1;
 	i__1 = *nrhs;
 	for (jcol = 1; jcol <= i__1; ++jcol) {
@@ -13158,8 +13198,8 @@ static doublereal c_b2210 = .5;
 	    }
 /* L140: */
 	}
-	dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwvt], n, &rwork[irwb],
-		 n, &c_b324, &rwork[irwib], n);
+	dgemm_("T", "N", n, nrhs, n, &c_b1034, &rwork[irwvt], n, &rwork[irwb],
+		 n, &c_b328, &rwork[irwib], n);
 	jreal = irwrb - 1;
 	jimag = irwib - 1;
 	i__1 = *nrhs;
@@ -13180,10 +13220,10 @@ static doublereal c_b2210 = .5;
 
 /*        Unscale. */
 
-	dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, n, &c__1, &d__[1], n,
+	dlascl_("G", &c__0, &c__0, &c_b1034, &orgnrm, n, &c__1, &d__[1], n,
 		info);
 	dlasrt_("D", n, &d__[1], info);
-	zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, nrhs, &b[b_offset],
+	zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, n, nrhs, &b[b_offset],
 		ldb, info);
 
 	return 0;
@@ -13284,9 +13324,9 @@ static doublereal c_b2210 = .5;
 
 /*              This is a small subproblem and is solved by DLASDQ. */
 
-		dlaset_("A", &nsize, &nsize, &c_b324, &c_b1015, &rwork[vt +
+		dlaset_("A", &nsize, &nsize, &c_b328, &c_b1034, &rwork[vt +
 			st1], n);
-		dlaset_("A", &nsize, &nsize, &c_b324, &c_b1015, &rwork[u +
+		dlaset_("A", &nsize, &nsize, &c_b328, &c_b1034, &rwork[u +
 			st1], n);
 		dlasdq_("U", &c__0, &nsize, &nsize, &nsize, &c__0, &d__[st], &
 			e[st], &rwork[vt + st1], n, &rwork[u + st1], n, &
@@ -13314,8 +13354,8 @@ static doublereal c_b2210 = .5;
 		    }
 /* L190: */
 		}
-		dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[u +
-			st1], n, &rwork[irwb], &nsize, &c_b324, &rwork[irwrb],
+		dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[u +
+			st1], n, &rwork[irwb], &nsize, &c_b328, &rwork[irwrb],
 			 &nsize);
 		j = irwb - 1;
 		i__2 = *nrhs;
@@ -13328,8 +13368,8 @@ static doublereal c_b2210 = .5;
 		    }
 /* L210: */
 		}
-		dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[u +
-			st1], n, &rwork[irwb], &nsize, &c_b324, &rwork[irwib],
+		dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[u +
+			st1], n, &rwork[irwb], &nsize, &c_b328, &rwork[irwib],
 			 &nsize);
 		jreal = irwrb - 1;
 		jimag = irwib - 1;
@@ -13384,7 +13424,7 @@ static doublereal c_b2210 = .5;
 
 /*     Apply the singular values and treat the tiny ones as zero. */
 
-    tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+    tol = rcnd * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
 
     i__1 = *n;
     for (i__ = 1; i__ <= i__1; ++i__) {
@@ -13395,10 +13435,10 @@ static doublereal c_b2210 = .5;
 */
 
 	if ((d__1 = d__[i__], abs(d__1)) <= tol) {
-	    zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &work[bx + i__ - 1], n);
+	    zlaset_("A", &c__1, nrhs, &c_b56, &c_b56, &work[bx + i__ - 1], n);
 	} else {
 	    ++(*rank);
-	    zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1015, &c__1, nrhs, &
+	    zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1034, &c__1, nrhs, &
 		    work[bx + i__ - 1], n, info);
 	}
 	d__[i__] = (d__1 = d__[i__], abs(d__1));
@@ -13441,8 +13481,8 @@ static doublereal c_b2210 = .5;
 		}
 /* L270: */
 	    }
-	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[vt + st1],
-		     n, &rwork[irwb], &nsize, &c_b324, &rwork[irwrb], &nsize);
+	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[vt + st1],
+		     n, &rwork[irwb], &nsize, &c_b328, &rwork[irwrb], &nsize);
 	    j = bxst - *n - 1;
 	    jimag = irwb - 1;
 	    i__2 = *nrhs;
@@ -13456,8 +13496,8 @@ static doublereal c_b2210 = .5;
 		}
 /* L290: */
 	    }
-	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[vt + st1],
-		     n, &rwork[irwb], &nsize, &c_b324, &rwork[irwib], &nsize);
+	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1034, &rwork[vt + st1],
+		     n, &rwork[irwb], &nsize, &c_b328, &rwork[irwib], &nsize);
 	    jreal = irwrb - 1;
 	    jimag = irwib - 1;
 	    i__2 = *nrhs;
@@ -13492,9 +13532,9 @@ static doublereal c_b2210 = .5;
 
 /*     Unscale and sort the singular values. */
 
-    dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, n, &c__1, &d__[1], n, info);
+    dlascl_("G", &c__0, &c__0, &c_b1034, &orgnrm, n, &c__1, &d__[1], n, info);
     dlasrt_("D", n, &d__[1], info);
-    zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, nrhs, &b[b_offset], ldb,
+    zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, n, nrhs, &b[b_offset], ldb,
 	    info);
 
     return 0;
@@ -13523,10 +13563,10 @@ doublereal zlange_(char *norm, integer *m, integer *n, doublecomplex *a,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -13552,7 +13592,7 @@ doublereal zlange_(char *norm, integer *m, integer *n, doublecomplex *a,
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -13575,7 +13615,7 @@ doublereal zlange_(char *norm, integer *m, integer *n, doublecomplex *a,
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(M,1).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+    WORK    (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)),
             where LWORK >= M when NORM = 'I'; otherwise, WORK is not
             referenced.
 
@@ -13692,10 +13732,10 @@ doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -13721,7 +13761,7 @@ doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a,
     where  norm1  denotes the  one norm of a matrix (maximum column sum),
     normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
     normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+    squares).  Note that  max(abs(A(i,j)))  is not a consistent matrix norm.
 
     Arguments
     =========
@@ -13753,7 +13793,7 @@ doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a,
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(N,1).
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+    WORK    (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)),
             where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
             WORK is not referenced.
 
@@ -13907,230 +13947,4351 @@ doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a,
 
 } /* zlanhe_ */
 
-doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
-	doublereal *work)
+/* Subroutine */ int zlaqr0_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh,
+	doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__,
+	integer *ldz, doublecomplex *work, integer *lwork, integer *info)
 {
     /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-    doublereal ret_val, d__1, d__2;
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8;
+    doublecomplex z__1, z__2, z__3, z__4, z__5;
 
     /* Builtin functions */
-    double z_abs(doublecomplex *), sqrt(doublereal);
+    double d_imag(doublecomplex *);
+    void z_sqrt(doublecomplex *, doublecomplex *);
 
     /* Local variables */
-    static integer i__, j;
-    static doublereal sum, scale;
-    extern logical lsame_(char *, char *);
-    static doublereal value;
-    extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *,
-	     doublereal *, doublereal *);
+    static integer i__, k;
+    static doublereal s;
+    static doublecomplex aa, bb, cc, dd;
+    static integer ld, nh, it, ks, kt, ku, kv, ls, ns, nw;
+    static doublecomplex tr2, det;
+    static integer inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot,
+	    nmin;
+    static doublecomplex swap;
+    static integer ktop;
+    static doublecomplex zdum[1]	/* was [1][1] */;
+    static integer kacc22, itmax, nsmax, nwmax, kwtop;
+    extern /* Subroutine */ int zlaqr3_(logical *, logical *, integer *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    integer *, integer *, doublecomplex *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *, integer *,
+	     doublecomplex *, integer *, integer *, doublecomplex *, integer *
+	    , doublecomplex *, integer *), zlaqr4_(logical *, logical *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
+	     doublecomplex *, integer *, integer *), zlaqr5_(logical *,
+	    logical *, integer *, integer *, integer *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, integer *, integer *,
+	     doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
+	     integer *, doublecomplex *, integer *);
+    static integer nibble;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static char jbcmpz[2];
+    static doublecomplex rtdisc;
+    static integer nwupbd;
+    static logical sorted;
+    extern /* Subroutine */ int zlahqr_(logical *, logical *, integer *,
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     integer *, integer *, doublecomplex *, integer *, integer *),
+	    zlacpy_(char *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static integer lwkopt;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
 
 
-    Purpose
-    =======
+       Purpose
+       =======
 
-    ZLANHS  returns the value of the one norm,  or the Frobenius norm, or
-    the  infinity norm,  or the  element of  largest absolute value  of a
-    Hessenberg matrix A.
+       ZLAQR0 computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**H, where T is an upper triangular matrix (the
+       Schur form), and Z is the unitary matrix of Schur vectors.
 
-    Description
-    ===========
+       Optionally Z may be postmultiplied into an input unitary
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
 
-    ZLANHS returns the value
+       Arguments
+       =========
 
-       ZLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
-                (
-                ( norm1(A),         NORM = '1', 'O' or 'o'
-                (
-                ( normI(A),         NORM = 'I' or 'i'
-                (
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+       WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
 
-    where  norm1  denotes the  one norm of a matrix (maximum column sum),
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+       WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
 
-    Arguments
-    =========
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+             H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
+             previous call to ZGEBAL, and then passed to ZGEHRD when the
+             matrix output by ZGEBAL is reduced to Hessenberg form.
+             Otherwise, ILO and IHI should be set to 1 and N,
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) COMPLEX*16 array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and WANTT is .TRUE., then H
+             contains the upper triangular matrix T from the Schur
+             decomposition (the Schur form). If INFO = 0 and WANT is
+             .FALSE., then the contents of H are unspecified on exit.
+             (The output value of H when INFO.GT.0 is given under the
+             description of INFO below.)
+
+             This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+             j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       W        (output) COMPLEX*16 array, dimension (N)
+             The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored
+             in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are
+             stored in the same order as on the diagonal of the Schur
+             form returned in H, with W(i) = H(i,i).
+
+       Z     (input/output) COMPLEX*16 array, dimension (LDZ,IHI)
+             If WANTZ is .FALSE., then Z is not referenced.
+             If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
+             replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
+             orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
+             (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if WANTZ is .TRUE.
+             then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) COMPLEX*16 array, dimension LWORK
+             On exit, if LWORK = -1, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient, but LWORK typically as large as 6*N may
+             be required for optimal performance.  A workspace query
+             to determine the optimal workspace size is recommended.
+
+             If LWORK = -1, then ZLAQR0 does a workspace query.
+             In this case, ZLAQR0 checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .GT. 0:  if INFO = i, ZLAQR0 failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and WANT is .FALSE., then on exit,
+                  the remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is a unitary matrix.  The final
+                  value of  H is upper Hessenberg and triangular in
+                  rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+
+                    (final value of Z(ILO:IHI,ILOZ:IHIZ)
+                     =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
+
+                  where U is the unitary matrix in (*) (regard-
+                  less of the value of WANTT.)
+
+                  If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+                  accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
+
+       ================================================================
+
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    ZLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
+
+       ==== Exceptional deflation windows:  try to cure rare
+       .    slow convergence by varying the size of the
+       .    deflation window after KEXNW iterations. ====
 
-    NORM    (input) CHARACTER*1
-            Specifies the value to be returned in ZLANHS as described
-            above.
+       ==== Exceptional shifts: try to cure rare slow convergence
+       .    with ad-hoc exceptional shifts every KEXSH iterations.
+       .    ====
+
+       ==== The constant WILK1 is used to form the exceptional
+       .    shifts. ====
+*/
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
 
-    N       (input) INTEGER
-            The order of the matrix A.  N >= 0.  When N = 0, ZLANHS is
-            set to zero.
+    /* Function Body */
+    *info = 0;
 
-    A       (input) COMPLEX*16 array, dimension (LDA,N)
-            The n by n upper Hessenberg matrix A; the part of A below the
-            first sub-diagonal is not referenced.
+/*     ==== Quick return for N = 0: nothing to do. ==== */
 
-    LDA     (input) INTEGER
-            The leading dimension of the array A.  LDA >= max(N,1).
+    if (*n == 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
 
-    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
-            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
-            referenced.
+    if (*n <= 11) {
 
-   =====================================================================
-*/
+/*        ==== Tiny matrices must use ZLAHQR. ==== */
 
+	lwkopt = 1;
+	if (*lwork != -1) {
+	    zlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1],
+		    iloz, ihiz, &z__[z_offset], ldz, info);
+	}
+    } else {
 
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1;
-    a -= a_offset;
-    --work;
+/*
+          ==== Use small bulge multi-shift QR with aggressive early
+          .    deflation on larger-than-tiny matrices. ====
 
-    /* Function Body */
-    if (*n == 0) {
-	value = 0.;
-    } else if (lsame_(norm, "M")) {
+          ==== Hope for the best. ====
+*/
 
-/*        Find max(abs(A(i,j))). */
+	*info = 0;
 
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-/* Computing MAX */
-		d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]);
-		value = max(d__1,d__2);
-/* L10: */
-	    }
-/* L20: */
+/*        ==== Set up job flags for ILAENV. ==== */
+
+	if (*wantt) {
+	    *(unsigned char *)jbcmpz = 'S';
+	} else {
+	    *(unsigned char *)jbcmpz = 'E';
+	}
+	if (*wantz) {
+	    *(unsigned char *)&jbcmpz[1] = 'V';
+	} else {
+	    *(unsigned char *)&jbcmpz[1] = 'N';
 	}
-    } else if (lsame_(norm, "O") || *(unsigned char *)
-	    norm == '1') {
 
-/*        Find norm1(A). */
+/*
+          ==== NWR = recommended deflation window size.  At this
+          .    point,  N .GT. NTINY = 11, so there is enough
+          .    subdiagonal workspace for NWR.GE.2 as required.
+          .    (In fact, there is enough subdiagonal space for
+          .    NWR.GE.3.) ====
+*/
 
-	value = 0.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    sum = 0.;
+	nwr = ilaenv_(&c__13, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nwr = max(2,nwr);
 /* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		sum += z_abs(&a[i__ + j * a_dim1]);
-/* L30: */
-	    }
-	    value = max(value,sum);
-/* L40: */
-	}
-    } else if (lsame_(norm, "I")) {
+	i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2);
+	nwr = min(i__1,nwr);
 
-/*        Find normI(A). */
+/*
+          ==== NSR = recommended number of simultaneous shifts.
+          .    At this point N .GT. NTINY = 11, so there is at
+          .    enough subdiagonal workspace for NSR to be even
+          .    and greater than or equal to two as required. ====
+*/
 
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
-	    work[i__] = 0.;
-/* L50: */
-	}
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
+	nsr = ilaenv_(&c__15, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
 /* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		work[i__] += z_abs(&a[i__ + j * a_dim1]);
-/* L60: */
-	    }
-/* L70: */
-	}
-	value = 0.;
-	i__1 = *n;
-	for (i__ = 1; i__ <= i__1; ++i__) {
+	i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi -
+		*ilo;
+	nsr = min(i__1,i__2);
 /* Computing MAX */
-	    d__1 = value, d__2 = work[i__];
-	    value = max(d__1,d__2);
-/* L80: */
-	}
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
+	i__1 = 2, i__2 = nsr - nsr % 2;
+	nsr = max(i__1,i__2);
 
-/*        Find normF(A). */
+/*
+          ==== Estimate optimal workspace ====
 
-	scale = 0.;
-	sum = 1.;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-/* Computing MIN */
-	    i__3 = *n, i__4 = j + 1;
-	    i__2 = min(i__3,i__4);
-	    zlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
-/* L90: */
-	}
-	value = scale * sqrt(sum);
-    }
+          ==== Workspace query call to ZLAQR3 ====
+*/
 
-    ret_val = value;
-    return ret_val;
+	i__1 = nwr + 1;
+	zlaqr3_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz,
+		ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[h_offset],
+		ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1],
+		 &c_n1);
 
-/*     End of ZLANHS */
+/*
+          ==== Optimal workspace = MAX(ZLAQR5, ZLAQR3) ====
 
-} /* zlanhs_ */
+   Computing MAX
+*/
+	i__1 = nsr * 3 / 2, i__2 = (integer) work[1].r;
+	lwkopt = max(i__1,i__2);
 
-/* Subroutine */ int zlarcm_(integer *m, integer *n, doublereal *a, integer *
-	lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc,
-	 doublereal *rwork)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
-	    i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1;
+/*        ==== Quick return in case of workspace query. ==== */
 
-    /* Builtin functions */
-    double d_imag(doublecomplex *);
+	if (*lwork == -1) {
+	    d__1 = (doublereal) lwkopt;
+	    z__1.r = d__1, z__1.i = 0.;
+	    work[1].r = z__1.r, work[1].i = z__1.i;
+	    return 0;
+	}
 
-    /* Local variables */
-    static integer i__, j, l;
-    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
-	    integer *, doublereal *, doublereal *, integer *, doublereal *,
-	    integer *, doublereal *, doublereal *, integer *);
+/*        ==== ZLAHQR/ZLAQR0 crossover point ==== */
 
+	nmin = ilaenv_(&c__12, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, (ftnlen)
+		6, (ftnlen)2);
+	nmin = max(11,nmin);
+
+/*        ==== Nibble crossover point ==== */
+
+	nibble = ilaenv_(&c__14, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	nibble = max(0,nibble);
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+          ==== Accumulate reflections during ttswp?  Use block
+          .    2-by-2 structure during matrix-matrix multiply? ====
+*/
 
+	kacc22 = ilaenv_(&c__16, "ZLAQR0", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	kacc22 = max(0,kacc22);
+	kacc22 = min(2,kacc22);
 
-    Purpose
-    =======
+/*
+          ==== NWMAX = the largest possible deflation window for
+          .    which there is sufficient workspace. ====
 
-    ZLARCM performs a very simple matrix-matrix multiplication:
-             C := A * B,
-    where A is M by M and real; B is M by N and complex;
-    C is M by N and complex.
+   Computing MIN
+*/
+	i__1 = (*n - 1) / 3, i__2 = *lwork / 2;
+	nwmax = min(i__1,i__2);
+	nw = nwmax;
 
-    Arguments
-    =========
+/*
+          ==== NSMAX = the Largest number of simultaneous shifts
+          .    for which there is sufficient workspace. ====
 
-    M       (input) INTEGER
-            The number of rows of the matrix A and of the matrix C.
-            M >= 0.
+   Computing MIN
+*/
+	i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3;
+	nsmax = min(i__1,i__2);
+	nsmax -= nsmax % 2;
 
-    N       (input) INTEGER
-            The number of columns and rows of the matrix B and
-            the number of columns of the matrix C.
-            N >= 0.
+/*        ==== NDFL: an iteration count restarted at deflation. ==== */
 
-    A       (input) DOUBLE PRECISION array, dimension (LDA, M)
-            A contains the M by M matrix A.
+	ndfl = 1;
+
+/*
+          ==== ITMAX = iteration limit ====
+
+   Computing MAX
+*/
+	i__1 = 10, i__2 = *ihi - *ilo + 1;
+	itmax = max(i__1,i__2) * 30;
+
+/*        ==== Last row and column in the active block ==== */
+
+	kbot = *ihi;
+
+/*        ==== Main Loop ==== */
+
+	i__1 = itmax;
+	for (it = 1; it <= i__1; ++it) {
+
+/*           ==== Done when KBOT falls below ILO ==== */
+
+	    if (kbot < *ilo) {
+		goto L80;
+	    }
+
+/*           ==== Locate active block ==== */
+
+	    i__2 = *ilo + 1;
+	    for (k = kbot; k >= i__2; --k) {
+		i__3 = k + (k - 1) * h_dim1;
+		if (h__[i__3].r == 0. && h__[i__3].i == 0.) {
+		    goto L20;
+		}
+/* L10: */
+	    }
+	    k = *ilo;
+L20:
+	    ktop = k;
+
+/*
+             ==== Select deflation window size:
+             .    Typical Case:
+             .      If possible and advisable, nibble the entire
+             .      active block.  If not, use size MIN(NWR,NWMAX)
+             .      or MIN(NWR+1,NWMAX) depending upon which has
+             .      the smaller corresponding subdiagonal entry
+             .      (a heuristic).
+             .
+             .    Exceptional Case:
+             .      If there have been no deflations in KEXNW or
+             .      more iterations, then vary the deflation window
+             .      size.   At first, because, larger windows are,
+             .      in general, more powerful than smaller ones,
+             .      rapidly increase the window to the maximum possible.
+             .      Then, gradually reduce the window size. ====
+*/
+
+	    nh = kbot - ktop + 1;
+	    nwupbd = min(nh,nwmax);
+	    if (ndfl < 5) {
+		nw = min(nwupbd,nwr);
+	    } else {
+/* Computing MIN */
+		i__2 = nwupbd, i__3 = nw << 1;
+		nw = min(i__2,i__3);
+	    }
+	    if (nw < nwmax) {
+		if (nw >= nh - 1) {
+		    nw = nh;
+		} else {
+		    kwtop = kbot - nw + 1;
+		    i__2 = kwtop + (kwtop - 1) * h_dim1;
+		    i__3 = kwtop - 1 + (kwtop - 2) * h_dim1;
+		    if ((d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[
+			    kwtop + (kwtop - 1) * h_dim1]), abs(d__2)) > (
+			    d__3 = h__[i__3].r, abs(d__3)) + (d__4 = d_imag(&
+			    h__[kwtop - 1 + (kwtop - 2) * h_dim1]), abs(d__4))
+			    ) {
+			++nw;
+		    }
+		}
+	    }
+	    if (ndfl < 5) {
+		ndec = -1;
+	    } else if (ndec >= 0 || nw >= nwupbd) {
+		++ndec;
+		if (nw - ndec < 2) {
+		    ndec = 0;
+		}
+		nw -= ndec;
+	    }
+
+/*
+             ==== Aggressive early deflation:
+             .    split workspace under the subdiagonal into
+             .      - an nw-by-nw work array V in the lower
+             .        left-hand-corner,
+             .      - an NW-by-at-least-NW-but-more-is-better
+             .        (NW-by-NHO) horizontal work array along
+             .        the bottom edge,
+             .      - an at-least-NW-but-more-is-better (NHV-by-NW)
+             .        vertical work array along the left-hand-edge.
+             .        ====
+*/
+
+	    kv = *n - nw + 1;
+	    kt = nw + 1;
+	    nho = *n - nw - 1 - kt + 1;
+	    kwv = nw + 2;
+	    nve = *n - nw - kwv + 1;
+
+/*           ==== Aggressive early deflation ==== */
+
+	    zlaqr3_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh,
+		    iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[kv
+		    + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, &
+		    h__[kwv + h_dim1], ldh, &work[1], lwork);
+
+/*           ==== Adjust KBOT accounting for new deflations. ==== */
+
+	    kbot -= ld;
+
+/*           ==== KS points to the shifts. ==== */
+
+	    ks = kbot - ls + 1;
+
+/*
+             ==== Skip an expensive QR sweep if there is a (partly
+             .    heuristic) reason to expect that many eigenvalues
+             .    will deflate without it.  Here, the QR sweep is
+             .    skipped if many eigenvalues have just been deflated
+             .    or if the remaining active block is small.
+*/
+
+	    if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(
+		    nmin,nwmax)) {
+
+/*
+                ==== NS = nominal number of simultaneous shifts.
+                .    This may be lowered (slightly) if ZLAQR3
+                .    did not provide that many shifts. ====
+
+   Computing MIN
+   Computing MAX
+*/
+		i__4 = 2, i__5 = kbot - ktop;
+		i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5);
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+
+/*
+                ==== If there have been no deflations
+                .    in a multiple of KEXSH iterations,
+                .    then try exceptional shifts.
+                .    Otherwise use shifts provided by
+                .    ZLAQR3 above or from the eigenvalues
+                .    of a trailing principal submatrix. ====
+*/
+
+		if (ndfl % 6 == 0) {
+		    ks = kbot - ns + 1;
+		    i__2 = ks + 1;
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			i__3 = i__;
+			i__4 = i__ + i__ * h_dim1;
+			i__5 = i__ + (i__ - 1) * h_dim1;
+			d__3 = ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				d_imag(&h__[i__ + (i__ - 1) * h_dim1]), abs(
+				d__2))) * .75;
+			z__1.r = h__[i__4].r + d__3, z__1.i = h__[i__4].i;
+			w[i__3].r = z__1.r, w[i__3].i = z__1.i;
+			i__3 = i__ - 1;
+			i__4 = i__;
+			w[i__3].r = w[i__4].r, w[i__3].i = w[i__4].i;
+/* L30: */
+		    }
+		} else {
+
+/*
+                   ==== Got NS/2 or fewer shifts? Use ZLAQR4 or
+                   .    ZLAHQR on a trailing principal submatrix to
+                   .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
+                   .    there is enough space below the subdiagonal
+                   .    to fit an NS-by-NS scratch array.) ====
+*/
+
+		    if (kbot - ks + 1 <= ns / 2) {
+			ks = kbot - ns + 1;
+			kt = *n - ns + 1;
+			zlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &
+				h__[kt + h_dim1], ldh);
+			if (ns > nmin) {
+			    zlaqr4_(&c_false, &c_false, &ns, &c__1, &ns, &h__[
+				    kt + h_dim1], ldh, &w[ks], &c__1, &c__1,
+				    zdum, &c__1, &work[1], lwork, &inf);
+			} else {
+			    zlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[
+				    kt + h_dim1], ldh, &w[ks], &c__1, &c__1,
+				    zdum, &c__1, &inf);
+			}
+			ks += inf;
+
+/*
+                      ==== In case of a rare QR failure use
+                      .    eigenvalues of the trailing 2-by-2
+                      .    principal submatrix.  Scale to avoid
+                      .    overflows, underflows and subnormals.
+                      .    (The scale factor S can not be zero,
+                      .    because H(KBOT,KBOT-1) is nonzero.) ====
+*/
+
+			if (ks >= kbot) {
+			    i__2 = kbot - 1 + (kbot - 1) * h_dim1;
+			    i__3 = kbot + (kbot - 1) * h_dim1;
+			    i__4 = kbot - 1 + kbot * h_dim1;
+			    i__5 = kbot + kbot * h_dim1;
+			    s = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 =
+				    d_imag(&h__[kbot - 1 + (kbot - 1) *
+				    h_dim1]), abs(d__2)) + ((d__3 = h__[i__3]
+				    .r, abs(d__3)) + (d__4 = d_imag(&h__[kbot
+				    + (kbot - 1) * h_dim1]), abs(d__4))) + ((
+				    d__5 = h__[i__4].r, abs(d__5)) + (d__6 =
+				    d_imag(&h__[kbot - 1 + kbot * h_dim1]),
+				    abs(d__6))) + ((d__7 = h__[i__5].r, abs(
+				    d__7)) + (d__8 = d_imag(&h__[kbot + kbot *
+				     h_dim1]), abs(d__8)));
+			    i__2 = kbot - 1 + (kbot - 1) * h_dim1;
+			    z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i /
+				    s;
+			    aa.r = z__1.r, aa.i = z__1.i;
+			    i__2 = kbot + (kbot - 1) * h_dim1;
+			    z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i /
+				    s;
+			    cc.r = z__1.r, cc.i = z__1.i;
+			    i__2 = kbot - 1 + kbot * h_dim1;
+			    z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i /
+				    s;
+			    bb.r = z__1.r, bb.i = z__1.i;
+			    i__2 = kbot + kbot * h_dim1;
+			    z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i /
+				    s;
+			    dd.r = z__1.r, dd.i = z__1.i;
+			    z__2.r = aa.r + dd.r, z__2.i = aa.i + dd.i;
+			    z__1.r = z__2.r / 2., z__1.i = z__2.i / 2.;
+			    tr2.r = z__1.r, tr2.i = z__1.i;
+			    z__3.r = aa.r - tr2.r, z__3.i = aa.i - tr2.i;
+			    z__4.r = dd.r - tr2.r, z__4.i = dd.i - tr2.i;
+			    z__2.r = z__3.r * z__4.r - z__3.i * z__4.i,
+				    z__2.i = z__3.r * z__4.i + z__3.i *
+				    z__4.r;
+			    z__5.r = bb.r * cc.r - bb.i * cc.i, z__5.i = bb.r
+				    * cc.i + bb.i * cc.r;
+			    z__1.r = z__2.r - z__5.r, z__1.i = z__2.i -
+				    z__5.i;
+			    det.r = z__1.r, det.i = z__1.i;
+			    z__2.r = -det.r, z__2.i = -det.i;
+			    z_sqrt(&z__1, &z__2);
+			    rtdisc.r = z__1.r, rtdisc.i = z__1.i;
+			    i__2 = kbot - 1;
+			    z__2.r = tr2.r + rtdisc.r, z__2.i = tr2.i +
+				    rtdisc.i;
+			    z__1.r = s * z__2.r, z__1.i = s * z__2.i;
+			    w[i__2].r = z__1.r, w[i__2].i = z__1.i;
+			    i__2 = kbot;
+			    z__2.r = tr2.r - rtdisc.r, z__2.i = tr2.i -
+				    rtdisc.i;
+			    z__1.r = s * z__2.r, z__1.i = s * z__2.i;
+			    w[i__2].r = z__1.r, w[i__2].i = z__1.i;
+
+			    ks = kbot - 1;
+			}
+		    }
+
+		    if (kbot - ks + 1 > ns) {
+
+/*                    ==== Sort the shifts (Helps a little) ==== */
+
+			sorted = FALSE_;
+			i__2 = ks + 1;
+			for (k = kbot; k >= i__2; --k) {
+			    if (sorted) {
+				goto L60;
+			    }
+			    sorted = TRUE_;
+			    i__3 = k - 1;
+			    for (i__ = ks; i__ <= i__3; ++i__) {
+				i__4 = i__;
+				i__5 = i__ + 1;
+				if ((d__1 = w[i__4].r, abs(d__1)) + (d__2 =
+					d_imag(&w[i__]), abs(d__2)) < (d__3 =
+					w[i__5].r, abs(d__3)) + (d__4 =
+					d_imag(&w[i__ + 1]), abs(d__4))) {
+				    sorted = FALSE_;
+				    i__4 = i__;
+				    swap.r = w[i__4].r, swap.i = w[i__4].i;
+				    i__4 = i__;
+				    i__5 = i__ + 1;
+				    w[i__4].r = w[i__5].r, w[i__4].i = w[i__5]
+					    .i;
+				    i__4 = i__ + 1;
+				    w[i__4].r = swap.r, w[i__4].i = swap.i;
+				}
+/* L40: */
+			    }
+/* L50: */
+			}
+L60:
+			;
+		    }
+		}
+
+/*
+                ==== If there are only two shifts, then use
+                .    only one.  ====
+*/
+
+		if (kbot - ks + 1 == 2) {
+		    i__2 = kbot;
+		    i__3 = kbot + kbot * h_dim1;
+		    z__2.r = w[i__2].r - h__[i__3].r, z__2.i = w[i__2].i -
+			    h__[i__3].i;
+		    z__1.r = z__2.r, z__1.i = z__2.i;
+		    i__4 = kbot - 1;
+		    i__5 = kbot + kbot * h_dim1;
+		    z__4.r = w[i__4].r - h__[i__5].r, z__4.i = w[i__4].i -
+			    h__[i__5].i;
+		    z__3.r = z__4.r, z__3.i = z__4.i;
+		    if ((d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1),
+			    abs(d__2)) < (d__3 = z__3.r, abs(d__3)) + (d__4 =
+			    d_imag(&z__3), abs(d__4))) {
+			i__2 = kbot - 1;
+			i__3 = kbot;
+			w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i;
+		    } else {
+			i__2 = kbot;
+			i__3 = kbot - 1;
+			w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i;
+		    }
+		}
+
+/*
+                ==== Use up to NS of the the smallest magnatiude
+                .    shifts.  If there aren't NS shifts available,
+                .    then use them all, possibly dropping one to
+                .    make the number of shifts even. ====
+
+   Computing MIN
+*/
+		i__2 = ns, i__3 = kbot - ks + 1;
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+		ks = kbot - ns + 1;
+
+/*
+                ==== Small-bulge multi-shift QR sweep:
+                .    split workspace under the subdiagonal into
+                .    - a KDU-by-KDU work array U in the lower
+                .      left-hand-corner,
+                .    - a KDU-by-at-least-KDU-but-more-is-better
+                .      (KDU-by-NHo) horizontal work array WH along
+                .      the bottom edge,
+                .    - and an at-least-KDU-but-more-is-better-by-KDU
+                .      (NVE-by-KDU) vertical work WV arrow along
+                .      the left-hand-edge. ====
+*/
+
+		kdu = ns * 3 - 3;
+		ku = *n - kdu + 1;
+		kwh = kdu + 1;
+		nho = *n - kdu - 3 - (kdu + 1) + 1;
+		kwv = kdu + 4;
+		nve = *n - kdu - kwv + 1;
+
+/*              ==== Small-bulge multi-shift QR sweep ==== */
+
+		zlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &w[ks], &
+			h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &
+			work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[
+			kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1],
+			ldh);
+	    }
+
+/*           ==== Note progress (or the lack of it). ==== */
+
+	    if (ld > 0) {
+		ndfl = 1;
+	    } else {
+		++ndfl;
+	    }
+
+/*
+             ==== End of main loop ====
+   L70:
+*/
+	}
+
+/*
+          ==== Iteration limit exceeded.  Set INFO to show where
+          .    the problem occurred and exit. ====
+*/
+
+	*info = kbot;
+L80:
+	;
+    }
+
+/*     ==== Return the optimal value of LWORK. ==== */
+
+    d__1 = (doublereal) lwkopt;
+    z__1.r = d__1, z__1.i = 0.;
+    work[1].r = z__1.r, work[1].i = z__1.i;
+
+/*     ==== End of ZLAQR0 ==== */
+
+    return 0;
+} /* zlaqr0_ */
+
+/* Subroutine */ int zlaqr1_(integer *n, doublecomplex *h__, integer *ldh,
+	doublecomplex *s1, doublecomplex *s2, doublecomplex *v)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
+    doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+
+    /* Local variables */
+    static doublereal s;
+    static doublecomplex h21s, h31s;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+         Given a 2-by-2 or 3-by-3 matrix H, ZLAQR1 sets v to a
+         scalar multiple of the first column of the product
+
+         (*)  K = (H - s1*I)*(H - s2*I)
+
+         scaling to avoid overflows and most underflows.
+
+         This is useful for starting double implicit shift bulges
+         in the QR algorithm.
+
+
+         N      (input) integer
+                Order of the matrix H. N must be either 2 or 3.
+
+         H      (input) COMPLEX*16 array of dimension (LDH,N)
+                The 2-by-2 or 3-by-3 matrix H in (*).
+
+         LDH    (input) integer
+                The leading dimension of H as declared in
+                the calling procedure.  LDH.GE.N
+
+         S1     (input) COMPLEX*16
+         S2     S1 and S2 are the shifts defining K in (*) above.
+
+         V      (output) COMPLEX*16 array of dimension N
+                A scalar multiple of the first column of the
+                matrix K in (*).
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --v;
+
+    /* Function Body */
+    if (*n == 2) {
+	i__1 = h_dim1 + 1;
+	z__2.r = h__[i__1].r - s2->r, z__2.i = h__[i__1].i - s2->i;
+	z__1.r = z__2.r, z__1.i = z__2.i;
+	i__2 = h_dim1 + 2;
+	s = (d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1), abs(d__2)) + (
+		(d__3 = h__[i__2].r, abs(d__3)) + (d__4 = d_imag(&h__[h_dim1
+		+ 2]), abs(d__4)));
+	if (s == 0.) {
+	    v[1].r = 0., v[1].i = 0.;
+	    v[2].r = 0., v[2].i = 0.;
+	} else {
+	    i__1 = h_dim1 + 2;
+	    z__1.r = h__[i__1].r / s, z__1.i = h__[i__1].i / s;
+	    h21s.r = z__1.r, h21s.i = z__1.i;
+	    i__1 = (h_dim1 << 1) + 1;
+	    z__2.r = h21s.r * h__[i__1].r - h21s.i * h__[i__1].i, z__2.i =
+		    h21s.r * h__[i__1].i + h21s.i * h__[i__1].r;
+	    i__2 = h_dim1 + 1;
+	    z__4.r = h__[i__2].r - s1->r, z__4.i = h__[i__2].i - s1->i;
+	    i__3 = h_dim1 + 1;
+	    z__6.r = h__[i__3].r - s2->r, z__6.i = h__[i__3].i - s2->i;
+	    z__5.r = z__6.r / s, z__5.i = z__6.i / s;
+	    z__3.r = z__4.r * z__5.r - z__4.i * z__5.i, z__3.i = z__4.r *
+		    z__5.i + z__4.i * z__5.r;
+	    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	    v[1].r = z__1.r, v[1].i = z__1.i;
+	    i__1 = h_dim1 + 1;
+	    i__2 = (h_dim1 << 1) + 2;
+	    z__4.r = h__[i__1].r + h__[i__2].r, z__4.i = h__[i__1].i + h__[
+		    i__2].i;
+	    z__3.r = z__4.r - s1->r, z__3.i = z__4.i - s1->i;
+	    z__2.r = z__3.r - s2->r, z__2.i = z__3.i - s2->i;
+	    z__1.r = h21s.r * z__2.r - h21s.i * z__2.i, z__1.i = h21s.r *
+		    z__2.i + h21s.i * z__2.r;
+	    v[2].r = z__1.r, v[2].i = z__1.i;
+	}
+    } else {
+	i__1 = h_dim1 + 1;
+	z__2.r = h__[i__1].r - s2->r, z__2.i = h__[i__1].i - s2->i;
+	z__1.r = z__2.r, z__1.i = z__2.i;
+	i__2 = h_dim1 + 2;
+	i__3 = h_dim1 + 3;
+	s = (d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1), abs(d__2)) + (
+		(d__3 = h__[i__2].r, abs(d__3)) + (d__4 = d_imag(&h__[h_dim1
+		+ 2]), abs(d__4))) + ((d__5 = h__[i__3].r, abs(d__5)) + (d__6
+		= d_imag(&h__[h_dim1 + 3]), abs(d__6)));
+	if (s == 0.) {
+	    v[1].r = 0., v[1].i = 0.;
+	    v[2].r = 0., v[2].i = 0.;
+	    v[3].r = 0., v[3].i = 0.;
+	} else {
+	    i__1 = h_dim1 + 2;
+	    z__1.r = h__[i__1].r / s, z__1.i = h__[i__1].i / s;
+	    h21s.r = z__1.r, h21s.i = z__1.i;
+	    i__1 = h_dim1 + 3;
+	    z__1.r = h__[i__1].r / s, z__1.i = h__[i__1].i / s;
+	    h31s.r = z__1.r, h31s.i = z__1.i;
+	    i__1 = h_dim1 + 1;
+	    z__4.r = h__[i__1].r - s1->r, z__4.i = h__[i__1].i - s1->i;
+	    i__2 = h_dim1 + 1;
+	    z__6.r = h__[i__2].r - s2->r, z__6.i = h__[i__2].i - s2->i;
+	    z__5.r = z__6.r / s, z__5.i = z__6.i / s;
+	    z__3.r = z__4.r * z__5.r - z__4.i * z__5.i, z__3.i = z__4.r *
+		    z__5.i + z__4.i * z__5.r;
+	    i__3 = (h_dim1 << 1) + 1;
+	    z__7.r = h__[i__3].r * h21s.r - h__[i__3].i * h21s.i, z__7.i =
+		    h__[i__3].r * h21s.i + h__[i__3].i * h21s.r;
+	    z__2.r = z__3.r + z__7.r, z__2.i = z__3.i + z__7.i;
+	    i__4 = h_dim1 * 3 + 1;
+	    z__8.r = h__[i__4].r * h31s.r - h__[i__4].i * h31s.i, z__8.i =
+		    h__[i__4].r * h31s.i + h__[i__4].i * h31s.r;
+	    z__1.r = z__2.r + z__8.r, z__1.i = z__2.i + z__8.i;
+	    v[1].r = z__1.r, v[1].i = z__1.i;
+	    i__1 = h_dim1 + 1;
+	    i__2 = (h_dim1 << 1) + 2;
+	    z__5.r = h__[i__1].r + h__[i__2].r, z__5.i = h__[i__1].i + h__[
+		    i__2].i;
+	    z__4.r = z__5.r - s1->r, z__4.i = z__5.i - s1->i;
+	    z__3.r = z__4.r - s2->r, z__3.i = z__4.i - s2->i;
+	    z__2.r = h21s.r * z__3.r - h21s.i * z__3.i, z__2.i = h21s.r *
+		    z__3.i + h21s.i * z__3.r;
+	    i__3 = h_dim1 * 3 + 2;
+	    z__6.r = h__[i__3].r * h31s.r - h__[i__3].i * h31s.i, z__6.i =
+		    h__[i__3].r * h31s.i + h__[i__3].i * h31s.r;
+	    z__1.r = z__2.r + z__6.r, z__1.i = z__2.i + z__6.i;
+	    v[2].r = z__1.r, v[2].i = z__1.i;
+	    i__1 = h_dim1 + 1;
+	    i__2 = h_dim1 * 3 + 3;
+	    z__5.r = h__[i__1].r + h__[i__2].r, z__5.i = h__[i__1].i + h__[
+		    i__2].i;
+	    z__4.r = z__5.r - s1->r, z__4.i = z__5.i - s1->i;
+	    z__3.r = z__4.r - s2->r, z__3.i = z__4.i - s2->i;
+	    z__2.r = h31s.r * z__3.r - h31s.i * z__3.i, z__2.i = h31s.r *
+		    z__3.i + h31s.i * z__3.r;
+	    i__3 = (h_dim1 << 1) + 3;
+	    z__6.r = h21s.r * h__[i__3].r - h21s.i * h__[i__3].i, z__6.i =
+		    h21s.r * h__[i__3].i + h21s.i * h__[i__3].r;
+	    z__1.r = z__2.r + z__6.r, z__1.i = z__2.i + z__6.i;
+	    v[3].r = z__1.r, v[3].i = z__1.i;
+	}
+    }
+    return 0;
+} /* zlaqr1_ */
+
+/* Subroutine */ int zlaqr2_(logical *wantt, logical *wantz, integer *n,
+	integer *ktop, integer *kbot, integer *nw, doublecomplex *h__,
+	integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__,
+	integer *ldz, integer *ns, integer *nd, doublecomplex *sh,
+	doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t,
+	integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv,
+	doublecomplex *work, integer *lwork)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1,
+	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublecomplex s;
+    static integer jw;
+    static doublereal foo;
+    static integer kln;
+    static doublecomplex tau;
+    static integer knt;
+    static doublereal ulp;
+    static integer lwk1, lwk2;
+    static doublecomplex beta;
+    static integer kcol, info, ifst, ilst, ltop, krow;
+    extern /* Subroutine */ int zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *);
+    static integer infqr;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static integer kwtop;
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
+
+    static doublereal safmin, safmax;
+    extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *), zlarfg_(integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *), zlahqr_(logical *,
+	    logical *, integer *, integer *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *, integer *, doublecomplex *,
+	     integer *, integer *), zlacpy_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *),
+	    zlaset_(char *, integer *, integer *, doublecomplex *,
+	    doublecomplex *, doublecomplex *, integer *);
+    static doublereal smlnum;
+    extern /* Subroutine */ int ztrexc_(char *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *, integer *, integer *,
+	    integer *);
+    static integer lwkopt;
+    extern /* Subroutine */ int zunmhr_(char *, char *, integer *, integer *,
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     doublecomplex *, integer *, doublecomplex *, integer *, integer *
+	    );
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+    -- April 2009                                                      --
+
+
+       This subroutine is identical to ZLAQR3 except that it avoids
+       recursion by calling ZLAHQR instead of ZLAQR4.
+
+
+       ******************************************************************
+       Aggressive early deflation:
+
+       This subroutine accepts as input an upper Hessenberg matrix
+       H and performs an unitary similarity transformation
+       designed to detect and deflate fully converged eigenvalues from
+       a trailing principal submatrix.  On output H has been over-
+       written by a new Hessenberg matrix that is a perturbation of
+       an unitary similarity transformation of H.  It is to be
+       hoped that the final version of H has many zero subdiagonal
+       entries.
+
+       ******************************************************************
+       WANTT   (input) LOGICAL
+            If .TRUE., then the Hessenberg matrix H is fully updated
+            so that the triangular Schur factor may be
+            computed (in cooperation with the calling subroutine).
+            If .FALSE., then only enough of H is updated to preserve
+            the eigenvalues.
+
+       WANTZ   (input) LOGICAL
+            If .TRUE., then the unitary matrix Z is updated so
+            so that the unitary Schur factor may be computed
+            (in cooperation with the calling subroutine).
+            If .FALSE., then Z is not referenced.
+
+       N       (input) INTEGER
+            The order of the matrix H and (if WANTZ is .TRUE.) the
+            order of the unitary matrix Z.
+
+       KTOP    (input) INTEGER
+            It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+            KBOT and KTOP together determine an isolated block
+            along the diagonal of the Hessenberg matrix.
+
+       KBOT    (input) INTEGER
+            It is assumed without a check that either
+            KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
+            determine an isolated block along the diagonal of the
+            Hessenberg matrix.
+
+       NW      (input) INTEGER
+            Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+
+       H       (input/output) COMPLEX*16 array, dimension (LDH,N)
+            On input the initial N-by-N section of H stores the
+            Hessenberg matrix undergoing aggressive early deflation.
+            On output H has been transformed by a unitary
+            similarity transformation, perturbed, and the returned
+            to Hessenberg form that (it is to be hoped) has some
+            zero subdiagonal entries.
+
+       LDH     (input) integer
+            Leading dimension of H just as declared in the calling
+            subroutine.  N .LE. LDH
+
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+
+       Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+            IF WANTZ is .TRUE., then on output, the unitary
+            similarity transformation mentioned above has been
+            accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+            If WANTZ is .FALSE., then Z is unreferenced.
+
+       LDZ     (input) integer
+            The leading dimension of Z just as declared in the
+            calling subroutine.  1 .LE. LDZ.
+
+       NS      (output) integer
+            The number of unconverged (ie approximate) eigenvalues
+            returned in SR and SI that may be used as shifts by the
+            calling subroutine.
+
+       ND      (output) integer
+            The number of converged eigenvalues uncovered by this
+            subroutine.
+
+       SH      (output) COMPLEX*16 array, dimension KBOT
+            On output, approximate eigenvalues that may
+            be used for shifts are stored in SH(KBOT-ND-NS+1)
+            through SR(KBOT-ND).  Converged eigenvalues are
+            stored in SH(KBOT-ND+1) through SH(KBOT).
+
+       V       (workspace) COMPLEX*16 array, dimension (LDV,NW)
+            An NW-by-NW work array.
+
+       LDV     (input) integer scalar
+            The leading dimension of V just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       NH      (input) integer scalar
+            The number of columns of T.  NH.GE.NW.
+
+       T       (workspace) COMPLEX*16 array, dimension (LDT,NW)
+
+       LDT     (input) integer
+            The leading dimension of T just as declared in the
+            calling subroutine.  NW .LE. LDT
+
+       NV      (input) integer
+            The number of rows of work array WV available for
+            workspace.  NV.GE.NW.
+
+       WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)
+
+       LDWV    (input) integer
+            The leading dimension of W just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       WORK    (workspace) COMPLEX*16 array, dimension LWORK.
+            On exit, WORK(1) is set to an estimate of the optimal value
+            of LWORK for the given values of N, NW, KTOP and KBOT.
+
+       LWORK   (input) integer
+            The dimension of the work array WORK.  LWORK = 2*NW
+            suffices, but greater efficiency may result from larger
+            values of LWORK.
+
+            If LWORK = -1, then a workspace query is assumed; ZLAQR2
+            only estimates the optimal workspace size for the given
+            values of N, NW, KTOP and KBOT.  The estimate is returned
+            in WORK(1).  No error message related to LWORK is issued
+            by XERBLA.  Neither H nor Z are accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+
+       ==== Estimate optimal workspace. ====
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --sh;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    --work;
+
+    /* Function Body */
+/* Computing MIN */
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    if (jw <= 2) {
+	lwkopt = 1;
+    } else {
+
+/*        ==== Workspace query call to ZGEHRD ==== */
+
+	i__1 = jw - 1;
+	zgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
+		c_n1, &info);
+	lwk1 = (integer) work[1].r;
+
+/*        ==== Workspace query call to ZUNMHR ==== */
+
+	i__1 = jw - 1;
+	zunmhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1],
+		 &v[v_offset], ldv, &work[1], &c_n1, &info);
+	lwk2 = (integer) work[1].r;
+
+/*        ==== Optimal workspace ==== */
+
+	lwkopt = jw + max(lwk1,lwk2);
+    }
+
+/*     ==== Quick return in case of workspace query. ==== */
+
+    if (*lwork == -1) {
+	d__1 = (doublereal) lwkopt;
+	z__1.r = d__1, z__1.i = 0.;
+	work[1].r = z__1.r, work[1].i = z__1.i;
+	return 0;
+    }
+
+/*
+       ==== Nothing to do ...
+       ... for an empty active block ... ====
+*/
+    *ns = 0;
+    *nd = 0;
+    work[1].r = 1., work[1].i = 0.;
+    if (*ktop > *kbot) {
+	return 0;
+    }
+/*     ... nor for an empty deflation window. ==== */
+    if (*nw < 1) {
+	return 0;
+    }
+
+/*     ==== Machine constants ==== */
+
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    dlabad_(&safmin, &safmax);
+    ulp = PRECISION;
+    smlnum = safmin * ((doublereal) (*n) / ulp);
+
+/*
+       ==== Setup deflation window ====
+
+   Computing MIN
+*/
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    kwtop = *kbot - jw + 1;
+    if (kwtop == *ktop) {
+	s.r = 0., s.i = 0.;
+    } else {
+	i__1 = kwtop + (kwtop - 1) * h_dim1;
+	s.r = h__[i__1].r, s.i = h__[i__1].i;
+    }
+
+    if (*kbot == kwtop) {
+
+/*        ==== 1-by-1 deflation window: not much to do ==== */
+
+	i__1 = kwtop;
+	i__2 = kwtop + kwtop * h_dim1;
+	sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i;
+	*ns = 1;
+	*nd = 0;
+/* Computing MAX */
+	i__1 = kwtop + kwtop * h_dim1;
+	d__5 = smlnum, d__6 = ulp * ((d__1 = h__[i__1].r, abs(d__1)) + (d__2 =
+		 d_imag(&h__[kwtop + kwtop * h_dim1]), abs(d__2)));
+	if ((d__3 = s.r, abs(d__3)) + (d__4 = d_imag(&s), abs(d__4)) <= max(
+		d__5,d__6)) {
+	    *ns = 0;
+	    *nd = 1;
+	    if (kwtop > *ktop) {
+		i__1 = kwtop + (kwtop - 1) * h_dim1;
+		h__[i__1].r = 0., h__[i__1].i = 0.;
+	    }
+	}
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+/*
+       ==== Convert to spike-triangular form.  (In case of a
+       .    rare QR failure, this routine continues to do
+       .    aggressive early deflation using that part of
+       .    the deflation window that converged using INFQR
+       .    here and there to keep track.) ====
+*/
+
+    zlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset],
+	    ldt);
+    i__1 = jw - 1;
+    i__2 = *ldh + 1;
+    i__3 = *ldt + 1;
+    zcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
+	    i__3);
+
+    zlaset_("A", &jw, &jw, &c_b56, &c_b57, &v[v_offset], ldv);
+    zlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[kwtop],
+	    &c__1, &jw, &v[v_offset], ldv, &infqr);
+
+/*     ==== Deflation detection loop ==== */
+
+    *ns = jw;
+    ilst = infqr + 1;
+    i__1 = jw;
+    for (knt = infqr + 1; knt <= i__1; ++knt) {
+
+/*        ==== Small spike tip deflation test ==== */
+
+	i__2 = *ns + *ns * t_dim1;
+	foo = (d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[*ns + *ns *
+		t_dim1]), abs(d__2));
+	if (foo == 0.) {
+	    foo = (d__1 = s.r, abs(d__1)) + (d__2 = d_imag(&s), abs(d__2));
+	}
+	i__2 = *ns * v_dim1 + 1;
+/* Computing MAX */
+	d__5 = smlnum, d__6 = ulp * foo;
+	if (((d__1 = s.r, abs(d__1)) + (d__2 = d_imag(&s), abs(d__2))) * ((
+		d__3 = v[i__2].r, abs(d__3)) + (d__4 = d_imag(&v[*ns * v_dim1
+		+ 1]), abs(d__4))) <= max(d__5,d__6)) {
+
+/*           ==== One more converged eigenvalue ==== */
+
+	    --(*ns);
+	} else {
+
+/*
+             ==== One undeflatable eigenvalue.  Move it up out of the
+             .    way.   (ZTREXC can not fail in this case.) ====
+*/
+
+	    ifst = *ns;
+	    ztrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &
+		    ilst, &info);
+	    ++ilst;
+	}
+/* L10: */
+    }
+
+/*        ==== Return to Hessenberg form ==== */
+
+    if (*ns == 0) {
+	s.r = 0., s.i = 0.;
+    }
+
+    if (*ns < jw) {
+
+/*
+          ==== sorting the diagonal of T improves accuracy for
+          .    graded matrices.  ====
+*/
+
+	i__1 = *ns;
+	for (i__ = infqr + 1; i__ <= i__1; ++i__) {
+	    ifst = i__;
+	    i__2 = *ns;
+	    for (j = i__ + 1; j <= i__2; ++j) {
+		i__3 = j + j * t_dim1;
+		i__4 = ifst + ifst * t_dim1;
+		if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[j + j *
+			t_dim1]), abs(d__2)) > (d__3 = t[i__4].r, abs(d__3))
+			+ (d__4 = d_imag(&t[ifst + ifst * t_dim1]), abs(d__4))
+			) {
+		    ifst = j;
+		}
+/* L20: */
+	    }
+	    ilst = i__;
+	    if (ifst != ilst) {
+		ztrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &info);
+	    }
+/* L30: */
+	}
+    }
+
+/*     ==== Restore shift/eigenvalue array from T ==== */
+
+    i__1 = jw;
+    for (i__ = infqr + 1; i__ <= i__1; ++i__) {
+	i__2 = kwtop + i__ - 1;
+	i__3 = i__ + i__ * t_dim1;
+	sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i;
+/* L40: */
+    }
+
+
+    if (*ns < jw || s.r == 0. && s.i == 0.) {
+	if (*ns > 1 && (s.r != 0. || s.i != 0.)) {
+
+/*           ==== Reflect spike back into lower triangle ==== */
+
+	    zcopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
+	    i__1 = *ns;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = i__;
+		d_cnjg(&z__1, &work[i__]);
+		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
+/* L50: */
+	    }
+	    beta.r = work[1].r, beta.i = work[1].i;
+	    zlarfg_(ns, &beta, &work[2], &c__1, &tau);
+	    work[1].r = 1., work[1].i = 0.;
+
+	    i__1 = jw - 2;
+	    i__2 = jw - 2;
+	    zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &t[t_dim1 + 3], ldt);
+
+	    d_cnjg(&z__1, &tau);
+	    zlarf_("L", ns, &jw, &work[1], &c__1, &z__1, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    zlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    zlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
+		    work[jw + 1]);
+
+	    i__1 = *lwork - jw;
+	    zgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
+		    , &i__1, &info);
+	}
+
+/*        ==== Copy updated reduced window into place ==== */
+
+	if (kwtop > 1) {
+	    i__1 = kwtop + (kwtop - 1) * h_dim1;
+	    d_cnjg(&z__2, &v[v_dim1 + 1]);
+	    z__1.r = s.r * z__2.r - s.i * z__2.i, z__1.i = s.r * z__2.i + s.i
+		    * z__2.r;
+	    h__[i__1].r = z__1.r, h__[i__1].i = z__1.i;
+	}
+	zlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
+		, ldh);
+	i__1 = jw - 1;
+	i__2 = *ldt + 1;
+	i__3 = *ldh + 1;
+	zcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1],
+		 &i__3);
+
+/*
+          ==== Accumulate orthogonal matrix in order update
+          .    H and Z, if requested.  ====
+*/
+
+	if (*ns > 1 && (s.r != 0. || s.i != 0.)) {
+	    i__1 = *lwork - jw;
+	    zunmhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1],
+		     &v[v_offset], ldv, &work[jw + 1], &i__1, &info);
+	}
+
+/*        ==== Update vertical slab in H ==== */
+
+	if (*wantt) {
+	    ltop = 1;
+	} else {
+	    ltop = *ktop;
+	}
+	i__1 = kwtop - 1;
+	i__2 = *nv;
+	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		i__2) {
+/* Computing MIN */
+	    i__3 = *nv, i__4 = kwtop - krow;
+	    kln = min(i__3,i__4);
+	    zgemm_("N", "N", &kln, &jw, &jw, &c_b57, &h__[krow + kwtop *
+		    h_dim1], ldh, &v[v_offset], ldv, &c_b56, &wv[wv_offset],
+		    ldwv);
+	    zlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop *
+		    h_dim1], ldh);
+/* L60: */
+	}
+
+/*        ==== Update horizontal slab in H ==== */
+
+	if (*wantt) {
+	    i__2 = *n;
+	    i__1 = *nh;
+	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2;
+		    kcol += i__1) {
+/* Computing MIN */
+		i__3 = *nh, i__4 = *n - kcol + 1;
+		kln = min(i__3,i__4);
+		zgemm_("C", "N", &jw, &kln, &jw, &c_b57, &v[v_offset], ldv, &
+			h__[kwtop + kcol * h_dim1], ldh, &c_b56, &t[t_offset],
+			 ldt);
+		zlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
+			 h_dim1], ldh);
+/* L70: */
+	    }
+	}
+
+/*        ==== Update vertical slab in Z ==== */
+
+	if (*wantz) {
+	    i__1 = *ihiz;
+	    i__2 = *nv;
+	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		     i__2) {
+/* Computing MIN */
+		i__3 = *nv, i__4 = *ihiz - krow + 1;
+		kln = min(i__3,i__4);
+		zgemm_("N", "N", &kln, &jw, &jw, &c_b57, &z__[krow + kwtop *
+			z_dim1], ldz, &v[v_offset], ldv, &c_b56, &wv[
+			wv_offset], ldwv);
+		zlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow +
+			kwtop * z_dim1], ldz);
+/* L80: */
+	    }
+	}
+    }
+
+/*     ==== Return the number of deflations ... ==== */
+
+    *nd = jw - *ns;
+
+/*
+       ==== ... and the number of shifts. (Subtracting
+       .    INFQR from the spike length takes care
+       .    of the case of a rare QR failure while
+       .    calculating eigenvalues of the deflation
+       .    window.)  ====
+*/
+
+    *ns -= infqr;
+
+/*      ==== Return optimal workspace. ==== */
+
+    d__1 = (doublereal) lwkopt;
+    z__1.r = d__1, z__1.i = 0.;
+    work[1].r = z__1.r, work[1].i = z__1.i;
+
+/*     ==== End of ZLAQR2 ==== */
+
+    return 0;
+} /* zlaqr2_ */
+
+/* Subroutine */ int zlaqr3_(logical *wantt, logical *wantz, integer *n,
+	integer *ktop, integer *kbot, integer *nw, doublecomplex *h__,
+	integer *ldh, integer *iloz, integer *ihiz, doublecomplex *z__,
+	integer *ldz, integer *ns, integer *nd, doublecomplex *sh,
+	doublecomplex *v, integer *ldv, integer *nh, doublecomplex *t,
+	integer *ldt, integer *nv, doublecomplex *wv, integer *ldwv,
+	doublecomplex *work, integer *lwork)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, t_dim1, t_offset, v_dim1, v_offset, wv_dim1,
+	    wv_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublecomplex s;
+    static integer jw;
+    static doublereal foo;
+    static integer kln;
+    static doublecomplex tau;
+    static integer knt;
+    static doublereal ulp;
+    static integer lwk1, lwk2, lwk3;
+    static doublecomplex beta;
+    static integer kcol, info, nmin, ifst, ilst, ltop, krow;
+    extern /* Subroutine */ int zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *);
+    static integer infqr;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static integer kwtop;
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *),
+	    zlaqr4_(logical *, logical *, integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *,
+	     doublecomplex *, integer *, doublecomplex *, integer *, integer *
+	    );
+
+    static doublereal safmin;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static doublereal safmax;
+    extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *), zlarfg_(integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *), zlahqr_(logical *,
+	    logical *, integer *, integer *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *, integer *, doublecomplex *,
+	     integer *, integer *), zlacpy_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *),
+	    zlaset_(char *, integer *, integer *, doublecomplex *,
+	    doublecomplex *, doublecomplex *, integer *);
+    static doublereal smlnum;
+    extern /* Subroutine */ int ztrexc_(char *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *, integer *, integer *,
+	    integer *);
+    static integer lwkopt;
+    extern /* Subroutine */ int zunmhr_(char *, char *, integer *, integer *,
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     doublecomplex *, integer *, doublecomplex *, integer *, integer *
+	    );
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2.1)                        --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+    -- April 2009                                                      --
+
+
+       ******************************************************************
+       Aggressive early deflation:
+
+       This subroutine accepts as input an upper Hessenberg matrix
+       H and performs an unitary similarity transformation
+       designed to detect and deflate fully converged eigenvalues from
+       a trailing principal submatrix.  On output H has been over-
+       written by a new Hessenberg matrix that is a perturbation of
+       an unitary similarity transformation of H.  It is to be
+       hoped that the final version of H has many zero subdiagonal
+       entries.
+
+       ******************************************************************
+       WANTT   (input) LOGICAL
+            If .TRUE., then the Hessenberg matrix H is fully updated
+            so that the triangular Schur factor may be
+            computed (in cooperation with the calling subroutine).
+            If .FALSE., then only enough of H is updated to preserve
+            the eigenvalues.
+
+       WANTZ   (input) LOGICAL
+            If .TRUE., then the unitary matrix Z is updated so
+            so that the unitary Schur factor may be computed
+            (in cooperation with the calling subroutine).
+            If .FALSE., then Z is not referenced.
+
+       N       (input) INTEGER
+            The order of the matrix H and (if WANTZ is .TRUE.) the
+            order of the unitary matrix Z.
+
+       KTOP    (input) INTEGER
+            It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
+            KBOT and KTOP together determine an isolated block
+            along the diagonal of the Hessenberg matrix.
+
+       KBOT    (input) INTEGER
+            It is assumed without a check that either
+            KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
+            determine an isolated block along the diagonal of the
+            Hessenberg matrix.
+
+       NW      (input) INTEGER
+            Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
+
+       H       (input/output) COMPLEX*16 array, dimension (LDH,N)
+            On input the initial N-by-N section of H stores the
+            Hessenberg matrix undergoing aggressive early deflation.
+            On output H has been transformed by a unitary
+            similarity transformation, perturbed, and the returned
+            to Hessenberg form that (it is to be hoped) has some
+            zero subdiagonal entries.
+
+       LDH     (input) integer
+            Leading dimension of H just as declared in the calling
+            subroutine.  N .LE. LDH
+
+       ILOZ    (input) INTEGER
+       IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
+
+       Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+            IF WANTZ is .TRUE., then on output, the unitary
+            similarity transformation mentioned above has been
+            accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
+            If WANTZ is .FALSE., then Z is unreferenced.
+
+       LDZ     (input) integer
+            The leading dimension of Z just as declared in the
+            calling subroutine.  1 .LE. LDZ.
+
+       NS      (output) integer
+            The number of unconverged (ie approximate) eigenvalues
+            returned in SR and SI that may be used as shifts by the
+            calling subroutine.
+
+       ND      (output) integer
+            The number of converged eigenvalues uncovered by this
+            subroutine.
+
+       SH      (output) COMPLEX*16 array, dimension KBOT
+            On output, approximate eigenvalues that may
+            be used for shifts are stored in SH(KBOT-ND-NS+1)
+            through SR(KBOT-ND).  Converged eigenvalues are
+            stored in SH(KBOT-ND+1) through SH(KBOT).
+
+       V       (workspace) COMPLEX*16 array, dimension (LDV,NW)
+            An NW-by-NW work array.
+
+       LDV     (input) integer scalar
+            The leading dimension of V just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       NH      (input) integer scalar
+            The number of columns of T.  NH.GE.NW.
+
+       T       (workspace) COMPLEX*16 array, dimension (LDT,NW)
+
+       LDT     (input) integer
+            The leading dimension of T just as declared in the
+            calling subroutine.  NW .LE. LDT
+
+       NV      (input) integer
+            The number of rows of work array WV available for
+            workspace.  NV.GE.NW.
+
+       WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)
+
+       LDWV    (input) integer
+            The leading dimension of W just as declared in the
+            calling subroutine.  NW .LE. LDV
+
+       WORK    (workspace) COMPLEX*16 array, dimension LWORK.
+            On exit, WORK(1) is set to an estimate of the optimal value
+            of LWORK for the given values of N, NW, KTOP and KBOT.
+
+       LWORK   (input) integer
+            The dimension of the work array WORK.  LWORK = 2*NW
+            suffices, but greater efficiency may result from larger
+            values of LWORK.
+
+            If LWORK = -1, then a workspace query is assumed; ZLAQR3
+            only estimates the optimal workspace size for the given
+            values of N, NW, KTOP and KBOT.  The estimate is returned
+            in WORK(1).  No error message related to LWORK is issued
+            by XERBLA.  Neither H nor Z are accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+
+       ==== Estimate optimal workspace. ====
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --sh;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    --work;
+
+    /* Function Body */
+/* Computing MIN */
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    if (jw <= 2) {
+	lwkopt = 1;
+    } else {
+
+/*        ==== Workspace query call to ZGEHRD ==== */
+
+	i__1 = jw - 1;
+	zgehrd_(&jw, &c__1, &i__1, &t[t_offset], ldt, &work[1], &work[1], &
+		c_n1, &info);
+	lwk1 = (integer) work[1].r;
+
+/*        ==== Workspace query call to ZUNMHR ==== */
+
+	i__1 = jw - 1;
+	zunmhr_("R", "N", &jw, &jw, &c__1, &i__1, &t[t_offset], ldt, &work[1],
+		 &v[v_offset], ldv, &work[1], &c_n1, &info);
+	lwk2 = (integer) work[1].r;
+
+/*        ==== Workspace query call to ZLAQR4 ==== */
+
+	zlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[1],
+		&c__1, &jw, &v[v_offset], ldv, &work[1], &c_n1, &infqr);
+	lwk3 = (integer) work[1].r;
+
+/*
+          ==== Optimal workspace ====
+
+   Computing MAX
+*/
+	i__1 = jw + max(lwk1,lwk2);
+	lwkopt = max(i__1,lwk3);
+    }
+
+/*     ==== Quick return in case of workspace query. ==== */
+
+    if (*lwork == -1) {
+	d__1 = (doublereal) lwkopt;
+	z__1.r = d__1, z__1.i = 0.;
+	work[1].r = z__1.r, work[1].i = z__1.i;
+	return 0;
+    }
+
+/*
+       ==== Nothing to do ...
+       ... for an empty active block ... ====
+*/
+    *ns = 0;
+    *nd = 0;
+    work[1].r = 1., work[1].i = 0.;
+    if (*ktop > *kbot) {
+	return 0;
+    }
+/*     ... nor for an empty deflation window. ==== */
+    if (*nw < 1) {
+	return 0;
+    }
+
+/*     ==== Machine constants ==== */
+
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    dlabad_(&safmin, &safmax);
+    ulp = PRECISION;
+    smlnum = safmin * ((doublereal) (*n) / ulp);
+
+/*
+       ==== Setup deflation window ====
+
+   Computing MIN
+*/
+    i__1 = *nw, i__2 = *kbot - *ktop + 1;
+    jw = min(i__1,i__2);
+    kwtop = *kbot - jw + 1;
+    if (kwtop == *ktop) {
+	s.r = 0., s.i = 0.;
+    } else {
+	i__1 = kwtop + (kwtop - 1) * h_dim1;
+	s.r = h__[i__1].r, s.i = h__[i__1].i;
+    }
+
+    if (*kbot == kwtop) {
+
+/*        ==== 1-by-1 deflation window: not much to do ==== */
+
+	i__1 = kwtop;
+	i__2 = kwtop + kwtop * h_dim1;
+	sh[i__1].r = h__[i__2].r, sh[i__1].i = h__[i__2].i;
+	*ns = 1;
+	*nd = 0;
+/* Computing MAX */
+	i__1 = kwtop + kwtop * h_dim1;
+	d__5 = smlnum, d__6 = ulp * ((d__1 = h__[i__1].r, abs(d__1)) + (d__2 =
+		 d_imag(&h__[kwtop + kwtop * h_dim1]), abs(d__2)));
+	if ((d__3 = s.r, abs(d__3)) + (d__4 = d_imag(&s), abs(d__4)) <= max(
+		d__5,d__6)) {
+	    *ns = 0;
+	    *nd = 1;
+	    if (kwtop > *ktop) {
+		i__1 = kwtop + (kwtop - 1) * h_dim1;
+		h__[i__1].r = 0., h__[i__1].i = 0.;
+	    }
+	}
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+/*
+       ==== Convert to spike-triangular form.  (In case of a
+       .    rare QR failure, this routine continues to do
+       .    aggressive early deflation using that part of
+       .    the deflation window that converged using INFQR
+       .    here and there to keep track.) ====
+*/
+
+    zlacpy_("U", &jw, &jw, &h__[kwtop + kwtop * h_dim1], ldh, &t[t_offset],
+	    ldt);
+    i__1 = jw - 1;
+    i__2 = *ldh + 1;
+    i__3 = *ldt + 1;
+    zcopy_(&i__1, &h__[kwtop + 1 + kwtop * h_dim1], &i__2, &t[t_dim1 + 2], &
+	    i__3);
+
+    zlaset_("A", &jw, &jw, &c_b56, &c_b57, &v[v_offset], ldv);
+    nmin = ilaenv_(&c__12, "ZLAQR3", "SV", &jw, &c__1, &jw, lwork, (ftnlen)6,
+	    (ftnlen)2);
+    if (jw > nmin) {
+	zlaqr4_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[
+		kwtop], &c__1, &jw, &v[v_offset], ldv, &work[1], lwork, &
+		infqr);
+    } else {
+	zlahqr_(&c_true, &c_true, &jw, &c__1, &jw, &t[t_offset], ldt, &sh[
+		kwtop], &c__1, &jw, &v[v_offset], ldv, &infqr);
+    }
+
+/*     ==== Deflation detection loop ==== */
+
+    *ns = jw;
+    ilst = infqr + 1;
+    i__1 = jw;
+    for (knt = infqr + 1; knt <= i__1; ++knt) {
+
+/*        ==== Small spike tip deflation test ==== */
+
+	i__2 = *ns + *ns * t_dim1;
+	foo = (d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[*ns + *ns *
+		t_dim1]), abs(d__2));
+	if (foo == 0.) {
+	    foo = (d__1 = s.r, abs(d__1)) + (d__2 = d_imag(&s), abs(d__2));
+	}
+	i__2 = *ns * v_dim1 + 1;
+/* Computing MAX */
+	d__5 = smlnum, d__6 = ulp * foo;
+	if (((d__1 = s.r, abs(d__1)) + (d__2 = d_imag(&s), abs(d__2))) * ((
+		d__3 = v[i__2].r, abs(d__3)) + (d__4 = d_imag(&v[*ns * v_dim1
+		+ 1]), abs(d__4))) <= max(d__5,d__6)) {
+
+/*           ==== One more converged eigenvalue ==== */
+
+	    --(*ns);
+	} else {
+
+/*
+             ==== One undeflatable eigenvalue.  Move it up out of the
+             .    way.   (ZTREXC can not fail in this case.) ====
+*/
+
+	    ifst = *ns;
+	    ztrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst, &
+		    ilst, &info);
+	    ++ilst;
+	}
+/* L10: */
+    }
+
+/*        ==== Return to Hessenberg form ==== */
+
+    if (*ns == 0) {
+	s.r = 0., s.i = 0.;
+    }
+
+    if (*ns < jw) {
+
+/*
+          ==== sorting the diagonal of T improves accuracy for
+          .    graded matrices.  ====
+*/
+
+	i__1 = *ns;
+	for (i__ = infqr + 1; i__ <= i__1; ++i__) {
+	    ifst = i__;
+	    i__2 = *ns;
+	    for (j = i__ + 1; j <= i__2; ++j) {
+		i__3 = j + j * t_dim1;
+		i__4 = ifst + ifst * t_dim1;
+		if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[j + j *
+			t_dim1]), abs(d__2)) > (d__3 = t[i__4].r, abs(d__3))
+			+ (d__4 = d_imag(&t[ifst + ifst * t_dim1]), abs(d__4))
+			) {
+		    ifst = j;
+		}
+/* L20: */
+	    }
+	    ilst = i__;
+	    if (ifst != ilst) {
+		ztrexc_("V", &jw, &t[t_offset], ldt, &v[v_offset], ldv, &ifst,
+			 &ilst, &info);
+	    }
+/* L30: */
+	}
+    }
+
+/*     ==== Restore shift/eigenvalue array from T ==== */
+
+    i__1 = jw;
+    for (i__ = infqr + 1; i__ <= i__1; ++i__) {
+	i__2 = kwtop + i__ - 1;
+	i__3 = i__ + i__ * t_dim1;
+	sh[i__2].r = t[i__3].r, sh[i__2].i = t[i__3].i;
+/* L40: */
+    }
+
+
+    if (*ns < jw || s.r == 0. && s.i == 0.) {
+	if (*ns > 1 && (s.r != 0. || s.i != 0.)) {
+
+/*           ==== Reflect spike back into lower triangle ==== */
+
+	    zcopy_(ns, &v[v_offset], ldv, &work[1], &c__1);
+	    i__1 = *ns;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = i__;
+		d_cnjg(&z__1, &work[i__]);
+		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
+/* L50: */
+	    }
+	    beta.r = work[1].r, beta.i = work[1].i;
+	    zlarfg_(ns, &beta, &work[2], &c__1, &tau);
+	    work[1].r = 1., work[1].i = 0.;
+
+	    i__1 = jw - 2;
+	    i__2 = jw - 2;
+	    zlaset_("L", &i__1, &i__2, &c_b56, &c_b56, &t[t_dim1 + 3], ldt);
+
+	    d_cnjg(&z__1, &tau);
+	    zlarf_("L", ns, &jw, &work[1], &c__1, &z__1, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    zlarf_("R", ns, ns, &work[1], &c__1, &tau, &t[t_offset], ldt, &
+		    work[jw + 1]);
+	    zlarf_("R", &jw, ns, &work[1], &c__1, &tau, &v[v_offset], ldv, &
+		    work[jw + 1]);
+
+	    i__1 = *lwork - jw;
+	    zgehrd_(&jw, &c__1, ns, &t[t_offset], ldt, &work[1], &work[jw + 1]
+		    , &i__1, &info);
+	}
+
+/*        ==== Copy updated reduced window into place ==== */
+
+	if (kwtop > 1) {
+	    i__1 = kwtop + (kwtop - 1) * h_dim1;
+	    d_cnjg(&z__2, &v[v_dim1 + 1]);
+	    z__1.r = s.r * z__2.r - s.i * z__2.i, z__1.i = s.r * z__2.i + s.i
+		    * z__2.r;
+	    h__[i__1].r = z__1.r, h__[i__1].i = z__1.i;
+	}
+	zlacpy_("U", &jw, &jw, &t[t_offset], ldt, &h__[kwtop + kwtop * h_dim1]
+		, ldh);
+	i__1 = jw - 1;
+	i__2 = *ldt + 1;
+	i__3 = *ldh + 1;
+	zcopy_(&i__1, &t[t_dim1 + 2], &i__2, &h__[kwtop + 1 + kwtop * h_dim1],
+		 &i__3);
+
+/*
+          ==== Accumulate orthogonal matrix in order update
+          .    H and Z, if requested.  ====
+*/
+
+	if (*ns > 1 && (s.r != 0. || s.i != 0.)) {
+	    i__1 = *lwork - jw;
+	    zunmhr_("R", "N", &jw, ns, &c__1, ns, &t[t_offset], ldt, &work[1],
+		     &v[v_offset], ldv, &work[jw + 1], &i__1, &info);
+	}
+
+/*        ==== Update vertical slab in H ==== */
+
+	if (*wantt) {
+	    ltop = 1;
+	} else {
+	    ltop = *ktop;
+	}
+	i__1 = kwtop - 1;
+	i__2 = *nv;
+	for (krow = ltop; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		i__2) {
+/* Computing MIN */
+	    i__3 = *nv, i__4 = kwtop - krow;
+	    kln = min(i__3,i__4);
+	    zgemm_("N", "N", &kln, &jw, &jw, &c_b57, &h__[krow + kwtop *
+		    h_dim1], ldh, &v[v_offset], ldv, &c_b56, &wv[wv_offset],
+		    ldwv);
+	    zlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &h__[krow + kwtop *
+		    h_dim1], ldh);
+/* L60: */
+	}
+
+/*        ==== Update horizontal slab in H ==== */
+
+	if (*wantt) {
+	    i__2 = *n;
+	    i__1 = *nh;
+	    for (kcol = *kbot + 1; i__1 < 0 ? kcol >= i__2 : kcol <= i__2;
+		    kcol += i__1) {
+/* Computing MIN */
+		i__3 = *nh, i__4 = *n - kcol + 1;
+		kln = min(i__3,i__4);
+		zgemm_("C", "N", &jw, &kln, &jw, &c_b57, &v[v_offset], ldv, &
+			h__[kwtop + kcol * h_dim1], ldh, &c_b56, &t[t_offset],
+			 ldt);
+		zlacpy_("A", &jw, &kln, &t[t_offset], ldt, &h__[kwtop + kcol *
+			 h_dim1], ldh);
+/* L70: */
+	    }
+	}
+
+/*        ==== Update vertical slab in Z ==== */
+
+	if (*wantz) {
+	    i__1 = *ihiz;
+	    i__2 = *nv;
+	    for (krow = *iloz; i__2 < 0 ? krow >= i__1 : krow <= i__1; krow +=
+		     i__2) {
+/* Computing MIN */
+		i__3 = *nv, i__4 = *ihiz - krow + 1;
+		kln = min(i__3,i__4);
+		zgemm_("N", "N", &kln, &jw, &jw, &c_b57, &z__[krow + kwtop *
+			z_dim1], ldz, &v[v_offset], ldv, &c_b56, &wv[
+			wv_offset], ldwv);
+		zlacpy_("A", &kln, &jw, &wv[wv_offset], ldwv, &z__[krow +
+			kwtop * z_dim1], ldz);
+/* L80: */
+	    }
+	}
+    }
+
+/*     ==== Return the number of deflations ... ==== */
+
+    *nd = jw - *ns;
+
+/*
+       ==== ... and the number of shifts. (Subtracting
+       .    INFQR from the spike length takes care
+       .    of the case of a rare QR failure while
+       .    calculating eigenvalues of the deflation
+       .    window.)  ====
+*/
+
+    *ns -= infqr;
+
+/*      ==== Return optimal workspace. ==== */
+
+    d__1 = (doublereal) lwkopt;
+    z__1.r = d__1, z__1.i = 0.;
+    work[1].r = z__1.r, work[1].i = z__1.i;
+
+/*     ==== End of ZLAQR3 ==== */
+
+    return 0;
+} /* zlaqr3_ */
+
+/* Subroutine */ int zlaqr4_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh,
+	doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__,
+	integer *ldz, doublecomplex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8;
+    doublecomplex z__1, z__2, z__3, z__4, z__5;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    void z_sqrt(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal s;
+    static doublecomplex aa, bb, cc, dd;
+    static integer ld, nh, it, ks, kt, ku, kv, ls, ns, nw;
+    static doublecomplex tr2, det;
+    static integer inf, kdu, nho, nve, kwh, nsr, nwr, kwv, ndec, ndfl, kbot,
+	    nmin;
+    static doublecomplex swap;
+    static integer ktop;
+    static doublecomplex zdum[1]	/* was [1][1] */;
+    static integer kacc22, itmax, nsmax, nwmax, kwtop;
+    extern /* Subroutine */ int zlaqr2_(logical *, logical *, integer *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    integer *, integer *, doublecomplex *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *, integer *,
+	     doublecomplex *, integer *, integer *, doublecomplex *, integer *
+	    , doublecomplex *, integer *), zlaqr5_(logical *, logical *,
+	    integer *, integer *, integer *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, integer *, integer *,
+	     doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, integer *, doublecomplex *, integer *,
+	     integer *, doublecomplex *, integer *);
+    static integer nibble;
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static char jbcmpz[2];
+    static doublecomplex rtdisc;
+    static integer nwupbd;
+    static logical sorted;
+    extern /* Subroutine */ int zlahqr_(logical *, logical *, integer *,
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     integer *, integer *, doublecomplex *, integer *, integer *),
+	    zlacpy_(char *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+       This subroutine implements one level of recursion for ZLAQR0.
+       It is a complete implementation of the small bulge multi-shift
+       QR algorithm.  It may be called by ZLAQR0 and, for large enough
+       deflation window size, it may be called by ZLAQR3.  This
+       subroutine is identical to ZLAQR0 except that it calls ZLAQR2
+       instead of ZLAQR3.
+
+       Purpose
+       =======
+
+       ZLAQR4 computes the eigenvalues of a Hessenberg matrix H
+       and, optionally, the matrices T and Z from the Schur decomposition
+       H = Z T Z**H, where T is an upper triangular matrix (the
+       Schur form), and Z is the unitary matrix of Schur vectors.
+
+       Optionally Z may be postmultiplied into an input unitary
+       matrix Q so that this routine can give the Schur factorization
+       of a matrix A which has been reduced to the Hessenberg form H
+       by the unitary matrix Q:  A = Q*H*Q**H = (QZ)*H*(QZ)**H.
+
+       Arguments
+       =========
+
+       WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+       WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+       N     (input) INTEGER
+             The order of the matrix H.  N .GE. 0.
+
+       ILO   (input) INTEGER
+       IHI   (input) INTEGER
+             It is assumed that H is already upper triangular in rows
+             and columns 1:ILO-1 and IHI+1:N and, if ILO.GT.1,
+             H(ILO,ILO-1) is zero. ILO and IHI are normally set by a
+             previous call to ZGEBAL, and then passed to ZGEHRD when the
+             matrix output by ZGEBAL is reduced to Hessenberg form.
+             Otherwise, ILO and IHI should be set to 1 and N,
+             respectively.  If N.GT.0, then 1.LE.ILO.LE.IHI.LE.N.
+             If N = 0, then ILO = 1 and IHI = 0.
+
+       H     (input/output) COMPLEX*16 array, dimension (LDH,N)
+             On entry, the upper Hessenberg matrix H.
+             On exit, if INFO = 0 and WANTT is .TRUE., then H
+             contains the upper triangular matrix T from the Schur
+             decomposition (the Schur form). If INFO = 0 and WANT is
+             .FALSE., then the contents of H are unspecified on exit.
+             (The output value of H when INFO.GT.0 is given under the
+             description of INFO below.)
+
+             This subroutine may explicitly set H(i,j) = 0 for i.GT.j and
+             j = 1, 2, ... ILO-1 or j = IHI+1, IHI+2, ... N.
+
+       LDH   (input) INTEGER
+             The leading dimension of the array H. LDH .GE. max(1,N).
+
+       W        (output) COMPLEX*16 array, dimension (N)
+             The computed eigenvalues of H(ILO:IHI,ILO:IHI) are stored
+             in W(ILO:IHI). If WANTT is .TRUE., then the eigenvalues are
+             stored in the same order as on the diagonal of the Schur
+             form returned in H, with W(i) = H(i,i).
+
+       Z     (input/output) COMPLEX*16 array, dimension (LDZ,IHI)
+             If WANTZ is .FALSE., then Z is not referenced.
+             If WANTZ is .TRUE., then Z(ILO:IHI,ILOZ:IHIZ) is
+             replaced by Z(ILO:IHI,ILOZ:IHIZ)*U where U is the
+             orthogonal Schur factor of H(ILO:IHI,ILO:IHI).
+             (The output value of Z when INFO.GT.0 is given under
+             the description of INFO below.)
+
+       LDZ   (input) INTEGER
+             The leading dimension of the array Z.  if WANTZ is .TRUE.
+             then LDZ.GE.MAX(1,IHIZ).  Otherwize, LDZ.GE.1.
+
+       WORK  (workspace/output) COMPLEX*16 array, dimension LWORK
+             On exit, if LWORK = -1, WORK(1) returns an estimate of
+             the optimal value for LWORK.
+
+       LWORK (input) INTEGER
+             The dimension of the array WORK.  LWORK .GE. max(1,N)
+             is sufficient, but LWORK typically as large as 6*N may
+             be required for optimal performance.  A workspace query
+             to determine the optimal workspace size is recommended.
+
+             If LWORK = -1, then ZLAQR4 does a workspace query.
+             In this case, ZLAQR4 checks the input parameters and
+             estimates the optimal workspace size for the given
+             values of N, ILO and IHI.  The estimate is returned
+             in WORK(1).  No error message related to LWORK is
+             issued by XERBLA.  Neither H nor Z are accessed.
+
+
+       INFO  (output) INTEGER
+               =  0:  successful exit
+             .GT. 0:  if INFO = i, ZLAQR4 failed to compute all of
+                  the eigenvalues.  Elements 1:ilo-1 and i+1:n of WR
+                  and WI contain those eigenvalues which have been
+                  successfully computed.  (Failures are rare.)
+
+                  If INFO .GT. 0 and WANT is .FALSE., then on exit,
+                  the remaining unconverged eigenvalues are the eigen-
+                  values of the upper Hessenberg matrix rows and
+                  columns ILO through INFO of the final, output
+                  value of H.
+
+                  If INFO .GT. 0 and WANTT is .TRUE., then on exit
+
+             (*)  (initial value of H)*U  = U*(final value of H)
+
+                  where U is a unitary matrix.  The final
+                  value of  H is upper Hessenberg and triangular in
+                  rows and columns INFO+1 through IHI.
+
+                  If INFO .GT. 0 and WANTZ is .TRUE., then on exit
+
+                    (final value of Z(ILO:IHI,ILOZ:IHIZ)
+                     =  (initial value of Z(ILO:IHI,ILOZ:IHIZ)*U
+
+                  where U is the unitary matrix in (*) (regard-
+                  less of the value of WANTT.)
+
+                  If INFO .GT. 0 and WANTZ is .FALSE., then Z is not
+                  accessed.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       References:
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part I: Maintaining Well Focused Shifts, and Level 3
+         Performance, SIAM Journal of Matrix Analysis, volume 23, pages
+         929--947, 2002.
+
+         K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+         Algorithm Part II: Aggressive Early Deflation, SIAM Journal
+         of Matrix Analysis, volume 23, pages 948--973, 2002.
+
+       ================================================================
+
+       ==== Matrices of order NTINY or smaller must be processed by
+       .    ZLAHQR because of insufficient subdiagonal scratch space.
+       .    (This is a hard limit.) ====
+
+       ==== Exceptional deflation windows:  try to cure rare
+       .    slow convergence by varying the size of the
+       .    deflation window after KEXNW iterations. ====
+
+       ==== Exceptional shifts: try to cure rare slow convergence
+       .    with ad-hoc exceptional shifts every KEXSH iterations.
+       .    ====
+
+       ==== The constant WILK1 is used to form the exceptional
+       .    shifts. ====
+*/
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+/*     ==== Quick return for N = 0: nothing to do. ==== */
+
+    if (*n == 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    if (*n <= 11) {
+
+/*        ==== Tiny matrices must use ZLAHQR. ==== */
+
+	lwkopt = 1;
+	if (*lwork != -1) {
+	    zlahqr_(wantt, wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1],
+		    iloz, ihiz, &z__[z_offset], ldz, info);
+	}
+    } else {
+
+/*
+          ==== Use small bulge multi-shift QR with aggressive early
+          .    deflation on larger-than-tiny matrices. ====
+
+          ==== Hope for the best. ====
+*/
+
+	*info = 0;
+
+/*        ==== Set up job flags for ILAENV. ==== */
+
+	if (*wantt) {
+	    *(unsigned char *)jbcmpz = 'S';
+	} else {
+	    *(unsigned char *)jbcmpz = 'E';
+	}
+	if (*wantz) {
+	    *(unsigned char *)&jbcmpz[1] = 'V';
+	} else {
+	    *(unsigned char *)&jbcmpz[1] = 'N';
+	}
+
+/*
+          ==== NWR = recommended deflation window size.  At this
+          .    point,  N .GT. NTINY = 11, so there is enough
+          .    subdiagonal workspace for NWR.GE.2 as required.
+          .    (In fact, there is enough subdiagonal space for
+          .    NWR.GE.3.) ====
+*/
+
+	nwr = ilaenv_(&c__13, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+	nwr = max(2,nwr);
+/* Computing MIN */
+	i__1 = *ihi - *ilo + 1, i__2 = (*n - 1) / 3, i__1 = min(i__1,i__2);
+	nwr = min(i__1,nwr);
+
+/*
+          ==== NSR = recommended number of simultaneous shifts.
+          .    At this point N .GT. NTINY = 11, so there is at
+          .    enough subdiagonal workspace for NSR to be even
+          .    and greater than or equal to two as required. ====
+*/
+
+	nsr = ilaenv_(&c__15, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)6,
+		 (ftnlen)2);
+/* Computing MIN */
+	i__1 = nsr, i__2 = (*n + 6) / 9, i__1 = min(i__1,i__2), i__2 = *ihi -
+		*ilo;
+	nsr = min(i__1,i__2);
+/* Computing MAX */
+	i__1 = 2, i__2 = nsr - nsr % 2;
+	nsr = max(i__1,i__2);
+
+/*
+          ==== Estimate optimal workspace ====
+
+          ==== Workspace query call to ZLAQR2 ====
+*/
+
+	i__1 = nwr + 1;
+	zlaqr2_(wantt, wantz, n, ilo, ihi, &i__1, &h__[h_offset], ldh, iloz,
+		ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[h_offset],
+		ldh, n, &h__[h_offset], ldh, n, &h__[h_offset], ldh, &work[1],
+		 &c_n1);
+
+/*
+          ==== Optimal workspace = MAX(ZLAQR5, ZLAQR2) ====
+
+   Computing MAX
+*/
+	i__1 = nsr * 3 / 2, i__2 = (integer) work[1].r;
+	lwkopt = max(i__1,i__2);
+
+/*        ==== Quick return in case of workspace query. ==== */
+
+	if (*lwork == -1) {
+	    d__1 = (doublereal) lwkopt;
+	    z__1.r = d__1, z__1.i = 0.;
+	    work[1].r = z__1.r, work[1].i = z__1.i;
+	    return 0;
+	}
+
+/*        ==== ZLAHQR/ZLAQR0 crossover point ==== */
+
+	nmin = ilaenv_(&c__12, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, (ftnlen)
+		6, (ftnlen)2);
+	nmin = max(11,nmin);
+
+/*        ==== Nibble crossover point ==== */
+
+	nibble = ilaenv_(&c__14, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	nibble = max(0,nibble);
+
+/*
+          ==== Accumulate reflections during ttswp?  Use block
+          .    2-by-2 structure during matrix-matrix multiply? ====
+*/
+
+	kacc22 = ilaenv_(&c__16, "ZLAQR4", jbcmpz, n, ilo, ihi, lwork, (
+		ftnlen)6, (ftnlen)2);
+	kacc22 = max(0,kacc22);
+	kacc22 = min(2,kacc22);
+
+/*
+          ==== NWMAX = the largest possible deflation window for
+          .    which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n - 1) / 3, i__2 = *lwork / 2;
+	nwmax = min(i__1,i__2);
+	nw = nwmax;
+
+/*
+          ==== NSMAX = the Largest number of simultaneous shifts
+          .    for which there is sufficient workspace. ====
+
+   Computing MIN
+*/
+	i__1 = (*n + 6) / 9, i__2 = (*lwork << 1) / 3;
+	nsmax = min(i__1,i__2);
+	nsmax -= nsmax % 2;
+
+/*        ==== NDFL: an iteration count restarted at deflation. ==== */
+
+	ndfl = 1;
+
+/*
+          ==== ITMAX = iteration limit ====
+
+   Computing MAX
+*/
+	i__1 = 10, i__2 = *ihi - *ilo + 1;
+	itmax = max(i__1,i__2) * 30;
+
+/*        ==== Last row and column in the active block ==== */
+
+	kbot = *ihi;
+
+/*        ==== Main Loop ==== */
+
+	i__1 = itmax;
+	for (it = 1; it <= i__1; ++it) {
+
+/*           ==== Done when KBOT falls below ILO ==== */
+
+	    if (kbot < *ilo) {
+		goto L80;
+	    }
+
+/*           ==== Locate active block ==== */
+
+	    i__2 = *ilo + 1;
+	    for (k = kbot; k >= i__2; --k) {
+		i__3 = k + (k - 1) * h_dim1;
+		if (h__[i__3].r == 0. && h__[i__3].i == 0.) {
+		    goto L20;
+		}
+/* L10: */
+	    }
+	    k = *ilo;
+L20:
+	    ktop = k;
+
+/*
+             ==== Select deflation window size:
+             .    Typical Case:
+             .      If possible and advisable, nibble the entire
+             .      active block.  If not, use size MIN(NWR,NWMAX)
+             .      or MIN(NWR+1,NWMAX) depending upon which has
+             .      the smaller corresponding subdiagonal entry
+             .      (a heuristic).
+             .
+             .    Exceptional Case:
+             .      If there have been no deflations in KEXNW or
+             .      more iterations, then vary the deflation window
+             .      size.   At first, because, larger windows are,
+             .      in general, more powerful than smaller ones,
+             .      rapidly increase the window to the maximum possible.
+             .      Then, gradually reduce the window size. ====
+*/
+
+	    nh = kbot - ktop + 1;
+	    nwupbd = min(nh,nwmax);
+	    if (ndfl < 5) {
+		nw = min(nwupbd,nwr);
+	    } else {
+/* Computing MIN */
+		i__2 = nwupbd, i__3 = nw << 1;
+		nw = min(i__2,i__3);
+	    }
+	    if (nw < nwmax) {
+		if (nw >= nh - 1) {
+		    nw = nh;
+		} else {
+		    kwtop = kbot - nw + 1;
+		    i__2 = kwtop + (kwtop - 1) * h_dim1;
+		    i__3 = kwtop - 1 + (kwtop - 2) * h_dim1;
+		    if ((d__1 = h__[i__2].r, abs(d__1)) + (d__2 = d_imag(&h__[
+			    kwtop + (kwtop - 1) * h_dim1]), abs(d__2)) > (
+			    d__3 = h__[i__3].r, abs(d__3)) + (d__4 = d_imag(&
+			    h__[kwtop - 1 + (kwtop - 2) * h_dim1]), abs(d__4))
+			    ) {
+			++nw;
+		    }
+		}
+	    }
+	    if (ndfl < 5) {
+		ndec = -1;
+	    } else if (ndec >= 0 || nw >= nwupbd) {
+		++ndec;
+		if (nw - ndec < 2) {
+		    ndec = 0;
+		}
+		nw -= ndec;
+	    }
+
+/*
+             ==== Aggressive early deflation:
+             .    split workspace under the subdiagonal into
+             .      - an nw-by-nw work array V in the lower
+             .        left-hand-corner,
+             .      - an NW-by-at-least-NW-but-more-is-better
+             .        (NW-by-NHO) horizontal work array along
+             .        the bottom edge,
+             .      - an at-least-NW-but-more-is-better (NHV-by-NW)
+             .        vertical work array along the left-hand-edge.
+             .        ====
+*/
+
+	    kv = *n - nw + 1;
+	    kt = nw + 1;
+	    nho = *n - nw - 1 - kt + 1;
+	    kwv = nw + 2;
+	    nve = *n - nw - kwv + 1;
+
+/*           ==== Aggressive early deflation ==== */
+
+	    zlaqr2_(wantt, wantz, n, &ktop, &kbot, &nw, &h__[h_offset], ldh,
+		    iloz, ihiz, &z__[z_offset], ldz, &ls, &ld, &w[1], &h__[kv
+		    + h_dim1], ldh, &nho, &h__[kv + kt * h_dim1], ldh, &nve, &
+		    h__[kwv + h_dim1], ldh, &work[1], lwork);
+
+/*           ==== Adjust KBOT accounting for new deflations. ==== */
+
+	    kbot -= ld;
+
+/*           ==== KS points to the shifts. ==== */
+
+	    ks = kbot - ls + 1;
+
+/*
+             ==== Skip an expensive QR sweep if there is a (partly
+             .    heuristic) reason to expect that many eigenvalues
+             .    will deflate without it.  Here, the QR sweep is
+             .    skipped if many eigenvalues have just been deflated
+             .    or if the remaining active block is small.
+*/
+
+	    if (ld == 0 || ld * 100 <= nw * nibble && kbot - ktop + 1 > min(
+		    nmin,nwmax)) {
+
+/*
+                ==== NS = nominal number of simultaneous shifts.
+                .    This may be lowered (slightly) if ZLAQR2
+                .    did not provide that many shifts. ====
+
+   Computing MIN
+   Computing MAX
+*/
+		i__4 = 2, i__5 = kbot - ktop;
+		i__2 = min(nsmax,nsr), i__3 = max(i__4,i__5);
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+
+/*
+                ==== If there have been no deflations
+                .    in a multiple of KEXSH iterations,
+                .    then try exceptional shifts.
+                .    Otherwise use shifts provided by
+                .    ZLAQR2 above or from the eigenvalues
+                .    of a trailing principal submatrix. ====
+*/
+
+		if (ndfl % 6 == 0) {
+		    ks = kbot - ns + 1;
+		    i__2 = ks + 1;
+		    for (i__ = kbot; i__ >= i__2; i__ += -2) {
+			i__3 = i__;
+			i__4 = i__ + i__ * h_dim1;
+			i__5 = i__ + (i__ - 1) * h_dim1;
+			d__3 = ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				d_imag(&h__[i__ + (i__ - 1) * h_dim1]), abs(
+				d__2))) * .75;
+			z__1.r = h__[i__4].r + d__3, z__1.i = h__[i__4].i;
+			w[i__3].r = z__1.r, w[i__3].i = z__1.i;
+			i__3 = i__ - 1;
+			i__4 = i__;
+			w[i__3].r = w[i__4].r, w[i__3].i = w[i__4].i;
+/* L30: */
+		    }
+		} else {
+
+/*
+                   ==== Got NS/2 or fewer shifts? Use ZLAHQR
+                   .    on a trailing principal submatrix to
+                   .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
+                   .    there is enough space below the subdiagonal
+                   .    to fit an NS-by-NS scratch array.) ====
+*/
+
+		    if (kbot - ks + 1 <= ns / 2) {
+			ks = kbot - ns + 1;
+			kt = *n - ns + 1;
+			zlacpy_("A", &ns, &ns, &h__[ks + ks * h_dim1], ldh, &
+				h__[kt + h_dim1], ldh);
+			zlahqr_(&c_false, &c_false, &ns, &c__1, &ns, &h__[kt
+				+ h_dim1], ldh, &w[ks], &c__1, &c__1, zdum, &
+				c__1, &inf);
+			ks += inf;
+
+/*
+                      ==== In case of a rare QR failure use
+                      .    eigenvalues of the trailing 2-by-2
+                      .    principal submatrix.  Scale to avoid
+                      .    overflows, underflows and subnormals.
+                      .    (The scale factor S can not be zero,
+                      .    because H(KBOT,KBOT-1) is nonzero.) ====
+*/
+
+			if (ks >= kbot) {
+			    i__2 = kbot - 1 + (kbot - 1) * h_dim1;
+			    i__3 = kbot + (kbot - 1) * h_dim1;
+			    i__4 = kbot - 1 + kbot * h_dim1;
+			    i__5 = kbot + kbot * h_dim1;
+			    s = (d__1 = h__[i__2].r, abs(d__1)) + (d__2 =
+				    d_imag(&h__[kbot - 1 + (kbot - 1) *
+				    h_dim1]), abs(d__2)) + ((d__3 = h__[i__3]
+				    .r, abs(d__3)) + (d__4 = d_imag(&h__[kbot
+				    + (kbot - 1) * h_dim1]), abs(d__4))) + ((
+				    d__5 = h__[i__4].r, abs(d__5)) + (d__6 =
+				    d_imag(&h__[kbot - 1 + kbot * h_dim1]),
+				    abs(d__6))) + ((d__7 = h__[i__5].r, abs(
+				    d__7)) + (d__8 = d_imag(&h__[kbot + kbot *
+				     h_dim1]), abs(d__8)));
+			    i__2 = kbot - 1 + (kbot - 1) * h_dim1;
+			    z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i /
+				    s;
+			    aa.r = z__1.r, aa.i = z__1.i;
+			    i__2 = kbot + (kbot - 1) * h_dim1;
+			    z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i /
+				    s;
+			    cc.r = z__1.r, cc.i = z__1.i;
+			    i__2 = kbot - 1 + kbot * h_dim1;
+			    z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i /
+				    s;
+			    bb.r = z__1.r, bb.i = z__1.i;
+			    i__2 = kbot + kbot * h_dim1;
+			    z__1.r = h__[i__2].r / s, z__1.i = h__[i__2].i /
+				    s;
+			    dd.r = z__1.r, dd.i = z__1.i;
+			    z__2.r = aa.r + dd.r, z__2.i = aa.i + dd.i;
+			    z__1.r = z__2.r / 2., z__1.i = z__2.i / 2.;
+			    tr2.r = z__1.r, tr2.i = z__1.i;
+			    z__3.r = aa.r - tr2.r, z__3.i = aa.i - tr2.i;
+			    z__4.r = dd.r - tr2.r, z__4.i = dd.i - tr2.i;
+			    z__2.r = z__3.r * z__4.r - z__3.i * z__4.i,
+				    z__2.i = z__3.r * z__4.i + z__3.i *
+				    z__4.r;
+			    z__5.r = bb.r * cc.r - bb.i * cc.i, z__5.i = bb.r
+				    * cc.i + bb.i * cc.r;
+			    z__1.r = z__2.r - z__5.r, z__1.i = z__2.i -
+				    z__5.i;
+			    det.r = z__1.r, det.i = z__1.i;
+			    z__2.r = -det.r, z__2.i = -det.i;
+			    z_sqrt(&z__1, &z__2);
+			    rtdisc.r = z__1.r, rtdisc.i = z__1.i;
+			    i__2 = kbot - 1;
+			    z__2.r = tr2.r + rtdisc.r, z__2.i = tr2.i +
+				    rtdisc.i;
+			    z__1.r = s * z__2.r, z__1.i = s * z__2.i;
+			    w[i__2].r = z__1.r, w[i__2].i = z__1.i;
+			    i__2 = kbot;
+			    z__2.r = tr2.r - rtdisc.r, z__2.i = tr2.i -
+				    rtdisc.i;
+			    z__1.r = s * z__2.r, z__1.i = s * z__2.i;
+			    w[i__2].r = z__1.r, w[i__2].i = z__1.i;
+
+			    ks = kbot - 1;
+			}
+		    }
+
+		    if (kbot - ks + 1 > ns) {
+
+/*                    ==== Sort the shifts (Helps a little) ==== */
+
+			sorted = FALSE_;
+			i__2 = ks + 1;
+			for (k = kbot; k >= i__2; --k) {
+			    if (sorted) {
+				goto L60;
+			    }
+			    sorted = TRUE_;
+			    i__3 = k - 1;
+			    for (i__ = ks; i__ <= i__3; ++i__) {
+				i__4 = i__;
+				i__5 = i__ + 1;
+				if ((d__1 = w[i__4].r, abs(d__1)) + (d__2 =
+					d_imag(&w[i__]), abs(d__2)) < (d__3 =
+					w[i__5].r, abs(d__3)) + (d__4 =
+					d_imag(&w[i__ + 1]), abs(d__4))) {
+				    sorted = FALSE_;
+				    i__4 = i__;
+				    swap.r = w[i__4].r, swap.i = w[i__4].i;
+				    i__4 = i__;
+				    i__5 = i__ + 1;
+				    w[i__4].r = w[i__5].r, w[i__4].i = w[i__5]
+					    .i;
+				    i__4 = i__ + 1;
+				    w[i__4].r = swap.r, w[i__4].i = swap.i;
+				}
+/* L40: */
+			    }
+/* L50: */
+			}
+L60:
+			;
+		    }
+		}
+
+/*
+                ==== If there are only two shifts, then use
+                .    only one.  ====
+*/
+
+		if (kbot - ks + 1 == 2) {
+		    i__2 = kbot;
+		    i__3 = kbot + kbot * h_dim1;
+		    z__2.r = w[i__2].r - h__[i__3].r, z__2.i = w[i__2].i -
+			    h__[i__3].i;
+		    z__1.r = z__2.r, z__1.i = z__2.i;
+		    i__4 = kbot - 1;
+		    i__5 = kbot + kbot * h_dim1;
+		    z__4.r = w[i__4].r - h__[i__5].r, z__4.i = w[i__4].i -
+			    h__[i__5].i;
+		    z__3.r = z__4.r, z__3.i = z__4.i;
+		    if ((d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1),
+			    abs(d__2)) < (d__3 = z__3.r, abs(d__3)) + (d__4 =
+			    d_imag(&z__3), abs(d__4))) {
+			i__2 = kbot - 1;
+			i__3 = kbot;
+			w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i;
+		    } else {
+			i__2 = kbot;
+			i__3 = kbot - 1;
+			w[i__2].r = w[i__3].r, w[i__2].i = w[i__3].i;
+		    }
+		}
+
+/*
+                ==== Use up to NS of the the smallest magnatiude
+                .    shifts.  If there aren't NS shifts available,
+                .    then use them all, possibly dropping one to
+                .    make the number of shifts even. ====
+
+   Computing MIN
+*/
+		i__2 = ns, i__3 = kbot - ks + 1;
+		ns = min(i__2,i__3);
+		ns -= ns % 2;
+		ks = kbot - ns + 1;
+
+/*
+                ==== Small-bulge multi-shift QR sweep:
+                .    split workspace under the subdiagonal into
+                .    - a KDU-by-KDU work array U in the lower
+                .      left-hand-corner,
+                .    - a KDU-by-at-least-KDU-but-more-is-better
+                .      (KDU-by-NHo) horizontal work array WH along
+                .      the bottom edge,
+                .    - and an at-least-KDU-but-more-is-better-by-KDU
+                .      (NVE-by-KDU) vertical work WV arrow along
+                .      the left-hand-edge. ====
+*/
+
+		kdu = ns * 3 - 3;
+		ku = *n - kdu + 1;
+		kwh = kdu + 1;
+		nho = *n - kdu - 3 - (kdu + 1) + 1;
+		kwv = kdu + 4;
+		nve = *n - kdu - kwv + 1;
+
+/*              ==== Small-bulge multi-shift QR sweep ==== */
+
+		zlaqr5_(wantt, wantz, &kacc22, n, &ktop, &kbot, &ns, &w[ks], &
+			h__[h_offset], ldh, iloz, ihiz, &z__[z_offset], ldz, &
+			work[1], &c__3, &h__[ku + h_dim1], ldh, &nve, &h__[
+			kwv + h_dim1], ldh, &nho, &h__[ku + kwh * h_dim1],
+			ldh);
+	    }
+
+/*           ==== Note progress (or the lack of it). ==== */
+
+	    if (ld > 0) {
+		ndfl = 1;
+	    } else {
+		++ndfl;
+	    }
+
+/*
+             ==== End of main loop ====
+   L70:
+*/
+	}
+
+/*
+          ==== Iteration limit exceeded.  Set INFO to show where
+          .    the problem occurred and exit. ====
+*/
+
+	*info = kbot;
+L80:
+	;
+    }
+
+/*     ==== Return the optimal value of LWORK. ==== */
+
+    d__1 = (doublereal) lwkopt;
+    z__1.r = d__1, z__1.i = 0.;
+    work[1].r = z__1.r, work[1].i = z__1.i;
+
+/*     ==== End of ZLAQR4 ==== */
+
+    return 0;
+} /* zlaqr4_ */
+
+/* Subroutine */ int zlaqr5_(logical *wantt, logical *wantz, integer *kacc22,
+	integer *n, integer *ktop, integer *kbot, integer *nshfts,
+	doublecomplex *s, doublecomplex *h__, integer *ldh, integer *iloz,
+	integer *ihiz, doublecomplex *z__, integer *ldz, doublecomplex *v,
+	integer *ldv, doublecomplex *u, integer *ldu, integer *nv,
+	doublecomplex *wv, integer *ldwv, integer *nh, doublecomplex *wh,
+	integer *ldwh)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, u_dim1, u_offset, v_dim1, v_offset, wh_dim1,
+	    wh_offset, wv_dim1, wv_offset, z_dim1, z_offset, i__1, i__2, i__3,
+	     i__4, i__5, i__6, i__7, i__8, i__9, i__10, i__11;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8, d__9, d__10;
+    doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+    double d_imag(doublecomplex *);
+
+    /* Local variables */
+    static integer j, k, m, i2, j2, i4, j4, k1;
+    static doublereal h11, h12, h21, h22;
+    static integer m22, ns, nu;
+    static doublecomplex vt[3];
+    static doublereal scl;
+    static integer kdu, kms;
+    static doublereal ulp;
+    static integer knz, kzs;
+    static doublereal tst1, tst2;
+    static doublecomplex beta;
+    static logical blk22, bmp22;
+    static integer mend, jcol, jlen, jbot, mbot, jtop, jrow, mtop;
+    static doublecomplex alpha;
+    static logical accum;
+    static integer ndcol, incol, krcol, nbmps;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), ztrmm_(char *, char *, char *, char *,
+	     integer *, integer *, doublecomplex *, doublecomplex *, integer *
+	    , doublecomplex *, integer *),
+	    dlabad_(doublereal *, doublereal *), zlaqr1_(integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    doublecomplex *);
+
+    static doublereal safmin, safmax;
+    extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *);
+    static doublecomplex refsum;
+    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *),
+	    zlaset_(char *, integer *, integer *, doublecomplex *,
+	    doublecomplex *, doublecomplex *, integer *);
+    static integer mstart;
+    static doublereal smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+       Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
+       November 2006
+
+
+       This auxiliary subroutine called by ZLAQR0 performs a
+       single small-bulge multi-shift QR sweep.
+
+        WANTT  (input) logical scalar
+               WANTT = .true. if the triangular Schur factor
+               is being computed.  WANTT is set to .false. otherwise.
+
+        WANTZ  (input) logical scalar
+               WANTZ = .true. if the unitary Schur factor is being
+               computed.  WANTZ is set to .false. otherwise.
+
+        KACC22 (input) integer with value 0, 1, or 2.
+               Specifies the computation mode of far-from-diagonal
+               orthogonal updates.
+          = 0: ZLAQR5 does not accumulate reflections and does not
+               use matrix-matrix multiply to update far-from-diagonal
+               matrix entries.
+          = 1: ZLAQR5 accumulates reflections and uses matrix-matrix
+               multiply to update the far-from-diagonal matrix entries.
+          = 2: ZLAQR5 accumulates reflections, uses matrix-matrix
+               multiply to update the far-from-diagonal matrix entries,
+               and takes advantage of 2-by-2 block structure during
+               matrix multiplies.
+
+        N      (input) integer scalar
+               N is the order of the Hessenberg matrix H upon which this
+               subroutine operates.
+
+        KTOP   (input) integer scalar
+        KBOT   (input) integer scalar
+               These are the first and last rows and columns of an
+               isolated diagonal block upon which the QR sweep is to be
+               applied. It is assumed without a check that
+                         either KTOP = 1  or   H(KTOP,KTOP-1) = 0
+               and
+                         either KBOT = N  or   H(KBOT+1,KBOT) = 0.
+
+        NSHFTS (input) integer scalar
+               NSHFTS gives the number of simultaneous shifts.  NSHFTS
+               must be positive and even.
+
+        S      (input/output) COMPLEX*16 array of size (NSHFTS)
+               S contains the shifts of origin that define the multi-
+               shift QR sweep.  On output S may be reordered.
+
+        H      (input/output) COMPLEX*16 array of size (LDH,N)
+               On input H contains a Hessenberg matrix.  On output a
+               multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
+               to the isolated diagonal block in rows and columns KTOP
+               through KBOT.
+
+        LDH    (input) integer scalar
+               LDH is the leading dimension of H just as declared in the
+               calling procedure.  LDH.GE.MAX(1,N).
+
+        ILOZ   (input) INTEGER
+        IHIZ   (input) INTEGER
+               Specify the rows of Z to which transformations must be
+               applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
+
+        Z      (input/output) COMPLEX*16 array of size (LDZ,IHI)
+               If WANTZ = .TRUE., then the QR Sweep unitary
+               similarity transformation is accumulated into
+               Z(ILOZ:IHIZ,ILO:IHI) from the right.
+               If WANTZ = .FALSE., then Z is unreferenced.
+
+        LDZ    (input) integer scalar
+               LDA is the leading dimension of Z just as declared in
+               the calling procedure. LDZ.GE.N.
+
+        V      (workspace) COMPLEX*16 array of size (LDV,NSHFTS/2)
+
+        LDV    (input) integer scalar
+               LDV is the leading dimension of V as declared in the
+               calling procedure.  LDV.GE.3.
+
+        U      (workspace) COMPLEX*16 array of size
+               (LDU,3*NSHFTS-3)
+
+        LDU    (input) integer scalar
+               LDU is the leading dimension of U just as declared in the
+               in the calling subroutine.  LDU.GE.3*NSHFTS-3.
+
+        NH     (input) integer scalar
+               NH is the number of columns in array WH available for
+               workspace. NH.GE.1.
+
+        WH     (workspace) COMPLEX*16 array of size (LDWH,NH)
+
+        LDWH   (input) integer scalar
+               Leading dimension of WH just as declared in the
+               calling procedure.  LDWH.GE.3*NSHFTS-3.
+
+        NV     (input) integer scalar
+               NV is the number of rows in WV agailable for workspace.
+               NV.GE.1.
+
+        WV     (workspace) COMPLEX*16 array of size
+               (LDWV,3*NSHFTS-3)
+
+        LDWV   (input) integer scalar
+               LDWV is the leading dimension of WV as declared in the
+               in the calling subroutine.  LDWV.GE.NV.
+
+       ================================================================
+       Based on contributions by
+          Karen Braman and Ralph Byers, Department of Mathematics,
+          University of Kansas, USA
+
+       ================================================================
+       Reference:
+
+       K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
+       Algorithm Part I: Maintaining Well Focused Shifts, and
+       Level 3 Performance, SIAM Journal of Matrix Analysis,
+       volume 23, pages 929--947, 2002.
+
+       ================================================================
+
+
+       ==== If there are no shifts, then there is nothing to do. ====
+*/
+
+    /* Parameter adjustments */
+    --s;
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    wv_dim1 = *ldwv;
+    wv_offset = 1 + wv_dim1;
+    wv -= wv_offset;
+    wh_dim1 = *ldwh;
+    wh_offset = 1 + wh_dim1;
+    wh -= wh_offset;
+
+    /* Function Body */
+    if (*nshfts < 2) {
+	return 0;
+    }
+
+/*
+       ==== If the active block is empty or 1-by-1, then there
+       .    is nothing to do. ====
+*/
+
+    if (*ktop >= *kbot) {
+	return 0;
+    }
+
+/*
+       ==== NSHFTS is supposed to be even, but if it is odd,
+       .    then simply reduce it by one.  ====
+*/
+
+    ns = *nshfts - *nshfts % 2;
+
+/*     ==== Machine constants for deflation ==== */
+
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    dlabad_(&safmin, &safmax);
+    ulp = PRECISION;
+    smlnum = safmin * ((doublereal) (*n) / ulp);
+
+/*
+       ==== Use accumulated reflections to update far-from-diagonal
+       .    entries ? ====
+*/
+
+    accum = *kacc22 == 1 || *kacc22 == 2;
+
+/*     ==== If so, exploit the 2-by-2 block structure? ==== */
+
+    blk22 = ns > 2 && *kacc22 == 2;
+
+/*     ==== clear trash ==== */
+
+    if (*ktop + 2 <= *kbot) {
+	i__1 = *ktop + 2 + *ktop * h_dim1;
+	h__[i__1].r = 0., h__[i__1].i = 0.;
+    }
+
+/*     ==== NBMPS = number of 2-shift bulges in the chain ==== */
+
+    nbmps = ns / 2;
+
+/*     ==== KDU = width of slab ==== */
+
+    kdu = nbmps * 6 - 3;
+
+/*     ==== Create and chase chains of NBMPS bulges ==== */
+
+    i__1 = *kbot - 2;
+    i__2 = nbmps * 3 - 2;
+    for (incol = (1 - nbmps) * 3 + *ktop - 1; i__2 < 0 ? incol >= i__1 :
+	    incol <= i__1; incol += i__2) {
+	ndcol = incol + kdu;
+	if (accum) {
+	    zlaset_("ALL", &kdu, &kdu, &c_b56, &c_b57, &u[u_offset], ldu);
+	}
+
+/*
+          ==== Near-the-diagonal bulge chase.  The following loop
+          .    performs the near-the-diagonal part of a small bulge
+          .    multi-shift QR sweep.  Each 6*NBMPS-2 column diagonal
+          .    chunk extends from column INCOL to column NDCOL
+          .    (including both column INCOL and column NDCOL). The
+          .    following loop chases a 3*NBMPS column long chain of
+          .    NBMPS bulges 3*NBMPS-2 columns to the right.  (INCOL
+          .    may be less than KTOP and and NDCOL may be greater than
+          .    KBOT indicating phantom columns from which to chase
+          .    bulges before they are actually introduced or to which
+          .    to chase bulges beyond column KBOT.)  ====
+
+   Computing MIN
+*/
+	i__4 = incol + nbmps * 3 - 3, i__5 = *kbot - 2;
+	i__3 = min(i__4,i__5);
+	for (krcol = incol; krcol <= i__3; ++krcol) {
+
+/*
+             ==== Bulges number MTOP to MBOT are active double implicit
+             .    shift bulges.  There may or may not also be small
+             .    2-by-2 bulge, if there is room.  The inactive bulges
+             .    (if any) must wait until the active bulges have moved
+             .    down the diagonal to make room.  The phantom matrix
+             .    paradigm described above helps keep track.  ====
+
+   Computing MAX
+*/
+	    i__4 = 1, i__5 = (*ktop - 1 - krcol + 2) / 3 + 1;
+	    mtop = max(i__4,i__5);
+/* Computing MIN */
+	    i__4 = nbmps, i__5 = (*kbot - krcol) / 3;
+	    mbot = min(i__4,i__5);
+	    m22 = mbot + 1;
+	    bmp22 = mbot < nbmps && krcol + (m22 - 1) * 3 == *kbot - 2;
+
+/*
+             ==== Generate reflections to chase the chain right
+             .    one column.  (The minimum value of K is KTOP-1.) ====
+*/
+
+	    i__4 = mbot;
+	    for (m = mtop; m <= i__4; ++m) {
+		k = krcol + (m - 1) * 3;
+		if (k == *ktop - 1) {
+		    zlaqr1_(&c__3, &h__[*ktop + *ktop * h_dim1], ldh, &s[(m <<
+			     1) - 1], &s[m * 2], &v[m * v_dim1 + 1]);
+		    i__5 = m * v_dim1 + 1;
+		    alpha.r = v[i__5].r, alpha.i = v[i__5].i;
+		    zlarfg_(&c__3, &alpha, &v[m * v_dim1 + 2], &c__1, &v[m *
+			    v_dim1 + 1]);
+		} else {
+		    i__5 = k + 1 + k * h_dim1;
+		    beta.r = h__[i__5].r, beta.i = h__[i__5].i;
+		    i__5 = m * v_dim1 + 2;
+		    i__6 = k + 2 + k * h_dim1;
+		    v[i__5].r = h__[i__6].r, v[i__5].i = h__[i__6].i;
+		    i__5 = m * v_dim1 + 3;
+		    i__6 = k + 3 + k * h_dim1;
+		    v[i__5].r = h__[i__6].r, v[i__5].i = h__[i__6].i;
+		    zlarfg_(&c__3, &beta, &v[m * v_dim1 + 2], &c__1, &v[m *
+			    v_dim1 + 1]);
+
+/*
+                   ==== A Bulge may collapse because of vigilant
+                   .    deflation or destructive underflow.  In the
+                   .    underflow case, try the two-small-subdiagonals
+                   .    trick to try to reinflate the bulge.  ====
+*/
+
+		    i__5 = k + 3 + k * h_dim1;
+		    i__6 = k + 3 + (k + 1) * h_dim1;
+		    i__7 = k + 3 + (k + 2) * h_dim1;
+		    if (h__[i__5].r != 0. || h__[i__5].i != 0. || (h__[i__6]
+			    .r != 0. || h__[i__6].i != 0.) || h__[i__7].r ==
+			    0. && h__[i__7].i == 0.) {
+
+/*                    ==== Typical case: not collapsed (yet). ==== */
+
+			i__5 = k + 1 + k * h_dim1;
+			h__[i__5].r = beta.r, h__[i__5].i = beta.i;
+			i__5 = k + 2 + k * h_dim1;
+			h__[i__5].r = 0., h__[i__5].i = 0.;
+			i__5 = k + 3 + k * h_dim1;
+			h__[i__5].r = 0., h__[i__5].i = 0.;
+		    } else {
+
+/*
+                      ==== Atypical case: collapsed.  Attempt to
+                      .    reintroduce ignoring H(K+1,K) and H(K+2,K).
+                      .    If the fill resulting from the new
+                      .    reflector is too large, then abandon it.
+                      .    Otherwise, use the new one. ====
+*/
+
+			zlaqr1_(&c__3, &h__[k + 1 + (k + 1) * h_dim1], ldh, &
+				s[(m << 1) - 1], &s[m * 2], vt);
+			alpha.r = vt[0].r, alpha.i = vt[0].i;
+			zlarfg_(&c__3, &alpha, &vt[1], &c__1, vt);
+			d_cnjg(&z__2, vt);
+			i__5 = k + 1 + k * h_dim1;
+			d_cnjg(&z__5, &vt[1]);
+			i__6 = k + 2 + k * h_dim1;
+			z__4.r = z__5.r * h__[i__6].r - z__5.i * h__[i__6].i,
+				z__4.i = z__5.r * h__[i__6].i + z__5.i * h__[
+				i__6].r;
+			z__3.r = h__[i__5].r + z__4.r, z__3.i = h__[i__5].i +
+				z__4.i;
+			z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i =
+				z__2.r * z__3.i + z__2.i * z__3.r;
+			refsum.r = z__1.r, refsum.i = z__1.i;
+
+			i__5 = k + 2 + k * h_dim1;
+			z__3.r = refsum.r * vt[1].r - refsum.i * vt[1].i,
+				z__3.i = refsum.r * vt[1].i + refsum.i * vt[1]
+				.r;
+			z__2.r = h__[i__5].r - z__3.r, z__2.i = h__[i__5].i -
+				z__3.i;
+			z__1.r = z__2.r, z__1.i = z__2.i;
+			z__5.r = refsum.r * vt[2].r - refsum.i * vt[2].i,
+				z__5.i = refsum.r * vt[2].i + refsum.i * vt[2]
+				.r;
+			z__4.r = z__5.r, z__4.i = z__5.i;
+			i__6 = k + k * h_dim1;
+			i__7 = k + 1 + (k + 1) * h_dim1;
+			i__8 = k + 2 + (k + 2) * h_dim1;
+			if ((d__1 = z__1.r, abs(d__1)) + (d__2 = d_imag(&z__1)
+				, abs(d__2)) + ((d__3 = z__4.r, abs(d__3)) + (
+				d__4 = d_imag(&z__4), abs(d__4))) > ulp * ((
+				d__5 = h__[i__6].r, abs(d__5)) + (d__6 =
+				d_imag(&h__[k + k * h_dim1]), abs(d__6)) + ((
+				d__7 = h__[i__7].r, abs(d__7)) + (d__8 =
+				d_imag(&h__[k + 1 + (k + 1) * h_dim1]), abs(
+				d__8))) + ((d__9 = h__[i__8].r, abs(d__9)) + (
+				d__10 = d_imag(&h__[k + 2 + (k + 2) * h_dim1])
+				, abs(d__10))))) {
+
+/*
+                         ==== Starting a new bulge here would
+                         .    create non-negligible fill.  Use
+                         .    the old one with trepidation. ====
+*/
+
+			    i__5 = k + 1 + k * h_dim1;
+			    h__[i__5].r = beta.r, h__[i__5].i = beta.i;
+			    i__5 = k + 2 + k * h_dim1;
+			    h__[i__5].r = 0., h__[i__5].i = 0.;
+			    i__5 = k + 3 + k * h_dim1;
+			    h__[i__5].r = 0., h__[i__5].i = 0.;
+			} else {
+
+/*
+                         ==== Stating a new bulge here would
+                         .    create only negligible fill.
+                         .    Replace the old reflector with
+                         .    the new one. ====
+*/
+
+			    i__5 = k + 1 + k * h_dim1;
+			    i__6 = k + 1 + k * h_dim1;
+			    z__1.r = h__[i__6].r - refsum.r, z__1.i = h__[
+				    i__6].i - refsum.i;
+			    h__[i__5].r = z__1.r, h__[i__5].i = z__1.i;
+			    i__5 = k + 2 + k * h_dim1;
+			    h__[i__5].r = 0., h__[i__5].i = 0.;
+			    i__5 = k + 3 + k * h_dim1;
+			    h__[i__5].r = 0., h__[i__5].i = 0.;
+			    i__5 = m * v_dim1 + 1;
+			    v[i__5].r = vt[0].r, v[i__5].i = vt[0].i;
+			    i__5 = m * v_dim1 + 2;
+			    v[i__5].r = vt[1].r, v[i__5].i = vt[1].i;
+			    i__5 = m * v_dim1 + 3;
+			    v[i__5].r = vt[2].r, v[i__5].i = vt[2].i;
+			}
+		    }
+		}
+/* L10: */
+	    }
+
+/*           ==== Generate a 2-by-2 reflection, if needed. ==== */
+
+	    k = krcol + (m22 - 1) * 3;
+	    if (bmp22) {
+		if (k == *ktop - 1) {
+		    zlaqr1_(&c__2, &h__[k + 1 + (k + 1) * h_dim1], ldh, &s[(
+			    m22 << 1) - 1], &s[m22 * 2], &v[m22 * v_dim1 + 1])
+			    ;
+		    i__4 = m22 * v_dim1 + 1;
+		    beta.r = v[i__4].r, beta.i = v[i__4].i;
+		    zlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22
+			    * v_dim1 + 1]);
+		} else {
+		    i__4 = k + 1 + k * h_dim1;
+		    beta.r = h__[i__4].r, beta.i = h__[i__4].i;
+		    i__4 = m22 * v_dim1 + 2;
+		    i__5 = k + 2 + k * h_dim1;
+		    v[i__4].r = h__[i__5].r, v[i__4].i = h__[i__5].i;
+		    zlarfg_(&c__2, &beta, &v[m22 * v_dim1 + 2], &c__1, &v[m22
+			    * v_dim1 + 1]);
+		    i__4 = k + 1 + k * h_dim1;
+		    h__[i__4].r = beta.r, h__[i__4].i = beta.i;
+		    i__4 = k + 2 + k * h_dim1;
+		    h__[i__4].r = 0., h__[i__4].i = 0.;
+		}
+	    }
+
+/*           ==== Multiply H by reflections from the left ==== */
+
+	    if (accum) {
+		jbot = min(ndcol,*kbot);
+	    } else if (*wantt) {
+		jbot = *n;
+	    } else {
+		jbot = *kbot;
+	    }
+	    i__4 = jbot;
+	    for (j = max(*ktop,krcol); j <= i__4; ++j) {
+/* Computing MIN */
+		i__5 = mbot, i__6 = (j - krcol + 2) / 3;
+		mend = min(i__5,i__6);
+		i__5 = mend;
+		for (m = mtop; m <= i__5; ++m) {
+		    k = krcol + (m - 1) * 3;
+		    d_cnjg(&z__2, &v[m * v_dim1 + 1]);
+		    i__6 = k + 1 + j * h_dim1;
+		    d_cnjg(&z__6, &v[m * v_dim1 + 2]);
+		    i__7 = k + 2 + j * h_dim1;
+		    z__5.r = z__6.r * h__[i__7].r - z__6.i * h__[i__7].i,
+			    z__5.i = z__6.r * h__[i__7].i + z__6.i * h__[i__7]
+			    .r;
+		    z__4.r = h__[i__6].r + z__5.r, z__4.i = h__[i__6].i +
+			    z__5.i;
+		    d_cnjg(&z__8, &v[m * v_dim1 + 3]);
+		    i__8 = k + 3 + j * h_dim1;
+		    z__7.r = z__8.r * h__[i__8].r - z__8.i * h__[i__8].i,
+			    z__7.i = z__8.r * h__[i__8].i + z__8.i * h__[i__8]
+			    .r;
+		    z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i;
+		    z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i =
+			    z__2.r * z__3.i + z__2.i * z__3.r;
+		    refsum.r = z__1.r, refsum.i = z__1.i;
+		    i__6 = k + 1 + j * h_dim1;
+		    i__7 = k + 1 + j * h_dim1;
+		    z__1.r = h__[i__7].r - refsum.r, z__1.i = h__[i__7].i -
+			    refsum.i;
+		    h__[i__6].r = z__1.r, h__[i__6].i = z__1.i;
+		    i__6 = k + 2 + j * h_dim1;
+		    i__7 = k + 2 + j * h_dim1;
+		    i__8 = m * v_dim1 + 2;
+		    z__2.r = refsum.r * v[i__8].r - refsum.i * v[i__8].i,
+			    z__2.i = refsum.r * v[i__8].i + refsum.i * v[i__8]
+			    .r;
+		    z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i -
+			    z__2.i;
+		    h__[i__6].r = z__1.r, h__[i__6].i = z__1.i;
+		    i__6 = k + 3 + j * h_dim1;
+		    i__7 = k + 3 + j * h_dim1;
+		    i__8 = m * v_dim1 + 3;
+		    z__2.r = refsum.r * v[i__8].r - refsum.i * v[i__8].i,
+			    z__2.i = refsum.r * v[i__8].i + refsum.i * v[i__8]
+			    .r;
+		    z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i -
+			    z__2.i;
+		    h__[i__6].r = z__1.r, h__[i__6].i = z__1.i;
+/* L20: */
+		}
+/* L30: */
+	    }
+	    if (bmp22) {
+		k = krcol + (m22 - 1) * 3;
+/* Computing MAX */
+		i__4 = k + 1;
+		i__5 = jbot;
+		for (j = max(i__4,*ktop); j <= i__5; ++j) {
+		    d_cnjg(&z__2, &v[m22 * v_dim1 + 1]);
+		    i__4 = k + 1 + j * h_dim1;
+		    d_cnjg(&z__5, &v[m22 * v_dim1 + 2]);
+		    i__6 = k + 2 + j * h_dim1;
+		    z__4.r = z__5.r * h__[i__6].r - z__5.i * h__[i__6].i,
+			    z__4.i = z__5.r * h__[i__6].i + z__5.i * h__[i__6]
+			    .r;
+		    z__3.r = h__[i__4].r + z__4.r, z__3.i = h__[i__4].i +
+			    z__4.i;
+		    z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i =
+			    z__2.r * z__3.i + z__2.i * z__3.r;
+		    refsum.r = z__1.r, refsum.i = z__1.i;
+		    i__4 = k + 1 + j * h_dim1;
+		    i__6 = k + 1 + j * h_dim1;
+		    z__1.r = h__[i__6].r - refsum.r, z__1.i = h__[i__6].i -
+			    refsum.i;
+		    h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
+		    i__4 = k + 2 + j * h_dim1;
+		    i__6 = k + 2 + j * h_dim1;
+		    i__7 = m22 * v_dim1 + 2;
+		    z__2.r = refsum.r * v[i__7].r - refsum.i * v[i__7].i,
+			    z__2.i = refsum.r * v[i__7].i + refsum.i * v[i__7]
+			    .r;
+		    z__1.r = h__[i__6].r - z__2.r, z__1.i = h__[i__6].i -
+			    z__2.i;
+		    h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
+/* L40: */
+		}
+	    }
+
+/*
+             ==== Multiply H by reflections from the right.
+             .    Delay filling in the last row until the
+             .    vigilant deflation check is complete. ====
+*/
+
+	    if (accum) {
+		jtop = max(*ktop,incol);
+	    } else if (*wantt) {
+		jtop = 1;
+	    } else {
+		jtop = *ktop;
+	    }
+	    i__5 = mbot;
+	    for (m = mtop; m <= i__5; ++m) {
+		i__4 = m * v_dim1 + 1;
+		if (v[i__4].r != 0. || v[i__4].i != 0.) {
+		    k = krcol + (m - 1) * 3;
+/* Computing MIN */
+		    i__6 = *kbot, i__7 = k + 3;
+		    i__4 = min(i__6,i__7);
+		    for (j = jtop; j <= i__4; ++j) {
+			i__6 = m * v_dim1 + 1;
+			i__7 = j + (k + 1) * h_dim1;
+			i__8 = m * v_dim1 + 2;
+			i__9 = j + (k + 2) * h_dim1;
+			z__4.r = v[i__8].r * h__[i__9].r - v[i__8].i * h__[
+				i__9].i, z__4.i = v[i__8].r * h__[i__9].i + v[
+				i__8].i * h__[i__9].r;
+			z__3.r = h__[i__7].r + z__4.r, z__3.i = h__[i__7].i +
+				z__4.i;
+			i__10 = m * v_dim1 + 3;
+			i__11 = j + (k + 3) * h_dim1;
+			z__5.r = v[i__10].r * h__[i__11].r - v[i__10].i * h__[
+				i__11].i, z__5.i = v[i__10].r * h__[i__11].i
+				+ v[i__10].i * h__[i__11].r;
+			z__2.r = z__3.r + z__5.r, z__2.i = z__3.i + z__5.i;
+			z__1.r = v[i__6].r * z__2.r - v[i__6].i * z__2.i,
+				z__1.i = v[i__6].r * z__2.i + v[i__6].i *
+				z__2.r;
+			refsum.r = z__1.r, refsum.i = z__1.i;
+			i__6 = j + (k + 1) * h_dim1;
+			i__7 = j + (k + 1) * h_dim1;
+			z__1.r = h__[i__7].r - refsum.r, z__1.i = h__[i__7].i
+				- refsum.i;
+			h__[i__6].r = z__1.r, h__[i__6].i = z__1.i;
+			i__6 = j + (k + 2) * h_dim1;
+			i__7 = j + (k + 2) * h_dim1;
+			d_cnjg(&z__3, &v[m * v_dim1 + 2]);
+			z__2.r = refsum.r * z__3.r - refsum.i * z__3.i,
+				z__2.i = refsum.r * z__3.i + refsum.i *
+				z__3.r;
+			z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i -
+				z__2.i;
+			h__[i__6].r = z__1.r, h__[i__6].i = z__1.i;
+			i__6 = j + (k + 3) * h_dim1;
+			i__7 = j + (k + 3) * h_dim1;
+			d_cnjg(&z__3, &v[m * v_dim1 + 3]);
+			z__2.r = refsum.r * z__3.r - refsum.i * z__3.i,
+				z__2.i = refsum.r * z__3.i + refsum.i *
+				z__3.r;
+			z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i -
+				z__2.i;
+			h__[i__6].r = z__1.r, h__[i__6].i = z__1.i;
+/* L50: */
+		    }
+
+		    if (accum) {
+
+/*
+                      ==== Accumulate U. (If necessary, update Z later
+                      .    with with an efficient matrix-matrix
+                      .    multiply.) ====
+*/
+
+			kms = k - incol;
+/* Computing MAX */
+			i__4 = 1, i__6 = *ktop - incol;
+			i__7 = kdu;
+			for (j = max(i__4,i__6); j <= i__7; ++j) {
+			    i__4 = m * v_dim1 + 1;
+			    i__6 = j + (kms + 1) * u_dim1;
+			    i__8 = m * v_dim1 + 2;
+			    i__9 = j + (kms + 2) * u_dim1;
+			    z__4.r = v[i__8].r * u[i__9].r - v[i__8].i * u[
+				    i__9].i, z__4.i = v[i__8].r * u[i__9].i +
+				    v[i__8].i * u[i__9].r;
+			    z__3.r = u[i__6].r + z__4.r, z__3.i = u[i__6].i +
+				    z__4.i;
+			    i__10 = m * v_dim1 + 3;
+			    i__11 = j + (kms + 3) * u_dim1;
+			    z__5.r = v[i__10].r * u[i__11].r - v[i__10].i * u[
+				    i__11].i, z__5.i = v[i__10].r * u[i__11]
+				    .i + v[i__10].i * u[i__11].r;
+			    z__2.r = z__3.r + z__5.r, z__2.i = z__3.i +
+				    z__5.i;
+			    z__1.r = v[i__4].r * z__2.r - v[i__4].i * z__2.i,
+				    z__1.i = v[i__4].r * z__2.i + v[i__4].i *
+				    z__2.r;
+			    refsum.r = z__1.r, refsum.i = z__1.i;
+			    i__4 = j + (kms + 1) * u_dim1;
+			    i__6 = j + (kms + 1) * u_dim1;
+			    z__1.r = u[i__6].r - refsum.r, z__1.i = u[i__6].i
+				    - refsum.i;
+			    u[i__4].r = z__1.r, u[i__4].i = z__1.i;
+			    i__4 = j + (kms + 2) * u_dim1;
+			    i__6 = j + (kms + 2) * u_dim1;
+			    d_cnjg(&z__3, &v[m * v_dim1 + 2]);
+			    z__2.r = refsum.r * z__3.r - refsum.i * z__3.i,
+				    z__2.i = refsum.r * z__3.i + refsum.i *
+				    z__3.r;
+			    z__1.r = u[i__6].r - z__2.r, z__1.i = u[i__6].i -
+				    z__2.i;
+			    u[i__4].r = z__1.r, u[i__4].i = z__1.i;
+			    i__4 = j + (kms + 3) * u_dim1;
+			    i__6 = j + (kms + 3) * u_dim1;
+			    d_cnjg(&z__3, &v[m * v_dim1 + 3]);
+			    z__2.r = refsum.r * z__3.r - refsum.i * z__3.i,
+				    z__2.i = refsum.r * z__3.i + refsum.i *
+				    z__3.r;
+			    z__1.r = u[i__6].r - z__2.r, z__1.i = u[i__6].i -
+				    z__2.i;
+			    u[i__4].r = z__1.r, u[i__4].i = z__1.i;
+/* L60: */
+			}
+		    } else if (*wantz) {
+
+/*
+                      ==== U is not accumulated, so update Z
+                      .    now by multiplying by reflections
+                      .    from the right. ====
+*/
+
+			i__7 = *ihiz;
+			for (j = *iloz; j <= i__7; ++j) {
+			    i__4 = m * v_dim1 + 1;
+			    i__6 = j + (k + 1) * z_dim1;
+			    i__8 = m * v_dim1 + 2;
+			    i__9 = j + (k + 2) * z_dim1;
+			    z__4.r = v[i__8].r * z__[i__9].r - v[i__8].i *
+				    z__[i__9].i, z__4.i = v[i__8].r * z__[
+				    i__9].i + v[i__8].i * z__[i__9].r;
+			    z__3.r = z__[i__6].r + z__4.r, z__3.i = z__[i__6]
+				    .i + z__4.i;
+			    i__10 = m * v_dim1 + 3;
+			    i__11 = j + (k + 3) * z_dim1;
+			    z__5.r = v[i__10].r * z__[i__11].r - v[i__10].i *
+				    z__[i__11].i, z__5.i = v[i__10].r * z__[
+				    i__11].i + v[i__10].i * z__[i__11].r;
+			    z__2.r = z__3.r + z__5.r, z__2.i = z__3.i +
+				    z__5.i;
+			    z__1.r = v[i__4].r * z__2.r - v[i__4].i * z__2.i,
+				    z__1.i = v[i__4].r * z__2.i + v[i__4].i *
+				    z__2.r;
+			    refsum.r = z__1.r, refsum.i = z__1.i;
+			    i__4 = j + (k + 1) * z_dim1;
+			    i__6 = j + (k + 1) * z_dim1;
+			    z__1.r = z__[i__6].r - refsum.r, z__1.i = z__[
+				    i__6].i - refsum.i;
+			    z__[i__4].r = z__1.r, z__[i__4].i = z__1.i;
+			    i__4 = j + (k + 2) * z_dim1;
+			    i__6 = j + (k + 2) * z_dim1;
+			    d_cnjg(&z__3, &v[m * v_dim1 + 2]);
+			    z__2.r = refsum.r * z__3.r - refsum.i * z__3.i,
+				    z__2.i = refsum.r * z__3.i + refsum.i *
+				    z__3.r;
+			    z__1.r = z__[i__6].r - z__2.r, z__1.i = z__[i__6]
+				    .i - z__2.i;
+			    z__[i__4].r = z__1.r, z__[i__4].i = z__1.i;
+			    i__4 = j + (k + 3) * z_dim1;
+			    i__6 = j + (k + 3) * z_dim1;
+			    d_cnjg(&z__3, &v[m * v_dim1 + 3]);
+			    z__2.r = refsum.r * z__3.r - refsum.i * z__3.i,
+				    z__2.i = refsum.r * z__3.i + refsum.i *
+				    z__3.r;
+			    z__1.r = z__[i__6].r - z__2.r, z__1.i = z__[i__6]
+				    .i - z__2.i;
+			    z__[i__4].r = z__1.r, z__[i__4].i = z__1.i;
+/* L70: */
+			}
+		    }
+		}
+/* L80: */
+	    }
+
+/*           ==== Special case: 2-by-2 reflection (if needed) ==== */
+
+	    k = krcol + (m22 - 1) * 3;
+	    i__5 = m22 * v_dim1 + 1;
+	    if (bmp22 && (v[i__5].r != 0. || v[i__5].i != 0.)) {
+/* Computing MIN */
+		i__7 = *kbot, i__4 = k + 3;
+		i__5 = min(i__7,i__4);
+		for (j = jtop; j <= i__5; ++j) {
+		    i__7 = m22 * v_dim1 + 1;
+		    i__4 = j + (k + 1) * h_dim1;
+		    i__6 = m22 * v_dim1 + 2;
+		    i__8 = j + (k + 2) * h_dim1;
+		    z__3.r = v[i__6].r * h__[i__8].r - v[i__6].i * h__[i__8]
+			    .i, z__3.i = v[i__6].r * h__[i__8].i + v[i__6].i *
+			     h__[i__8].r;
+		    z__2.r = h__[i__4].r + z__3.r, z__2.i = h__[i__4].i +
+			    z__3.i;
+		    z__1.r = v[i__7].r * z__2.r - v[i__7].i * z__2.i, z__1.i =
+			     v[i__7].r * z__2.i + v[i__7].i * z__2.r;
+		    refsum.r = z__1.r, refsum.i = z__1.i;
+		    i__7 = j + (k + 1) * h_dim1;
+		    i__4 = j + (k + 1) * h_dim1;
+		    z__1.r = h__[i__4].r - refsum.r, z__1.i = h__[i__4].i -
+			    refsum.i;
+		    h__[i__7].r = z__1.r, h__[i__7].i = z__1.i;
+		    i__7 = j + (k + 2) * h_dim1;
+		    i__4 = j + (k + 2) * h_dim1;
+		    d_cnjg(&z__3, &v[m22 * v_dim1 + 2]);
+		    z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, z__2.i =
+			    refsum.r * z__3.i + refsum.i * z__3.r;
+		    z__1.r = h__[i__4].r - z__2.r, z__1.i = h__[i__4].i -
+			    z__2.i;
+		    h__[i__7].r = z__1.r, h__[i__7].i = z__1.i;
+/* L90: */
+		}
+
+		if (accum) {
+		    kms = k - incol;
+/* Computing MAX */
+		    i__5 = 1, i__7 = *ktop - incol;
+		    i__4 = kdu;
+		    for (j = max(i__5,i__7); j <= i__4; ++j) {
+			i__5 = m22 * v_dim1 + 1;
+			i__7 = j + (kms + 1) * u_dim1;
+			i__6 = m22 * v_dim1 + 2;
+			i__8 = j + (kms + 2) * u_dim1;
+			z__3.r = v[i__6].r * u[i__8].r - v[i__6].i * u[i__8]
+				.i, z__3.i = v[i__6].r * u[i__8].i + v[i__6]
+				.i * u[i__8].r;
+			z__2.r = u[i__7].r + z__3.r, z__2.i = u[i__7].i +
+				z__3.i;
+			z__1.r = v[i__5].r * z__2.r - v[i__5].i * z__2.i,
+				z__1.i = v[i__5].r * z__2.i + v[i__5].i *
+				z__2.r;
+			refsum.r = z__1.r, refsum.i = z__1.i;
+			i__5 = j + (kms + 1) * u_dim1;
+			i__7 = j + (kms + 1) * u_dim1;
+			z__1.r = u[i__7].r - refsum.r, z__1.i = u[i__7].i -
+				refsum.i;
+			u[i__5].r = z__1.r, u[i__5].i = z__1.i;
+			i__5 = j + (kms + 2) * u_dim1;
+			i__7 = j + (kms + 2) * u_dim1;
+			d_cnjg(&z__3, &v[m22 * v_dim1 + 2]);
+			z__2.r = refsum.r * z__3.r - refsum.i * z__3.i,
+				z__2.i = refsum.r * z__3.i + refsum.i *
+				z__3.r;
+			z__1.r = u[i__7].r - z__2.r, z__1.i = u[i__7].i -
+				z__2.i;
+			u[i__5].r = z__1.r, u[i__5].i = z__1.i;
+/* L100: */
+		    }
+		} else if (*wantz) {
+		    i__4 = *ihiz;
+		    for (j = *iloz; j <= i__4; ++j) {
+			i__5 = m22 * v_dim1 + 1;
+			i__7 = j + (k + 1) * z_dim1;
+			i__6 = m22 * v_dim1 + 2;
+			i__8 = j + (k + 2) * z_dim1;
+			z__3.r = v[i__6].r * z__[i__8].r - v[i__6].i * z__[
+				i__8].i, z__3.i = v[i__6].r * z__[i__8].i + v[
+				i__6].i * z__[i__8].r;
+			z__2.r = z__[i__7].r + z__3.r, z__2.i = z__[i__7].i +
+				z__3.i;
+			z__1.r = v[i__5].r * z__2.r - v[i__5].i * z__2.i,
+				z__1.i = v[i__5].r * z__2.i + v[i__5].i *
+				z__2.r;
+			refsum.r = z__1.r, refsum.i = z__1.i;
+			i__5 = j + (k + 1) * z_dim1;
+			i__7 = j + (k + 1) * z_dim1;
+			z__1.r = z__[i__7].r - refsum.r, z__1.i = z__[i__7].i
+				- refsum.i;
+			z__[i__5].r = z__1.r, z__[i__5].i = z__1.i;
+			i__5 = j + (k + 2) * z_dim1;
+			i__7 = j + (k + 2) * z_dim1;
+			d_cnjg(&z__3, &v[m22 * v_dim1 + 2]);
+			z__2.r = refsum.r * z__3.r - refsum.i * z__3.i,
+				z__2.i = refsum.r * z__3.i + refsum.i *
+				z__3.r;
+			z__1.r = z__[i__7].r - z__2.r, z__1.i = z__[i__7].i -
+				z__2.i;
+			z__[i__5].r = z__1.r, z__[i__5].i = z__1.i;
+/* L110: */
+		    }
+		}
+	    }
+
+/*           ==== Vigilant deflation check ==== */
+
+	    mstart = mtop;
+	    if (krcol + (mstart - 1) * 3 < *ktop) {
+		++mstart;
+	    }
+	    mend = mbot;
+	    if (bmp22) {
+		++mend;
+	    }
+	    if (krcol == *kbot - 2) {
+		++mend;
+	    }
+	    i__4 = mend;
+	    for (m = mstart; m <= i__4; ++m) {
+/* Computing MIN */
+		i__5 = *kbot - 1, i__7 = krcol + (m - 1) * 3;
+		k = min(i__5,i__7);
+
+/*
+                ==== The following convergence test requires that
+                .    the tradition small-compared-to-nearby-diagonals
+                .    criterion and the Ahues & Tisseur (LAWN 122, 1997)
+                .    criteria both be satisfied.  The latter improves
+                .    accuracy in some examples. Falling back on an
+                .    alternate convergence criterion when TST1 or TST2
+                .    is zero (as done here) is traditional but probably
+                .    unnecessary. ====
+*/
+
+		i__5 = k + 1 + k * h_dim1;
+		if (h__[i__5].r != 0. || h__[i__5].i != 0.) {
+		    i__5 = k + k * h_dim1;
+		    i__7 = k + 1 + (k + 1) * h_dim1;
+		    tst1 = (d__1 = h__[i__5].r, abs(d__1)) + (d__2 = d_imag(&
+			    h__[k + k * h_dim1]), abs(d__2)) + ((d__3 = h__[
+			    i__7].r, abs(d__3)) + (d__4 = d_imag(&h__[k + 1 +
+			    (k + 1) * h_dim1]), abs(d__4)));
+		    if (tst1 == 0.) {
+			if (k >= *ktop + 1) {
+			    i__5 = k + (k - 1) * h_dim1;
+			    tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				    d_imag(&h__[k + (k - 1) * h_dim1]), abs(
+				    d__2));
+			}
+			if (k >= *ktop + 2) {
+			    i__5 = k + (k - 2) * h_dim1;
+			    tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				    d_imag(&h__[k + (k - 2) * h_dim1]), abs(
+				    d__2));
+			}
+			if (k >= *ktop + 3) {
+			    i__5 = k + (k - 3) * h_dim1;
+			    tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				    d_imag(&h__[k + (k - 3) * h_dim1]), abs(
+				    d__2));
+			}
+			if (k <= *kbot - 2) {
+			    i__5 = k + 2 + (k + 1) * h_dim1;
+			    tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				    d_imag(&h__[k + 2 + (k + 1) * h_dim1]),
+				    abs(d__2));
+			}
+			if (k <= *kbot - 3) {
+			    i__5 = k + 3 + (k + 1) * h_dim1;
+			    tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				    d_imag(&h__[k + 3 + (k + 1) * h_dim1]),
+				    abs(d__2));
+			}
+			if (k <= *kbot - 4) {
+			    i__5 = k + 4 + (k + 1) * h_dim1;
+			    tst1 += (d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				    d_imag(&h__[k + 4 + (k + 1) * h_dim1]),
+				    abs(d__2));
+			}
+		    }
+		    i__5 = k + 1 + k * h_dim1;
+/* Computing MAX */
+		    d__3 = smlnum, d__4 = ulp * tst1;
+		    if ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 = d_imag(&h__[
+			    k + 1 + k * h_dim1]), abs(d__2)) <= max(d__3,d__4)
+			    ) {
+/* Computing MAX */
+			i__5 = k + 1 + k * h_dim1;
+			i__7 = k + (k + 1) * h_dim1;
+			d__5 = (d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				d_imag(&h__[k + 1 + k * h_dim1]), abs(d__2)),
+				d__6 = (d__3 = h__[i__7].r, abs(d__3)) + (
+				d__4 = d_imag(&h__[k + (k + 1) * h_dim1]),
+				abs(d__4));
+			h12 = max(d__5,d__6);
+/* Computing MIN */
+			i__5 = k + 1 + k * h_dim1;
+			i__7 = k + (k + 1) * h_dim1;
+			d__5 = (d__1 = h__[i__5].r, abs(d__1)) + (d__2 =
+				d_imag(&h__[k + 1 + k * h_dim1]), abs(d__2)),
+				d__6 = (d__3 = h__[i__7].r, abs(d__3)) + (
+				d__4 = d_imag(&h__[k + (k + 1) * h_dim1]),
+				abs(d__4));
+			h21 = min(d__5,d__6);
+			i__5 = k + k * h_dim1;
+			i__7 = k + 1 + (k + 1) * h_dim1;
+			z__2.r = h__[i__5].r - h__[i__7].r, z__2.i = h__[i__5]
+				.i - h__[i__7].i;
+			z__1.r = z__2.r, z__1.i = z__2.i;
+/* Computing MAX */
+			i__6 = k + 1 + (k + 1) * h_dim1;
+			d__5 = (d__1 = h__[i__6].r, abs(d__1)) + (d__2 =
+				d_imag(&h__[k + 1 + (k + 1) * h_dim1]), abs(
+				d__2)), d__6 = (d__3 = z__1.r, abs(d__3)) + (
+				d__4 = d_imag(&z__1), abs(d__4));
+			h11 = max(d__5,d__6);
+			i__5 = k + k * h_dim1;
+			i__7 = k + 1 + (k + 1) * h_dim1;
+			z__2.r = h__[i__5].r - h__[i__7].r, z__2.i = h__[i__5]
+				.i - h__[i__7].i;
+			z__1.r = z__2.r, z__1.i = z__2.i;
+/* Computing MIN */
+			i__6 = k + 1 + (k + 1) * h_dim1;
+			d__5 = (d__1 = h__[i__6].r, abs(d__1)) + (d__2 =
+				d_imag(&h__[k + 1 + (k + 1) * h_dim1]), abs(
+				d__2)), d__6 = (d__3 = z__1.r, abs(d__3)) + (
+				d__4 = d_imag(&z__1), abs(d__4));
+			h22 = min(d__5,d__6);
+			scl = h11 + h12;
+			tst2 = h22 * (h11 / scl);
+
+/* Computing MAX */
+			d__1 = smlnum, d__2 = ulp * tst2;
+			if (tst2 == 0. || h21 * (h12 / scl) <= max(d__1,d__2))
+				 {
+			    i__5 = k + 1 + k * h_dim1;
+			    h__[i__5].r = 0., h__[i__5].i = 0.;
+			}
+		    }
+		}
+/* L120: */
+	    }
+
+/*
+             ==== Fill in the last row of each bulge. ====
+
+   Computing MIN
+*/
+	    i__4 = nbmps, i__5 = (*kbot - krcol - 1) / 3;
+	    mend = min(i__4,i__5);
+	    i__4 = mend;
+	    for (m = mtop; m <= i__4; ++m) {
+		k = krcol + (m - 1) * 3;
+		i__5 = m * v_dim1 + 1;
+		i__7 = m * v_dim1 + 3;
+		z__2.r = v[i__5].r * v[i__7].r - v[i__5].i * v[i__7].i,
+			z__2.i = v[i__5].r * v[i__7].i + v[i__5].i * v[i__7]
+			.r;
+		i__6 = k + 4 + (k + 3) * h_dim1;
+		z__1.r = z__2.r * h__[i__6].r - z__2.i * h__[i__6].i, z__1.i =
+			 z__2.r * h__[i__6].i + z__2.i * h__[i__6].r;
+		refsum.r = z__1.r, refsum.i = z__1.i;
+		i__5 = k + 4 + (k + 1) * h_dim1;
+		z__1.r = -refsum.r, z__1.i = -refsum.i;
+		h__[i__5].r = z__1.r, h__[i__5].i = z__1.i;
+		i__5 = k + 4 + (k + 2) * h_dim1;
+		z__2.r = -refsum.r, z__2.i = -refsum.i;
+		d_cnjg(&z__3, &v[m * v_dim1 + 2]);
+		z__1.r = z__2.r * z__3.r - z__2.i * z__3.i, z__1.i = z__2.r *
+			z__3.i + z__2.i * z__3.r;
+		h__[i__5].r = z__1.r, h__[i__5].i = z__1.i;
+		i__5 = k + 4 + (k + 3) * h_dim1;
+		i__7 = k + 4 + (k + 3) * h_dim1;
+		d_cnjg(&z__3, &v[m * v_dim1 + 3]);
+		z__2.r = refsum.r * z__3.r - refsum.i * z__3.i, z__2.i =
+			refsum.r * z__3.i + refsum.i * z__3.r;
+		z__1.r = h__[i__7].r - z__2.r, z__1.i = h__[i__7].i - z__2.i;
+		h__[i__5].r = z__1.r, h__[i__5].i = z__1.i;
+/* L130: */
+	    }
+
+/*
+             ==== End of near-the-diagonal bulge chase. ====
+
+   L140:
+*/
+	}
+
+/*
+          ==== Use U (if accumulated) to update far-from-diagonal
+          .    entries in H.  If required, use U to update Z as
+          .    well. ====
+*/
+
+	if (accum) {
+	    if (*wantt) {
+		jtop = 1;
+		jbot = *n;
+	    } else {
+		jtop = *ktop;
+		jbot = *kbot;
+	    }
+	    if (! blk22 || incol < *ktop || ndcol > *kbot || ns <= 2) {
+
+/*
+                ==== Updates not exploiting the 2-by-2 block
+                .    structure of U.  K1 and NU keep track of
+                .    the location and size of U in the special
+                .    cases of introducing bulges and chasing
+                .    bulges off the bottom.  In these special
+                .    cases and in case the number of shifts
+                .    is NS = 2, there is no 2-by-2 block
+                .    structure to exploit.  ====
+
+   Computing MAX
+*/
+		i__3 = 1, i__4 = *ktop - incol;
+		k1 = max(i__3,i__4);
+/* Computing MAX */
+		i__3 = 0, i__4 = ndcol - *kbot;
+		nu = kdu - max(i__3,i__4) - k1 + 1;
+
+/*              ==== Horizontal Multiply ==== */
+
+		i__3 = jbot;
+		i__4 = *nh;
+		for (jcol = min(ndcol,*kbot) + 1; i__4 < 0 ? jcol >= i__3 :
+			jcol <= i__3; jcol += i__4) {
+/* Computing MIN */
+		    i__5 = *nh, i__7 = jbot - jcol + 1;
+		    jlen = min(i__5,i__7);
+		    zgemm_("C", "N", &nu, &jlen, &nu, &c_b57, &u[k1 + k1 *
+			    u_dim1], ldu, &h__[incol + k1 + jcol * h_dim1],
+			    ldh, &c_b56, &wh[wh_offset], ldwh);
+		    zlacpy_("ALL", &nu, &jlen, &wh[wh_offset], ldwh, &h__[
+			    incol + k1 + jcol * h_dim1], ldh);
+/* L150: */
+		}
+
+/*              ==== Vertical multiply ==== */
+
+		i__4 = max(*ktop,incol) - 1;
+		i__3 = *nv;
+		for (jrow = jtop; i__3 < 0 ? jrow >= i__4 : jrow <= i__4;
+			jrow += i__3) {
+/* Computing MIN */
+		    i__5 = *nv, i__7 = max(*ktop,incol) - jrow;
+		    jlen = min(i__5,i__7);
+		    zgemm_("N", "N", &jlen, &nu, &nu, &c_b57, &h__[jrow + (
+			    incol + k1) * h_dim1], ldh, &u[k1 + k1 * u_dim1],
+			    ldu, &c_b56, &wv[wv_offset], ldwv);
+		    zlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &h__[
+			    jrow + (incol + k1) * h_dim1], ldh);
+/* L160: */
+		}
+
+/*              ==== Z multiply (also vertical) ==== */
+
+		if (*wantz) {
+		    i__3 = *ihiz;
+		    i__4 = *nv;
+		    for (jrow = *iloz; i__4 < 0 ? jrow >= i__3 : jrow <= i__3;
+			     jrow += i__4) {
+/* Computing MIN */
+			i__5 = *nv, i__7 = *ihiz - jrow + 1;
+			jlen = min(i__5,i__7);
+			zgemm_("N", "N", &jlen, &nu, &nu, &c_b57, &z__[jrow +
+				(incol + k1) * z_dim1], ldz, &u[k1 + k1 *
+				u_dim1], ldu, &c_b56, &wv[wv_offset], ldwv);
+			zlacpy_("ALL", &jlen, &nu, &wv[wv_offset], ldwv, &z__[
+				jrow + (incol + k1) * z_dim1], ldz)
+				;
+/* L170: */
+		    }
+		}
+	    } else {
+
+/*
+                ==== Updates exploiting U's 2-by-2 block structure.
+                .    (I2, I4, J2, J4 are the last rows and columns
+                .    of the blocks.) ====
+*/
+
+		i2 = (kdu + 1) / 2;
+		i4 = kdu;
+		j2 = i4 - i2;
+		j4 = kdu;
+
+/*
+                ==== KZS and KNZ deal with the band of zeros
+                .    along the diagonal of one of the triangular
+                .    blocks. ====
+*/
+
+		kzs = j4 - j2 - (ns + 1);
+		knz = ns + 1;
+
+/*              ==== Horizontal multiply ==== */
+
+		i__4 = jbot;
+		i__3 = *nh;
+		for (jcol = min(ndcol,*kbot) + 1; i__3 < 0 ? jcol >= i__4 :
+			jcol <= i__4; jcol += i__3) {
+/* Computing MIN */
+		    i__5 = *nh, i__7 = jbot - jcol + 1;
+		    jlen = min(i__5,i__7);
+
+/*
+                   ==== Copy bottom of H to top+KZS of scratch ====
+                    (The first KZS rows get multiplied by zero.) ====
+*/
+
+		    zlacpy_("ALL", &knz, &jlen, &h__[incol + 1 + j2 + jcol *
+			    h_dim1], ldh, &wh[kzs + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U21' ==== */
+
+		    zlaset_("ALL", &kzs, &jlen, &c_b56, &c_b56, &wh[wh_offset]
+			    , ldwh);
+		    ztrmm_("L", "U", "C", "N", &knz, &jlen, &c_b57, &u[j2 + 1
+			    + (kzs + 1) * u_dim1], ldu, &wh[kzs + 1 + wh_dim1]
+			    , ldwh);
+
+/*                 ==== Multiply top of H by U11' ==== */
+
+		    zgemm_("C", "N", &i2, &jlen, &j2, &c_b57, &u[u_offset],
+			    ldu, &h__[incol + 1 + jcol * h_dim1], ldh, &c_b57,
+			     &wh[wh_offset], ldwh);
+
+/*                 ==== Copy top of H to bottom of WH ==== */
+
+		    zlacpy_("ALL", &j2, &jlen, &h__[incol + 1 + jcol * h_dim1]
+			    , ldh, &wh[i2 + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U21' ==== */
+
+		    ztrmm_("L", "L", "C", "N", &j2, &jlen, &c_b57, &u[(i2 + 1)
+			     * u_dim1 + 1], ldu, &wh[i2 + 1 + wh_dim1], ldwh);
+
+/*                 ==== Multiply by U22 ==== */
+
+		    i__5 = i4 - i2;
+		    i__7 = j4 - j2;
+		    zgemm_("C", "N", &i__5, &jlen, &i__7, &c_b57, &u[j2 + 1 +
+			    (i2 + 1) * u_dim1], ldu, &h__[incol + 1 + j2 +
+			    jcol * h_dim1], ldh, &c_b57, &wh[i2 + 1 + wh_dim1]
+			    , ldwh);
+
+/*                 ==== Copy it back ==== */
+
+		    zlacpy_("ALL", &kdu, &jlen, &wh[wh_offset], ldwh, &h__[
+			    incol + 1 + jcol * h_dim1], ldh);
+/* L180: */
+		}
+
+/*              ==== Vertical multiply ==== */
+
+		i__3 = max(incol,*ktop) - 1;
+		i__4 = *nv;
+		for (jrow = jtop; i__4 < 0 ? jrow >= i__3 : jrow <= i__3;
+			jrow += i__4) {
+/* Computing MIN */
+		    i__5 = *nv, i__7 = max(incol,*ktop) - jrow;
+		    jlen = min(i__5,i__7);
+
+/*
+                   ==== Copy right of H to scratch (the first KZS
+                   .    columns get multiplied by zero) ====
+*/
+
+		    zlacpy_("ALL", &jlen, &knz, &h__[jrow + (incol + 1 + j2) *
+			     h_dim1], ldh, &wv[(kzs + 1) * wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U21 ==== */
+
+		    zlaset_("ALL", &jlen, &kzs, &c_b56, &c_b56, &wv[wv_offset]
+			    , ldwv);
+		    ztrmm_("R", "U", "N", "N", &jlen, &knz, &c_b57, &u[j2 + 1
+			    + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1) *
+			    wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U11 ==== */
+
+		    zgemm_("N", "N", &jlen, &i2, &j2, &c_b57, &h__[jrow + (
+			    incol + 1) * h_dim1], ldh, &u[u_offset], ldu, &
+			    c_b57, &wv[wv_offset], ldwv)
+			    ;
+
+/*                 ==== Copy left of H to right of scratch ==== */
+
+		    zlacpy_("ALL", &jlen, &j2, &h__[jrow + (incol + 1) *
+			    h_dim1], ldh, &wv[(i2 + 1) * wv_dim1 + 1], ldwv);
+
+/*                 ==== Multiply by U21 ==== */
+
+		    i__5 = i4 - i2;
+		    ztrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b57, &u[(i2 +
+			    1) * u_dim1 + 1], ldu, &wv[(i2 + 1) * wv_dim1 + 1]
+			    , ldwv);
+
+/*                 ==== Multiply by U22 ==== */
+
+		    i__5 = i4 - i2;
+		    i__7 = j4 - j2;
+		    zgemm_("N", "N", &jlen, &i__5, &i__7, &c_b57, &h__[jrow +
+			    (incol + 1 + j2) * h_dim1], ldh, &u[j2 + 1 + (i2
+			    + 1) * u_dim1], ldu, &c_b57, &wv[(i2 + 1) *
+			    wv_dim1 + 1], ldwv);
+
+/*                 ==== Copy it back ==== */
+
+		    zlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &h__[
+			    jrow + (incol + 1) * h_dim1], ldh);
+/* L190: */
+		}
+
+/*              ==== Multiply Z (also vertical) ==== */
+
+		if (*wantz) {
+		    i__4 = *ihiz;
+		    i__3 = *nv;
+		    for (jrow = *iloz; i__3 < 0 ? jrow >= i__4 : jrow <= i__4;
+			     jrow += i__3) {
+/* Computing MIN */
+			i__5 = *nv, i__7 = *ihiz - jrow + 1;
+			jlen = min(i__5,i__7);
+
+/*
+                      ==== Copy right of Z to left of scratch (first
+                      .     KZS columns get multiplied by zero) ====
+*/
+
+			zlacpy_("ALL", &jlen, &knz, &z__[jrow + (incol + 1 +
+				j2) * z_dim1], ldz, &wv[(kzs + 1) * wv_dim1 +
+				1], ldwv);
+
+/*                    ==== Multiply by U12 ==== */
+
+			zlaset_("ALL", &jlen, &kzs, &c_b56, &c_b56, &wv[
+				wv_offset], ldwv);
+			ztrmm_("R", "U", "N", "N", &jlen, &knz, &c_b57, &u[j2
+				+ 1 + (kzs + 1) * u_dim1], ldu, &wv[(kzs + 1)
+				* wv_dim1 + 1], ldwv);
+
+/*                    ==== Multiply by U11 ==== */
+
+			zgemm_("N", "N", &jlen, &i2, &j2, &c_b57, &z__[jrow +
+				(incol + 1) * z_dim1], ldz, &u[u_offset], ldu,
+				 &c_b57, &wv[wv_offset], ldwv);
+
+/*                    ==== Copy left of Z to right of scratch ==== */
+
+			zlacpy_("ALL", &jlen, &j2, &z__[jrow + (incol + 1) *
+				z_dim1], ldz, &wv[(i2 + 1) * wv_dim1 + 1],
+				ldwv);
+
+/*                    ==== Multiply by U21 ==== */
+
+			i__5 = i4 - i2;
+			ztrmm_("R", "L", "N", "N", &jlen, &i__5, &c_b57, &u[(
+				i2 + 1) * u_dim1 + 1], ldu, &wv[(i2 + 1) *
+				wv_dim1 + 1], ldwv);
+
+/*                    ==== Multiply by U22 ==== */
+
+			i__5 = i4 - i2;
+			i__7 = j4 - j2;
+			zgemm_("N", "N", &jlen, &i__5, &i__7, &c_b57, &z__[
+				jrow + (incol + 1 + j2) * z_dim1], ldz, &u[j2
+				+ 1 + (i2 + 1) * u_dim1], ldu, &c_b57, &wv[(
+				i2 + 1) * wv_dim1 + 1], ldwv);
+
+/*                    ==== Copy the result back to Z ==== */
+
+			zlacpy_("ALL", &jlen, &kdu, &wv[wv_offset], ldwv, &
+				z__[jrow + (incol + 1) * z_dim1], ldz);
+/* L200: */
+		    }
+		}
+	    }
+	}
+/* L210: */
+    }
+
+/*     ==== End of ZLAQR5 ==== */
+
+    return 0;
+} /* zlaqr5_ */
+
+/* Subroutine */ int zlarcm_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc,
+	 doublereal *rwork)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5;
+    doublereal d__1;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    ZLARCM performs a very simple matrix-matrix multiplication:
+             C := A * B,
+    where A is M by M and real; B is M by N and complex;
+    C is M by N and complex.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A and of the matrix C.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns and rows of the matrix B and
+            the number of columns of the matrix C.
+            N >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA, M)
+            A contains the M by M matrix A.
 
     LDA     (input) INTEGER
             The leading dimension of the array A. LDA >=max(1,M).
@@ -14184,8 +18345,8 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     }
 
     l = *m * *n + 1;
-    dgemm_("N", "N", m, n, m, &c_b1015, &a[a_offset], lda, &rwork[1], m, &
-	    c_b324, &rwork[l], m);
+    dgemm_("N", "N", m, n, m, &c_b1034, &a[a_offset], lda, &rwork[1], m, &
+	    c_b328, &rwork[l], m);
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
@@ -14207,8 +18368,8 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	}
 /* L60: */
     }
-    dgemm_("N", "N", m, n, m, &c_b1015, &a[a_offset], lda, &rwork[1], m, &
-	    c_b324, &rwork[l], m);
+    dgemm_("N", "N", m, n, m, &c_b1034, &a[a_offset], lda, &rwork[1], m, &
+	    c_b328, &rwork[l], m);
     i__1 = *n;
     for (j = 1; j <= i__1; ++j) {
 	i__2 = *m;
@@ -14235,23 +18396,29 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	ldc, doublecomplex *work)
 {
     /* System generated locals */
-    integer c_dim1, c_offset;
+    integer c_dim1, c_offset, i__1;
     doublecomplex z__1;
 
     /* Local variables */
+    static integer i__;
+    static logical applyleft;
     extern logical lsame_(char *, char *);
+    static integer lastc;
     extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *,
 	    doublecomplex *, integer *, doublecomplex *, integer *,
 	    doublecomplex *, integer *), zgemv_(char *, integer *, integer *,
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
 	    integer *, doublecomplex *, doublecomplex *, integer *);
+    static integer lastv;
+    extern integer ilazlc_(integer *, integer *, doublecomplex *, integer *),
+	    ilazlr_(integer *, integer *, doublecomplex *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14319,39 +18486,77 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     --work;
 
     /* Function Body */
-    if (lsame_(side, "L")) {
+    applyleft = lsame_(side, "L");
+    lastv = 0;
+    lastc = 0;
+    if (tau->r != 0. || tau->i != 0.) {
+/*
+       Set up variables for scanning V.  LASTV begins pointing to the end
+       of V.
+*/
+	if (applyleft) {
+	    lastv = *m;
+	} else {
+	    lastv = *n;
+	}
+	if (*incv > 0) {
+	    i__ = (lastv - 1) * *incv + 1;
+	} else {
+	    i__ = 1;
+	}
+/*     Look for the last non-zero row in V. */
+	for(;;) { /* while(complicated condition) */
+	    i__1 = i__;
+	    if (!(lastv > 0 && (v[i__1].r == 0. && v[i__1].i == 0.)))
+	    	break;
+	    --lastv;
+	    i__ -= *incv;
+	}
+	if (applyleft) {
+/*     Scan for the last non-zero column in C(1:lastv,:). */
+	    lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc);
+	} else {
+/*     Scan for the last non-zero row in C(:,1:lastv). */
+	    lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc);
+	}
+    }
+/*
+       Note that lastc.eq.0 renders the BLAS operations null; no special
+       case is needed at this level.
+*/
+    if (applyleft) {
 
 /*        Form  H * C */
 
-	if (tau->r != 0. || tau->i != 0.) {
+	if (lastv > 0) {
 
-/*           w := C' * v */
+/*           w(1:lastc,1) := C(1:lastv,1:lastc)' * v(1:lastv,1) */
 
-	    zgemv_("Conjugate transpose", m, n, &c_b60, &c__[c_offset], ldc, &
-		    v[1], incv, &c_b59, &work[1], &c__1);
+	    zgemv_("Conjugate transpose", &lastv, &lastc, &c_b57, &c__[
+		    c_offset], ldc, &v[1], incv, &c_b56, &work[1], &c__1);
 
-/*           C := C - v * w' */
+/*           C(1:lastv,1:lastc) := C(...) - v(1:lastv,1) * w(1:lastc,1)' */
 
 	    z__1.r = -tau->r, z__1.i = -tau->i;
-	    zgerc_(m, n, &z__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
-		    ldc);
+	    zgerc_(&lastv, &lastc, &z__1, &v[1], incv, &work[1], &c__1, &c__[
+		    c_offset], ldc);
 	}
     } else {
 
 /*        Form  C * H */
 
-	if (tau->r != 0. || tau->i != 0.) {
+	if (lastv > 0) {
 
-/*           w := C * v */
+/*           w(1:lastc,1) := C(1:lastc,1:lastv) * v(1:lastv,1) */
 
-	    zgemv_("No transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1],
-		    incv, &c_b59, &work[1], &c__1);
+	    zgemv_("No transpose", &lastc, &lastv, &c_b57, &c__[c_offset],
+		    ldc, &v[1], incv, &c_b56, &work[1], &c__1);
 
-/*           C := C - w * v' */
+/*           C(1:lastc,1:lastv) := C(...) - w(1:lastc,1) * v(1:lastv,1)' */
 
 	    z__1.r = -tau->r, z__1.i = -tau->i;
-	    zgerc_(m, n, &z__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
-		    ldc);
+	    zgerc_(&lastc, &lastv, &z__1, &work[1], &c__1, &v[1], incv, &c__[
+		    c_offset], ldc);
 	}
     }
     return 0;
@@ -14376,22 +18581,28 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     /* Local variables */
     static integer i__, j;
     extern logical lsame_(char *, char *);
+    static integer lastc;
     extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
 	    integer *, doublecomplex *, doublecomplex *, integer *,
 	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
-	    integer *), zcopy_(integer *, doublecomplex *,
-	    integer *, doublecomplex *, integer *), ztrmm_(char *, char *,
-	    char *, char *, integer *, integer *, doublecomplex *,
-	    doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *,
 	    integer *);
+    static integer lastv;
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), ztrmm_(char *, char *, char *, char *
+	    , integer *, integer *, doublecomplex *, doublecomplex *, integer
+	    *, doublecomplex *, integer *);
+    extern integer ilazlc_(integer *, integer *, doublecomplex *, integer *);
+    extern /* Subroutine */ int zlacgv_(integer *, doublecomplex *, integer *)
+	    ;
+    extern integer ilazlr_(integer *, integer *, doublecomplex *, integer *);
     static char transt[1];
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -14513,6 +18724,13 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilazlr_(m, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
 
                 W := C1'
@@ -14520,30 +18738,31 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-		    zlacgv_(n, &work[j * work_dim1 + 1], &c__1);
+		    zcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
+			    + 1], &c__1);
+		    zlacgv_(&lastc, &work[j * work_dim1 + 1], &c__1);
 /* L10: */
 		}
 
 /*              W := W * V1 */
 
-		ztrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b60,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+		ztrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2'*V2 */
 
-		    i__1 = *m - *k;
-		    zgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
-			     &c_b60, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 +
-			    v_dim1], ldv, &c_b60, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    zgemm_("Conjugate transpose", "No transpose", &lastc, k, &
+			    i__1, &c_b57, &c__[*k + 1 + c_dim1], ldc, &v[*k +
+			    1 + v_dim1], ldv, &c_b57, &work[work_offset],
+			    ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		ztrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b60, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ztrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
+			c_b57, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
@@ -14551,24 +18770,25 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 /*                 C2 := C2 - V2 * W' */
 
-		    i__1 = *m - *k;
+		    i__1 = lastv - *k;
 		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
-			     &z__1, &v[*k + 1 + v_dim1], ldv, &work[
-			    work_offset], ldwork, &c_b60, &c__[*k + 1 +
+		    zgemm_("No transpose", "Conjugate transpose", &i__1, &
+			    lastc, k, &z__1, &v[*k + 1 + v_dim1], ldv, &work[
+			    work_offset], ldwork, &c_b57, &c__[*k + 1 +
 			    c_dim1], ldc);
 		}
 
 /*              W := W * V1' */
 
-		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
-			&c_b60, &v[v_offset], ldv, &work[work_offset], ldwork);
+		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[v_offset], ldv, &work[
+			work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = j + i__ * c_dim1;
 			i__4 = j + i__ * c_dim1;
@@ -14586,6 +18806,13 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilazlr_(n, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
                 W := C1
@@ -14593,55 +18820,56 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    zcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L40: */
 		}
 
 /*              W := W * V1 */
 
-		ztrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b60,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+		ztrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2 * V2 */
 
-		    i__1 = *n - *k;
-		    zgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b60, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
-			    1 + v_dim1], ldv, &c_b60, &work[work_offset],
+		    i__1 = lastv - *k;
+		    zgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
+			    c_b57, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
+			    1 + v_dim1], ldv, &c_b57, &work[work_offset],
 			    ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		ztrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b60, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ztrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b57,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - W * V2' */
 
-		    i__1 = *n - *k;
+		    i__1 = lastv - *k;
 		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
-			     &z__1, &work[work_offset], ldwork, &v[*k + 1 +
-			    v_dim1], ldv, &c_b60, &c__[(*k + 1) * c_dim1 + 1],
-			     ldc);
+		    zgemm_("No transpose", "Conjugate transpose", &lastc, &
+			    i__1, k, &z__1, &work[work_offset], ldwork, &v[*k
+			    + 1 + v_dim1], ldv, &c_b57, &c__[(*k + 1) *
+			    c_dim1 + 1], ldc);
 		}
 
 /*              W := W * V1' */
 
-		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
-			&c_b60, &v[v_offset], ldv, &work[work_offset], ldwork);
+		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[v_offset], ldv, &work[
+			work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = i__ + j * c_dim1;
 			i__4 = i__ + j * c_dim1;
@@ -14669,6 +18897,13 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilazlr_(m, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
 
                 W := C2'
@@ -14676,59 +18911,59 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-		    zlacgv_(n, &work[j * work_dim1 + 1], &c__1);
+		    zcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
+			    j * work_dim1 + 1], &c__1);
+		    zlacgv_(&lastc, &work[j * work_dim1 + 1], &c__1);
 /* L70: */
 		}
 
 /*              W := W * V2 */
 
-		ztrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b60,
-			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*m > *k) {
+		ztrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1'*V1 */
 
-		    i__1 = *m - *k;
-		    zgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
-			     &c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b60, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    zgemm_("Conjugate transpose", "No transpose", &lastc, k, &
+			    i__1, &c_b57, &c__[c_offset], ldc, &v[v_offset],
+			    ldv, &c_b57, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		ztrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b60, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ztrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
+			c_b57, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - V1 * W' */
 
-		    i__1 = *m - *k;
+		    i__1 = lastv - *k;
 		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
-			     &z__1, &v[v_offset], ldv, &work[work_offset],
-			    ldwork, &c_b60, &c__[c_offset], ldc);
+		    zgemm_("No transpose", "Conjugate transpose", &i__1, &
+			    lastc, k, &z__1, &v[v_offset], ldv, &work[
+			    work_offset], ldwork, &c_b57, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2' */
 
-		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
-			&c_b60, &v[*m - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &
+			work[work_offset], ldwork);
 
 /*              C2 := C2 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = *m - *k + j + i__ * c_dim1;
-			i__4 = *m - *k + j + i__ * c_dim1;
+			i__3 = lastv - *k + j + i__ * c_dim1;
+			i__4 = lastv - *k + j + i__ * c_dim1;
 			d_cnjg(&z__2, &work[i__ + j * work_dim1]);
 			z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i -
 				z__2.i;
@@ -14743,6 +18978,13 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilazlr_(n, k, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
 
                 W := C2
@@ -14750,58 +18992,58 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
+		    zcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1,
+			     &work[j * work_dim1 + 1], &c__1);
 /* L100: */
 		}
 
 /*              W := W * V2 */
 
-		ztrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b60,
-			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
-			ldwork);
-		if (*n > *k) {
+		ztrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1 * V1 */
 
-		    i__1 = *n - *k;
-		    zgemm_("No transpose", "No transpose", m, k, &i__1, &
-			    c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b60, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    zgemm_("No transpose", "No transpose", &lastc, k, &i__1, &
+			    c_b57, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b57, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		ztrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b60, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ztrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b57,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V' */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - W * V1' */
 
-		    i__1 = *n - *k;
+		    i__1 = lastv - *k;
 		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
-			     &z__1, &work[work_offset], ldwork, &v[v_offset],
-			    ldv, &c_b60, &c__[c_offset], ldc);
+		    zgemm_("No transpose", "Conjugate transpose", &lastc, &
+			    i__1, k, &z__1, &work[work_offset], ldwork, &v[
+			    v_offset], ldv, &c_b57, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2' */
 
-		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
-			&c_b60, &v[*n - *k + 1 + v_dim1], ldv, &work[
-			work_offset], ldwork);
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[lastv - *k + 1 + v_dim1], ldv, &
+			work[work_offset], ldwork);
 
 /*              C2 := C2 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + (*n - *k + j) * c_dim1;
-			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__3 = i__ + (lastv - *k + j) * c_dim1;
+			i__4 = i__ + (lastv - *k + j) * c_dim1;
 			i__5 = i__ + j * work_dim1;
 			z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[
 				i__4].i - work[i__5].i;
@@ -14828,6 +19070,13 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilazlc_(k, m, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
                 W := C1'
@@ -14835,56 +19084,58 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
-			     &c__1);
-		    zlacgv_(n, &work[j * work_dim1 + 1], &c__1);
+		    zcopy_(&lastc, &c__[j + c_dim1], ldc, &work[j * work_dim1
+			    + 1], &c__1);
+		    zlacgv_(&lastc, &work[j * work_dim1 + 1], &c__1);
 /* L130: */
 		}
 
 /*              W := W * V1' */
 
-		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
-			&c_b60, &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*m > *k) {
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[v_offset], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2'*V2' */
 
-		    i__1 = *m - *k;
-		    zgemm_("Conjugate transpose", "Conjugate transpose", n, k,
-			     &i__1, &c_b60, &c__[*k + 1 + c_dim1], ldc, &v[(*
-			    k + 1) * v_dim1 + 1], ldv, &c_b60, &work[
-			    work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    zgemm_("Conjugate transpose", "Conjugate transpose", &
+			    lastc, k, &i__1, &c_b57, &c__[*k + 1 + c_dim1],
+			    ldc, &v[(*k + 1) * v_dim1 + 1], ldv, &c_b57, &
+			    work[work_offset], ldwork)
+			    ;
 		}
 
 /*              W := W * T'  or  W * T */
 
-		ztrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b60, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ztrmm_("Right", "Upper", transt, "Non-unit", &lastc, k, &
+			c_b57, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - V2' * W' */
 
-		    i__1 = *m - *k;
+		    i__1 = lastv - *k;
 		    z__1.r = -1., z__1.i = -0.;
 		    zgemm_("Conjugate transpose", "Conjugate transpose", &
-			    i__1, n, k, &z__1, &v[(*k + 1) * v_dim1 + 1], ldv,
-			     &work[work_offset], ldwork, &c_b60, &c__[*k + 1
-			    + c_dim1], ldc);
+			    i__1, &lastc, k, &z__1, &v[(*k + 1) * v_dim1 + 1],
+			     ldv, &work[work_offset], ldwork, &c_b57, &c__[*k
+			    + 1 + c_dim1], ldc);
 		}
 
 /*              W := W * V1 */
 
-		ztrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b60,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		ztrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = j + i__ * c_dim1;
 			i__4 = j + i__ * c_dim1;
@@ -14902,6 +19153,13 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilazlc_(k, n, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
                 W := C1
@@ -14909,55 +19167,56 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+		    zcopy_(&lastc, &c__[j * c_dim1 + 1], &c__1, &work[j *
 			    work_dim1 + 1], &c__1);
 /* L160: */
 		}
 
 /*              W := W * V1' */
 
-		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
-			&c_b60, &v[v_offset], ldv, &work[work_offset], ldwork);
-		if (*n > *k) {
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[v_offset], ldv, &work[
+			work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C2 * V2' */
 
-		    i__1 = *n - *k;
-		    zgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
-			     &c_b60, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k
-			    + 1) * v_dim1 + 1], ldv, &c_b60, &work[
+		    i__1 = lastv - *k;
+		    zgemm_("No transpose", "Conjugate transpose", &lastc, k, &
+			    i__1, &c_b57, &c__[(*k + 1) * c_dim1 + 1], ldc, &
+			    v[(*k + 1) * v_dim1 + 1], ldv, &c_b57, &work[
 			    work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		ztrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b60, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ztrmm_("Right", "Upper", trans, "Non-unit", &lastc, k, &c_b57,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C2 := C2 - W * V2 */
 
-		    i__1 = *n - *k;
+		    i__1 = lastv - *k;
 		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("No transpose", "No transpose", m, &i__1, k, &z__1,
-			     &work[work_offset], ldwork, &v[(*k + 1) * v_dim1
-			    + 1], ldv, &c_b60, &c__[(*k + 1) * c_dim1 + 1],
-			    ldc);
+		    zgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
+			    z__1, &work[work_offset], ldwork, &v[(*k + 1) *
+			    v_dim1 + 1], ldv, &c_b57, &c__[(*k + 1) * c_dim1
+			    + 1], ldc);
 		}
 
 /*              W := W * V1 */
 
-		ztrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b60,
-			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		ztrmm_("Right", "Upper", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[v_offset], ldv, &work[work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
 			i__3 = i__ + j * c_dim1;
 			i__4 = i__ + j * c_dim1;
@@ -14985,6 +19244,13 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
                 Form  H * C  or  H' * C  where  C = ( C1 )
                                                     ( C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilazlc_(k, m, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilazlc_(&lastv, n, &c__[c_offset], ldc);
+
+/*
                 W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
 
                 W := C2'
@@ -14992,59 +19258,60 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
-			    work_dim1 + 1], &c__1);
-		    zlacgv_(n, &work[j * work_dim1 + 1], &c__1);
+		    zcopy_(&lastc, &c__[lastv - *k + j + c_dim1], ldc, &work[
+			    j * work_dim1 + 1], &c__1);
+		    zlacgv_(&lastc, &work[j * work_dim1 + 1], &c__1);
 /* L190: */
 		}
 
 /*              W := W * V2' */
 
-		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
-			&c_b60, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-		if (*m > *k) {
+		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[(lastv - *k + 1) * v_dim1 + 1],
+			ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1'*V1' */
 
-		    i__1 = *m - *k;
-		    zgemm_("Conjugate transpose", "Conjugate transpose", n, k,
-			     &i__1, &c_b60, &c__[c_offset], ldc, &v[v_offset],
-			     ldv, &c_b60, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    zgemm_("Conjugate transpose", "Conjugate transpose", &
+			    lastc, k, &i__1, &c_b57, &c__[c_offset], ldc, &v[
+			    v_offset], ldv, &c_b57, &work[work_offset],
+			    ldwork);
 		}
 
 /*              W := W * T'  or  W * T */
 
-		ztrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b60, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ztrmm_("Right", "Lower", transt, "Non-unit", &lastc, k, &
+			c_b57, &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - V' * W' */
 
-		if (*m > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - V1' * W' */
 
-		    i__1 = *m - *k;
+		    i__1 = lastv - *k;
 		    z__1.r = -1., z__1.i = -0.;
 		    zgemm_("Conjugate transpose", "Conjugate transpose", &
-			    i__1, n, k, &z__1, &v[v_offset], ldv, &work[
-			    work_offset], ldwork, &c_b60, &c__[c_offset], ldc);
+			    i__1, &lastc, k, &z__1, &v[v_offset], ldv, &work[
+			    work_offset], ldwork, &c_b57, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		ztrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b60,
-			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+		ztrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C2 := C2 - W' */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *n;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = *m - *k + j + i__ * c_dim1;
-			i__4 = *m - *k + j + i__ * c_dim1;
+			i__3 = lastv - *k + j + i__ * c_dim1;
+			i__4 = lastv - *k + j + i__ * c_dim1;
 			d_cnjg(&z__2, &work[i__ + j * work_dim1]);
 			z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i -
 				z__2.i;
@@ -15059,6 +19326,13 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 /*
                 Form  C * H  or  C * H'  where  C = ( C1  C2 )
 
+   Computing MAX
+*/
+		i__1 = *k, i__2 = ilazlc_(k, n, &v[v_offset], ldv);
+		lastv = max(i__1,i__2);
+		lastc = ilazlr_(m, &lastv, &c__[c_offset], ldc);
+
+/*
                 W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
 
                 W := C2
@@ -15066,58 +19340,58 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
-			    j * work_dim1 + 1], &c__1);
+		    zcopy_(&lastc, &c__[(lastv - *k + j) * c_dim1 + 1], &c__1,
+			     &work[j * work_dim1 + 1], &c__1);
 /* L220: */
 		}
 
 /*              W := W * V2' */
 
-		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
-			&c_b60, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
-			work_offset], ldwork);
-		if (*n > *k) {
+		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", &
+			lastc, k, &c_b57, &v[(lastv - *k + 1) * v_dim1 + 1],
+			ldv, &work[work_offset], ldwork);
+		if (lastv > *k) {
 
 /*                 W := W + C1 * V1' */
 
-		    i__1 = *n - *k;
-		    zgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
-			     &c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, &
-			    c_b60, &work[work_offset], ldwork);
+		    i__1 = lastv - *k;
+		    zgemm_("No transpose", "Conjugate transpose", &lastc, k, &
+			    i__1, &c_b57, &c__[c_offset], ldc, &v[v_offset],
+			    ldv, &c_b57, &work[work_offset], ldwork);
 		}
 
 /*              W := W * T  or  W * T' */
 
-		ztrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b60, &t[
-			t_offset], ldt, &work[work_offset], ldwork);
+		ztrmm_("Right", "Lower", trans, "Non-unit", &lastc, k, &c_b57,
+			 &t[t_offset], ldt, &work[work_offset], ldwork);
 
 /*              C := C - W * V */
 
-		if (*n > *k) {
+		if (lastv > *k) {
 
 /*                 C1 := C1 - W * V1 */
 
-		    i__1 = *n - *k;
+		    i__1 = lastv - *k;
 		    z__1.r = -1., z__1.i = -0.;
-		    zgemm_("No transpose", "No transpose", m, &i__1, k, &z__1,
-			     &work[work_offset], ldwork, &v[v_offset], ldv, &
-			    c_b60, &c__[c_offset], ldc);
+		    zgemm_("No transpose", "No transpose", &lastc, &i__1, k, &
+			    z__1, &work[work_offset], ldwork, &v[v_offset],
+			    ldv, &c_b57, &c__[c_offset], ldc);
 		}
 
 /*              W := W * V2 */
 
-		ztrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b60,
-			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+		ztrmm_("Right", "Lower", "No transpose", "Unit", &lastc, k, &
+			c_b57, &v[(lastv - *k + 1) * v_dim1 + 1], ldv, &work[
 			work_offset], ldwork);
 
 /*              C1 := C1 - W */
 
 		i__1 = *k;
 		for (j = 1; j <= i__1; ++j) {
-		    i__2 = *m;
+		    i__2 = lastc;
 		    for (i__ = 1; i__ <= i__2; ++i__) {
-			i__3 = i__ + (*n - *k + j) * c_dim1;
-			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__3 = i__ + (lastv - *k + j) * c_dim1;
+			i__4 = i__ + (lastv - *k + j) * c_dim1;
 			i__5 = i__ + j * work_dim1;
 			z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[
 				i__4].i - work[i__5].i;
@@ -15166,10 +19440,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -15248,11 +19522,11 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	safmin = SAFEMINIMUM / EPSILON;
 	rsafmn = 1. / safmin;
 
+	knt = 0;
 	if (abs(beta) < safmin) {
 
 /*           XNORM, BETA may be inaccurate; scale X and recompute them */
 
-	    knt = 0;
 L10:
 	    ++knt;
 	    i__1 = *n - 1;
@@ -15272,37 +19546,25 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	    alpha->r = z__1.r, alpha->i = z__1.i;
 	    d__1 = dlapy3_(&alphr, &alphi, &xnorm);
 	    beta = -d_sign(&d__1, &alphr);
-	    d__1 = (beta - alphr) / beta;
-	    d__2 = -alphi / beta;
-	    z__1.r = d__1, z__1.i = d__2;
-	    tau->r = z__1.r, tau->i = z__1.i;
-	    z__2.r = alpha->r - beta, z__2.i = alpha->i;
-	    zladiv_(&z__1, &c_b60, &z__2);
-	    alpha->r = z__1.r, alpha->i = z__1.i;
-	    i__1 = *n - 1;
-	    zscal_(&i__1, alpha, &x[1], incx);
+	}
+	d__1 = (beta - alphr) / beta;
+	d__2 = -alphi / beta;
+	z__1.r = d__1, z__1.i = d__2;
+	tau->r = z__1.r, tau->i = z__1.i;
+	z__2.r = alpha->r - beta, z__2.i = alpha->i;
+	zladiv_(&z__1, &c_b57, &z__2);
+	alpha->r = z__1.r, alpha->i = z__1.i;
+	i__1 = *n - 1;
+	zscal_(&i__1, alpha, &x[1], incx);
 
-/*           If ALPHA is subnormal, it may lose relative accuracy */
+/*        If ALPHA is subnormal, it may lose relative accuracy */
 
-	    alpha->r = beta, alpha->i = 0.;
-	    i__1 = knt;
-	    for (j = 1; j <= i__1; ++j) {
-		z__1.r = safmin * alpha->r, z__1.i = safmin * alpha->i;
-		alpha->r = z__1.r, alpha->i = z__1.i;
+	i__1 = knt;
+	for (j = 1; j <= i__1; ++j) {
+	    beta *= safmin;
 /* L20: */
-	    }
-	} else {
-	    d__1 = (beta - alphr) / beta;
-	    d__2 = -alphi / beta;
-	    z__1.r = d__1, z__1.i = d__2;
-	    tau->r = z__1.r, tau->i = z__1.i;
-	    z__2.r = alpha->r - beta, z__2.i = alpha->i;
-	    zladiv_(&z__1, &c_b60, &z__2);
-	    alpha->r = z__1.r, alpha->i = z__1.i;
-	    i__1 = *n - 1;
-	    zscal_(&i__1, alpha, &x[1], incx);
-	    alpha->r = beta, alpha->i = 0.;
 	}
+	alpha->r = beta, alpha->i = 0.;
     }
 
     return 0;
@@ -15320,22 +19582,22 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     doublecomplex z__1;
 
     /* Local variables */
-    static integer i__, j;
+    static integer i__, j, prevlastv;
     static doublecomplex vii;
     extern logical lsame_(char *, char *);
     extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
-	    integer *, doublecomplex *, doublecomplex *, integer *),
-	    ztrmv_(char *, char *, char *, integer *, doublecomplex *,
-	    integer *, doublecomplex *, integer *),
-	    zlacgv_(integer *, doublecomplex *, integer *);
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+    static integer lastv;
+    extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, integer *);
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -15447,8 +19709,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     }
 
     if (lsame_(direct, "F")) {
+	prevlastv = *n;
 	i__1 = *k;
 	for (i__ = 1; i__ <= i__1; ++i__) {
+	    prevlastv = max(prevlastv,i__);
 	    i__2 = i__;
 	    if (tau[i__2].r == 0. && tau[i__2].i == 0.) {
 
@@ -15469,33 +19733,53 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		i__2 = i__ + i__ * v_dim1;
 		v[i__2].r = 1., v[i__2].i = 0.;
 		if (lsame_(storev, "C")) {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			i__3 = lastv + i__ * v_dim1;
+			if (v[i__3].r != 0. || v[i__3].i != 0.) {
+			    goto L15;
+			}
+		    }
+L15:
+		    j = min(lastv,prevlastv);
 
-/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+/*                 T(1:i-1,i) := - tau(i) * V(i:j,1:i-1)' * V(i:j,i) */
 
-		    i__2 = *n - i__ + 1;
+		    i__2 = j - i__ + 1;
 		    i__3 = i__ - 1;
 		    i__4 = i__;
 		    z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
 		    zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__
 			    + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, &
-			    c_b59, &t[i__ * t_dim1 + 1], &c__1);
+			    c_b56, &t[i__ * t_dim1 + 1], &c__1);
 		} else {
+/*                 Skip any trailing zeros. */
+		    i__2 = i__ + 1;
+		    for (lastv = *n; lastv >= i__2; --lastv) {
+			i__3 = i__ + lastv * v_dim1;
+			if (v[i__3].r != 0. || v[i__3].i != 0.) {
+			    goto L16;
+			}
+		    }
+L16:
+		    j = min(lastv,prevlastv);
 
-/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:j) * V(i,i:j)' */
 
-		    if (i__ < *n) {
-			i__2 = *n - i__;
+		    if (i__ < j) {
+			i__2 = j - i__;
 			zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
 		    }
 		    i__2 = i__ - 1;
-		    i__3 = *n - i__ + 1;
+		    i__3 = j - i__ + 1;
 		    i__4 = i__;
 		    z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
 		    zgemv_("No transpose", &i__2, &i__3, &z__1, &v[i__ *
 			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
-			    c_b59, &t[i__ * t_dim1 + 1], &c__1);
-		    if (i__ < *n) {
-			i__2 = *n - i__;
+			    c_b56, &t[i__ * t_dim1 + 1], &c__1);
+		    if (i__ < j) {
+			i__2 = j - i__;
 			zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
 		    }
 		}
@@ -15510,10 +19794,16 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		i__2 = i__ + i__ * t_dim1;
 		i__3 = i__;
 		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+		if (i__ > 1) {
+		    prevlastv = max(prevlastv,lastv);
+		} else {
+		    prevlastv = lastv;
+		}
 	    }
 /* L20: */
 	}
     } else {
+	prevlastv = 1;
 	for (i__ = *k; i__ >= 1; --i__) {
 	    i__1 = i__;
 	    if (tau[i__1].r == 0. && tau[i__1].i == 0.) {
@@ -15536,19 +19826,29 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 			vii.r = v[i__1].r, vii.i = v[i__1].i;
 			i__1 = *n - *k + i__ + i__ * v_dim1;
 			v[i__1].r = 1., v[i__1].i = 0.;
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    i__2 = lastv + i__ * v_dim1;
+			    if (v[i__2].r != 0. || v[i__2].i != 0.) {
+				goto L35;
+			    }
+			}
+L35:
+			j = max(lastv,prevlastv);
 
 /*
                       T(i+1:k,i) :=
-                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+                              - tau(i) * V(j:n-k+i,i+1:k)' * V(j:n-k+i,i)
 */
 
-			i__1 = *n - *k + i__;
+			i__1 = *n - *k + i__ - j + 1;
 			i__2 = *k - i__;
 			i__3 = i__;
 			z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
 			zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[
-				(i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1
-				+ 1], &c__1, &c_b59, &t[i__ + 1 + i__ *
+				j + (i__ + 1) * v_dim1], ldv, &v[j + i__ *
+				v_dim1], &c__1, &c_b56, &t[i__ + 1 + i__ *
 				t_dim1], &c__1);
 			i__1 = *n - *k + i__ + i__ * v_dim1;
 			v[i__1].r = vii.r, v[i__1].i = vii.i;
@@ -15557,23 +19857,34 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 			vii.r = v[i__1].r, vii.i = v[i__1].i;
 			i__1 = i__ + (*n - *k + i__) * v_dim1;
 			v[i__1].r = 1., v[i__1].i = 0.;
+/*                    Skip any leading zeros. */
+			i__1 = i__ - 1;
+			for (lastv = 1; lastv <= i__1; ++lastv) {
+			    i__2 = i__ + lastv * v_dim1;
+			    if (v[i__2].r != 0. || v[i__2].i != 0.) {
+				goto L36;
+			    }
+			}
+L36:
+			j = max(lastv,prevlastv);
 
 /*
                       T(i+1:k,i) :=
-                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+                              - tau(i) * V(i+1:k,j:n-k+i) * V(i,j:n-k+i)'
 */
 
-			i__1 = *n - *k + i__ - 1;
-			zlacgv_(&i__1, &v[i__ + v_dim1], ldv);
+			i__1 = *n - *k + i__ - 1 - j + 1;
+			zlacgv_(&i__1, &v[i__ + j * v_dim1], ldv);
 			i__1 = *k - i__;
-			i__2 = *n - *k + i__;
+			i__2 = *n - *k + i__ - j + 1;
 			i__3 = i__;
 			z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
 			zgemv_("No transpose", &i__1, &i__2, &z__1, &v[i__ +
-				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
-				c_b59, &t[i__ + 1 + i__ * t_dim1], &c__1);
-			i__1 = *n - *k + i__ - 1;
-			zlacgv_(&i__1, &v[i__ + v_dim1], ldv);
+				1 + j * v_dim1], ldv, &v[i__ + j * v_dim1],
+				ldv, &c_b56, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
+			i__1 = *n - *k + i__ - 1 - j + 1;
+			zlacgv_(&i__1, &v[i__ + j * v_dim1], ldv);
 			i__1 = i__ + (*n - *k + i__) * v_dim1;
 			v[i__1].r = vii.r, v[i__1].i = vii.i;
 		    }
@@ -15585,2063 +19896,289 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
 			     t_dim1], &c__1)
 			    ;
-		}
-		i__1 = i__ + i__ * t_dim1;
-		i__2 = i__;
-		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
-	    }
-/* L40: */
-	}
-    }
-    return 0;
-
-/*     End of ZLARFT */
-
-} /* zlarft_ */
-
-/* Subroutine */ int zlarfx_(char *side, integer *m, integer *n,
-	doublecomplex *v, doublecomplex *tau, doublecomplex *c__, integer *
-	ldc, doublecomplex *work)
-{
-    /* System generated locals */
-    integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8,
-	    i__9, i__10, i__11;
-    doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8, z__9, z__10,
-	     z__11, z__12, z__13, z__14, z__15, z__16, z__17, z__18, z__19;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer j;
-    static doublecomplex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4,
-	    v5, v6, v7, v8, v9, t10, v10, sum;
-    extern logical lsame_(char *, char *);
-    extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *,
-	    doublecomplex *, integer *, doublecomplex *, integer *,
-	    doublecomplex *, integer *), zgemv_(char *, integer *, integer *,
-	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
-	    integer *, doublecomplex *, doublecomplex *, integer *);
-
-
-/*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
-
-
-    Purpose
-    =======
-
-    ZLARFX applies a complex elementary reflector H to a complex m by n
-    matrix C, from either the left or the right. H is represented in the
-    form
-
-          H = I - tau * v * v'
-
-    where tau is a complex scalar and v is a complex vector.
-
-    If tau = 0, then H is taken to be the unit matrix
-
-    This version uses inline code if H has order < 11.
-
-    Arguments
-    =========
-
-    SIDE    (input) CHARACTER*1
-            = 'L': form  H * C
-            = 'R': form  C * H
-
-    M       (input) INTEGER
-            The number of rows of the matrix C.
-
-    N       (input) INTEGER
-            The number of columns of the matrix C.
-
-    V       (input) COMPLEX*16 array, dimension (M) if SIDE = 'L'
-                                          or (N) if SIDE = 'R'
-            The vector v in the representation of H.
-
-    TAU     (input) COMPLEX*16
-            The value tau in the representation of H.
-
-    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
-            On entry, the m by n matrix C.
-            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
-            or C * H if SIDE = 'R'.
-
-    LDC     (input) INTEGER
-            The leading dimension of the array C. LDA >= max(1,M).
-
-    WORK    (workspace) COMPLEX*16 array, dimension (N) if SIDE = 'L'
-                                              or (M) if SIDE = 'R'
-            WORK is not referenced if H has order < 11.
-
-    =====================================================================
-*/
-
-
-    /* Parameter adjustments */
-    --v;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1;
-    c__ -= c_offset;
-    --work;
-
-    /* Function Body */
-    if (tau->r == 0. && tau->i == 0.) {
-	return 0;
-    }
-    if (lsame_(side, "L")) {
-
-/*        Form  H * C, where H has order m. */
-
-	switch (*m) {
-	    case 1:  goto L10;
-	    case 2:  goto L30;
-	    case 3:  goto L50;
-	    case 4:  goto L70;
-	    case 5:  goto L90;
-	    case 6:  goto L110;
-	    case 7:  goto L130;
-	    case 8:  goto L150;
-	    case 9:  goto L170;
-	    case 10:  goto L190;
+		    if (i__ > 1) {
+			prevlastv = min(prevlastv,lastv);
+		    } else {
+			prevlastv = lastv;
+		    }
+		}
+		i__1 = i__ + i__ * t_dim1;
+		i__2 = i__;
+		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+	    }
+/* L40: */
 	}
+    }
+    return 0;
 
-/*
-          Code for general M
+/*     End of ZLARFT */
 
-          w := C'*v
-*/
+} /* zlarft_ */
 
-	zgemv_("Conjugate transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1]
-		, &c__1, &c_b59, &work[1], &c__1);
+/* Subroutine */ int zlartg_(doublecomplex *f, doublecomplex *g, doublereal *
+	cs, doublecomplex *sn, doublecomplex *r__)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6, d__7, d__8, d__9, d__10;
+    doublecomplex z__1, z__2, z__3;
 
-/*        C := C - tau * v * w' */
+    /* Builtin functions */
+    double log(doublereal), pow_di(doublereal *, integer *), d_imag(
+	    doublecomplex *), sqrt(doublereal);
+    void d_cnjg(doublecomplex *, doublecomplex *);
 
-	z__1.r = -tau->r, z__1.i = -tau->i;
-	zgerc_(m, n, &z__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset],
-		ldc);
-	goto L410;
-L10:
+    /* Local variables */
+    static doublereal d__;
+    static integer i__;
+    static doublereal f2, g2;
+    static doublecomplex ff;
+    static doublereal di, dr;
+    static doublecomplex fs, gs;
+    static doublereal f2s, g2s, eps, scale;
+    static integer count;
+    static doublereal safmn2;
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    static doublereal safmx2;
 
-/*        Special code for 1 x 1 Householder */
+    static doublereal safmin;
 
-	z__3.r = tau->r * v[1].r - tau->i * v[1].i, z__3.i = tau->r * v[1].i
-		+ tau->i * v[1].r;
-	d_cnjg(&z__4, &v[1]);
-	z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i
-		+ z__3.i * z__4.r;
-	z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i;
-	t1.r = z__1.r, t1.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, z__1.i = t1.r *
-		    c__[i__3].i + t1.i * c__[i__3].r;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L20: */
-	}
-	goto L410;
-L30:
 
-/*        Special code for 2 x 2 Householder */
-
-	d_cnjg(&z__1, &v[1]);
-	v1.r = z__1.r, v1.i = z__1.i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	d_cnjg(&z__1, &v[2]);
-	v2.r = z__1.r, v2.i = z__1.i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    z__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__2.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    z__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__3.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L40: */
-	}
-	goto L410;
-L50:
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
-/*        Special code for 3 x 3 Householder */
-
-	d_cnjg(&z__1, &v[1]);
-	v1.r = z__1.r, v1.i = z__1.i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	d_cnjg(&z__1, &v[2]);
-	v2.r = z__1.r, v2.i = z__1.i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	d_cnjg(&z__1, &v[3]);
-	v3.r = z__1.r, v3.i = z__1.i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    z__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__3.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    z__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__4.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-	    i__4 = j * c_dim1 + 3;
-	    z__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__5.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L60: */
-	}
-	goto L410;
-L70:
 
-/*        Special code for 4 x 4 Householder */
-
-	d_cnjg(&z__1, &v[1]);
-	v1.r = z__1.r, v1.i = z__1.i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	d_cnjg(&z__1, &v[2]);
-	v2.r = z__1.r, v2.i = z__1.i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	d_cnjg(&z__1, &v[3]);
-	v3.r = z__1.r, v3.i = z__1.i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	d_cnjg(&z__1, &v[4]);
-	v4.r = z__1.r, v4.i = z__1.i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    z__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__4.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    z__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__5.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i;
-	    i__4 = j * c_dim1 + 3;
-	    z__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__6.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__2.r = z__3.r + z__6.r, z__2.i = z__3.i + z__6.i;
-	    i__5 = j * c_dim1 + 4;
-	    z__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__7.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + z__7.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L80: */
-	}
-	goto L410;
-L90:
+    Purpose
+    =======
 
-/*        Special code for 5 x 5 Householder */
-
-	d_cnjg(&z__1, &v[1]);
-	v1.r = z__1.r, v1.i = z__1.i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	d_cnjg(&z__1, &v[2]);
-	v2.r = z__1.r, v2.i = z__1.i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	d_cnjg(&z__1, &v[3]);
-	v3.r = z__1.r, v3.i = z__1.i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	d_cnjg(&z__1, &v[4]);
-	v4.r = z__1.r, v4.i = z__1.i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	d_cnjg(&z__1, &v[5]);
-	v5.r = z__1.r, v5.i = z__1.i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    z__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__5.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    z__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__6.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__4.r = z__5.r + z__6.r, z__4.i = z__5.i + z__6.i;
-	    i__4 = j * c_dim1 + 3;
-	    z__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__7.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i;
-	    i__5 = j * c_dim1 + 4;
-	    z__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__8.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    z__2.r = z__3.r + z__8.r, z__2.i = z__3.i + z__8.i;
-	    i__6 = j * c_dim1 + 5;
-	    z__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__9.i = v5.r *
-		    c__[i__6].i + v5.i * c__[i__6].r;
-	    z__1.r = z__2.r + z__9.r, z__1.i = z__2.i + z__9.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L100: */
-	}
-	goto L410;
-L110:
+    ZLARTG generates a plane rotation so that
 
-/*        Special code for 6 x 6 Householder */
-
-	d_cnjg(&z__1, &v[1]);
-	v1.r = z__1.r, v1.i = z__1.i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	d_cnjg(&z__1, &v[2]);
-	v2.r = z__1.r, v2.i = z__1.i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	d_cnjg(&z__1, &v[3]);
-	v3.r = z__1.r, v3.i = z__1.i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	d_cnjg(&z__1, &v[4]);
-	v4.r = z__1.r, v4.i = z__1.i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	d_cnjg(&z__1, &v[5]);
-	v5.r = z__1.r, v5.i = z__1.i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	d_cnjg(&z__1, &v[6]);
-	v6.r = z__1.r, v6.i = z__1.i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    z__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__6.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    z__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__7.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__5.r = z__6.r + z__7.r, z__5.i = z__6.i + z__7.i;
-	    i__4 = j * c_dim1 + 3;
-	    z__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__8.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__4.r = z__5.r + z__8.r, z__4.i = z__5.i + z__8.i;
-	    i__5 = j * c_dim1 + 4;
-	    z__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__9.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    z__3.r = z__4.r + z__9.r, z__3.i = z__4.i + z__9.i;
-	    i__6 = j * c_dim1 + 5;
-	    z__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__10.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__2.r = z__3.r + z__10.r, z__2.i = z__3.i + z__10.i;
-	    i__7 = j * c_dim1 + 6;
-	    z__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__11.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__1.r = z__2.r + z__11.r, z__1.i = z__2.i + z__11.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L120: */
-	}
-	goto L410;
-L130:
+       [  CS  SN  ]     [ F ]     [ R ]
+       [  __      ]  .  [   ]  =  [   ]   where CS**2 + |SN|**2 = 1.
+       [ -SN  CS  ]     [ G ]     [ 0 ]
 
-/*        Special code for 7 x 7 Householder */
-
-	d_cnjg(&z__1, &v[1]);
-	v1.r = z__1.r, v1.i = z__1.i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	d_cnjg(&z__1, &v[2]);
-	v2.r = z__1.r, v2.i = z__1.i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	d_cnjg(&z__1, &v[3]);
-	v3.r = z__1.r, v3.i = z__1.i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	d_cnjg(&z__1, &v[4]);
-	v4.r = z__1.r, v4.i = z__1.i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	d_cnjg(&z__1, &v[5]);
-	v5.r = z__1.r, v5.i = z__1.i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	d_cnjg(&z__1, &v[6]);
-	v6.r = z__1.r, v6.i = z__1.i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	d_cnjg(&z__1, &v[7]);
-	v7.r = z__1.r, v7.i = z__1.i;
-	d_cnjg(&z__2, &v7);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t7.r = z__1.r, t7.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    z__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__7.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    z__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__8.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__6.r = z__7.r + z__8.r, z__6.i = z__7.i + z__8.i;
-	    i__4 = j * c_dim1 + 3;
-	    z__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__9.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__5.r = z__6.r + z__9.r, z__5.i = z__6.i + z__9.i;
-	    i__5 = j * c_dim1 + 4;
-	    z__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__10.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    z__4.r = z__5.r + z__10.r, z__4.i = z__5.i + z__10.i;
-	    i__6 = j * c_dim1 + 5;
-	    z__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__11.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__3.r = z__4.r + z__11.r, z__3.i = z__4.i + z__11.i;
-	    i__7 = j * c_dim1 + 6;
-	    z__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__12.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__2.r = z__3.r + z__12.r, z__2.i = z__3.i + z__12.i;
-	    i__8 = j * c_dim1 + 7;
-	    z__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__13.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    z__1.r = z__2.r + z__13.r, z__1.i = z__2.i + z__13.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L140: */
-	}
-	goto L410;
-L150:
+    This is a faster version of the BLAS1 routine ZROTG, except for
+    the following differences:
+       F and G are unchanged on return.
+       If G=0, then CS=1 and SN=0.
+       If F=0, then CS=0 and SN is chosen so that R is real.
 
-/*        Special code for 8 x 8 Householder */
-
-	d_cnjg(&z__1, &v[1]);
-	v1.r = z__1.r, v1.i = z__1.i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	d_cnjg(&z__1, &v[2]);
-	v2.r = z__1.r, v2.i = z__1.i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	d_cnjg(&z__1, &v[3]);
-	v3.r = z__1.r, v3.i = z__1.i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	d_cnjg(&z__1, &v[4]);
-	v4.r = z__1.r, v4.i = z__1.i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	d_cnjg(&z__1, &v[5]);
-	v5.r = z__1.r, v5.i = z__1.i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	d_cnjg(&z__1, &v[6]);
-	v6.r = z__1.r, v6.i = z__1.i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	d_cnjg(&z__1, &v[7]);
-	v7.r = z__1.r, v7.i = z__1.i;
-	d_cnjg(&z__2, &v7);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t7.r = z__1.r, t7.i = z__1.i;
-	d_cnjg(&z__1, &v[8]);
-	v8.r = z__1.r, v8.i = z__1.i;
-	d_cnjg(&z__2, &v8);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t8.r = z__1.r, t8.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    z__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__8.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    z__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__9.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__7.r = z__8.r + z__9.r, z__7.i = z__8.i + z__9.i;
-	    i__4 = j * c_dim1 + 3;
-	    z__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__10.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    z__6.r = z__7.r + z__10.r, z__6.i = z__7.i + z__10.i;
-	    i__5 = j * c_dim1 + 4;
-	    z__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__11.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    z__5.r = z__6.r + z__11.r, z__5.i = z__6.i + z__11.i;
-	    i__6 = j * c_dim1 + 5;
-	    z__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__12.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__4.r = z__5.r + z__12.r, z__4.i = z__5.i + z__12.i;
-	    i__7 = j * c_dim1 + 6;
-	    z__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__13.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__3.r = z__4.r + z__13.r, z__3.i = z__4.i + z__13.i;
-	    i__8 = j * c_dim1 + 7;
-	    z__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__14.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    z__2.r = z__3.r + z__14.r, z__2.i = z__3.i + z__14.i;
-	    i__9 = j * c_dim1 + 8;
-	    z__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__15.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    z__1.r = z__2.r + z__15.r, z__1.i = z__2.i + z__15.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 8;
-	    i__3 = j * c_dim1 + 8;
-	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L160: */
-	}
-	goto L410;
-L170:
+    Arguments
+    =========
 
-/*        Special code for 9 x 9 Householder */
-
-	d_cnjg(&z__1, &v[1]);
-	v1.r = z__1.r, v1.i = z__1.i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	d_cnjg(&z__1, &v[2]);
-	v2.r = z__1.r, v2.i = z__1.i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	d_cnjg(&z__1, &v[3]);
-	v3.r = z__1.r, v3.i = z__1.i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	d_cnjg(&z__1, &v[4]);
-	v4.r = z__1.r, v4.i = z__1.i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	d_cnjg(&z__1, &v[5]);
-	v5.r = z__1.r, v5.i = z__1.i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	d_cnjg(&z__1, &v[6]);
-	v6.r = z__1.r, v6.i = z__1.i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	d_cnjg(&z__1, &v[7]);
-	v7.r = z__1.r, v7.i = z__1.i;
-	d_cnjg(&z__2, &v7);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t7.r = z__1.r, t7.i = z__1.i;
-	d_cnjg(&z__1, &v[8]);
-	v8.r = z__1.r, v8.i = z__1.i;
-	d_cnjg(&z__2, &v8);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t8.r = z__1.r, t8.i = z__1.i;
-	d_cnjg(&z__1, &v[9]);
-	v9.r = z__1.r, v9.i = z__1.i;
-	d_cnjg(&z__2, &v9);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t9.r = z__1.r, t9.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    z__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__9.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    z__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__10.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    z__8.r = z__9.r + z__10.r, z__8.i = z__9.i + z__10.i;
-	    i__4 = j * c_dim1 + 3;
-	    z__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__11.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    z__7.r = z__8.r + z__11.r, z__7.i = z__8.i + z__11.i;
-	    i__5 = j * c_dim1 + 4;
-	    z__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__12.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    z__6.r = z__7.r + z__12.r, z__6.i = z__7.i + z__12.i;
-	    i__6 = j * c_dim1 + 5;
-	    z__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__13.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__5.r = z__6.r + z__13.r, z__5.i = z__6.i + z__13.i;
-	    i__7 = j * c_dim1 + 6;
-	    z__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__14.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__4.r = z__5.r + z__14.r, z__4.i = z__5.i + z__14.i;
-	    i__8 = j * c_dim1 + 7;
-	    z__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__15.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    z__3.r = z__4.r + z__15.r, z__3.i = z__4.i + z__15.i;
-	    i__9 = j * c_dim1 + 8;
-	    z__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__16.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    z__2.r = z__3.r + z__16.r, z__2.i = z__3.i + z__16.i;
-	    i__10 = j * c_dim1 + 9;
-	    z__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__17.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    z__1.r = z__2.r + z__17.r, z__1.i = z__2.i + z__17.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 8;
-	    i__3 = j * c_dim1 + 8;
-	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 9;
-	    i__3 = j * c_dim1 + 9;
-	    z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L180: */
-	}
-	goto L410;
-L190:
+    F       (input) COMPLEX*16
+            The first component of vector to be rotated.
 
-/*        Special code for 10 x 10 Householder */
-
-	d_cnjg(&z__1, &v[1]);
-	v1.r = z__1.r, v1.i = z__1.i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	d_cnjg(&z__1, &v[2]);
-	v2.r = z__1.r, v2.i = z__1.i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	d_cnjg(&z__1, &v[3]);
-	v3.r = z__1.r, v3.i = z__1.i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	d_cnjg(&z__1, &v[4]);
-	v4.r = z__1.r, v4.i = z__1.i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	d_cnjg(&z__1, &v[5]);
-	v5.r = z__1.r, v5.i = z__1.i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	d_cnjg(&z__1, &v[6]);
-	v6.r = z__1.r, v6.i = z__1.i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	d_cnjg(&z__1, &v[7]);
-	v7.r = z__1.r, v7.i = z__1.i;
-	d_cnjg(&z__2, &v7);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t7.r = z__1.r, t7.i = z__1.i;
-	d_cnjg(&z__1, &v[8]);
-	v8.r = z__1.r, v8.i = z__1.i;
-	d_cnjg(&z__2, &v8);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t8.r = z__1.r, t8.i = z__1.i;
-	d_cnjg(&z__1, &v[9]);
-	v9.r = z__1.r, v9.i = z__1.i;
-	d_cnjg(&z__2, &v9);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t9.r = z__1.r, t9.i = z__1.i;
-	d_cnjg(&z__1, &v[10]);
-	v10.r = z__1.r, v10.i = z__1.i;
-	d_cnjg(&z__2, &v10);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t10.r = z__1.r, t10.i = z__1.i;
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j * c_dim1 + 1;
-	    z__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__10.i = v1.r
-		    * c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j * c_dim1 + 2;
-	    z__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__11.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    z__9.r = z__10.r + z__11.r, z__9.i = z__10.i + z__11.i;
-	    i__4 = j * c_dim1 + 3;
-	    z__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__12.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    z__8.r = z__9.r + z__12.r, z__8.i = z__9.i + z__12.i;
-	    i__5 = j * c_dim1 + 4;
-	    z__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__13.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    z__7.r = z__8.r + z__13.r, z__7.i = z__8.i + z__13.i;
-	    i__6 = j * c_dim1 + 5;
-	    z__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__14.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__6.r = z__7.r + z__14.r, z__6.i = z__7.i + z__14.i;
-	    i__7 = j * c_dim1 + 6;
-	    z__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__15.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__5.r = z__6.r + z__15.r, z__5.i = z__6.i + z__15.i;
-	    i__8 = j * c_dim1 + 7;
-	    z__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__16.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    z__4.r = z__5.r + z__16.r, z__4.i = z__5.i + z__16.i;
-	    i__9 = j * c_dim1 + 8;
-	    z__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__17.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    z__3.r = z__4.r + z__17.r, z__3.i = z__4.i + z__17.i;
-	    i__10 = j * c_dim1 + 9;
-	    z__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__18.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    z__2.r = z__3.r + z__18.r, z__2.i = z__3.i + z__18.i;
-	    i__11 = j * c_dim1 + 10;
-	    z__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, z__19.i =
-		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
-	    z__1.r = z__2.r + z__19.r, z__1.i = z__2.i + z__19.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j * c_dim1 + 1;
-	    i__3 = j * c_dim1 + 1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 2;
-	    i__3 = j * c_dim1 + 2;
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 3;
-	    i__3 = j * c_dim1 + 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 4;
-	    i__3 = j * c_dim1 + 4;
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 5;
-	    i__3 = j * c_dim1 + 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 6;
-	    i__3 = j * c_dim1 + 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 7;
-	    i__3 = j * c_dim1 + 7;
-	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 8;
-	    i__3 = j * c_dim1 + 8;
-	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 9;
-	    i__3 = j * c_dim1 + 9;
-	    z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j * c_dim1 + 10;
-	    i__3 = j * c_dim1 + 10;
-	    z__2.r = sum.r * t10.r - sum.i * t10.i, z__2.i = sum.r * t10.i +
-		    sum.i * t10.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L200: */
-	}
-	goto L410;
-    } else {
+    G       (input) COMPLEX*16
+            The second component of vector to be rotated.
 
-/*        Form  C * H, where H has order n. */
+    CS      (output) DOUBLE PRECISION
+            The cosine of the rotation.
 
-	switch (*n) {
-	    case 1:  goto L210;
-	    case 2:  goto L230;
-	    case 3:  goto L250;
-	    case 4:  goto L270;
-	    case 5:  goto L290;
-	    case 6:  goto L310;
-	    case 7:  goto L330;
-	    case 8:  goto L350;
-	    case 9:  goto L370;
-	    case 10:  goto L390;
-	}
+    SN      (output) COMPLEX*16
+            The sine of the rotation.
 
-/*
-          Code for general N
+    R       (output) COMPLEX*16
+            The nonzero component of the rotated vector.
 
-          w := C * v
-*/
+    Further Details
+    ======= =======
 
-	zgemv_("No transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1], &
-		c__1, &c_b59, &work[1], &c__1);
+    3-5-96 - Modified with a new algorithm by W. Kahan and J. Demmel
 
-/*        C := C - tau * w * v' */
+    This version has a few statements commented out for thread safety
+    (machine parameters are computed on each entry). 10 feb 03, SJH.
 
-	z__1.r = -tau->r, z__1.i = -tau->i;
-	zgerc_(m, n, &z__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset],
-		ldc);
-	goto L410;
-L210:
+    =====================================================================
 
-/*        Special code for 1 x 1 Householder */
+       LOGICAL            FIRST
+       SAVE               FIRST, SAFMX2, SAFMIN, SAFMN2
+       DATA               FIRST / .TRUE. /
 
-	z__3.r = tau->r * v[1].r - tau->i * v[1].i, z__3.i = tau->r * v[1].i
-		+ tau->i * v[1].r;
-	d_cnjg(&z__4, &v[1]);
-	z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i
-		+ z__3.i * z__4.r;
-	z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i;
-	t1.r = z__1.r, t1.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, z__1.i = t1.r *
-		    c__[i__3].i + t1.i * c__[i__3].r;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L220: */
-	}
-	goto L410;
-L230:
-
-/*        Special code for 2 x 2 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    z__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__2.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    z__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__3.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L240: */
-	}
-	goto L410;
-L250:
-
-/*        Special code for 3 x 3 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    z__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__3.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    z__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__4.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-	    i__4 = j + c_dim1 * 3;
-	    z__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__5.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L260: */
+       IF( FIRST ) THEN
+*/
+    safmin = SAFEMINIMUM;
+    eps = EPSILON;
+    d__1 = BASE;
+    i__1 = (integer) (log(safmin / eps) / log(BASE) / 2.);
+    safmn2 = pow_di(&d__1, &i__1);
+    safmx2 = 1. / safmn2;
+/*
+          FIRST = .FALSE.
+       END IF
+   Computing MAX
+   Computing MAX
+*/
+    d__7 = (d__1 = f->r, abs(d__1)), d__8 = (d__2 = d_imag(f), abs(d__2));
+/* Computing MAX */
+    d__9 = (d__3 = g->r, abs(d__3)), d__10 = (d__4 = d_imag(g), abs(d__4));
+    d__5 = max(d__7,d__8), d__6 = max(d__9,d__10);
+    scale = max(d__5,d__6);
+    fs.r = f->r, fs.i = f->i;
+    gs.r = g->r, gs.i = g->i;
+    count = 0;
+    if (scale >= safmx2) {
+L10:
+	++count;
+	z__1.r = safmn2 * fs.r, z__1.i = safmn2 * fs.i;
+	fs.r = z__1.r, fs.i = z__1.i;
+	z__1.r = safmn2 * gs.r, z__1.i = safmn2 * gs.i;
+	gs.r = z__1.r, gs.i = z__1.i;
+	scale *= safmn2;
+	if (scale >= safmx2) {
+	    goto L10;
 	}
-	goto L410;
-L270:
-
-/*        Special code for 4 x 4 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    z__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__4.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    z__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__5.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i;
-	    i__4 = j + c_dim1 * 3;
-	    z__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__6.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__2.r = z__3.r + z__6.r, z__2.i = z__3.i + z__6.i;
-	    i__5 = j + (c_dim1 << 2);
-	    z__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__7.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + z__7.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L280: */
+    } else if (scale <= safmn2) {
+	if (g->r == 0. && g->i == 0.) {
+	    *cs = 1.;
+	    sn->r = 0., sn->i = 0.;
+	    r__->r = f->r, r__->i = f->i;
+	    return 0;
 	}
-	goto L410;
-L290:
-
-/*        Special code for 5 x 5 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    z__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__5.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    z__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__6.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__4.r = z__5.r + z__6.r, z__4.i = z__5.i + z__6.i;
-	    i__4 = j + c_dim1 * 3;
-	    z__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__7.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i;
-	    i__5 = j + (c_dim1 << 2);
-	    z__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__8.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    z__2.r = z__3.r + z__8.r, z__2.i = z__3.i + z__8.i;
-	    i__6 = j + c_dim1 * 5;
-	    z__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__9.i = v5.r *
-		    c__[i__6].i + v5.i * c__[i__6].r;
-	    z__1.r = z__2.r + z__9.r, z__1.i = z__2.i + z__9.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L300: */
+L20:
+	--count;
+	z__1.r = safmx2 * fs.r, z__1.i = safmx2 * fs.i;
+	fs.r = z__1.r, fs.i = z__1.i;
+	z__1.r = safmx2 * gs.r, z__1.i = safmx2 * gs.i;
+	gs.r = z__1.r, gs.i = z__1.i;
+	scale *= safmx2;
+	if (scale <= safmn2) {
+	    goto L20;
 	}
-	goto L410;
-L310:
-
-/*        Special code for 6 x 6 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    z__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__6.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    z__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__7.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__5.r = z__6.r + z__7.r, z__5.i = z__6.i + z__7.i;
-	    i__4 = j + c_dim1 * 3;
-	    z__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__8.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__4.r = z__5.r + z__8.r, z__4.i = z__5.i + z__8.i;
-	    i__5 = j + (c_dim1 << 2);
-	    z__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__9.i = v4.r *
-		    c__[i__5].i + v4.i * c__[i__5].r;
-	    z__3.r = z__4.r + z__9.r, z__3.i = z__4.i + z__9.i;
-	    i__6 = j + c_dim1 * 5;
-	    z__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__10.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__2.r = z__3.r + z__10.r, z__2.i = z__3.i + z__10.i;
-	    i__7 = j + c_dim1 * 6;
-	    z__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__11.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__1.r = z__2.r + z__11.r, z__1.i = z__2.i + z__11.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L320: */
+    }
+/* Computing 2nd power */
+    d__1 = fs.r;
+/* Computing 2nd power */
+    d__2 = d_imag(&fs);
+    f2 = d__1 * d__1 + d__2 * d__2;
+/* Computing 2nd power */
+    d__1 = gs.r;
+/* Computing 2nd power */
+    d__2 = d_imag(&gs);
+    g2 = d__1 * d__1 + d__2 * d__2;
+    if (f2 <= max(g2,1.) * safmin) {
+
+/*        This is a rare case: F is very small. */
+
+	if (f->r == 0. && f->i == 0.) {
+	    *cs = 0.;
+	    d__2 = g->r;
+	    d__3 = d_imag(g);
+	    d__1 = dlapy2_(&d__2, &d__3);
+	    r__->r = d__1, r__->i = 0.;
+/*           Do complex/real division explicitly with two real divisions */
+	    d__1 = gs.r;
+	    d__2 = d_imag(&gs);
+	    d__ = dlapy2_(&d__1, &d__2);
+	    d__1 = gs.r / d__;
+	    d__2 = -d_imag(&gs) / d__;
+	    z__1.r = d__1, z__1.i = d__2;
+	    sn->r = z__1.r, sn->i = z__1.i;
+	    return 0;
 	}
-	goto L410;
-L330:
+	d__1 = fs.r;
+	d__2 = d_imag(&fs);
+	f2s = dlapy2_(&d__1, &d__2);
+/*
+          G2 and G2S are accurate
+          G2 is at least SAFMIN, and G2S is at least SAFMN2
+*/
+	g2s = sqrt(g2);
+/*
+          Error in CS from underflow in F2S is at most
+          UNFL / SAFMN2 .lt. sqrt(UNFL*EPS) .lt. EPS
+          If MAX(G2,ONE)=G2, then F2 .lt. G2*SAFMIN,
+          and so CS .lt. sqrt(SAFMIN)
+          If MAX(G2,ONE)=ONE, then F2 .lt. SAFMIN
+          and so CS .lt. sqrt(SAFMIN)/SAFMN2 = sqrt(EPS)
+          Therefore, CS = F2S/G2S / sqrt( 1 + (F2S/G2S)**2 ) = F2S/G2S
+*/
+	*cs = f2s / g2s;
+/*
+          Make sure abs(FF) = 1
+          Do complex/real division explicitly with 2 real divisions
+   Computing MAX
+*/
+	d__3 = (d__1 = f->r, abs(d__1)), d__4 = (d__2 = d_imag(f), abs(d__2));
+	if (max(d__3,d__4) > 1.) {
+	    d__1 = f->r;
+	    d__2 = d_imag(f);
+	    d__ = dlapy2_(&d__1, &d__2);
+	    d__1 = f->r / d__;
+	    d__2 = d_imag(f) / d__;
+	    z__1.r = d__1, z__1.i = d__2;
+	    ff.r = z__1.r, ff.i = z__1.i;
+	} else {
+	    dr = safmx2 * f->r;
+	    di = safmx2 * d_imag(f);
+	    d__ = dlapy2_(&dr, &di);
+	    d__1 = dr / d__;
+	    d__2 = di / d__;
+	    z__1.r = d__1, z__1.i = d__2;
+	    ff.r = z__1.r, ff.i = z__1.i;
+	}
+	d__1 = gs.r / g2s;
+	d__2 = -d_imag(&gs) / g2s;
+	z__2.r = d__1, z__2.i = d__2;
+	z__1.r = ff.r * z__2.r - ff.i * z__2.i, z__1.i = ff.r * z__2.i + ff.i
+		* z__2.r;
+	sn->r = z__1.r, sn->i = z__1.i;
+	z__2.r = *cs * f->r, z__2.i = *cs * f->i;
+	z__3.r = sn->r * g->r - sn->i * g->i, z__3.i = sn->r * g->i + sn->i *
+		g->r;
+	z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	r__->r = z__1.r, r__->i = z__1.i;
+    } else {
 
-/*        Special code for 7 x 7 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	d_cnjg(&z__2, &v7);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t7.r = z__1.r, t7.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    z__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__7.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    z__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__8.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__6.r = z__7.r + z__8.r, z__6.i = z__7.i + z__8.i;
-	    i__4 = j + c_dim1 * 3;
-	    z__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__9.i = v3.r *
-		    c__[i__4].i + v3.i * c__[i__4].r;
-	    z__5.r = z__6.r + z__9.r, z__5.i = z__6.i + z__9.i;
-	    i__5 = j + (c_dim1 << 2);
-	    z__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__10.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    z__4.r = z__5.r + z__10.r, z__4.i = z__5.i + z__10.i;
-	    i__6 = j + c_dim1 * 5;
-	    z__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__11.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__3.r = z__4.r + z__11.r, z__3.i = z__4.i + z__11.i;
-	    i__7 = j + c_dim1 * 6;
-	    z__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__12.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__2.r = z__3.r + z__12.r, z__2.i = z__3.i + z__12.i;
-	    i__8 = j + c_dim1 * 7;
-	    z__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__13.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    z__1.r = z__2.r + z__13.r, z__1.i = z__2.i + z__13.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L340: */
-	}
-	goto L410;
-L350:
-
-/*        Special code for 8 x 8 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	d_cnjg(&z__2, &v7);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t7.r = z__1.r, t7.i = z__1.i;
-	v8.r = v[8].r, v8.i = v[8].i;
-	d_cnjg(&z__2, &v8);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t8.r = z__1.r, t8.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    z__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__8.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    z__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__9.i = v2.r *
-		    c__[i__3].i + v2.i * c__[i__3].r;
-	    z__7.r = z__8.r + z__9.r, z__7.i = z__8.i + z__9.i;
-	    i__4 = j + c_dim1 * 3;
-	    z__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__10.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    z__6.r = z__7.r + z__10.r, z__6.i = z__7.i + z__10.i;
-	    i__5 = j + (c_dim1 << 2);
-	    z__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__11.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    z__5.r = z__6.r + z__11.r, z__5.i = z__6.i + z__11.i;
-	    i__6 = j + c_dim1 * 5;
-	    z__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__12.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__4.r = z__5.r + z__12.r, z__4.i = z__5.i + z__12.i;
-	    i__7 = j + c_dim1 * 6;
-	    z__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__13.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__3.r = z__4.r + z__13.r, z__3.i = z__4.i + z__13.i;
-	    i__8 = j + c_dim1 * 7;
-	    z__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__14.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    z__2.r = z__3.r + z__14.r, z__2.i = z__3.i + z__14.i;
-	    i__9 = j + (c_dim1 << 3);
-	    z__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__15.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    z__1.r = z__2.r + z__15.r, z__1.i = z__2.i + z__15.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 3);
-	    i__3 = j + (c_dim1 << 3);
-	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L360: */
-	}
-	goto L410;
-L370:
-
-/*        Special code for 9 x 9 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	d_cnjg(&z__2, &v7);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t7.r = z__1.r, t7.i = z__1.i;
-	v8.r = v[8].r, v8.i = v[8].i;
-	d_cnjg(&z__2, &v8);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t8.r = z__1.r, t8.i = z__1.i;
-	v9.r = v[9].r, v9.i = v[9].i;
-	d_cnjg(&z__2, &v9);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t9.r = z__1.r, t9.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    z__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__9.i = v1.r *
-		    c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    z__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__10.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    z__8.r = z__9.r + z__10.r, z__8.i = z__9.i + z__10.i;
-	    i__4 = j + c_dim1 * 3;
-	    z__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__11.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    z__7.r = z__8.r + z__11.r, z__7.i = z__8.i + z__11.i;
-	    i__5 = j + (c_dim1 << 2);
-	    z__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__12.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    z__6.r = z__7.r + z__12.r, z__6.i = z__7.i + z__12.i;
-	    i__6 = j + c_dim1 * 5;
-	    z__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__13.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__5.r = z__6.r + z__13.r, z__5.i = z__6.i + z__13.i;
-	    i__7 = j + c_dim1 * 6;
-	    z__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__14.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__4.r = z__5.r + z__14.r, z__4.i = z__5.i + z__14.i;
-	    i__8 = j + c_dim1 * 7;
-	    z__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__15.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    z__3.r = z__4.r + z__15.r, z__3.i = z__4.i + z__15.i;
-	    i__9 = j + (c_dim1 << 3);
-	    z__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__16.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    z__2.r = z__3.r + z__16.r, z__2.i = z__3.i + z__16.i;
-	    i__10 = j + c_dim1 * 9;
-	    z__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__17.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    z__1.r = z__2.r + z__17.r, z__1.i = z__2.i + z__17.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 3);
-	    i__3 = j + (c_dim1 << 3);
-	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 9;
-	    i__3 = j + c_dim1 * 9;
-	    z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L380: */
-	}
-	goto L410;
-L390:
-
-/*        Special code for 10 x 10 Householder */
-
-	v1.r = v[1].r, v1.i = v[1].i;
-	d_cnjg(&z__2, &v1);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t1.r = z__1.r, t1.i = z__1.i;
-	v2.r = v[2].r, v2.i = v[2].i;
-	d_cnjg(&z__2, &v2);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t2.r = z__1.r, t2.i = z__1.i;
-	v3.r = v[3].r, v3.i = v[3].i;
-	d_cnjg(&z__2, &v3);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t3.r = z__1.r, t3.i = z__1.i;
-	v4.r = v[4].r, v4.i = v[4].i;
-	d_cnjg(&z__2, &v4);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t4.r = z__1.r, t4.i = z__1.i;
-	v5.r = v[5].r, v5.i = v[5].i;
-	d_cnjg(&z__2, &v5);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t5.r = z__1.r, t5.i = z__1.i;
-	v6.r = v[6].r, v6.i = v[6].i;
-	d_cnjg(&z__2, &v6);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t6.r = z__1.r, t6.i = z__1.i;
-	v7.r = v[7].r, v7.i = v[7].i;
-	d_cnjg(&z__2, &v7);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t7.r = z__1.r, t7.i = z__1.i;
-	v8.r = v[8].r, v8.i = v[8].i;
-	d_cnjg(&z__2, &v8);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t8.r = z__1.r, t8.i = z__1.i;
-	v9.r = v[9].r, v9.i = v[9].i;
-	d_cnjg(&z__2, &v9);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t9.r = z__1.r, t9.i = z__1.i;
-	v10.r = v[10].r, v10.i = v[10].i;
-	d_cnjg(&z__2, &v10);
-	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
-		+ tau->i * z__2.r;
-	t10.r = z__1.r, t10.i = z__1.i;
-	i__1 = *m;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = j + c_dim1;
-	    z__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__10.i = v1.r
-		    * c__[i__2].i + v1.i * c__[i__2].r;
-	    i__3 = j + (c_dim1 << 1);
-	    z__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__11.i = v2.r
-		    * c__[i__3].i + v2.i * c__[i__3].r;
-	    z__9.r = z__10.r + z__11.r, z__9.i = z__10.i + z__11.i;
-	    i__4 = j + c_dim1 * 3;
-	    z__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__12.i = v3.r
-		    * c__[i__4].i + v3.i * c__[i__4].r;
-	    z__8.r = z__9.r + z__12.r, z__8.i = z__9.i + z__12.i;
-	    i__5 = j + (c_dim1 << 2);
-	    z__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__13.i = v4.r
-		    * c__[i__5].i + v4.i * c__[i__5].r;
-	    z__7.r = z__8.r + z__13.r, z__7.i = z__8.i + z__13.i;
-	    i__6 = j + c_dim1 * 5;
-	    z__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__14.i = v5.r
-		    * c__[i__6].i + v5.i * c__[i__6].r;
-	    z__6.r = z__7.r + z__14.r, z__6.i = z__7.i + z__14.i;
-	    i__7 = j + c_dim1 * 6;
-	    z__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__15.i = v6.r
-		    * c__[i__7].i + v6.i * c__[i__7].r;
-	    z__5.r = z__6.r + z__15.r, z__5.i = z__6.i + z__15.i;
-	    i__8 = j + c_dim1 * 7;
-	    z__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__16.i = v7.r
-		    * c__[i__8].i + v7.i * c__[i__8].r;
-	    z__4.r = z__5.r + z__16.r, z__4.i = z__5.i + z__16.i;
-	    i__9 = j + (c_dim1 << 3);
-	    z__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__17.i = v8.r
-		    * c__[i__9].i + v8.i * c__[i__9].r;
-	    z__3.r = z__4.r + z__17.r, z__3.i = z__4.i + z__17.i;
-	    i__10 = j + c_dim1 * 9;
-	    z__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__18.i =
-		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
-	    z__2.r = z__3.r + z__18.r, z__2.i = z__3.i + z__18.i;
-	    i__11 = j + c_dim1 * 10;
-	    z__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, z__19.i =
-		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
-	    z__1.r = z__2.r + z__19.r, z__1.i = z__2.i + z__19.i;
-	    sum.r = z__1.r, sum.i = z__1.i;
-	    i__2 = j + c_dim1;
-	    i__3 = j + c_dim1;
-	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
-		    sum.i * t1.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 1);
-	    i__3 = j + (c_dim1 << 1);
-	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
-		    sum.i * t2.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 3;
-	    i__3 = j + c_dim1 * 3;
-	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
-		    sum.i * t3.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 2);
-	    i__3 = j + (c_dim1 << 2);
-	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
-		    sum.i * t4.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 5;
-	    i__3 = j + c_dim1 * 5;
-	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
-		    sum.i * t5.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 6;
-	    i__3 = j + c_dim1 * 6;
-	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
-		    sum.i * t6.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 7;
-	    i__3 = j + c_dim1 * 7;
-	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
-		    sum.i * t7.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + (c_dim1 << 3);
-	    i__3 = j + (c_dim1 << 3);
-	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
-		    sum.i * t8.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 9;
-	    i__3 = j + c_dim1 * 9;
-	    z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i +
-		    sum.i * t9.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-	    i__2 = j + c_dim1 * 10;
-	    i__3 = j + c_dim1 * 10;
-	    z__2.r = sum.r * t10.r - sum.i * t10.i, z__2.i = sum.r * t10.i +
-		    sum.i * t10.r;
-	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
-	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-/* L400: */
-	}
-	goto L410;
-    }
-L410:
+/*
+          This is the most common case.
+          Neither F2 nor F2/G2 are less than SAFMIN
+          F2S cannot overflow, and it is accurate
+*/
+
+	f2s = sqrt(g2 / f2 + 1.);
+/*        Do the F2S(real)*FS(complex) multiply with two real multiplies */
+	d__1 = f2s * fs.r;
+	d__2 = f2s * d_imag(&fs);
+	z__1.r = d__1, z__1.i = d__2;
+	r__->r = z__1.r, r__->i = z__1.i;
+	*cs = 1. / f2s;
+	d__ = f2 + g2;
+/*        Do complex/real division explicitly with two real divisions */
+	d__1 = r__->r / d__;
+	d__2 = d_imag(r__) / d__;
+	z__1.r = d__1, z__1.i = d__2;
+	sn->r = z__1.r, sn->i = z__1.i;
+	d_cnjg(&z__2, &gs);
+	z__1.r = sn->r * z__2.r - sn->i * z__2.i, z__1.i = sn->r * z__2.i +
+		sn->i * z__2.r;
+	sn->r = z__1.r, sn->i = z__1.i;
+	if (count != 0) {
+	    if (count > 0) {
+		i__1 = count;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    z__1.r = safmx2 * r__->r, z__1.i = safmx2 * r__->i;
+		    r__->r = z__1.r, r__->i = z__1.i;
+/* L30: */
+		}
+	    } else {
+		i__1 = -count;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    z__1.r = safmn2 * r__->r, z__1.i = safmn2 * r__->i;
+		    r__->r = z__1.r, r__->i = z__1.i;
+/* L40: */
+		}
+	    }
+	}
+    }
     return 0;
 
-/*     End of ZLARFX */
+/*     End of ZLARTG */
 
-} /* zlarfx_ */
+} /* zlartg_ */
 
 /* Subroutine */ int zlascl_(char *type__, integer *kl, integer *ku,
 	doublereal *cfrom, doublereal *cto, integer *m, integer *n,
@@ -17661,15 +20198,16 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     static doublereal cfrom1;
 
     static doublereal cfromc;
+    extern logical disnan_(doublereal *);
     extern /* Subroutine */ int xerbla_(char *, integer *);
     static doublereal bignum, smlnum;
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       February 29, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -17720,7 +20258,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     N       (input) INTEGER
             The number of columns of the matrix A.  N >= 0.
 
-    A       (input/output) COMPLEX*16 array, dimension (LDA,M)
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
             The matrix to be multiplied by CTO/CFROM.  See TYPE for the
             storage type.
 
@@ -17765,8 +20303,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
     if (itype == -1) {
 	*info = -1;
-    } else if (*cfrom == 0.) {
+    } else if (*cfrom == 0. || disnan_(cfrom)) {
 	*info = -4;
+    } else if (disnan_(cto)) {
+	*info = -5;
     } else if (*m < 0) {
 	*info = -6;
     } else if (*n < 0 || itype == 4 && *n != *m || itype == 5 && *n != *m) {
@@ -17813,18 +20353,36 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 L10:
     cfrom1 = cfromc * smlnum;
-    cto1 = ctoc / bignum;
-    if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
-	mul = smlnum;
-	done = FALSE_;
-	cfromc = cfrom1;
-    } else if (abs(cto1) > abs(cfromc)) {
-	mul = bignum;
-	done = FALSE_;
-	ctoc = cto1;
-    } else {
+    if (cfrom1 == cfromc) {
+/*
+          CFROMC is an inf.  Multiply by a correctly signed zero for
+          finite CTOC, or a NaN if CTOC is infinite.
+*/
 	mul = ctoc / cfromc;
 	done = TRUE_;
+	cto1 = ctoc;
+    } else {
+	cto1 = ctoc / bignum;
+	if (cto1 == ctoc) {
+/*
+             CTOC is either 0 or an inf.  In both cases, CTOC itself
+             serves as the correct multiplication factor.
+*/
+	    mul = ctoc;
+	    done = TRUE_;
+	    cfromc = 1.;
+	} else if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
+	    mul = smlnum;
+	    done = FALSE_;
+	    cfromc = cfrom1;
+	} else if (abs(cto1) > abs(cfromc)) {
+	    mul = bignum;
+	    done = FALSE_;
+	    ctoc = cto1;
+	} else {
+	    mul = ctoc / cfromc;
+	    done = TRUE_;
+	}
     }
 
     if (itype == 0) {
@@ -17989,10 +20547,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18140,51 +20698,86 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       October 31, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
     =======
 
-    ZLASR   performs the transformation
+    ZLASR applies a sequence of real plane rotations to a complex matrix
+    A, from either the left or the right.
 
-       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+    When SIDE = 'L', the transformation takes the form
 
-       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+       A := P*A
 
-    where A is an m by n complex matrix and P is an orthogonal matrix,
-    consisting of a sequence of plane rotations determined by the
-    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
-    and z = n when SIDE = 'R' or 'r' ):
+    and when SIDE = 'R', the transformation takes the form
 
-    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+       A := A*P**T
 
-       P = P( z - 1 )*...*P( 2 )*P( 1 ),
+    where P is an orthogonal matrix consisting of a sequence of z plane
+    rotations, with z = M when SIDE = 'L' and z = N when SIDE = 'R',
+    and P**T is the transpose of P.
 
-    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+    When DIRECT = 'F' (Forward sequence), then
 
-       P = P( 1 )*P( 2 )*...*P( z - 1 ),
+       P = P(z-1) * ... * P(2) * P(1)
 
-    where  P( k ) is a plane rotation matrix for the following planes:
+    and when DIRECT = 'B' (Backward sequence), then
 
-       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
-          the plane ( k, k + 1 )
+       P = P(1) * P(2) * ... * P(z-1)
 
-       when  PIVOT = 'T' or 't'  ( Top pivot ),
-          the plane ( 1, k + 1 )
+    where P(k) is a plane rotation matrix defined by the 2-by-2 rotation
 
-       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
-          the plane ( k, z )
+       R(k) = (  c(k)  s(k) )
+            = ( -s(k)  c(k) ).
 
-    c( k ) and s( k )  must contain the  cosine and sine that define the
-    matrix  P( k ).  The two by two plane rotation part of the matrix
-    P( k ), R( k ), is assumed to be of the form
+    When PIVOT = 'V' (Variable pivot), the rotation is performed
+    for the plane (k,k+1), i.e., P(k) has the form
 
-       R( k ) = (  c( k )  s( k ) ).
-                ( -s( k )  c( k ) )
+       P(k) = (  1                                            )
+              (       ...                                     )
+              (              1                                )
+              (                   c(k)  s(k)                  )
+              (                  -s(k)  c(k)                  )
+              (                                1              )
+              (                                     ...       )
+              (                                            1  )
+
+    where R(k) appears as a rank-2 modification to the identity matrix in
+    rows and columns k and k+1.
+
+    When PIVOT = 'T' (Top pivot), the rotation is performed for the
+    plane (1,k+1), so P(k) has the form
+
+       P(k) = (  c(k)                    s(k)                 )
+              (         1                                     )
+              (              ...                              )
+              (                     1                         )
+              ( -s(k)                    c(k)                 )
+              (                                 1             )
+              (                                      ...      )
+              (                                             1 )
+
+    where R(k) appears in rows and columns 1 and k+1.
+
+    Similarly, when PIVOT = 'B' (Bottom pivot), the rotation is
+    performed for the plane (k,z), giving P(k) the form
+
+       P(k) = ( 1                                             )
+              (      ...                                      )
+              (             1                                 )
+              (                  c(k)                    s(k) )
+              (                         1                     )
+              (                              ...              )
+              (                                     1         )
+              (                 -s(k)                    c(k) )
+
+    where R(k) appears in rows and columns k and z.  The rotations are
+    performed without ever forming P(k) explicitly.
 
     Arguments
     =========
@@ -18193,13 +20786,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             Specifies whether the plane rotation matrix P is applied to
             A on the left or the right.
             = 'L':  Left, compute A := P*A
-            = 'R':  Right, compute A:= A*P'
-
-    DIRECT  (input) CHARACTER*1
-            Specifies whether P is a forward or backward sequence of
-            plane rotations.
-            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
-            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+            = 'R':  Right, compute A:= A*P**T
 
     PIVOT   (input) CHARACTER*1
             Specifies the plane for which P(k) is a plane rotation
@@ -18208,6 +20795,12 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             = 'T':  Top pivot, the plane (1,k+1)
             = 'B':  Bottom pivot, the plane (k,z)
 
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P(z-1)*...*P(2)*P(1)
+            = 'B':  Backward, P = P(1)*P(2)*...*P(z-1)
+
     M       (input) INTEGER
             The number of rows of the matrix A.  If m <= 1, an immediate
             return is effected.
@@ -18216,18 +20809,22 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             The number of columns of the matrix A.  If n <= 1, an
             immediate return is effected.
 
-    C, S    (input) DOUBLE PRECISION arrays, dimension
+    C       (input) DOUBLE PRECISION array, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            The cosines c(k) of the plane rotations.
+
+    S       (input) DOUBLE PRECISION array, dimension
                     (M-1) if SIDE = 'L'
                     (N-1) if SIDE = 'R'
-            c(k) and s(k) contain the cosine and sine that define the
-            matrix P(k).  The two by two plane rotation part of the
-            matrix P(k), R(k), is assumed to be of the form
-            R( k ) = (  c( k )  s( k ) ).
-                     ( -s( k )  c( k ) )
+            The sines s(k) of the plane rotations.  The 2-by-2 plane
+            rotation part of the matrix P(k), R(k), has the form
+            R(k) = (  c(k)  s(k) )
+                   ( -s(k)  c(k) ).
 
     A       (input/output) COMPLEX*16 array, dimension (LDA,N)
-            The m by n matrix A.  On exit, A is overwritten by P*A if
-            SIDE = 'R' or by A*P' if SIDE = 'L'.
+            The M-by-N matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P**T if SIDE = 'L'.
 
     LDA     (input) INTEGER
             The leading dimension of the array A.  LDA >= max(1,M).
@@ -18684,10 +21281,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18796,10 +21393,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18830,7 +21427,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             The last element of IPIV for which a row interchange will
             be done.
 
-    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+    IPIV    (input) INTEGER array, dimension (K2*abs(INCX))
             The vector of pivot indices.  Only the elements in positions
             K1 through K2 of IPIV are accessed.
             IPIV(K) = L implies rows K and L are to be interchanged.
@@ -18962,10 +21559,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -18986,7 +21583,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     Arguments
     =========
 
-    UPLO    (input) CHARACTER
+    UPLO    (input) CHARACTER*1
             Specifies whether the upper or lower triangular part of the
             Hermitian matrix A is stored:
             = 'U': Upper triangular
@@ -19131,7 +21728,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__, &i__2, &z__1, &a[(i__ + 1) *
 			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
-			c_b60, &a[i__ * a_dim1 + 1], &c__1);
+			c_b57, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
 		i__2 = *n - i__;
@@ -19140,7 +21737,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__, &i__2, &z__1, &w[(iw + 1) *
 			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
-			c_b60, &a[i__ * a_dim1 + 1], &c__1);
+			c_b57, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
 		i__2 = i__ + i__ * a_dim1;
@@ -19168,32 +21765,32 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 /*              Compute W(1:i-1,i) */
 
 		i__2 = i__ - 1;
-		zhemv_("Upper", &i__2, &c_b60, &a[a_offset], lda, &a[i__ *
-			a_dim1 + 1], &c__1, &c_b59, &w[iw * w_dim1 + 1], &
+		zhemv_("Upper", &i__2, &c_b57, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b56, &w[iw * w_dim1 + 1], &
 			c__1);
 		if (i__ < *n) {
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &w[(
+		    zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &w[(
 			    iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1],
-			    &c__1, &c_b59, &w[i__ + 1 + iw * w_dim1], &c__1);
+			    &c__1, &c_b56, &w[i__ + 1 + iw * w_dim1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
 		    z__1.r = -1., z__1.i = -0.;
 		    zgemv_("No transpose", &i__2, &i__3, &z__1, &a[(i__ + 1) *
 			     a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b60, &w[iw * w_dim1 + 1], &c__1);
+			    c__1, &c_b57, &w[iw * w_dim1 + 1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
-		    zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[(
+		    zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[(
 			    i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1],
-			     &c__1, &c_b59, &w[i__ + 1 + iw * w_dim1], &c__1);
+			     &c__1, &c_b56, &w[i__ + 1 + iw * w_dim1], &c__1);
 		    i__2 = i__ - 1;
 		    i__3 = *n - i__;
 		    z__1.r = -1., z__1.i = -0.;
 		    zgemv_("No transpose", &i__2, &i__3, &z__1, &w[(iw + 1) *
 			    w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
-			    c__1, &c_b60, &w[iw * w_dim1 + 1], &c__1);
+			    c__1, &c_b57, &w[iw * w_dim1 + 1], &c__1);
 		}
 		i__2 = i__ - 1;
 		zscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
@@ -19233,7 +21830,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	    i__3 = i__ - 1;
 	    z__1.r = -1., z__1.i = -0.;
 	    zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda,
-		     &w[i__ + w_dim1], ldw, &c_b60, &a[i__ + i__ * a_dim1], &
+		     &w[i__ + w_dim1], ldw, &c_b57, &a[i__ + i__ * a_dim1], &
 		    c__1);
 	    i__2 = i__ - 1;
 	    zlacgv_(&i__2, &w[i__ + w_dim1], ldw);
@@ -19243,7 +21840,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	    i__3 = i__ - 1;
 	    z__1.r = -1., z__1.i = -0.;
 	    zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + w_dim1], ldw,
-		     &a[i__ + a_dim1], lda, &c_b60, &a[i__ + i__ * a_dim1], &
+		     &a[i__ + a_dim1], lda, &c_b57, &a[i__ + i__ * a_dim1], &
 		    c__1);
 	    i__2 = i__ - 1;
 	    zlacgv_(&i__2, &a[i__ + a_dim1], lda);
@@ -19273,30 +21870,30 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 /*              Compute W(i+1:n,i) */
 
 		i__2 = *n - i__;
-		zhemv_("Lower", &i__2, &c_b60, &a[i__ + 1 + (i__ + 1) *
+		zhemv_("Lower", &i__2, &c_b57, &a[i__ + 1 + (i__ + 1) *
 			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b59, &w[i__ + 1 + i__ * w_dim1], &c__1);
+			c_b56, &w[i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &w[i__ +
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &w[i__ +
 			1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b59, &w[i__ * w_dim1 + 1], &c__1);
+			c_b56, &w[i__ * w_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 +
-			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b60, &w[
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b57, &w[
 			i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
 			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
-			c_b59, &w[i__ * w_dim1 + 1], &c__1);
+			c_b56, &w[i__ * w_dim1 + 1], &c__1);
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + 1 +
-			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b60, &w[
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b57, &w[
 			i__ + 1 + i__ * w_dim1], &c__1);
 		i__2 = *n - i__;
 		zscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
@@ -19380,10 +21977,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1992
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -20000,7 +22597,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 /*                 Scale x by 1/2. */
 
-		    zdscal_(n, &c_b2210, &x[1], &c__1);
+		    zdscal_(n, &c_b2435, &x[1], &c__1);
 		    *scale *= .5;
 		}
 
@@ -20518,10 +23115,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -20617,7 +23214,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		i__2 = i__ - 1;
 		i__3 = *n - i__;
 		z__1.r = aii, z__1.i = 0.;
-		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) *
+		zgemv_("No transpose", &i__2, &i__3, &c_b57, &a[(i__ + 1) *
 			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
 			z__1, &a[i__ * a_dim1 + 1], &c__1);
 		i__2 = *n - i__;
@@ -20648,7 +23245,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		i__2 = *n - i__;
 		i__3 = i__ - 1;
 		z__1.r = aii, z__1.i = 0.;
-		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b57, &a[i__ +
 			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
 			z__1, &a[i__ + a_dim1], lda);
 		i__2 = i__ - 1;
@@ -20691,10 +23288,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK auxiliary routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -20796,19 +23393,19 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		ib = min(i__3,i__4);
 		i__3 = i__ - 1;
 		ztrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", &
-			i__3, &ib, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[
+			i__3, &ib, &c_b57, &a[i__ + i__ * a_dim1], lda, &a[
 			i__ * a_dim1 + 1], lda);
 		zlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
 		if (i__ + ib <= *n) {
 		    i__3 = i__ - 1;
 		    i__4 = *n - i__ - ib + 1;
 		    zgemm_("No transpose", "Conjugate transpose", &i__3, &ib,
-			    &i__4, &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda, &
-			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b60, &a[i__
+			    &i__4, &c_b57, &a[(i__ + ib) * a_dim1 + 1], lda, &
+			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b57, &a[i__
 			    * a_dim1 + 1], lda);
 		    i__3 = *n - i__ - ib + 1;
-		    zherk_("Upper", "No transpose", &ib, &i__3, &c_b1015, &a[
-			    i__ + (i__ + ib) * a_dim1], lda, &c_b1015, &a[i__
+		    zherk_("Upper", "No transpose", &ib, &i__3, &c_b1034, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b1034, &a[i__
 			    + i__ * a_dim1], lda);
 		}
 /* L10: */
@@ -20825,20 +23422,20 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		ib = min(i__3,i__4);
 		i__3 = i__ - 1;
 		ztrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", &
-			ib, &i__3, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[
+			ib, &i__3, &c_b57, &a[i__ + i__ * a_dim1], lda, &a[
 			i__ + a_dim1], lda);
 		zlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
 		if (i__ + ib <= *n) {
 		    i__3 = i__ - 1;
 		    i__4 = *n - i__ - ib + 1;
 		    zgemm_("Conjugate transpose", "No transpose", &ib, &i__3,
-			    &i__4, &c_b60, &a[i__ + ib + i__ * a_dim1], lda, &
-			    a[i__ + ib + a_dim1], lda, &c_b60, &a[i__ +
+			    &i__4, &c_b57, &a[i__ + ib + i__ * a_dim1], lda, &
+			    a[i__ + ib + a_dim1], lda, &c_b57, &a[i__ +
 			    a_dim1], lda);
 		    i__3 = *n - i__ - ib + 1;
 		    zherk_("Lower", "Conjugate transpose", &ib, &i__3, &
-			    c_b1015, &a[i__ + ib + i__ * a_dim1], lda, &
-			    c_b1015, &a[i__ + i__ * a_dim1], lda);
+			    c_b1034, &a[i__ + ib + i__ * a_dim1], lda, &
+			    c_b1034, &a[i__ + i__ * a_dim1], lda);
 		}
 /* L20: */
 	    }
@@ -20872,16 +23469,17 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
 	    integer *, doublecomplex *, doublecomplex *, integer *);
     static logical upper;
+    extern logical disnan_(doublereal *);
     extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
 	    integer *, doublereal *, doublecomplex *, integer *), zlacgv_(
 	    integer *, doublecomplex *, integer *);
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -20980,7 +23578,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		    , &c__1);
 	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
 	    ajj = z__1.r;
-	    if (ajj <= 0.) {
+	    if (ajj <= 0. || disnan_(&ajj)) {
 		i__2 = j + j * a_dim1;
 		a[i__2].r = ajj, a[i__2].i = 0.;
 		goto L30;
@@ -20998,7 +23596,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		i__3 = *n - j;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("Transpose", &i__2, &i__3, &z__1, &a[(j + 1) * a_dim1
-			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b60, &a[j + (
+			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b57, &a[j + (
 			j + 1) * a_dim1], lda);
 		i__2 = j - 1;
 		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
@@ -21023,7 +23621,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	    zdotc_(&z__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
 	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
 	    ajj = z__1.r;
-	    if (ajj <= 0.) {
+	    if (ajj <= 0. || disnan_(&ajj)) {
 		i__2 = j + j * a_dim1;
 		a[i__2].r = ajj, a[i__2].i = 0.;
 		goto L30;
@@ -21041,7 +23639,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		i__3 = j - 1;
 		z__1.r = -1., z__1.i = -0.;
 		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[j + 1 + a_dim1]
-			, lda, &a[j + a_dim1], lda, &c_b60, &a[j + 1 + j *
+			, lda, &a[j + a_dim1], lda, &c_b57, &a[j + 1 + j *
 			a_dim1], &c__1);
 		i__2 = j - 1;
 		zlacgv_(&i__2, &a[j + a_dim1], lda);
@@ -21090,10 +23688,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21204,8 +23802,8 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		i__3 = nb, i__4 = *n - j + 1;
 		jb = min(i__3,i__4);
 		i__3 = j - 1;
-		zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1294, &
-			a[j * a_dim1 + 1], lda, &c_b1015, &a[j + j * a_dim1],
+		zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1276, &
+			a[j * a_dim1 + 1], lda, &c_b1034, &a[j + j * a_dim1],
 			lda);
 		zpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
 		if (*info != 0) {
@@ -21220,11 +23818,11 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		    z__1.r = -1., z__1.i = -0.;
 		    zgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
 			    &i__4, &z__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
-			     * a_dim1 + 1], lda, &c_b60, &a[j + (j + jb) *
+			     * a_dim1 + 1], lda, &c_b57, &a[j + (j + jb) *
 			    a_dim1], lda);
 		    i__3 = *n - j - jb + 1;
 		    ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
-			     &jb, &i__3, &c_b60, &a[j + j * a_dim1], lda, &a[
+			     &jb, &i__3, &c_b57, &a[j + j * a_dim1], lda, &a[
 			    j + (j + jb) * a_dim1], lda);
 		}
 /* L10: */
@@ -21247,8 +23845,8 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		i__3 = nb, i__4 = *n - j + 1;
 		jb = min(i__3,i__4);
 		i__3 = j - 1;
-		zherk_("Lower", "No transpose", &jb, &i__3, &c_b1294, &a[j +
-			a_dim1], lda, &c_b1015, &a[j + j * a_dim1], lda);
+		zherk_("Lower", "No transpose", &jb, &i__3, &c_b1276, &a[j +
+			a_dim1], lda, &c_b1034, &a[j + j * a_dim1], lda);
 		zpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
 		if (*info != 0) {
 		    goto L30;
@@ -21262,11 +23860,11 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		    z__1.r = -1., z__1.i = -0.;
 		    zgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
 			    &i__4, &z__1, &a[j + jb + a_dim1], lda, &a[j +
-			    a_dim1], lda, &c_b60, &a[j + jb + j * a_dim1],
+			    a_dim1], lda, &c_b57, &a[j + jb + j * a_dim1],
 			    lda);
 		    i__3 = *n - j - jb + 1;
 		    ztrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
-			    , &i__3, &jb, &c_b60, &a[j + j * a_dim1], lda, &a[
+			    , &i__3, &jb, &c_b57, &a[j + j * a_dim1], lda, &a[
 			    j + jb + j * a_dim1], lda);
 		}
 /* L20: */
@@ -21300,10 +23898,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       March 31, 1993
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21405,10 +24003,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21499,11 +24097,11 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 */
 
 	ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, &
-		c_b60, &a[a_offset], lda, &b[b_offset], ldb);
+		c_b57, &a[a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve U*X = B, overwriting B with X. */
 
-	ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, &
+	ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b57, &
 		a[a_offset], lda, &b[b_offset], ldb);
     } else {
 
@@ -21513,20 +24111,155 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
           Solve L*X = B, overwriting B with X.
 */
 
-	ztrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b60, &
+	ztrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b57, &
 		a[a_offset], lda, &b[b_offset], ldb);
 
 /*        Solve L'*X = B, overwriting B with X. */
 
 	ztrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, &
-		c_b60, &a[a_offset], lda, &b[b_offset], ldb);
+		c_b57, &a[a_offset], lda, &b[b_offset], ldb);
+    }
+
+    return 0;
+
+/*     End of ZPOTRS */
+
+} /* zpotrs_ */
+
+/* Subroutine */ int zrot_(integer *n, doublecomplex *cx, integer *incx,
+	doublecomplex *cy, integer *incy, doublereal *c__, doublecomplex *s)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    doublecomplex z__1, z__2, z__3, z__4;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static doublecomplex stemp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    ZROT   applies a plane rotation, where the cos (C) is real and the
+    sin (S) is complex, and the vectors CX and CY are complex.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of elements in the vectors CX and CY.
+
+    CX      (input/output) COMPLEX*16 array, dimension (N)
+            On input, the vector X.
+            On output, CX is overwritten with C*X + S*Y.
+
+    INCX    (input) INTEGER
+            The increment between successive values of CY.  INCX <> 0.
+
+    CY      (input/output) COMPLEX*16 array, dimension (N)
+            On input, the vector Y.
+            On output, CY is overwritten with -CONJG(S)*X + C*Y.
+
+    INCY    (input) INTEGER
+            The increment between successive values of CY.  INCX <> 0.
+
+    C       (input) DOUBLE PRECISION
+    S       (input) COMPLEX*16
+            C and S define a rotation
+               [  C          S  ]
+               [ -conjg(S)   C  ]
+            where C*C + S*CONJG(S) = 1.0.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*     Code for unequal increments or equal increments not equal to 1 */
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i;
+	i__3 = iy;
+	z__3.r = s->r * cy[i__3].r - s->i * cy[i__3].i, z__3.i = s->r * cy[
+		i__3].i + s->i * cy[i__3].r;
+	z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	stemp.r = z__1.r, stemp.i = z__1.i;
+	i__2 = iy;
+	i__3 = iy;
+	z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i;
+	d_cnjg(&z__4, s);
+	i__4 = ix;
+	z__3.r = z__4.r * cx[i__4].r - z__4.i * cx[i__4].i, z__3.i = z__4.r *
+		cx[i__4].i + z__4.i * cx[i__4].r;
+	z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
+	cy[i__2].r = z__1.r, cy[i__2].i = z__1.i;
+	i__2 = ix;
+	cx[i__2].r = stemp.r, cx[i__2].i = stemp.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
     }
-
     return 0;
 
-/*     End of ZPOTRS */
+/*     Code for both increments equal to 1 */
 
-} /* zpotrs_ */
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i;
+	i__3 = i__;
+	z__3.r = s->r * cy[i__3].r - s->i * cy[i__3].i, z__3.i = s->r * cy[
+		i__3].i + s->i * cy[i__3].r;
+	z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	stemp.r = z__1.r, stemp.i = z__1.i;
+	i__2 = i__;
+	i__3 = i__;
+	z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i;
+	d_cnjg(&z__4, s);
+	i__4 = i__;
+	z__3.r = z__4.r * cx[i__4].r - z__4.i * cx[i__4].i, z__3.i = z__4.r *
+		cx[i__4].i + z__4.i * cx[i__4].r;
+	z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
+	cy[i__2].r = z__1.r, cy[i__2].i = z__1.i;
+	i__2 = i__;
+	cx[i__2].r = stemp.r, cx[i__2].i = stemp.i;
+/* L30: */
+    }
+    return 0;
+} /* zrot_ */
 
 /* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__,
 	doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work,
@@ -21545,7 +24278,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     /* Local variables */
     static integer i__, j, k, m;
     static doublereal p;
-    static integer ii, ll, end, lgn;
+    static integer ii, ll, lgn;
     static doublereal eps, tiny;
     extern logical lsame_(char *, char *);
     static integer lwmin, start;
@@ -21563,6 +24296,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	    doublereal *, integer *), xerbla_(char *, integer *);
     extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
 	    integer *, integer *, ftnlen, ftnlen);
+    static integer finish;
     extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
     extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
 	     integer *), zlacrm_(integer *, integer *, doublecomplex *,
@@ -21581,10 +24315,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -21637,18 +24371,22 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             The leading dimension of the array Z.  LDZ >= 1.
             If eigenvectors are desired, then LDZ >= max(1,N).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
             The dimension of the array WORK.
             If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
             If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
+            Note that for COMPZ = 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LWORK need
+            only be 1.
 
             If LWORK = -1, then a workspace query is assumed; the routine
-            only calculates the optimal size of the WORK array, returns
-            this value as the first entry of the WORK array, and no error
-            message related to LWORK is issued by XERBLA.
+            only calculates the optimal sizes of the WORK, RWORK and
+            IWORK arrays, returns these values as the first entries of
+            the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
     RWORK   (workspace/output) DOUBLE PRECISION array,
                                            dimension (LRWORK)
@@ -21663,13 +24401,17 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
                            that 2**k >= N.
             If COMPZ = 'I' and N > 1, LRWORK must be at least
                            1 + 4*N + 2*N**2 .
+            Note that for COMPZ = 'I' or 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LRWORK
+            need only be max(1,2*(N-1)).
 
             If LRWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the RWORK array,
-            returns this value as the first entry of the RWORK array, and
-            no error message related to LRWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK, RWORK
+            and IWORK arrays, returns these values as the first entries
+            of the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
-    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+    IWORK   (workspace/output) INTEGER array, dimension (MAX(1,LIWORK))
             On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
 
     LIWORK  (input) INTEGER
@@ -21679,11 +24421,15 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
                                       6 + 6*N + 5*N*lg N.
             If COMPZ = 'I' or N > 1,  LIWORK must be at least
                                       3 + 5*N .
+            Note that for COMPZ = 'I' or 'V', then if N is less than or
+            equal to the minimum divide size, usually 25, then LIWORK
+            need only be 1.
 
             If LIWORK = -1, then a workspace query is assumed; the
-            routine only calculates the optimal size of the IWORK array,
-            returns this value as the first entry of the IWORK array, and
-            no error message related to LIWORK is issued by XERBLA.
+            routine only calculates the optimal sizes of the WORK, RWORK
+            and IWORK arrays, returns these values as the first entries
+            of the WORK, RWORK and IWORK arrays, and no error message
+            related to LWORK or LRWORK or LIWORK is issued by XERBLA.
 
     INFO    (output) INTEGER
             = 0:  successful exit.
@@ -21728,19 +24474,36 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     } else {
 	icompz = -1;
     }
-    if (*n <= 1 || icompz <= 0) {
-	lwmin = 1;
-	liwmin = 1;
-	lrwmin = 1;
-    } else {
-	lgn = (integer) (log((doublereal) (*n)) / log(2.));
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (pow_ii(&c__2, &lgn) < *n) {
-	    ++lgn;
-	}
-	if (icompz == 1) {
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
+	*info = -6;
+    }
+
+    if (*info == 0) {
+
+/*        Compute the workspace requirements */
+
+	smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+		ftnlen)6, (ftnlen)1);
+	if (*n <= 1 || icompz == 0) {
+	    lwmin = 1;
+	    liwmin = 1;
+	    lrwmin = 1;
+	} else if (*n <= smlsiz) {
+	    lwmin = 1;
+	    liwmin = 1;
+	    lrwmin = *n - 1 << 1;
+	} else if (icompz == 1) {
+	    lgn = (integer) (log((doublereal) (*n)) / log(2.));
+	    if (pow_ii(&c__2, &lgn) < *n) {
+		++lgn;
+	    }
+	    if (pow_ii(&c__2, &lgn) < *n) {
+		++lgn;
+	    }
 	    lwmin = *n * *n;
 /* Computing 2nd power */
 	    i__1 = *n;
@@ -21753,25 +24516,17 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	    lrwmin = (*n << 2) + 1 + (i__1 * i__1 << 1);
 	    liwmin = *n * 5 + 3;
 	}
-    }
-    if (icompz < 0) {
-	*info = -1;
-    } else if (*n < 0) {
-	*info = -2;
-    } else if (*ldz < 1 || icompz > 0 && *ldz < max(1,*n)) {
-	*info = -6;
-    } else if (*lwork < lwmin && ! lquery) {
-	*info = -8;
-    } else if (*lrwork < lrwmin && ! lquery) {
-	*info = -10;
-    } else if (*liwork < liwmin && ! lquery) {
-	*info = -12;
-    }
-
-    if (*info == 0) {
 	work[1].r = (doublereal) lwmin, work[1].i = 0.;
 	rwork[1] = (doublereal) lrwmin;
 	iwork[1] = liwmin;
+
+	if (*lwork < lwmin && ! lquery) {
+	    *info = -8;
+	} else if (*lrwork < lrwmin && ! lquery) {
+	    *info = -10;
+	} else if (*liwork < liwmin && ! lquery) {
+	    *info = -12;
+	}
     }
 
     if (*info != 0) {
@@ -21795,9 +24550,6 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	return 0;
     }
 
-    smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
-	    ftnlen)6, (ftnlen)1);
-
 /*
        If the following conditional clause is removed, then the routine
        will use the Divide and Conquer routine to compute only the
@@ -21805,14 +24557,15 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
        (2 + 5N + 2N lg(N)) integer workspace.
        Since on many architectures DSTERF is much faster than any other
        algorithm for finding eigenvalues only, it is used here
-       as the default.
+       as the default. If the conditional clause is removed, then
+       information on the size of workspace needs to be changed.
 
        If COMPZ = 'N', use DSTERF to compute the eigenvalues.
 */
 
     if (icompz == 0) {
 	dsterf_(n, &d__[1], &e[1], info);
-	return 0;
+	goto L70;
     }
 
 /*
@@ -21821,161 +24574,156 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 */
 
     if (*n <= smlsiz) {
-	if (icompz == 0) {
-	    dsterf_(n, &d__[1], &e[1], info);
-	    return 0;
-	} else if (icompz == 2) {
-	    zsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
-	} else {
-	    zsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
-		    info);
-	    return 0;
-	}
-    }
 
-/*     If COMPZ = 'I', we simply call DSTEDC instead. */
+	zsteqr_(compz, n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		info);
 
-    if (icompz == 2) {
-	dlaset_("Full", n, n, &c_b324, &c_b1015, &rwork[1], n);
-	ll = *n * *n + 1;
-	i__1 = *lrwork - ll + 1;
-	dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
-		iwork[1], liwork, info);
-	i__1 = *n;
-	for (j = 1; j <= i__1; ++j) {
-	    i__2 = *n;
-	    for (i__ = 1; i__ <= i__2; ++i__) {
-		i__3 = i__ + j * z_dim1;
-		i__4 = (j - 1) * *n + i__;
-		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.;
+    } else {
+
+/*        If COMPZ = 'I', we simply call DSTEDC instead. */
+
+	if (icompz == 2) {
+	    dlaset_("Full", n, n, &c_b328, &c_b1034, &rwork[1], n);
+	    ll = *n * *n + 1;
+	    i__1 = *lrwork - ll + 1;
+	    dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
+		    iwork[1], liwork, info);
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * z_dim1;
+		    i__4 = (j - 1) * *n + i__;
+		    z__[i__3].r = rwork[i__4], z__[i__3].i = 0.;
 /* L10: */
-	    }
+		}
 /* L20: */
+	    }
+	    goto L70;
 	}
-	return 0;
-    }
 
 /*
-       From now on, only option left to be handled is COMPZ = 'V',
-       i.e. ICOMPZ = 1.
+          From now on, only option left to be handled is COMPZ = 'V',
+          i.e. ICOMPZ = 1.
 
-       Scale.
+          Scale.
 */
 
-    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
-    if (orgnrm == 0.) {
-	return 0;
-    }
+	orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+	if (orgnrm == 0.) {
+	    goto L70;
+	}
 
-    eps = EPSILON;
+	eps = EPSILON;
 
-    start = 1;
+	start = 1;
 
-/*     while ( START <= N ) */
+/*        while ( START <= N ) */
 
 L30:
-    if (start <= *n) {
+	if (start <= *n) {
 
 /*
-       Let END be the position of the next subdiagonal entry such that
-       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
-       matrix identified by the elements between START and END
-       constitutes an independent sub-problem.
+             Let FINISH be the position of the next subdiagonal entry
+             such that E( FINISH ) <= TINY or FINISH = N if no such
+             subdiagonal exists.  The matrix identified by the elements
+             between START and FINISH constitutes an independent
+             sub-problem.
 */
 
-	end = start;
+	    finish = start;
 L40:
-	if (end < *n) {
-	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
-		    d__[end + 1], abs(d__2)));
-	    if ((d__1 = e[end], abs(d__1)) > tiny) {
-		++end;
-		goto L40;
+	    if (finish < *n) {
+		tiny = eps * sqrt((d__1 = d__[finish], abs(d__1))) * sqrt((
+			d__2 = d__[finish + 1], abs(d__2)));
+		if ((d__1 = e[finish], abs(d__1)) > tiny) {
+		    ++finish;
+		    goto L40;
+		}
 	    }
-	}
 
-/*        (Sub) Problem determined.  Compute its size and solve it. */
+/*           (Sub) Problem determined.  Compute its size and solve it. */
 
-	m = end - start + 1;
-	if (m > smlsiz) {
-	    *info = smlsiz;
+	    m = finish - start + 1;
+	    if (m > smlsiz) {
 
-/*           Scale. */
+/*              Scale. */
 
-	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &m, &c__1, &d__[
-		    start], &m, info);
-	    i__1 = m - 1;
-	    i__2 = m - 1;
-	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &i__1, &c__1, &e[
-		    start], &i__2, info);
+		orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
+		dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, &m, &c__1, &d__[
+			start], &m, info);
+		i__1 = m - 1;
+		i__2 = m - 1;
+		dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1034, &i__1, &c__1, &
+			e[start], &i__2, info);
 
-	    zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
-		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
-	    if (*info > 0) {
-		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
-			+ 1) + start - 1;
-		return 0;
-	    }
+		zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 +
+			1], ldz, &work[1], n, &rwork[1], &iwork[1], info);
+		if (*info > 0) {
+		    *info = (*info / (m + 1) + start - 1) * (*n + 1) + *info %
+			     (m + 1) + start - 1;
+		    goto L70;
+		}
 
-/*           Scale back. */
+/*              Scale back. */
 
-	    dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, &m, &c__1, &d__[
-		    start], &m, info);
+		dlascl_("G", &c__0, &c__0, &c_b1034, &orgnrm, &m, &c__1, &d__[
+			start], &m, info);
 
-	} else {
-	    dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
-		     m + 1], info);
-	    zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
-		    work[1], n, &rwork[m * m + 1]);
-	    zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
-	    if (*info > 0) {
-		*info = start * (*n + 1) + end;
-		return 0;
+	    } else {
+		dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &
+			rwork[m * m + 1], info);
+		zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
+			work[1], n, &rwork[m * m + 1]);
+		zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1],
+			ldz);
+		if (*info > 0) {
+		    *info = start * (*n + 1) + finish;
+		    goto L70;
+		}
 	    }
-	}
 
-	start = end + 1;
-	goto L30;
-    }
+	    start = finish + 1;
+	    goto L30;
+	}
 
 /*
-       endwhile
+          endwhile
 
-       If the problem split any number of times, then the eigenvalues
-       will not be properly ordered.  Here we permute the eigenvalues
-       (and the associated eigenvectors) into ascending order.
+          If the problem split any number of times, then the eigenvalues
+          will not be properly ordered.  Here we permute the eigenvalues
+          (and the associated eigenvectors) into ascending order.
 */
 
-    if (m != *n) {
+	if (m != *n) {
 
-/*        Use Selection Sort to minimize swaps of eigenvectors */
+/*           Use Selection Sort to minimize swaps of eigenvectors */
 
-	i__1 = *n;
-	for (ii = 2; ii <= i__1; ++ii) {
-	    i__ = ii - 1;
-	    k = i__;
-	    p = d__[i__];
-	    i__2 = *n;
-	    for (j = ii; j <= i__2; ++j) {
-		if (d__[j] < p) {
-		    k = j;
-		    p = d__[j];
-		}
+	    i__1 = *n;
+	    for (ii = 2; ii <= i__1; ++ii) {
+		i__ = ii - 1;
+		k = i__;
+		p = d__[i__];
+		i__2 = *n;
+		for (j = ii; j <= i__2; ++j) {
+		    if (d__[j] < p) {
+			k = j;
+			p = d__[j];
+		    }
 /* L50: */
-	    }
-	    if (k != i__) {
-		d__[k] = d__[i__];
-		d__[i__] = p;
-		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
-			 &c__1);
-	    }
+		}
+		if (k != i__) {
+		    d__[k] = d__[i__];
+		    d__[i__] = p;
+		    zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
+			    + 1], &c__1);
+		}
 /* L60: */
+	    }
 	}
     }
 
+L70:
     work[1].r = (doublereal) lwmin, work[1].i = 0.;
     rwork[1] = (doublereal) lrwmin;
     iwork[1] = liwmin;
@@ -22038,10 +24786,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -22174,7 +24922,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 */
 
     if (icompz == 2) {
-	zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz);
+	zlaset_("Full", n, n, &c_b56, &c_b57, &z__[z_offset], ldz);
     }
 
     nmaxit = *n * 30;
@@ -22324,7 +25072,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 /*        Form shift. */
 
 	g = (d__[l + 1] - p) / (e[l] * 2.);
-	r__ = dlapy2_(&g, &c_b1015);
+	r__ = dlapy2_(&g, &c_b1034);
 	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
 
 	s = 1.;
@@ -22450,7 +25198,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 /*        Form shift. */
 
 	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
-	r__ = dlapy2_(&g, &c_b1015);
+	r__ = dlapy2_(&g, &c_b1034);
 	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
 
 	s = 1.;
@@ -22630,10 +25378,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -22641,20 +25389,23 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
     ZTREVC computes some or all of the right and/or left eigenvectors of
     a complex upper triangular matrix T.
+    Matrices of this type are produced by the Schur factorization of
+    a complex general matrix:  A = Q*T*Q**H, as computed by ZHSEQR.
 
     The right eigenvector x and the left eigenvector y of T corresponding
     to an eigenvalue w are defined by:
 
-                 T*x = w*x,     y'*T = w*y'
+                 T*x = w*x,     (y**H)*T = w*(y**H)
 
-    where y' denotes the conjugate transpose of the vector y.
+    where y**H denotes the conjugate transpose of the vector y.
+    The eigenvalues are not input to this routine, but are read directly
+    from the diagonal of T.
 
-    If all eigenvectors are requested, the routine may either return the
-    matrices X and/or Y of right or left eigenvectors of T, or the
-    products Q*X and/or Q*Y, where Q is an input unitary
-    matrix. If T was obtained from the Schur factorization of an
-    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
-    right or left eigenvectors of A.
+    This routine returns the matrices X and/or Y of right and left
+    eigenvectors of T, or the products Q*X and/or Q*Y, where Q is an
+    input matrix.  If Q is the unitary factor that reduces a matrix A to
+    Schur form T, then Q*X and Q*Y are the matrices of right and left
+    eigenvectors of A.
 
     Arguments
     =========
@@ -22667,17 +25418,17 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     HOWMNY  (input) CHARACTER*1
             = 'A':  compute all right and/or left eigenvectors;
             = 'B':  compute all right and/or left eigenvectors,
-                    and backtransform them using the input matrices
-                    supplied in VR and/or VL;
+                    backtransformed using the matrices supplied in
+                    VR and/or VL;
             = 'S':  compute selected right and/or left eigenvectors,
-                    specified by the logical array SELECT.
+                    as indicated by the logical array SELECT.
 
     SELECT  (input) LOGICAL array, dimension (N)
             If HOWMNY = 'S', SELECT specifies the eigenvectors to be
             computed.
-            If HOWMNY = 'A' or 'B', SELECT is not referenced.
-            To select the eigenvector corresponding to the j-th
-            eigenvalue, SELECT(j) must be set to .TRUE..
+            The eigenvector corresponding to the j-th eigenvalue is
+            computed if SELECT(j) = .TRUE..
+            Not referenced if HOWMNY = 'A' or 'B'.
 
     N       (input) INTEGER
             The order of the matrix T. N >= 0.
@@ -22695,19 +25446,16 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             Schur vectors returned by ZHSEQR).
             On exit, if SIDE = 'L' or 'B', VL contains:
             if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
-                             VL is lower triangular. The i-th column
-                             VL(i) of VL is the eigenvector corresponding
-                             to T(i,i).
             if HOWMNY = 'B', the matrix Q*Y;
             if HOWMNY = 'S', the left eigenvectors of T specified by
                              SELECT, stored consecutively in the columns
                              of VL, in the same order as their
                              eigenvalues.
-            If SIDE = 'R', VL is not referenced.
+            Not referenced if SIDE = 'R'.
 
     LDVL    (input) INTEGER
-            The leading dimension of the array VL.  LDVL >= max(1,N) if
-            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+            The leading dimension of the array VL.  LDVL >= 1, and if
+            SIDE = 'L' or 'B', LDVL >= N.
 
     VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)
             On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
@@ -22715,19 +25463,16 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             Schur vectors returned by ZHSEQR).
             On exit, if SIDE = 'R' or 'B', VR contains:
             if HOWMNY = 'A', the matrix X of right eigenvectors of T;
-                             VR is upper triangular. The i-th column
-                             VR(i) of VR is the eigenvector corresponding
-                             to T(i,i).
             if HOWMNY = 'B', the matrix Q*X;
             if HOWMNY = 'S', the right eigenvectors of T specified by
                              SELECT, stored consecutively in the columns
                              of VR, in the same order as their
                              eigenvalues.
-            If SIDE = 'L', VR is not referenced.
+            Not referenced if SIDE = 'L'.
 
     LDVR    (input) INTEGER
-            The leading dimension of the array VR.  LDVR >= max(1,N) if
-             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+            The leading dimension of the array VR.  LDVR >= 1, and if
+            SIDE = 'R' or 'B'; LDVR >= N.
 
     MM      (input) INTEGER
             The number of columns in the arrays VL and/or VR. MM >= M.
@@ -22945,7 +25690,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		if (ki > 1) {
 		    i__1 = ki - 1;
 		    z__1.r = scale, z__1.i = 0.;
-		    zgemv_("N", n, &i__1, &c_b60, &vr[vr_offset], ldvr, &work[
+		    zgemv_("N", n, &i__1, &c_b57, &vr[vr_offset], ldvr, &work[
 			    1], &c__1, &z__1, &vr[ki * vr_dim1 + 1], &c__1);
 		}
 
@@ -23061,7 +25806,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 		if (ki < *n) {
 		    i__2 = *n - ki;
 		    z__1.r = scale, z__1.i = 0.;
-		    zgemv_("N", n, &i__2, &c_b60, &vl[(ki + 1) * vl_dim1 + 1],
+		    zgemv_("N", n, &i__2, &c_b57, &vl[(ki + 1) * vl_dim1 + 1],
 			     ldvl, &work[ki + 1], &c__1, &z__1, &vl[ki *
 			    vl_dim1 + 1], &c__1);
 		}
@@ -23095,6 +25840,194 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 } /* ztrevc_ */
 
+/* Subroutine */ int ztrexc_(char *compq, integer *n, doublecomplex *t,
+	integer *ldt, doublecomplex *q, integer *ldq, integer *ifst, integer *
+	ilst, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, t_dim1, t_offset, i__1, i__2, i__3;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer k, m1, m2, m3;
+    static doublereal cs;
+    static doublecomplex t11, t22, sn, temp;
+    extern /* Subroutine */ int zrot_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublereal *, doublecomplex *);
+    extern logical lsame_(char *, char *);
+    static logical wantq;
+    extern /* Subroutine */ int xerbla_(char *, integer *), zlartg_(
+	    doublecomplex *, doublecomplex *, doublereal *, doublecomplex *,
+	    doublecomplex *);
+
+
+/*
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    ZTREXC reorders the Schur factorization of a complex matrix
+    A = Q*T*Q**H, so that the diagonal element of T with row index IFST
+    is moved to row ILST.
+
+    The Schur form T is reordered by a unitary similarity transformation
+    Z**H*T*Z, and optionally the matrix Q of Schur vectors is updated by
+    postmultplying it with Z.
+
+    Arguments
+    =========
+
+    COMPQ   (input) CHARACTER*1
+            = 'V':  update the matrix Q of Schur vectors;
+            = 'N':  do not update Q.
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input/output) COMPLEX*16 array, dimension (LDT,N)
+            On entry, the upper triangular matrix T.
+            On exit, the reordered upper triangular matrix.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    Q       (input/output) COMPLEX*16 array, dimension (LDQ,N)
+            On entry, if COMPQ = 'V', the matrix Q of Schur vectors.
+            On exit, if COMPQ = 'V', Q has been postmultiplied by the
+            unitary transformation matrix Z which reorders T.
+            If COMPQ = 'N', Q is not referenced.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    IFST    (input) INTEGER
+    ILST    (input) INTEGER
+            Specify the reordering of the diagonal elements of T:
+            The element with row index IFST is moved to row ILST by a
+            sequence of transpositions between adjacent elements.
+            1 <= IFST <= N; 1 <= ILST <= N.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Decode and test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(compq, "V");
+    if (! lsame_(compq, "N") && ! wantq) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldt < max(1,*n)) {
+	*info = -4;
+    } else if (*ldq < 1 || wantq && *ldq < max(1,*n)) {
+	*info = -6;
+    } else if (*ifst < 1 || *ifst > *n) {
+	*info = -7;
+    } else if (*ilst < 1 || *ilst > *n) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZTREXC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 1 || *ifst == *ilst) {
+	return 0;
+    }
+
+    if (*ifst < *ilst) {
+
+/*        Move the IFST-th diagonal element forward down the diagonal. */
+
+	m1 = 0;
+	m2 = -1;
+	m3 = 1;
+    } else {
+
+/*        Move the IFST-th diagonal element backward up the diagonal. */
+
+	m1 = -1;
+	m2 = 0;
+	m3 = -1;
+    }
+
+    i__1 = *ilst + m2;
+    i__2 = m3;
+    for (k = *ifst + m1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) {
+
+/*        Interchange the k-th and (k+1)-th diagonal elements. */
+
+	i__3 = k + k * t_dim1;
+	t11.r = t[i__3].r, t11.i = t[i__3].i;
+	i__3 = k + 1 + (k + 1) * t_dim1;
+	t22.r = t[i__3].r, t22.i = t[i__3].i;
+
+/*        Determine the transformation to perform the interchange. */
+
+	z__1.r = t22.r - t11.r, z__1.i = t22.i - t11.i;
+	zlartg_(&t[k + (k + 1) * t_dim1], &z__1, &cs, &sn, &temp);
+
+/*        Apply transformation to the matrix T. */
+
+	if (k + 2 <= *n) {
+	    i__3 = *n - k - 1;
+	    zrot_(&i__3, &t[k + (k + 2) * t_dim1], ldt, &t[k + 1 + (k + 2) *
+		    t_dim1], ldt, &cs, &sn);
+	}
+	i__3 = k - 1;
+	d_cnjg(&z__1, &sn);
+	zrot_(&i__3, &t[k * t_dim1 + 1], &c__1, &t[(k + 1) * t_dim1 + 1], &
+		c__1, &cs, &z__1);
+
+	i__3 = k + k * t_dim1;
+	t[i__3].r = t22.r, t[i__3].i = t22.i;
+	i__3 = k + 1 + (k + 1) * t_dim1;
+	t[i__3].r = t11.r, t[i__3].i = t11.i;
+
+	if (wantq) {
+
+/*           Accumulate transformation in the matrix Q. */
+
+	    d_cnjg(&z__1, &sn);
+	    zrot_(n, &q[k * q_dim1 + 1], &c__1, &q[(k + 1) * q_dim1 + 1], &
+		    c__1, &cs, &z__1);
+	}
+
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of ZTREXC */
+
+} /* ztrexc_ */
+
 /* Subroutine */ int ztrti2_(char *uplo, char *diag, integer *n,
 	doublecomplex *a, integer *lda, integer *info)
 {
@@ -23118,10 +26051,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23207,7 +26140,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	for (j = 1; j <= i__1; ++j) {
 	    if (nounit) {
 		i__2 = j + j * a_dim1;
-		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
+		z_div(&z__1, &c_b57, &a[j + j * a_dim1]);
 		a[i__2].r = z__1.r, a[i__2].i = z__1.i;
 		i__2 = j + j * a_dim1;
 		z__1.r = -a[i__2].r, z__1.i = -a[i__2].i;
@@ -23233,7 +26166,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	for (j = *n; j >= 1; --j) {
 	    if (nounit) {
 		i__1 = j + j * a_dim1;
-		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
+		z_div(&z__1, &c_b57, &a[j + j * a_dim1]);
 		a[i__1].r = z__1.r, a[i__1].i = z__1.i;
 		i__1 = j + j * a_dim1;
 		z__1.r = -a[i__1].r, z__1.i = -a[i__1].i;
@@ -23291,10 +26224,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23425,7 +26358,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		i__4 = j - 1;
 		ztrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
-			c_b60, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+			c_b57, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
 		i__4 = j - 1;
 		z__1.r = -1., z__1.i = -0.;
 		ztrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
@@ -23453,7 +26386,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 		    i__1 = *n - j - jb + 1;
 		    ztrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
-			    &c_b60, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
+			    &c_b57, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
 			    + jb + j * a_dim1], lda);
 		    i__1 = *n - j - jb + 1;
 		    z__1.r = -1., z__1.i = -0.;
@@ -23493,10 +26426,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23658,10 +26591,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23727,7 +26660,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             reflector H(i) or G(i), which determines Q or P**H, as
             returned by ZGEBRD in its array argument TAUQ or TAUP.
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -23942,10 +26875,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -23982,7 +26915,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by ZGEHRD.
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -24138,10 +27071,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -24314,10 +27247,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -24357,7 +27290,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by ZGELQF.
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -24578,10 +27511,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -24622,7 +27555,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
             TAU(i) must contain the scalar factor of the elementary
             reflector H(i), as returned by ZGEQRF.
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -24840,10 +27773,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25055,10 +27988,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25283,10 +28216,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25373,16 +28306,17 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
             The dimension of the array WORK.
             If SIDE = 'L', LWORK >= max(1,N);
-            if SIDE = 'R', LWORK >= max(1,M).
-            For optimum performance LWORK >= N*NB if SIDE = 'L', and
-            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
-            blocksize.
+            if SIDE = 'R', LWORK >= max(1,M);
+            if N = 0 or M = 0, LWORK >= 1.
+            For optimum performance LWORK >= max(1,N*NB) if SIDE = 'L',
+            and LWORK >= max(1,M*NB) if SIDE = 'R', where NB is the
+            optimal blocksize. (NB = 0 if M = 0 or N = 0.)
 
             If LWORK = -1, then a workspace query is assumed; the routine
             only calculates the optimal size of the WORK array, returns
@@ -25425,6 +28359,9 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	nq = *n;
 	nw = *m;
     }
+    if (*m == 0 || *n == 0) {
+	nw = 0;
+    }
     if (! applyq && ! lsame_(vect, "P")) {
 	*info = -1;
     } else if (! left && ! lsame_(side, "R")) {
@@ -25450,48 +28387,54 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     }
 
     if (*info == 0) {
-	if (applyq) {
-	    if (left) {
-/* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__1, n, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    } else {
+	if (nw > 0) {
+	    if (applyq) {
+		if (left) {
 /* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__1, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
-	    }
-	} else {
-	    if (left) {
+		    i__3[0] = 1, a__1[0] = side;
+		    i__3[1] = 1, a__1[1] = trans;
+		    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		    i__1 = *m - 1;
+		    i__2 = *m - 1;
+		    nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__1, n, &i__2, &
+			    c_n1, (ftnlen)6, (ftnlen)2);
+		} else {
 /* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *m - 1;
-		i__2 = *m - 1;
-		nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
+		    i__3[0] = 1, a__1[0] = side;
+		    i__3[1] = 1, a__1[1] = trans;
+		    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		    i__1 = *n - 1;
+		    i__2 = *n - 1;
+		    nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__1, &i__2, &
+			    c_n1, (ftnlen)6, (ftnlen)2);
+		}
 	    } else {
+		if (left) {
 /* Writing concatenation */
-		i__3[0] = 1, a__1[0] = side;
-		i__3[1] = 1, a__1[1] = trans;
-		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-		i__1 = *n - 1;
-		i__2 = *n - 1;
-		nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
-			ftnlen)6, (ftnlen)2);
+		    i__3[0] = 1, a__1[0] = side;
+		    i__3[1] = 1, a__1[1] = trans;
+		    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		    i__1 = *m - 1;
+		    i__2 = *m - 1;
+		    nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, &i__1, n, &i__2, &
+			    c_n1, (ftnlen)6, (ftnlen)2);
+		} else {
+/* Writing concatenation */
+		    i__3[0] = 1, a__1[0] = side;
+		    i__3[1] = 1, a__1[1] = trans;
+		    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		    i__1 = *n - 1;
+		    i__2 = *n - 1;
+		    nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, &i__1, &i__2, &
+			    c_n1, (ftnlen)6, (ftnlen)2);
+		}
 	    }
+/* Computing MAX */
+	    i__1 = 1, i__2 = nw * nb;
+	    lwkopt = max(i__1,i__2);
+	} else {
+	    lwkopt = 1;
 	}
-	lwkopt = max(1,nw) * nb;
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
     }
 
@@ -25500,11 +28443,11 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	xerbla_("ZUNMBR", &i__1);
 	return 0;
     } else if (lquery) {
+	return 0;
     }
 
 /*     Quick return if possible */
 
-    work[1].r = 1., work[1].i = 0.;
     if (*m == 0 || *n == 0) {
 	return 0;
     }
@@ -25581,6 +28524,235 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 } /* zunmbr_ */
 
+/* Subroutine */ int zunmhr_(char *side, char *trans, integer *m, integer *n,
+	integer *ilo, integer *ihi, doublecomplex *a, integer *lda,
+	doublecomplex *tau, doublecomplex *c__, integer *ldc, doublecomplex *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, nh, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer lwkopt;
+    static logical lquery;
+    extern /* Subroutine */ int zunmqr_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
+
+
+    Purpose
+    =======
+
+    ZUNMHR overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    IHI-ILO elementary reflectors, as returned by ZGEHRD:
+
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q**H (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of ZGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            If SIDE = 'L', then 1 <= ILO <= IHI <= M, if M > 0, and
+            ILO = 1 and IHI = 0, if M = 0;
+            if SIDE = 'R', then 1 <= ILO <= IHI <= N, if N > 0, and
+            ILO = 1 and IHI = 0, if N = 0.
+
+    A       (input) COMPLEX*16 array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by ZGEHRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) COMPLEX*16 array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGEHRD.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    left = lsame_(side, "L");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ilo < 1 || *ilo > max(1,nq)) {
+	*info = -5;
+    } else if (*ihi < min(*ilo,nq) || *ihi > nq) {
+	*info = -6;
+    } else if (*lda < max(1,nq)) {
+	*info = -8;
+    } else if (*ldc < max(1,*m)) {
+	*info = -11;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -13;
+    }
+
+    if (*info == 0) {
+	if (left) {
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = side;
+	    i__1[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &nh, n, &nh, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	} else {
+/* Writing concatenation */
+	    i__1[0] = 1, a__1[0] = side;
+	    i__1[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+	    nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &nh, &nh, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("ZUNMHR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m == 0 || *n == 0 || nh == 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    if (left) {
+	mi = nh;
+	ni = *n;
+	i1 = *ilo + 1;
+	i2 = 1;
+    } else {
+	mi = *m;
+	ni = nh;
+	i1 = 1;
+	i2 = *ilo + 1;
+    }
+
+    zunmqr_(side, trans, &mi, &ni, &nh, &a[*ilo + 1 + *ilo * a_dim1], lda, &
+	    tau[*ilo], &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNMHR */
+
+} /* zunmhr_ */
+
 /* Subroutine */ int zunml2_(char *side, char *trans, integer *m, integer *n,
 	integer *k, doublecomplex *a, integer *lda, doublecomplex *tau,
 	doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info)
@@ -25605,10 +28777,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       September 30, 1994
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25849,10 +29021,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -25917,7 +29089,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -26159,10 +29331,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26227,7 +29399,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -26273,10 +29445,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
     if (left) {
 	nq = *m;
-	nw = *n;
+	nw = max(1,*n);
     } else {
 	nq = *n;
-	nw = *m;
+	nw = max(1,*m);
     }
     if (! left && ! lsame_(side, "R")) {
 	*info = -1;
@@ -26292,27 +29464,33 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 	*info = -7;
     } else if (*ldc < max(1,*m)) {
 	*info = -10;
-    } else if (*lwork < max(1,nw) && ! lquery) {
-	*info = -12;
     }
 
     if (*info == 0) {
+	if (*m == 0 || *n == 0) {
+	    lwkopt = 1;
+	} else {
 
 /*
-          Determine the block size.  NB may be at most NBMAX, where NBMAX
-          is used to define the local array T.
+             Determine the block size.  NB may be at most NBMAX, where
+             NBMAX is used to define the local array T.
 
    Computing MIN
    Writing concatenation
 */
-	i__3[0] = 1, a__1[0] = side;
-	i__3[1] = 1, a__1[1] = trans;
-	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
-	i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQL", ch__1, m, n, k, &c_n1, (
-		ftnlen)6, (ftnlen)2);
-	nb = min(i__1,i__2);
-	lwkopt = max(1,nw) * nb;
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQL", ch__1, m, n, k, &c_n1,
+		    (ftnlen)6, (ftnlen)2);
+	    nb = min(i__1,i__2);
+	    lwkopt = nw * nb;
+	}
 	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+
+	if (*lwork < nw && ! lquery) {
+	    *info = -12;
+	}
     }
 
     if (*info != 0) {
@@ -26325,8 +29503,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 /*     Quick return if possible */
 
-    if (*m == 0 || *n == 0 || *k == 0) {
-	work[1].r = 1., work[1].i = 0.;
+    if (*m == 0 || *n == 0) {
 	return 0;
     }
 
@@ -26459,10 +29636,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26527,7 +29704,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER
@@ -26756,10 +29933,10 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
 
 
 /*
-    -- LAPACK routine (version 3.0) --
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
-       Courant Institute, Argonne National Lab, and Rice University
-       June 30, 1999
+    -- LAPACK routine (version 3.2) --
+    -- LAPACK is a software package provided by Univ. of Tennessee,    --
+    -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+       November 2006
 
 
     Purpose
@@ -26825,7 +30002,7 @@ doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
     LDC     (input) INTEGER
             The leading dimension of the array C. LDC >= max(1,M).
 
-    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
             On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 
     LWORK   (input) INTEGER

From 555e0e84afdd0452e6b3f83fb31ae860467b2937 Mon Sep 17 00:00:00 2001
From: Eric Wieser <wieser.eric@gmail.com>
Date: Sun, 26 Mar 2017 14:44:54 +0100
Subject: [PATCH 4/4] DOC: Add changelog entry for new lapack_lite

Also, update authors of lapack_lite

[ci skip]
---
 doc/release/1.13.0-notes.rst        | 12 ++++++++++++
 numpy/linalg/lapack_lite/README.rst |  3 ++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/doc/release/1.13.0-notes.rst b/doc/release/1.13.0-notes.rst
index 0c9d743f65dd..6cb8de046224 100644
--- a/doc/release/1.13.0-notes.rst
+++ b/doc/release/1.13.0-notes.rst
@@ -229,6 +229,18 @@ array, in the same way that ``sort`` already did. Additionally, the
 Note that this argument is not added at the end, so breaks any code that
 passed ``fill_value`` as a positional argument.
 
+Bundled version of LAPACK is now 3.2.2
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+NumPy comes bundled with a minimal implementation of lapack for systems without
+a lapack library installed, under the name of ``lapack_lite``. This has been
+upgraded from LAPACK 3.0.0 (June 30, 1999) to LAPACK 3.2.2 (June 30, 2010). See
+the `LAPACK changelogs`_ for details on the all the changes this entails.
+
+While no new features are exposed through ``numpy``, this fixes some bugs
+regarding "workspace" sizes, and in some places may use faster algorithms.
+
+.. _`LAPACK changelogs`: http://www.netlib.org/lapack/release_notes.html#_4_history_of_lapack_releases
+
 Changes
 =======
 
diff --git a/numpy/linalg/lapack_lite/README.rst b/numpy/linalg/lapack_lite/README.rst
index b25ce1e74f24..1343d25f8a94 100644
--- a/numpy/linalg/lapack_lite/README.rst
+++ b/numpy/linalg/lapack_lite/README.rst
@@ -1,7 +1,8 @@
 Regenerating lapack_lite source
 ===============================
 
-:Author: David M. Cooke <cookedm@physics.mcmaster.ca>
+:Authors: * David M. Cooke <cookedm@physics.mcmaster.ca>
+          * Eric Wieser (upgraded lapack version on 2017-03-26)
 
 The ``numpy/linalg/f2c_*.c`` files are ``f2c``'d versions of the LAPACK routines
 required by the ``LinearAlgebra`` module, and wrapped by the ``lapack_lite``