diff --git a/numpy/core/bento.info b/numpy/core/bento.info
index 9c8476eee408..356535a9fe0c 100644
--- a/numpy/core/bento.info
+++ b/numpy/core/bento.info
@@ -25,6 +25,9 @@ Library:
     Extension: umath_tests
         Sources:
             src/umath/umath_tests.c.src
+    Extension: umath_linalg
+        Sources:
+            src/umath/umath_linalg.c.src
     Extension: scalarmath
         Sources:
             src/scalarmathmodule.c.src
diff --git a/numpy/core/bscript b/numpy/core/bscript
index d31faa186f3c..6b4e3d6224be 100644
--- a/numpy/core/bscript
+++ b/numpy/core/bscript
@@ -503,6 +503,7 @@ def pre_build(context):
     context.tweak_extension("scalarmath", use="npymath", includes=["src/private"])
     context.tweak_extension("multiarray_tests", use="npymath", includes=["src/private"])
     context.tweak_extension("umath_tests", use="npymath", includes=["src/private"])
+    context.tweak_extension("umath_linalg", use="npymath", includes=["src/private"])
 
     def build_dotblas(extension):
         if bld.env.HAS_CBLAS:
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index a1000ae0be0a..a2cd7ec661ee 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -918,6 +918,38 @@ def get_dotblas_sources(ext, build_dir):
     config.add_extension('umath_tests',
                     sources = [join('src','umath', 'umath_tests.c.src')])
 
+    #######################################################################
+    #                        umath_linalg module                          #
+    #######################################################################
+
+    lapack_info = get_info('lapack_opt', 0)
+    def get_lapack_lite_sources(ext, build_dir):
+        if not lapack_info:
+            print("### Warning:  Using unoptimized lapack ###")
+            return ext.depends[:-1]
+        else:
+            if sys.platform=='win32':
+                print("### Warning:  python_xerbla.c is disabled ###")
+                return ext.depends[:1]
+            return ext.depends[:2]
+
+    config.add_extension('_umath_linalg',
+                         sources = [get_lapack_lite_sources,
+                                    join('src', 'umath', 'umath_linalg.c.src'),
+                                    join('src', 'umath', 'lapack_lite', 'python_xerbla.c'),
+                                    join('src', 'umath', 'lapack_lite', 'zlapack_lite.c'),
+                                    join('src', 'umath', 'lapack_lite', 'dlapack_lite.c'),
+                                    join('src', 'umath', 'lapack_lite', 'blas_lite.c'),
+                                    join('src', 'umath', 'lapack_lite', 'dlamch.c'),
+                                    join('src', 'umath', 'lapack_lite', 'f2c_lite.c')
+                                    ],
+                         depends =  [join('src', 'umath', 'gufuncs_linalg.py'),
+                                     join('src', 'umath', 'lapack_lite', 'f2c.h'),
+                                    ],
+                         extra_info = lapack_info,
+                         libraries = ['npymath'],
+                         )
+
     #######################################################################
     #                     multiarray_tests module                         #
     #######################################################################
diff --git a/numpy/core/src/umath/gufuncs_linalg.py b/numpy/core/src/umath/gufuncs_linalg.py
new file mode 100644
index 000000000000..4574f595f676
--- /dev/null
+++ b/numpy/core/src/umath/gufuncs_linalg.py
@@ -0,0 +1,1368 @@
+"""Linear Algebra functions implemented as gufuncs, so they broadcast.
+
+Notes
+-----
+
+This module contains functionality that could be found in the linalg module,
+but in a gufunc-like way. This allows the use of vectorization and broadcasting
+on the operands.
+
+This module itself provides wrappers to kernels written as numpy
+generalized-ufuncs that perform the heavy-lifting of computation. The wrappers
+exist in order to provide a sane interface, like providing keyword arguments in
+line with the ones used by linalg, or just to automatically select the
+appropriate kernel depending on the parameters. All wrappers forward the
+keyword parameters to the underlying generalized ufunc (the kernel).
+
+The functions are intended to be used on arrays of matrices. Functions that in
+numpy.LinAlg would generate a LinAlgError (for example, inv on a non-invertible
+matrix) will just generate NaNs as result. When this happens, invalid floating
+point status will be set. Error handling can be configured for this cases using
+np.seterr.
+
+Additional functions some fused arithmetic, useful for efficient operation over
+"""
+
+from __future__ import division, absolute_import
+
+
+__all__ = ['inner1d', 'dotc1d', 'innerwt', 'matrix_multiply', 'det', 'slogdet',
+           'inv', 'cholesky', 'quadratic_form', 'add3', 'multiply3', 
+           'multiply3_add', 'multiply_add', 'multiply_add2', 'multiply4', 
+           'multiply4_add', 'eig', 'eigvals', 'eigh', 'eigvalsh', 'solve', 
+           'svd', 'chosolve', 'poinv']
+
+
+import numpy.core._umath_linalg as _impl
+import numpy as np
+
+
+def inner1d(a, b, **kwargs):
+    """
+    Compute the dot product of vectors over the inner dimension, with
+    broadcasting.
+
+    Parameters
+    ----------
+    a : (..., N) array
+        Input array
+    b : (..., N) array
+        Input array
+
+    Returns
+    -------
+    inner : (...) array
+        dot product over the inner dimension.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply when matching dimensions.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    For single and double types this is equivalent to dotc1d.
+
+    Maps to Blas functions sdot, ddot, cdotu and zdotu.
+
+    See Also
+    --------
+    dotc1d : dot product conjugating first vector.
+    innerwt : weighted (i.e. triple) inner product.
+
+    Examples
+    --------
+    >>> a = np.arange(1,5).reshape(2,2)
+    >>> b = np.arange(1,8,2).reshape(2,2)
+    >>> res = inner1d(a,b)
+    >>> res.shape
+    (2,)
+    >>> print res
+    [  7.  43.]
+
+    """
+    return _impl.inner1d(a, b, **kwargs)
+
+
+def dotc1d(a, b, **kwargs):
+    """
+    Compute the dot product of vectors over the inner dimension, conjugating
+    the first vector, with broadcasting
+
+    Parameters
+    ----------
+    a : (..., N) array
+        Input array
+    b : (..., N) array
+        Input array
+
+    Returns
+    -------
+    dotc : (...) array
+        dot product conjugating the first vector over the inner
+        dimension.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply when matching dimensions.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    For single and double types this is equivalent to inner1d.
+
+    Maps to Blas functions sdot, ddot, cdotc and zdotc.
+
+    See Also
+    --------
+    inner1d : dot product
+    innerwt : weighted (i.e. triple) inner product.
+
+    Examples
+    --------
+    >>> a = np.arange(1,5).reshape(2,2)
+    >>> b = np.arange(1,8,2).reshape(2,2)
+    >>> res = inner1d(a,b)
+    >>> res.shape
+    (2,)
+    >>> print res
+    [  7.  43.]
+
+    """
+    return _impl.dotc1d(a, b, **kwargs)
+
+
+def innerwt(a, b, c, **kwargs):
+    """
+    Compute the weighted (i.e. triple) inner product, with
+    broadcasting.
+
+    Parameters
+    ----------
+    a, b, c : (..., N) array
+        Input arrays
+
+    Returns
+    -------
+    inner : (...) array
+        The weighted (i.e. triple) inner product.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    See Also
+    --------
+    inner1d : inner product.
+    dotc1d : dot product conjugating first vector.
+
+    Examples
+    --------
+    >>> a = np.arange(1,5).reshape(2,2)
+    >>> b = np.arange(1,8,2).reshape(2,2)
+    >>> c = np.arange(0.25,1.20,0.25).reshape(2,2)
+    >>> res = innerwt(a,b,c)
+    >>> res.shape
+    (2,)
+    >>> res
+    array([  3.25,  39.25])
+
+    """
+    return _impl.innerwt(a, b, c, **kwargs)
+
+
+def matrix_multiply(a,b,**kwargs):
+    """
+    Compute matrix multiplication, with broadcasting
+
+    Parameters
+    ----------
+    a : (..., M, N) array
+        Input array.
+    b : (..., N, P) array
+        Input array.
+
+    Returns
+    -------
+    r : (..., M, P) array matrix multiplication of a and b over any number of
+        outer dimensions
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Matrix multiplication is computed using BLAS _gemm functions.
+
+    Implemented for single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    Examples
+    --------
+    >>> a = np.arange(1,17).reshape(2,2,4)
+    >>> b = np.arange(1,25).reshape(2,4,3)
+    >>> res = matrix_multiply(a,b)
+    >>> res.shape
+    (2, 2, 3)
+    >>> res
+    array([[[   70.,    80.,    90.],
+            [  158.,   184.,   210.]],
+    <BLANKLINE>
+           [[  750.,   792.,   834.],
+            [ 1030.,  1088.,  1146.]]])
+
+    """
+    return _impl.matrix_multiply(a,b,**kwargs)
+
+
+def det(a, **kwargs):
+    """
+    Compute the determinant of arrays, with broadcasting.
+
+    Parameters
+    ----------
+    a : (NDIMS, M, M) array
+        Input array. Its inner dimensions must be those of a square 2-D array.
+
+    Returns
+    -------
+    det : (NDIMS) array
+        Determinants of `a`
+
+    See Also
+    --------
+    slogdet : Another representation for the determinant, more suitable
+        for large matrices where underflow/overflow may occur
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    The determinants are computed via LU factorization using the LAPACK
+    routine _getrf.
+
+    Implemented for single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    Examples
+    --------
+    The determinant of a 2-D array [[a, b], [c, d]] is ad - bc:
+
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> np.allclose(-2.0, det(a))
+    True
+
+    >>> a = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]] ])
+    >>> np.allclose(-2.0, det(a))
+    True
+
+    """
+    return _impl.det(a, **kwargs)
+
+
+def slogdet(a, **kwargs):
+    """
+    Compute the sign and (natural) logarithm of the determinant of an array,
+    with broadcasting.
+
+    If an array has a very small or very large determinant, then a call to 
+    `det` may overflow or underflow. This routine is more robust against such
+    issues, because it computes the logarithm of the determinant rather than
+    the determinant itself
+
+    Parameters
+    ----------
+    a : (..., M, M) array
+        Input array. Its inner dimensions must be those of a square 2-D array.
+
+    Returns
+    -------
+    sign : (...) array
+        An array of numbers representing the sign of the determinants. For real
+        matrices, this is 1, 0, or -1. For complex matrices, this is a complex 
+        number with absolute value 1 (i.e., it is on the unit circle), or else
+        0.
+    logdet : (...) array
+        The natural log of the absolute value of the determinant. This is always
+        a real type.
+
+    If the determinant is zero, then `sign` will be 0 and `logdet` will be -Inf.
+    In all cases, the determinant is equal to ``sign * np.exp(logdet)``.
+
+    See Also
+    --------
+    det
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    The determinants are computed via LU factorization using the LAPACK
+    routine _getrf.
+
+    Implemented for types single, double, csingle and cdouble. Numpy conversion
+    rules apply. For complex versions `logdet` will be of the associated real
+    type (single for csingle, double for cdouble).
+
+    Examples
+    --------
+    The determinant of a 2-D array [[a, b], [c, d]] is ad - bc:
+
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> (sign, logdet) = slogdet(a)
+    >>> sign.shape
+    ()
+    >>> logdet.shape
+    ()
+    >>> np.allclose(-2.0, sign * np.exp(logdet))
+    True
+
+    >>> a = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]] ])
+    >>> (sign, logdet) = slogdet(a)
+    >>> sign.shape
+    (2,)
+    >>> logdet.shape
+    (2,)
+    >>> np.allclose(-2.0, sign * np.exp(logdet))
+    True
+
+    """
+    return _impl.slogdet(a, **kwargs)
+
+
+def inv(a, **kwargs):
+    """
+    Compute the (multiplicative) inverse of matrices, with broadcasting.
+
+    Given a square matrix `a`, return the matrix `ainv` satisfying
+    ``matrix_multiply(a, ainv) = matrix_multiply(ainv, a) = Identity matrix``
+
+    Parameters
+    ----------
+    a : (..., M, M) array
+        Matrices to be inverted
+
+    Returns
+    -------
+    ainv : (..., M, M) array
+        (Multiplicative) inverse of the `a` matrices.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    Singular matrices and thus, not invertible, result in an array of NaNs.
+
+    See Also
+    --------
+    poinv : compute the multiplicative inverse of hermitian/symmetric matrices,
+            using cholesky decomposition.
+
+    Examples
+    --------
+    >>> a = np.array([[1, 2], [3, 4]])
+    >>> ainv = inv(a)
+    >>> np.allclose(matrix_multiply(a, ainv), np.eye(2))
+    True
+    >>> np.allclose(matrix_multiply(ainv, a), np.eye(2))
+    True
+
+    """
+    return _impl.inv(a, **kwargs)
+
+
+def cholesky(a, UPLO='L', **kwargs):
+    """
+    Compute the cholesky decomposition of `a`, with broadcasting
+
+    The Cholesky decomposition (or Cholesky triangle) is a decomposition of a
+    Hermitian, positive-definite matrix into the product of a lower triangular
+    matrix and its conjugate transpose.
+
+    A = LL*
+
+    where L* is the positive-definite matrix.
+
+    Parameters
+    ----------
+    a : (..., M, M) array
+        Matrices for which compute the cholesky decomposition
+
+    Returns
+    -------
+    l : (..., M, M) array
+        Matrices for each element where each entry is the lower triangular
+        matrix with strictly positive diagonal entries such that a = ll* for
+        all outer dimensions
+
+    See Also
+    --------
+    chosolve : solve a system using cholesky decomposition
+    poinv : compute the inverse of a matrix using cholesky decomposition
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    Decomposition is performed using LAPACK routine _potrf.
+
+    For elements where the LAPACK routine fails, the result will be set to NaNs.
+
+    If an element of the source array is not a positive-definite matrix the
+    result for that element is undefined.
+
+    Examples
+    --------
+    >>> A = np.array([[1,-2j],[2j,5]])
+    >>> A
+    array([[ 1.+0.j,  0.-2.j],
+           [ 0.+2.j,  5.+0.j]])
+    >>> L = cholesky(A)
+    >>> L
+    array([[ 1.+0.j,  0.+0.j],
+           [ 0.+2.j,  1.+0.j]])
+
+    """
+    if 'L' == UPLO:
+        gufunc = _impl.cholesky_lo
+    else:
+        gufunc = _impl.cholesky_up
+
+    return gufunc(a, **kwargs)
+
+
+def eig(a, **kwargs):
+    """
+    Compute the eigenvalues and right eigenvectors of square arrays,
+    with broadcasting
+
+    Parameters
+    ----------
+    a : (..., M, M) array
+        Matrices for which the eigenvalues and right eigenvectors will
+        be computed
+
+    Returns
+    -------
+    w : (..., M) array
+        The eigenvalues, each repeated according to its multiplicity.
+        The eigenvalues are not necessarily ordered. The resulting
+        array will be always be of complex type. When `a` is real
+        the resulting eigenvalues will be real (0 imaginary part) or
+        occur in conjugate pairs
+
+    v : (..., M, M) array
+        The normalized (unit "length") eigenvectors, such that the
+        column ``v[:,i]`` is the eigenvector corresponding to the
+        eigenvalue ``w[i]``.
+
+    See Also
+    --------
+    eigvals : eigenvalues of general arrays.
+    eigh : eigenvalues and right eigenvectors of symmetric/hermitian
+        arrays.
+    eigvalsh : eigenvalues of symmetric/hermitian arrays.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    Eigenvalues and eigenvectors for single and double versions will
+    always be typed csingle and cdouble, even if all the results are
+    real (imaginary part will be 0).
+
+    This is implemented using the _geev LAPACK routines which compute
+    the eigenvalues and eigenvectors of general square arrays.
+
+    For elements where the LAPACK routine fails, the result will be set
+    to NaNs.
+
+    Examples
+    --------
+    First, a utility function to check if eigvals/eigvectors are correct.
+    This checks the definition of eigenvectors. For each eigenvector v
+    with associated eigenvalue w of a matrix M the following equality must
+    hold: Mv == wv
+
+    >>> def check_eigen(M, w, v):
+    ...     '''vectorial check of Mv==wv'''
+    ...     lhs = matrix_multiply(M, v)
+    ...     rhs = w*v
+    ...     return np.allclose(lhs, rhs)
+
+    (Almost) Trivial example with real e-values and e-vectors. Note
+    the complex types of the results
+
+    >>> M = np.diag((1,2,3)).astype(float)
+    >>> w, v = eig(M)
+    >>> check_eigen(M, w, v)
+    True
+
+    Real matrix possessing complex e-values and e-vectors; note that the
+    e-values are complex conjugates of each other.
+
+    >>> M = np.array([[1, -1], [1, 1]])
+    >>> w, v = eig(M)
+    >>> check_eigen(M, w, v)
+    True
+
+    Complex-valued matrix with real e-values (but complex-valued e-vectors);
+    note that a.conj().T = a, i.e., a is Hermitian.
+
+    >>> M = np.array([[1, 1j], [-1j, 1]])
+    >>> w, v = eig(M)
+    >>> check_eigen(M, w, v)
+    True
+
+    """
+    return _impl.eig(a, **kwargs)
+
+
+def eigvals(a, **kwargs):
+    """
+    Compute the eigenvalues of general matrices, with broadcasting.
+
+    Main difference between `eigvals` and `eig`: the eigenvectors aren't
+    returned.
+
+    Parameters
+    ----------
+    a : (..., M, M) array
+        Matrices whose eigenvalues will be computed
+
+    Returns
+    -------
+    w : (..., M) array
+        The eigenvalues, each repeated according to its multiplicity.
+        The eigenvalues are not necessarily ordered. The resulting
+        array will be always be of complex type. When `a` is real
+        the resulting eigenvalues will be real (0 imaginary part) or
+        occur in conjugate pairs
+
+    See Also
+    --------
+    eig : eigenvalues and right eigenvectors of general arrays.
+    eigh : eigenvalues and right eigenvectors of symmetric/hermitian
+        arrays.
+    eigvalsh : eigenvalues of symmetric/hermitian arrays.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    Eigenvalues for single and double versions will always be typed
+    csingle and cdouble, even if all the results are real (imaginary
+    part will be 0).
+
+    This is implemented using the _geev LAPACK routines which compute
+    the eigenvalues and eigenvectors of general square arrays.
+
+    For elements where the LAPACK routine fails, the result will be set
+    to NaNs.
+
+    Examples
+    --------
+
+    Eigenvalues for a diagonal matrix are its diagonal elements
+
+    >>> D = np.diag((-1,1))
+    >>> eigvals(D)
+    array([-1.+0.j,  1.+0.j])
+
+    Multiplying on the left by an orthogonal matrix, `Q`, and on the
+    right by `Q.T` (the transpose of `Q` preserves the eigenvalues of
+    the original matrix
+
+    >>> x = np.random.random()
+    >>> Q = np.array([[np.cos(x), -np.sin(x)], [np.sin(x), np.cos(x)]])
+    >>> A = matrix_multiply(Q, D)
+    >>> A = matrix_multiply(A, Q.T)
+    >>> eigvals(A)
+    array([-1.+0.j,  1.+0.j])
+
+    """
+    return _impl.eigvals(a, **kwargs)
+
+
+def quadratic_form(u,Q,v, **kwargs):
+    """
+    Compute the quadratic form uQv, with broadcasting
+
+    Parameters
+    ----------
+    u : (..., M) array
+        The u vectors of the quadratic form uQv
+
+    Q : (..., M, N) array
+        The Q matrices of the quadratic form uQv
+
+    v : (..., N) array
+        The v vectors of the quadratic form uQv
+
+    Returns
+    -------
+    qf : (...) array
+        The result of the quadratic forms
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    This is similar to PDL inner2
+
+    Examples
+    --------
+
+    The result in absence of broadcasting is just as np.dot(np.dot(u,Q),v)
+    or np.dot(u, np.dot(Q,v))
+
+    >>> u = np.array([2., 3.])
+    >>> Q = np.array([[1.,1.], [0.,1.]])
+    >>> v = np.array([1.,2.])
+    >>> quadratic_form(u,Q,v)
+    12.0
+
+    >>> np.dot(np.dot(u,Q),v)
+    12.0
+
+    >>> np.dot(u, np.dot(Q,v))
+    12.0
+
+    """
+    return _impl.quadratic_form(u, Q, v, **kwargs)
+
+
+def add3(a, b, c, **kwargs):
+    """
+    Element-wise addition of 3 arrays: a + b + c.
+
+    Parameters
+    ----------
+    a, b, c : (...) array
+        arrays with the addends
+
+    Returns
+    -------
+    add3 : (...) array
+        resulting element-wise addition.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    See Also
+    --------
+    multiply3 : element-wise three-way multiplication.
+    multiply3_add : element-wise three-way multiplication and addition.
+    multiply_add : element-wise multiply-add.
+    multiply_add2 : element-wise multiplication with two additions.
+    multiply4 : element-wise four-way multiplication
+    multiply4_add : element-wise four-way multiplication and addition,
+
+    Examples
+    --------
+    >>> a = np.linspace(1.0, 30.0, 30)
+    >>> add3(a[0::3], a[1::3], a[2::3])
+    array([  6.,  15.,  24.,  33.,  42.,  51.,  60.,  69.,  78.,  87.])
+
+    """
+    return _impl.add3(a, b, c, **kwargs)
+
+
+def multiply3(a, b, c, **kwargs):
+    """
+    Element-wise multiplication of 3 arrays: a*b*c.
+
+    Parameters
+    ----------
+    a, b, c : (...) array
+        arrays with the factors
+
+    Returns
+    -------
+    m3 : (...) array
+        resulting element-wise product
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    See Also
+    --------
+    add3 : element-wise three-was addition
+    multiply3_add : element-wise three-way multiplication and addition.
+    multiply_add : element-wise multiply-add.
+    multiply_add2 : element-wise multiplication with two additions.
+    multiply4 : element-wise four-way multiplication
+    multiply4_add : element-wise four-way multiplication and addition,
+
+    Examples
+    --------
+    >>> a = np.linspace(1.0, 10.0, 10)
+    >>> multiply3(a, 1.01, a)
+    array([   1.01,    4.04,    9.09,   16.16,   25.25,   36.36,   49.49,
+             64.64,   81.81,  101.  ])
+
+    """
+    return _impl.multiply3(a, b, c, **kwargs)
+
+
+def multiply3_add(a, b, c, d, **kwargs):
+    """
+    Element-wise multiplication of 3 arrays adding an element
+    of the a 4th array to the result: a*b*c + d
+
+    Parameters
+    ----------
+    a, b, c : (...) array
+        arrays with the factors
+
+    d : (...) array
+        array with the addend
+
+    Returns
+    -------
+    m3a : (...) array
+        resulting element-wise addition
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    See Also
+    --------
+    add3 : element-wise three-was addition
+    multiply3 : element-wise three-way multiplication.
+    multiply3_add : element-wise three-way multiplication and addition.
+    multiply_add : element-wise multiply-add.
+    multiply_add2 : element-wise multiplication with two additions.
+    multiply4 : element-wise four-way multiplication
+    multiply4_add : element-wise four-way multiplication and addition,
+
+    Examples
+    --------
+    >>> a = np.linspace(1.0, 10.0, 10)
+    >>> multiply3_add(a, 1.01, a, 42e-4)
+    array([   1.0142,    4.0442,    9.0942,   16.1642,   25.2542,   36.3642,
+             49.4942,   64.6442,   81.8142,  101.0042])
+
+    """
+    return _impl.multiply3_add(a, b, c, d, **kwargs)
+
+
+def multiply_add(a, b, c, **kwargs):
+    """
+    Element-wise addition of 3 arrays
+
+    Parameters
+    ----------
+    a, b, c : (...) array
+        arrays with the addends
+
+    Returns
+    -------
+    add3 : (...) array
+        resulting element-wise addition
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    See Also
+    --------
+    add3 : element-wise three-was addition
+    multiply3 : element-wise three-way multiplication.
+    multiply3_add : element-wise three-way multiplication and addition.
+    multiply_add : element-wise multiply-add.
+    multiply_add2 : element-wise multiplication with two additions.
+    multiply4 : element-wise four-way multiplication
+    multiply4_add : element-wise four-way multiplication and addition,
+
+    Examples
+    --------
+    >>> a = np.linspace(1.0, 10.0, 10)
+    >>> multiply_add(a, a, 42e-4)
+    array([   1.0042,    4.0042,    9.0042,   16.0042,   25.0042,   36.0042,
+             49.0042,   64.0042,   81.0042,  100.0042])
+
+    """
+    return _impl.multiply_add(a, b, c, **kwargs)
+
+
+def multiply_add2(a, b, c, d, **kwargs):
+    """
+    Element-wise addition of 3 arrays
+
+    Parameters
+    ----------
+    a, b, c : (...) array
+        arrays with the addends
+
+    Returns
+    -------
+    add3 : (...) array
+        resulting element-wise addition
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    See Also
+    --------
+    add3 : element-wise three-was addition
+    multiply3 : element-wise three-way multiplication.
+    multiply3_add : element-wise three-way multiplication and addition.
+    multiply_add : element-wise multiply-add.
+    multiply_add2 : element-wise multiplication with two additions.
+    multiply4 : element-wise four-way multiplication
+    multiply4_add : element-wise four-way multiplication and addition,
+
+    Examples
+    --------
+    >>> a = np.linspace(1.0, 10.0, 10)
+    >>> multiply_add2(a, a, a, 42e-4)
+    array([   2.0042,    6.0042,   12.0042,   20.0042,   30.0042,   42.0042,
+             56.0042,   72.0042,   90.0042,  110.0042])
+
+    """
+    return _impl.multiply_add2(a, b, c, d, **kwargs)
+
+
+def multiply4(a, b, c, d, **kwargs):
+    """
+    Element-wise addition of 3 arrays
+
+    Parameters
+    ----------
+    a, b, c : (...) array
+        arrays with the addends
+
+    Returns
+    -------
+    add3 : (...) array
+        resulting element-wise addition
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    See Also
+    --------
+    add3 : element-wise three-was addition
+    multiply3 : element-wise three-way multiplication.
+    multiply3_add : element-wise three-way multiplication and addition.
+    multiply_add : element-wise multiply-add.
+    multiply_add2 : element-wise multiplication with two additions.
+    multiply4 : element-wise four-way multiplication
+    multiply4_add : element-wise four-way multiplication and addition,
+
+    Examples
+    --------
+    >>> a = np.linspace(1.0, 10.0, 10)
+    >>> multiply4(a, a, a[::-1], 1.0001)
+    array([  10.001 ,   36.0036,   72.0072,  112.0112,  150.015 ,  180.018 ,
+            196.0196,  192.0192,  162.0162,  100.01  ])
+
+    """
+    return _impl.multiply4(a, b, c, d, **kwargs)
+
+
+def multiply4_add(a, b, c, d, e, **kwargs):
+    """
+    Element-wise addition of 3 arrays
+
+    Parameters
+    ----------
+    a, b, c : (...) array
+        arrays with the addends
+
+    Returns
+    -------
+    add3 : (...) array
+        resulting element-wise addition
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Implemented for types single, double, csingle and cdouble. Numpy
+    conversion rules apply.
+
+    See Also
+    --------
+    add3 : element-wise three-was addition
+    multiply3 : element-wise three-way multiplication.
+    multiply3_add : element-wise three-way multiplication and addition.
+    multiply_add : element-wise multiply-add.
+    multiply_add2 : element-wise multiplication with two additions.
+    multiply4 : element-wise four-way multiplication
+    multiply4_add : element-wise four-way multiplication and addition,
+
+    Examples
+    --------
+    >>> a = np.linspace(1.0, 10.0, 10)
+    >>> multiply4_add(a, a, a[::-1], 1.01, 42e-4)
+    array([  10.1042,   36.3642,   72.7242,  113.1242,  151.5042,  181.8042,
+            197.9642,  193.9242,  163.6242,  101.0042])
+
+    """
+    return _impl.multiply4_add(a, b, c, d, e,**kwargs)
+
+
+def eigh(A, UPLO='L', **kw_args):
+    """
+    Computes the eigenvalues and eigenvectors for the square matrices
+    in the inner dimensions of A, being those matrices
+    symmetric/hermitian.
+
+    Parameters
+    ----------
+    A : (..., M, M) array
+         Hermitian/Symmetric matrices whose eigenvalues and
+         eigenvectors are to be computed.
+    UPLO : {'L', 'U'}, optional
+         Specifies whether the calculation is done with the lower
+         triangular part of the elements in `A` ('L', default) or
+         the upper triangular part ('U').
+
+    Returns
+    -------
+    w : (..., M) array
+        The eigenvalues, not necessarily ordered.
+    v : (..., M, M) array
+        The inner dimensions contain matrices with the normalized
+        eigenvectors as columns. The column-numbers are coherent with
+        the associated eigenvalue.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    The eigenvalues/eigenvectors are computed using LAPACK routines _ssyevd,
+    _heevd
+
+    For elements where the LAPACK routine fails, the result will be set
+    to NaNs.
+
+    Implemented for single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    Unlike eig, the results for single and double will always be single
+    and doubles. It is not possible for symmetrical real matrices to result
+    in complex eigenvalues/eigenvectors
+
+    See Also
+    --------
+    eigvalsh : eigenvalues of symmetric/hermitian arrays.
+    eig : eigenvalues and right eigenvectors for general matrices.
+    eigvals : eigenvalues for general matrices.
+
+    Examples
+    --------
+    First, a utility function to check if eigvals/eigvectors are correct.
+    This checks the definition of eigenvectors. For each eigenvector v
+    with associated eigenvalue w of a matrix M the following equality must
+    hold: Mv == wv
+
+    >>> def check_eigen(M, w, v):
+    ...     '''vectorial check of Mv==wv'''
+    ...     lhs = matrix_multiply(M, v)
+    ...     rhs = w*v
+    ...     return np.allclose(lhs, rhs)
+
+    A simple example that computes eigenvectors and eigenvalues of
+    a hermitian matrix and checks that A*v = v*w for both pairs of
+    eignvalues(w) and eigenvectors(v)
+
+    >>> M = np.array([[1, -2j], [2j, 1]])
+    >>> w, v = eigh(M)
+    >>> check_eigen(M, w, v)
+    True
+
+    """
+    if 'L' == UPLO:
+        gufunc = _impl.eigh_lo
+    else:
+        gufunc = _impl.eigh_up
+
+    return gufunc(A, **kw_args)
+
+
+def eigvalsh(A, UPLO='L', **kw_args):
+    """
+    Computes the eigenvalues for the square matrices in the inner
+    dimensions of A, being those matrices symmetric/hermitian.
+
+    Parameters
+    ----------
+    A : (..., M, M) array
+         Hermitian/Symmetric matrices whose eigenvalues and
+         eigenvectors are to be computed.
+    UPLO : {'L', 'U'}, optional
+         Specifies whether the calculation is done with the lower
+         triangular part of the elements in `A` ('L', default) or
+         the upper triangular part ('U').
+
+    Returns
+    -------
+    w : (..., M) array
+        The eigenvalues, not necessarily ordered.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    The eigenvalues are computed using LAPACK routines _ssyevd, _heevd
+
+    For elements where the LAPACK routine fails, the result will be set
+    to NaNs.
+
+    Implemented for single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    Unlike eigvals, the results for single and double will always be single
+    and doubles. It is not possible for symmetrical real matrices to result
+    in complex eigenvalues.
+
+    See Also
+    --------
+    eigh : eigenvalues of symmetric/hermitian arrays.
+    eig : eigenvalues and right eigenvectors for general matrices.
+    eigvals : eigenvalues for general matrices.
+
+    Examples
+    --------
+    eigvalsh results should be the same as the eigenvalues returned by eigh
+
+    >>> a = np.array([[1, -2j], [2j, 5]])
+    >>> w, v = eigh(a)
+    >>> np.allclose(w, eigvalsh(a))
+    True
+
+    eigvalsh on an identity matrix is all ones
+    >>> eigvalsh(np.eye(6))
+    array([ 1.,  1.,  1.,  1.,  1.,  1.])
+
+    """
+    if ('L' == UPLO):
+        gufunc = _impl.eigvalsh_lo
+    else:
+        gufunc = _impl.eigvalsh_up
+
+    return gufunc(A,**kw_args)
+
+
+def solve(A,B,**kw_args):
+    """
+    Solve the linear matrix equations on the inner dimensions.
+
+    Computes the "exact" solution, `x`. of the well-determined,
+    i.e., full rank, linear matrix equations `ax = b`.
+
+    Parameters
+    ----------
+    A : (..., M, M) array
+        Coefficient matrices.
+    B : (..., M, N) array
+        Ordinate or "dependent variable" values.
+
+    Returns
+    -------
+    X : (..., M, N) array
+        Solutions to the system A X = B for all the outer dimensions
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    The solutions are computed using LAPACK routine _gesv
+
+    For elements where the LAPACK routine fails, the result will be set
+    to NaNs.
+
+    Implemented for single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    See Also
+    --------
+    chosolve : solve a system using cholesky decomposition (for equations
+               having symmetric/hermitian coefficient matrices)
+
+    Examples
+    --------
+    Solve the system of equations ``3 * x0 + x1 = 9`` and ``x0 + 2 * x1 = 8``:
+
+    >>> a = np.array([[3,1], [1,2]])
+    >>> b = np.array([9,8])
+    >>> x = solve(a, b)
+    >>> x
+    array([ 2.,  3.])
+
+    Check that the solution is correct:
+
+    >>> np.allclose(np.dot(a, x), b)
+    True
+
+    """
+    if len(B.shape) == (len(A.shape) - 1):
+        gufunc = _impl.solve1
+    else:
+        gufunc = _impl.solve
+
+    return gufunc(A,B,**kw_args)
+
+
+def svd(a, full_matrices=1, compute_uv=1 ,**kw_args):
+    """
+    Singular Value Decomposition on the inner dimensions.
+
+    Factors the matrices in `a` as ``u * np.diag(s) * v``, where `u`
+    and `v` are unitary and `s` is a 1-d array of `a`'s singular
+    values.
+
+    Parameters
+    ----------
+    a : (..., M, N) array
+        The array of matrices to decompose.
+    full_matrices : bool, optional
+        If True (default), `u` and `v` have the shapes (`M`, `M`) and
+        (`N`, `N`), respectively. Otherwise, the shapes are (`M`, `K`)
+        and (`K`, `N`), respectively, where `K` = min(`M`, `N`).
+    compute_uv : bool, optional
+        Whether or not to compute `u` and `v` in addition to `s`. True
+        by default.
+
+    Returns
+    -------
+    u : { (..., M, M), (..., M, K) } array
+        Unitary matrices. The actual shape depends on the value of
+        ``full_matrices``. Only returned when ``compute_uv`` is True.
+    s : (..., K) array
+        The singular values for every matrix, sorted in descending order.
+    v : { (..., N, N), (..., K, N) } array
+        Unitary matrices. The actual shape depends on the value of
+        ``full_matrices``. Only returned when ``compute_uv`` is True.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    Singular Value Decomposition is performed using LAPACK routine _gesdd
+
+    For elements where the LAPACK routine fails, the result will be set
+    to NaNs.
+
+    Implemented for types single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    Examples
+    --------
+    >>> a = np.random.randn(9, 6) + 1j*np.random.randn(9, 6)
+
+    Reconstruction based on full SVD:
+
+    >>> U, s, V = svd(a, full_matrices=True)
+    >>> U.shape, V.shape, s.shape
+    ((9, 9), (6, 6), (6,))
+    >>> S = np.zeros((9, 6), dtype=complex)
+    >>> S[:6, :6] = np.diag(s)
+    >>> np.allclose(a, np.dot(U, np.dot(S, V)))
+    True
+
+    Reconstruction based on reduced SVD:
+
+    >>> U, s, V = svd(a, full_matrices=False)
+    >>> U.shape, V.shape, s.shape
+    ((9, 6), (6, 6), (6,))
+    >>> S = np.diag(s)
+    >>> np.allclose(a, np.dot(U, np.dot(S, V)))
+    True
+
+    """
+    m = a.shape[-2]
+    n = a.shape[-1]
+    if 1 == compute_uv:
+        if 1 == full_matrices:
+            if m < n:
+                gufunc = _impl.svd_m_f
+            else:
+                gufunc = _impl.svd_n_f
+        else:
+            if m < n:
+                gufunc = _impl.svd_m_s
+            else:
+                gufunc = _impl.svd_n_s
+    else:
+        if m < n:
+            gufunc = _impl.svd_m
+        else:
+            gufunc = _impl.svd_n
+    return gufunc(a, **kw_args)
+
+
+def chosolve(A, B, UPLO='L', **kw_args):
+    """
+    Solve the linear matrix equations on the inner dimensions, using
+    cholesky decomposition.
+
+    Computes the "exact" solution, `x`. of the well-determined,
+    i.e., full rank, linear matrix equations `ax = b`, where a is
+    a symmetric/hermitian positive-definite matrix.
+
+    Parameters
+    ----------
+    A : (..., M, M) array
+        Coefficient symmetric/hermitian positive-definite matrices.
+    B : (..., M, N) array
+        Ordinate or "dependent variable" values.
+    UPLO : {'L', 'U'}, optional
+         Specifies whether the calculation is done with the lower
+         triangular part of the elements in `A` ('L', default) or
+         the upper triangular part ('U').
+
+    Returns
+    -------
+    X : (..., M, N) array
+        Solutions to the system A X = B for all elements in the outer
+        dimensions
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    The solutions are computed using LAPACK routines _potrf, _potrs
+
+    For elements where the LAPACK routine fails, the result will be set
+    to NaNs.
+
+    Implemented for single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    See Also
+    --------
+    solve : solve a system using cholesky decomposition (for equations
+            having symmetric/hermitian coefficient matrices)
+
+    Examples
+    --------
+    Solve the system of equations ``3 * x0 + x1 = 9`` and ``x0 + 2 * x1 = 8``:
+    (note the matrix is symmetric in this system)
+
+    >>> a = np.array([[3,1], [1,2]])
+    >>> b = np.array([9,8])
+    >>> x = solve(a, b)
+    >>> x
+    array([ 2.,  3.])
+
+    Check that the solution is correct:
+
+    >>> np.allclose(np.dot(a, x), b)
+    True
+
+    """
+    if len(B.shape) == (len(A.shape) - 1):
+        if 'L' == UPLO:
+            gufunc = _impl.chosolve1_lo
+        else:
+            gufunc = _impl.chosolve1_up
+    else:
+        if 'L' == UPLO:
+            gufunc = _impl.chosolve_lo
+        else:
+            gufunc = _impl.chosolve_up
+
+    return gufunc(A, B, **kw_args)
+
+
+def poinv(A, UPLO='L', **kw_args):
+    """
+    Compute the (multiplicative) inverse of symmetric/hermitian positive 
+    definite matrices, with broadcasting.
+
+    Given a square symmetic/hermitian positive-definite matrix `a`, return 
+    the matrix `ainv` satisfying ``matrix_multiply(a, ainv) = 
+    matrix_multiply(ainv, a) = Identity matrix``.
+
+    Parameters
+    ----------
+    a : (..., M, M) array
+        Symmetric/hermitian postive definite matrices to be inverted.
+
+    Returns
+    -------
+    ainv : (..., M, M) array
+        (Multiplicative) inverse of the `a` matrices.
+
+    Notes
+    -----
+    Numpy broadcasting rules apply.
+
+    The inverse is computed using LAPACK routines _potrf, _potri
+
+    For elements where the LAPACK routine fails, the result will be set
+    to NaNs.
+
+    Implemented for types single, double, csingle and cdouble. Numpy conversion
+    rules apply.
+
+    See Also
+    --------
+    inv : compute the multiplicative inverse of general matrices.
+
+    Examples
+    --------
+    >>> a = np.array([[5, 3], [3, 5]])
+    >>> ainv = poinv(a)
+    >>> np.allclose(matrix_multiply(a, ainv), np.eye(2))
+    True
+    >>> np.allclose(matrix_multiply(ainv, a), np.eye(2))
+    True
+
+    """
+    if 'L' == UPLO:
+        gufunc = _impl.poinv_lo
+    else:
+        gufunc = _impl.poinv_up
+
+    return gufunc(A, **kw_args);
+
+
+if __name__ == "__main__":
+    import doctest
+    print "Running doctests..."
+    doctest.testmod()
diff --git a/numpy/core/src/umath/gufuncs_linalg_contents.rst b/numpy/core/src/umath/gufuncs_linalg_contents.rst
new file mode 100644
index 000000000000..424c5f214837
--- /dev/null
+++ b/numpy/core/src/umath/gufuncs_linalg_contents.rst
@@ -0,0 +1,104 @@
+=======================
+ gufuncs_linalg module
+=======================
+
+gufuncs_linalg implements a series of linear algebra functions as gufuncs.
+Most of these functions are already present in numpy.linalg, but as they
+are implemented using gufunc kernels they can be broadcasting. Some parts
+that are python in numpy.linalg are implemented inside C functions, as well
+as the iteration used when used on vectors. This can result in faster
+execution as well.
+
+In addition, there are some ufuncs thrown in that implement fused operations
+over numpy vectors that can result in faster execution on large vector 
+compared to non-fused versions (for example: multiply_add, multiply3).
+
+In fact, gufuncs_linalg is a very thin wrapper of python code that wraps
+the actual kernels (gufuncs). This wrapper was needed in order to provide
+a sane interface for some functions. Mostly working around limitations on
+what can be described in a gufunc signature. Things like having one dimension
+of a result depending on the minimum of two dimensions of the sources (like
+in svd) or passing an uniform keyword parameter to the whole operation
+(like UPLO on functions over symmetric/hermitian matrices).
+
+The gufunc kernels are in a c module named _umath_linalg, that is imported
+privately in gufuncs_linalg.
+
+==========
+ Contents
+==========
+Here is an enumeration of the functions. These are the functions exported by
+the module and should appear in its __all__ attribute. All the functions
+contain a docstring explaining them in detail.
+
+General
+=======
+- inner1d
+- innerwt
+- matrix_multiply
+- quadratic_form
+
+Lineal Algebra
+==============
+- det
+- slogdet
+- cholesky
+- eig
+- eigvals
+- eigh
+- eigvalsh
+- solve
+- svd
+- chosolve
+- inv
+- poinv
+
+Fused Operations
+================
+- add3
+- multiply3
+- multiply3_add
+- multiply_add
+- multiply_add2
+- multiply4
+- multiply4_add
+
+================
+ Error Handling
+================
+Unlike the numpy.linalg module, this module does not use exceptions to notify
+errors in the execution of the kernels. As these functions are thougth to be 
+used in a vector way it didn't seem appropriate to raise exceptions on failure
+of an element. So instead, when an error computing an element occurs its 
+associated result will be set to an invalid value (all NaNs).
+
+Exceptions can occur if the arguments fail to map properly to the underlying
+gufunc (due to signature mismatch, for example).
+
+================================
+ Notes about the implementation
+================================
+Where possible, the wrapper functions map directly into a gufunc implementing
+the computation.
+
+That's not always the case, as due to limitations of the gufunc interface some
+functions cannot be mapped straight into a kernel.
+
+Two cases come to mind:
+- An uniform parameter is needed to configure the way the computation is 
+performed (like UPLO in the functions working on symmetric/hermitian matrices)
+- svd, where it was impossible to map the function to a gufunc signature.
+
+In the case of uniform parameters like UPLO, there are two separate entry points
+in the C module that imply either 'U' or 'L'. The wrapper just selects the
+kernel to use by checking the appropriate keyword parameter. This way a
+function interface similar to numpy.linalg can be kept.
+
+In the case of SVD not only there were problems with the support of keyword
+arguments. There was the added problem of the signature system not being able
+to cope with the needs of this functions. Just for the singular values a
+a signature like (m,n)->(min(m,n)) was needed. This has been worked around by
+implementing different kernels for the cases where min(m,n) == m and where
+min(m,n) == n. The wrapper code automatically selects the appropriate one.
+
+
diff --git a/numpy/core/src/umath/lapack_lite/blas_lite.c b/numpy/core/src/umath/lapack_lite/blas_lite.c
new file mode 100644
index 000000000000..bd24768c3fe9
--- /dev/null
+++ b/numpy/core/src/umath/lapack_lite/blas_lite.c
@@ -0,0 +1,21135 @@
+/*
+NOTE: This is generated code. Look in Misc/lapack_lite for information on
+      remaking this file.
+*/
+#include "f2c.h"
+
+#ifdef HAVE_CONFIG
+#include "config.h"
+#else
+extern doublereal dlamch_(char *);
+#define EPSILON dlamch_("Epsilon")
+#define SAFEMINIMUM dlamch_("Safe minimum")
+#define PRECISION dlamch_("Precision")
+#define BASE dlamch_("Base")
+#endif
+
+extern doublereal dlapy2_(doublereal *x, doublereal *y);
+
+
+
+/* Table of constant values */
+
+static complex c_b21 = {1.f,0.f};
+static integer c__1 = 1;
+static doublecomplex c_b1077 = {1.,0.};
+
+/* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer *
+	incx, complex *cy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    real r__1, r__2;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, ix, iy;
+
+
+/*
+       constant times a vector plus a vector.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if ((r__1 = ca->r, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = iy;
+	i__3 = iy;
+	i__4 = ix;
+	q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
+		i__4].i + ca->i * cx[i__4].r;
+	q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__;
+	i__4 = i__;
+	q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
+		i__4].i + ca->i * cx[i__4].r;
+	q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+/* L30: */
+    }
+    return 0;
+} /* caxpy_ */
+
+/* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex *
+	cy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+
+
+/*
+       copies a vector, x, to a vector, y.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = iy;
+	i__3 = ix;
+	cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__;
+	cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
+/* L30: */
+    }
+    return 0;
+} /* ccopy_ */
+
+/* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer
+	*incx, complex *cy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static complex ctemp;
+
+
+/*
+       forms the dot product of two vectors, conjugating the first
+       vector.
+       jack dongarra, linpack,  3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    ctemp.r = 0.f, ctemp.i = 0.f;
+     ret_val->r = 0.f,  ret_val->i = 0.f;
+    if (*n <= 0) {
+	return ;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r_cnjg(&q__3, &cx[ix]);
+	i__2 = iy;
+	q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r *
+		cy[i__2].i + q__3.i * cy[i__2].r;
+	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
+    return ;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r_cnjg(&q__3, &cx[i__]);
+	i__2 = i__;
+	q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r *
+		cy[i__2].i + q__3.i * cy[i__2].r;
+	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+/* L30: */
+    }
+     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
+    return ;
+} /* cdotc_ */
+
+/* Complex */ VOID cdotu_(complex * ret_val, integer *n, complex *cx, integer
+	*incx, complex *cy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    complex q__1, q__2;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static complex ctemp;
+
+
+/*
+       forms the dot product of two vectors.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    ctemp.r = 0.f, ctemp.i = 0.f;
+     ret_val->r = 0.f,  ret_val->i = 0.f;
+    if (*n <= 0) {
+	return ;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	i__3 = iy;
+	q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i =
+		cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
+	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
+    return ;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__;
+	q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i =
+		cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
+	q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+/* L30: */
+    }
+     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
+    return ;
+} /* cdotu_ */
+
+/* Subroutine */ int cgemm_(char *transa, char *transb, integer *m, integer *
+	n, integer *k, complex *alpha, complex *a, integer *lda, complex *b,
+	integer *ldb, complex *beta, complex *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5, i__6;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static logical nota, notb;
+    static complex temp;
+    static logical conja, conjb;
+    static integer ncola;
+    extern logical lsame_(char *, char *);
+    static integer nrowa, nrowb;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    CGEMM  performs one of the matrix-matrix operations
+
+       C := alpha*op( A )*op( B ) + beta*C,
+
+    where  op( X ) is one of
+
+       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+
+    alpha and beta are scalars, and A, B and C are matrices, with op( A )
+    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n',  op( A ) = A.
+
+                TRANSA = 'T' or 't',  op( A ) = A'.
+
+                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).
+
+             Unchanged on exit.
+
+    TRANSB - CHARACTER*1.
+             On entry, TRANSB specifies the form of op( B ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSB = 'N' or 'n',  op( B ) = B.
+
+                TRANSB = 'T' or 't',  op( B ) = B'.
+
+                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry,  M  specifies  the number  of rows  of the  matrix
+             op( A )  and of the  matrix  C.  M  must  be at least  zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N  specifies the number  of columns of the matrix
+             op( B ) and the number of columns of the matrix C. N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry,  K  specifies  the number of columns of the matrix
+             op( A ) and the number of rows of the matrix op( B ). K must
+             be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by m  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+             LDA must be at least  max( 1, m ), otherwise  LDA must be at
+             least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  n by k  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+             LDB must be at least  max( 1, k ), otherwise  LDB must be at
+             least  max( 1, n ).
+             Unchanged on exit.
+
+    BETA   - COMPLEX         .
+             On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+             supplied as zero then C need not be set on input.
+             Unchanged on exit.
+
+    C      - COMPLEX          array of DIMENSION ( LDC, n ).
+             Before entry, the leading  m by n  part of the array  C must
+             contain the matrix  C,  except when  beta  is zero, in which
+             case C need not be set on entry.
+             On exit, the array  C  is overwritten by the  m by n  matrix
+             ( alpha*op( A )*op( B ) + beta*C ).
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+       conjugated or transposed, set  CONJA and CONJB  as true if  A  and
+       B  respectively are to be  transposed but  not conjugated  and set
+       NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
+       and the number of rows of  B  respectively.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    nota = lsame_(transa, "N");
+    notb = lsame_(transb, "N");
+    conja = lsame_(transa, "C");
+    conjb = lsame_(transb, "C");
+    if (nota) {
+	nrowa = *m;
+	ncola = *k;
+    } else {
+	nrowa = *k;
+	ncola = *m;
+    }
+    if (notb) {
+	nrowb = *k;
+    } else {
+	nrowb = *n;
+    }
+
+/*     Test the input parameters. */
+
+    info = 0;
+    if (! nota && ! conja && ! lsame_(transa, "T")) {
+	info = 1;
+    } else if (! notb && ! conjb && ! lsame_(transb, "T")) {
+	info = 2;
+    } else if (*m < 0) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*k < 0) {
+	info = 5;
+    } else if (*lda < max(1,nrowa)) {
+	info = 8;
+    } else if (*ldb < max(1,nrowb)) {
+	info = 10;
+    } else if (*ldc < max(1,*m)) {
+	info = 13;
+    }
+    if (info != 0) {
+	xerbla_("CGEMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (((alpha->r == 0.f && alpha->i == 0.f) ||
+	    (*k == 0)) && (beta->r == 1.f && beta->i == 0.f))) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (alpha->r == 0.f && alpha->i == 0.f) {
+	if (beta->r == 0.f && beta->i == 0.f) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * c_dim1;
+		    c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * c_dim1;
+		    i__4 = i__ + j * c_dim1;
+		    q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i,
+			    q__1.i = beta->r * c__[i__4].i + beta->i * c__[
+			    i__4].r;
+		    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (notb) {
+	if (nota) {
+
+/*           Form  C := alpha*A*B + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (beta->r == 0.f && beta->i == 0.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L50: */
+		    }
+		} else if ((beta->r != 1.f) || (beta->i != 0.f)) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__1.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L60: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = l + j * b_dim1;
+		    if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
+			i__3 = l + j * b_dim1;
+			q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
+				q__1.i = alpha->r * b[i__3].i + alpha->i * b[
+				i__3].r;
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+/* L90: */
+	    }
+	} else if (conja) {
+
+/*           Form  C := alpha*conjg( A' )*B + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * b_dim1;
+			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
+				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L100: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L110: */
+		}
+/* L120: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			i__4 = l + i__ * a_dim1;
+			i__5 = l + j * b_dim1;
+			q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
+				.i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
+				.i * b[i__5].r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L130: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L140: */
+		}
+/* L150: */
+	    }
+	}
+    } else if (nota) {
+	if (conjb) {
+
+/*           Form  C := alpha*A*conjg( B' ) + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (beta->r == 0.f && beta->i == 0.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L160: */
+		    }
+		} else if ((beta->r != 1.f) || (beta->i != 0.f)) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__1.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L170: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * b_dim1;
+		    if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
+			r_cnjg(&q__2, &b[j + l * b_dim1]);
+			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
+				q__1.i = alpha->r * q__2.i + alpha->i *
+				q__2.r;
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+/* L180: */
+			}
+		    }
+/* L190: */
+		}
+/* L200: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A*B'          + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (beta->r == 0.f && beta->i == 0.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L210: */
+		    }
+		} else if ((beta->r != 1.f) || (beta->i != 0.f)) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__1.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L220: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * b_dim1;
+		    if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
+			i__3 = j + l * b_dim1;
+			q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
+				q__1.i = alpha->r * b[i__3].i + alpha->i * b[
+				i__3].r;
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+/* L230: */
+			}
+		    }
+/* L240: */
+		}
+/* L250: */
+	    }
+	}
+    } else if (conja) {
+	if (conjb) {
+
+/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			r_cnjg(&q__4, &b[j + l * b_dim1]);
+			q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i =
+				q__3.r * q__4.i + q__3.i * q__4.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L260: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L270: */
+		}
+/* L280: */
+	    }
+	} else {
+
+/*           Form  C := alpha*conjg( A' )*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = j + l * b_dim1;
+			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
+				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L290: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L300: */
+		}
+/* L310: */
+	    }
+	}
+    } else {
+	if (conjb) {
+
+/*           Form  C := alpha*A'*conjg( B' ) + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			i__4 = l + i__ * a_dim1;
+			r_cnjg(&q__3, &b[j + l * b_dim1]);
+			q__2.r = a[i__4].r * q__3.r - a[i__4].i * q__3.i,
+				q__2.i = a[i__4].r * q__3.i + a[i__4].i *
+				q__3.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L320: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L330: */
+		}
+/* L340: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			i__4 = l + i__ * a_dim1;
+			i__5 = j + l * b_dim1;
+			q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
+				.i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
+				.i * b[i__5].r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L350: */
+		    }
+		    if (beta->r == 0.f && beta->i == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, q__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L360: */
+		}
+/* L370: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CGEMM . */
+
+} /* cgemm_ */
+
+/* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex *
+	alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
+	beta, complex *y, integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static complex temp;
+    static integer lenx, leny;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj;
+
+
+/*
+    Purpose
+    =======
+
+    CGEMV  performs one of the matrix-vector operations
+
+       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+
+       y := alpha*conjg( A' )*x + beta*y,
+
+    where alpha and beta are scalars, x and y are vectors and A is an
+    m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+
+                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+
+                TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+    X      - COMPLEX          array of DIMENSION at least
+             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+             Before entry, the incremented array X must contain the
+             vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - COMPLEX         .
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - COMPLEX          array of DIMENSION at least
+             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+             Before entry with BETA non-zero, the incremented array Y
+             must contain the vector y. On exit, Y is overwritten by the
+             updated vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
+	    ) {
+	info = 1;
+    } else if (*m < 0) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*lda < max(1,*m)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    } else if (*incy == 0) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("CGEMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (alpha->r == 0.f && alpha->i == 0.f && (
+	    beta->r == 1.f && beta->i == 0.f))) {
+	return 0;
+    }
+
+    noconj = lsame_(trans, "T");
+
+/*
+       Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+       up the start points in  X  and  Y.
+*/
+
+    if (lsame_(trans, "N")) {
+	lenx = *n;
+	leny = *m;
+    } else {
+	lenx = *m;
+	leny = *n;
+    }
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (lenx - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (leny - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+
+       First form  y := beta*y.
+*/
+
+    if ((beta->r != 1.f) || (beta->i != 0.f)) {
+	if (*incy == 1) {
+	    if (beta->r == 0.f && beta->i == 0.f) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__;
+		    y[i__2].r = 0.f, y[i__2].i = 0.f;
+/* L10: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__;
+		    i__3 = i__;
+		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (beta->r == 0.f && beta->i == 0.f) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = iy;
+		    y[i__2].r = 0.f, y[i__2].i = 0.f;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = iy;
+		    i__3 = iy;
+		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (alpha->r == 0.f && alpha->i == 0.f) {
+	return 0;
+    }
+    if (lsame_(trans, "N")) {
+
+/*        Form  y := alpha*A*x + y. */
+
+	jx = kx;
+	if (*incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
+		    i__2 = jx;
+		    q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    temp.r = q__1.r, temp.i = q__1.i;
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__;
+			i__4 = i__;
+			i__5 = i__ + j * a_dim1;
+			q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i +
+				q__2.i;
+			y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+/* L50: */
+		    }
+		}
+		jx += *incx;
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
+		    i__2 = jx;
+		    q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    temp.r = q__1.r, temp.i = q__1.i;
+		    iy = ky;
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = iy;
+			i__4 = iy;
+			i__5 = i__ + j * a_dim1;
+			q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i +
+				q__2.i;
+			y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
+
+	jy = ky;
+	if (*incx == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp.r = 0.f, temp.i = 0.f;
+		if (noconj) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__;
+			q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
+				.i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
+				.i * x[i__4].r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L90: */
+		    }
+		} else {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			i__3 = i__;
+			q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L100: */
+		    }
+		}
+		i__2 = jy;
+		i__3 = jy;
+		q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i =
+			alpha->r * temp.i + alpha->i * temp.r;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		jy += *incy;
+/* L110: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp.r = 0.f, temp.i = 0.f;
+		ix = kx;
+		if (noconj) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = ix;
+			q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
+				.i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
+				.i * x[i__4].r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+			ix += *incx;
+/* L120: */
+		    }
+		} else {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			i__3 = ix;
+			q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+			ix += *incx;
+/* L130: */
+		    }
+		}
+		i__2 = jy;
+		i__3 = jy;
+		q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i =
+			alpha->r * temp.i + alpha->i * temp.r;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		jy += *incy;
+/* L140: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CGEMV . */
+
+} /* cgemv_ */
+
+/* Subroutine */ int cgerc_(integer *m, integer *n, complex *alpha, complex *
+	x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, ix, jy, kx, info;
+    static complex temp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    CGERC  performs the rank 1 operation
+
+       A := alpha*x*conjg( y' ) + A,
+
+    where alpha is a scalar, x is an m element vector, y is an n element
+    vector and A is an m by n matrix.
+
+    Parameters
+    ==========
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - COMPLEX          array of dimension at least
+             ( 1 + ( m - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the m
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients. On exit, A is
+             overwritten by the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (*m < 0) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("CGERC ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (alpha->r == 0.f && alpha->i == 0.f)) {
+	return 0;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (*incy > 0) {
+	jy = 1;
+    } else {
+	jy = 1 - (*n - 1) * *incy;
+    }
+    if (*incx == 1) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if ((y[i__2].r != 0.f) || (y[i__2].i != 0.f)) {
+		r_cnjg(&q__2, &y[jy]);
+		q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			alpha->r * q__2.i + alpha->i * q__2.r;
+		temp.r = q__1.r, temp.i = q__1.i;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = i__;
+		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
+		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L10: */
+		}
+	    }
+	    jy += *incy;
+/* L20: */
+	}
+    } else {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*m - 1) * *incx;
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if ((y[i__2].r != 0.f) || (y[i__2].i != 0.f)) {
+		r_cnjg(&q__2, &y[jy]);
+		q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			alpha->r * q__2.i + alpha->i * q__2.r;
+		temp.r = q__1.r, temp.i = q__1.i;
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = ix;
+		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
+		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+		    ix += *incx;
+/* L30: */
+		}
+	    }
+	    jy += *incy;
+/* L40: */
+	}
+    }
+
+    return 0;
+
+/*     End of CGERC . */
+
+} /* cgerc_ */
+
+/* Subroutine */ int cgeru_(integer *m, integer *n, complex *alpha, complex *
+	x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jy, kx, info;
+    static complex temp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    CGERU  performs the rank 1 operation
+
+       A := alpha*x*y' + A,
+
+    where alpha is a scalar, x is an m element vector, y is an n element
+    vector and A is an m by n matrix.
+
+    Parameters
+    ==========
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - COMPLEX          array of dimension at least
+             ( 1 + ( m - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the m
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients. On exit, A is
+             overwritten by the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (*m < 0) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("CGERU ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (alpha->r == 0.f && alpha->i == 0.f)) {
+	return 0;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (*incy > 0) {
+	jy = 1;
+    } else {
+	jy = 1 - (*n - 1) * *incy;
+    }
+    if (*incx == 1) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if ((y[i__2].r != 0.f) || (y[i__2].i != 0.f)) {
+		i__2 = jy;
+		q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
+			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
+		temp.r = q__1.r, temp.i = q__1.i;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = i__;
+		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
+		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L10: */
+		}
+	    }
+	    jy += *incy;
+/* L20: */
+	}
+    } else {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*m - 1) * *incx;
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if ((y[i__2].r != 0.f) || (y[i__2].i != 0.f)) {
+		i__2 = jy;
+		q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
+			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
+		temp.r = q__1.r, temp.i = q__1.i;
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = ix;
+		    q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
+		    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+		    ix += *incx;
+/* L30: */
+		}
+	    }
+	    jy += *incy;
+/* L40: */
+	}
+    }
+
+    return 0;
+
+/*     End of CGERU . */
+
+} /* cgeru_ */
+
+/* Subroutine */ int chemv_(char *uplo, integer *n, complex *alpha, complex *
+	a, integer *lda, complex *x, integer *incx, complex *beta, complex *y,
+	 integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static complex temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    CHEMV  performs the matrix-vector  operation
+
+       y := alpha*A*x + beta*y,
+
+    where alpha and beta are scalars, x and y are n element vectors and
+    A is an n by n hermitian matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the hermitian matrix and the strictly
+             lower triangular part of A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the hermitian matrix and the strictly
+             upper triangular part of A is not referenced.
+             Note that the imaginary parts of the diagonal elements need
+             not be set and are assumed to be zero.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - COMPLEX         .
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y. On exit, Y is overwritten by the updated
+             vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*lda < max(1,*n)) {
+	info = 5;
+    } else if (*incx == 0) {
+	info = 7;
+    } else if (*incy == 0) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("CHEMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f &&
+	     beta->i == 0.f))) {
+	return 0;
+    }
+
+/*     Set up the start points in  X  and  Y. */
+
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (*n - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (*n - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+
+       First form  y := beta*y.
+*/
+
+    if ((beta->r != 1.f) || (beta->i != 0.f)) {
+	if (*incy == 1) {
+	    if (beta->r == 0.f && beta->i == 0.f) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__;
+		    y[i__2].r = 0.f, y[i__2].i = 0.f;
+/* L10: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__;
+		    i__3 = i__;
+		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (beta->r == 0.f && beta->i == 0.f) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = iy;
+		    y[i__2].r = 0.f, y[i__2].i = 0.f;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = iy;
+		    i__3 = iy;
+		    q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (alpha->r == 0.f && alpha->i == 0.f) {
+	return 0;
+    }
+    if (lsame_(uplo, "U")) {
+
+/*        Form  y  when A is stored in upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = q__1.r, temp1.i = q__1.i;
+		temp2.r = 0.f, temp2.i = 0.f;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__;
+		    i__4 = i__;
+		    i__5 = i__ + j * a_dim1;
+		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+		    i__3 = i__;
+		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+/* L50: */
+		}
+		i__2 = j;
+		i__3 = j;
+		i__4 = j + j * a_dim1;
+		r__1 = a[i__4].r;
+		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+		q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
+		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+/* L60: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = q__1.r, temp1.i = q__1.i;
+		temp2.r = 0.f, temp2.i = 0.f;
+		ix = kx;
+		iy = ky;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = iy;
+		    i__4 = iy;
+		    i__5 = i__ + j * a_dim1;
+		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+		    i__3 = ix;
+		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    ix += *incx;
+		    iy += *incy;
+/* L70: */
+		}
+		i__2 = jy;
+		i__3 = jy;
+		i__4 = j + j * a_dim1;
+		r__1 = a[i__4].r;
+		q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
+		q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
+		q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y  when A is stored in lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = q__1.r, temp1.i = q__1.i;
+		temp2.r = 0.f, temp2.i = 0.f;
+		i__2 = j;
+		i__3 = j;
+		i__4 = j + j * a_dim1;
+		r__1 = a[i__4].r;
+		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    i__3 = i__;
+		    i__4 = i__;
+		    i__5 = i__ + j * a_dim1;
+		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+		    i__3 = i__;
+		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+/* L90: */
+		}
+		i__2 = j;
+		i__3 = j;
+		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+/* L100: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = q__1.r, temp1.i = q__1.i;
+		temp2.r = 0.f, temp2.i = 0.f;
+		i__2 = jy;
+		i__3 = jy;
+		i__4 = j + j * a_dim1;
+		r__1 = a[i__4].r;
+		q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		ix = jx;
+		iy = jy;
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    ix += *incx;
+		    iy += *incy;
+		    i__3 = iy;
+		    i__4 = iy;
+		    i__5 = i__ + j * a_dim1;
+		    q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
+		    y[i__3].r = q__1.r, y[i__3].i = q__1.i;
+		    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+		    i__3 = ix;
+		    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
+			     q__3.r * x[i__3].i + q__3.i * x[i__3].r;
+		    q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+/* L110: */
+		}
+		i__2 = jy;
+		i__3 = jy;
+		q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
+		y[i__2].r = q__1.r, y[i__2].i = q__1.i;
+		jx += *incx;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CHEMV . */
+
+} /* chemv_ */
+
+/* Subroutine */ int cher2_(char *uplo, integer *n, complex *alpha, complex *
+	x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    real r__1;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static complex temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    CHER2  performs the hermitian rank 2 operation
+
+       A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+
+    where alpha is a scalar, x and y are n element vectors and A is an n
+    by n hermitian matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the hermitian matrix and the strictly
+             lower triangular part of A is not referenced. On exit, the
+             upper triangular part of the array A is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the hermitian matrix and the strictly
+             upper triangular part of A is not referenced. On exit, the
+             lower triangular part of the array A is overwritten by the
+             lower triangular part of the updated matrix.
+             Note that the imaginary parts of the diagonal elements need
+             not be set, they are assumed to be zero, and on exit they
+             are set to zero.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*n)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("CHER2 ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (alpha->r == 0.f && alpha->i == 0.f)) {
+	return 0;
+    }
+
+/*
+       Set up the start points in X and Y if the increments are not both
+       unity.
+*/
+
+    if ((*incx != 1) || (*incy != 1)) {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*n - 1) * *incx;
+	}
+	if (*incy > 0) {
+	    ky = 1;
+	} else {
+	    ky = 1 - (*n - 1) * *incy;
+	}
+	jx = kx;
+	jy = ky;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+*/
+
+    if (lsame_(uplo, "U")) {
+
+/*        Form  A  when A is stored in the upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		i__3 = j;
+		if (((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) || (((y[i__3]
+			.r != 0.f) || (y[i__3].i != 0.f)))) {
+		    r_cnjg(&q__2, &y[j]);
+		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			    alpha->r * q__2.i + alpha->i * q__2.r;
+		    temp1.r = q__1.r, temp1.i = q__1.i;
+		    i__2 = j;
+		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    r_cnjg(&q__1, &q__2);
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = i__;
+			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
+				q__3.i;
+			i__6 = i__;
+			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L10: */
+		    }
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = j;
+		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = j;
+		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    r__1 = a[i__3].r + q__1.r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    r__1 = a[i__3].r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		i__3 = jy;
+		if (((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) || (((y[i__3]
+			.r != 0.f) || (y[i__3].i != 0.f)))) {
+		    r_cnjg(&q__2, &y[jy]);
+		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			    alpha->r * q__2.i + alpha->i * q__2.r;
+		    temp1.r = q__1.r, temp1.i = q__1.i;
+		    i__2 = jx;
+		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    r_cnjg(&q__1, &q__2);
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    ix = kx;
+		    iy = ky;
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = ix;
+			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
+				q__3.i;
+			i__6 = iy;
+			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			ix += *incx;
+			iy += *incy;
+/* L30: */
+		    }
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = jx;
+		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = jy;
+		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    r__1 = a[i__3].r + q__1.r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    r__1 = a[i__3].r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		}
+		jx += *incx;
+		jy += *incy;
+/* L40: */
+	    }
+	}
+    } else {
+
+/*        Form  A  when A is stored in the lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		i__3 = j;
+		if (((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) || (((y[i__3]
+			.r != 0.f) || (y[i__3].i != 0.f)))) {
+		    r_cnjg(&q__2, &y[j]);
+		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			    alpha->r * q__2.i + alpha->i * q__2.r;
+		    temp1.r = q__1.r, temp1.i = q__1.i;
+		    i__2 = j;
+		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    r_cnjg(&q__1, &q__2);
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = j;
+		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = j;
+		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    r__1 = a[i__3].r + q__1.r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = i__;
+			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
+				q__3.i;
+			i__6 = i__;
+			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L50: */
+		    }
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    r__1 = a[i__3].r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		}
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		i__3 = jy;
+		if (((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) || (((y[i__3]
+			.r != 0.f) || (y[i__3].i != 0.f)))) {
+		    r_cnjg(&q__2, &y[jy]);
+		    q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i =
+			    alpha->r * q__2.i + alpha->i * q__2.r;
+		    temp1.r = q__1.r, temp1.i = q__1.i;
+		    i__2 = jx;
+		    q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    r_cnjg(&q__1, &q__2);
+		    temp2.r = q__1.r, temp2.i = q__1.i;
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = jx;
+		    q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    q__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = jy;
+		    q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    q__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    r__1 = a[i__3].r + q__1.r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		    ix = jx;
+		    iy = jy;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			ix += *incx;
+			iy += *incy;
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = ix;
+			q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				q__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i +
+				q__3.i;
+			i__6 = iy;
+			q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				q__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+			a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L70: */
+		    }
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    r__1 = a[i__3].r;
+		    a[i__2].r = r__1, a[i__2].i = 0.f;
+		}
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CHER2 . */
+
+} /* cher2_ */
+
+/* Subroutine */ int cher2k_(char *uplo, char *trans, integer *n, integer *k,
+	complex *alpha, complex *a, integer *lda, complex *b, integer *ldb,
+	real *beta, complex *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5, i__6, i__7;
+    real r__1;
+    complex q__1, q__2, q__3, q__4, q__5, q__6;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static complex temp1, temp2;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    CHER2K  performs one of the hermitian rank 2k operations
+
+       C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C,
+
+    or
+
+       C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C,
+
+    where  alpha and beta  are scalars with  beta  real,  C is an  n by n
+    hermitian matrix and  A and B  are  n by k matrices in the first case
+    and  k by n  matrices in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          +
+                                           conjg( alpha )*B*conjg( A' ) +
+                                           beta*C.
+
+                TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          +
+                                           conjg( alpha )*conjg( B' )*A +
+                                           beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns  of the  matrices  A and B,  and on  entry  with
+             TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+             matrices  A and B.  K must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  k by n  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDB must be at least  max( 1, n ), otherwise  LDB must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - COMPLEX          array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  hermitian matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  hermitian matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+             Note that the imaginary parts of the diagonal elements need
+             not be set,  they are assumed to be zero,  and on exit they
+             are set to zero.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+    -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
+       Ed Anderson, Cray Research Inc.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldb < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldc < max(1,*n)) {
+	info = 12;
+    }
+    if (info != 0) {
+	xerbla_("CHER2K", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (((alpha->r == 0.f && alpha->i == 0.f) || (*k == 0)) && *
+	    beta == 1.f)) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (alpha->r == 0.f && alpha->i == 0.f) {
+	if (upper) {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L30: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*
+          Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) +
+                     C.
+*/
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L90: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L100: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * a_dim1;
+		    i__4 = j + l * b_dim1;
+		    if (((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) || (((b[
+			    i__4].r != 0.f) || (b[i__4].i != 0.f)))) {
+			r_cnjg(&q__2, &b[j + l * b_dim1]);
+			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
+				q__1.i = alpha->r * q__2.i + alpha->i *
+				q__2.r;
+			temp1.r = q__1.r, temp1.i = q__1.i;
+			i__3 = j + l * a_dim1;
+			q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i,
+				q__2.i = alpha->r * a[i__3].i + alpha->i * a[
+				i__3].r;
+			r_cnjg(&q__1, &q__2);
+			temp2.r = q__1.r, temp2.i = q__1.i;
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__3.r = a[i__6].r * temp1.r - a[i__6].i *
+				    temp1.i, q__3.i = a[i__6].r * temp1.i + a[
+				    i__6].i * temp1.r;
+			    q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
+				    .i + q__3.i;
+			    i__7 = i__ + l * b_dim1;
+			    q__4.r = b[i__7].r * temp2.r - b[i__7].i *
+				    temp2.i, q__4.i = b[i__7].r * temp2.i + b[
+				    i__7].i * temp2.r;
+			    q__1.r = q__2.r + q__4.r, q__1.i = q__2.i +
+				    q__4.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+/* L110: */
+			}
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = j + l * a_dim1;
+			q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i,
+				q__2.i = a[i__5].r * temp1.i + a[i__5].i *
+				temp1.r;
+			i__6 = j + l * b_dim1;
+			q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i,
+				q__3.i = b[i__6].r * temp2.i + b[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			r__1 = c__[i__4].r + q__1.r;
+			c__[i__3].r = r__1, c__[i__3].i = 0.f;
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L140: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L150: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * a_dim1;
+		    i__4 = j + l * b_dim1;
+		    if (((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) || (((b[
+			    i__4].r != 0.f) || (b[i__4].i != 0.f)))) {
+			r_cnjg(&q__2, &b[j + l * b_dim1]);
+			q__1.r = alpha->r * q__2.r - alpha->i * q__2.i,
+				q__1.i = alpha->r * q__2.i + alpha->i *
+				q__2.r;
+			temp1.r = q__1.r, temp1.i = q__1.i;
+			i__3 = j + l * a_dim1;
+			q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i,
+				q__2.i = alpha->r * a[i__3].i + alpha->i * a[
+				i__3].r;
+			r_cnjg(&q__1, &q__2);
+			temp2.r = q__1.r, temp2.i = q__1.i;
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__3.r = a[i__6].r * temp1.r - a[i__6].i *
+				    temp1.i, q__3.i = a[i__6].r * temp1.i + a[
+				    i__6].i * temp1.r;
+			    q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
+				    .i + q__3.i;
+			    i__7 = i__ + l * b_dim1;
+			    q__4.r = b[i__7].r * temp2.r - b[i__7].i *
+				    temp2.i, q__4.i = b[i__7].r * temp2.i + b[
+				    i__7].i * temp2.r;
+			    q__1.r = q__2.r + q__4.r, q__1.i = q__2.i +
+				    q__4.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+/* L160: */
+			}
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = j + l * a_dim1;
+			q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i,
+				q__2.i = a[i__5].r * temp1.i + a[i__5].i *
+				temp1.r;
+			i__6 = j + l * b_dim1;
+			q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i,
+				q__3.i = b[i__6].r * temp2.i + b[i__6].i *
+				temp2.r;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			r__1 = c__[i__4].r + q__1.r;
+			c__[i__3].r = r__1, c__[i__3].i = 0.f;
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*
+          Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A +
+                     C.
+*/
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp1.r = 0.f, temp1.i = 0.f;
+		    temp2.r = 0.f, temp2.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * b_dim1;
+			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
+				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
+				.r;
+			q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
+			temp1.r = q__1.r, temp1.i = q__1.i;
+			r_cnjg(&q__3, &b[l + i__ * b_dim1]);
+			i__4 = l + j * a_dim1;
+			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
+				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
+				.r;
+			q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+			temp2.r = q__1.r, temp2.i = q__1.i;
+/* L190: */
+		    }
+		    if (i__ == j) {
+			if (*beta == 0.f) {
+			    i__3 = j + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    r__1 = q__1.r;
+			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
+			} else {
+			    i__3 = j + j * c_dim1;
+			    i__4 = j + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    r__1 = *beta * c__[i__4].r + q__1.r;
+			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
+			}
+		    } else {
+			if (*beta == 0.f) {
+			    i__3 = i__ + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			} else {
+			    i__3 = i__ + j * c_dim1;
+			    i__4 = i__ + j * c_dim1;
+			    q__3.r = *beta * c__[i__4].r, q__3.i = *beta *
+				    c__[i__4].i;
+			    q__4.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__4.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i +
+				    q__4.i;
+			    r_cnjg(&q__6, alpha);
+			    q__5.r = q__6.r * temp2.r - q__6.i * temp2.i,
+				    q__5.i = q__6.r * temp2.i + q__6.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i +
+				    q__5.i;
+			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			}
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp1.r = 0.f, temp1.i = 0.f;
+		    temp2.r = 0.f, temp2.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * b_dim1;
+			q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i,
+				q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
+				.r;
+			q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
+			temp1.r = q__1.r, temp1.i = q__1.i;
+			r_cnjg(&q__3, &b[l + i__ * b_dim1]);
+			i__4 = l + j * a_dim1;
+			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
+				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
+				.r;
+			q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
+			temp2.r = q__1.r, temp2.i = q__1.i;
+/* L220: */
+		    }
+		    if (i__ == j) {
+			if (*beta == 0.f) {
+			    i__3 = j + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    r__1 = q__1.r;
+			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
+			} else {
+			    i__3 = j + j * c_dim1;
+			    i__4 = j + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    r__1 = *beta * c__[i__4].r + q__1.r;
+			    c__[i__3].r = r__1, c__[i__3].i = 0.f;
+			}
+		    } else {
+			if (*beta == 0.f) {
+			    i__3 = i__ + j * c_dim1;
+			    q__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    r_cnjg(&q__4, alpha);
+			    q__3.r = q__4.r * temp2.r - q__4.i * temp2.i,
+				    q__3.i = q__4.r * temp2.i + q__4.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			} else {
+			    i__3 = i__ + j * c_dim1;
+			    i__4 = i__ + j * c_dim1;
+			    q__3.r = *beta * c__[i__4].r, q__3.i = *beta *
+				    c__[i__4].i;
+			    q__4.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    q__4.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i +
+				    q__4.i;
+			    r_cnjg(&q__6, alpha);
+			    q__5.r = q__6.r * temp2.r - q__6.i * temp2.i,
+				    q__5.i = q__6.r * temp2.i + q__6.i *
+				    temp2.r;
+			    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i +
+				    q__5.i;
+			    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+			}
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CHER2K. */
+
+} /* cher2k_ */
+
+/* Subroutine */ int cherk_(char *uplo, char *trans, integer *n, integer *k,
+	real *alpha, complex *a, integer *lda, real *beta, complex *c__,
+	integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5,
+	    i__6;
+    real r__1;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static complex temp;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static real rtemp;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    CHERK  performs one of the hermitian rank k operations
+
+       C := alpha*A*conjg( A' ) + beta*C,
+
+    or
+
+       C := alpha*conjg( A' )*A + beta*C,
+
+    where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
+    matrix and  A  is an  n by k  matrix in the  first case and a  k by n
+    matrix in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns   of  the   matrix   A,   and  on   entry   with
+             TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+             matrix A.  K must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - COMPLEX          array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  hermitian matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  hermitian matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+             Note that the imaginary parts of the diagonal elements need
+             not be set,  they are assumed to be zero,  and on exit they
+             are set to zero.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+    -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
+       Ed Anderson, Cray Research Inc.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldc < max(1,*n)) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("CHERK ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (((*alpha == 0.f) || (*k == 0)) && *beta == 1.f)) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	if (upper) {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L30: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*conjg( A' ) + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L90: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L100: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * a_dim1;
+		    if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
+			r_cnjg(&q__2, &a[j + l * a_dim1]);
+			q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+/* L110: */
+			}
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = i__ + l * a_dim1;
+			q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			r__1 = c__[i__4].r + q__1.r;
+			c__[i__3].r = r__1, c__[i__3].i = 0.f;
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0.f, c__[i__3].i = 0.f;
+/* L140: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L150: */
+		    }
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * a_dim1;
+		    if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
+			r_cnjg(&q__2, &a[j + l * a_dim1]);
+			q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = j + l * a_dim1;
+			q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			r__1 = c__[i__4].r + q__1.r;
+			c__[i__3].r = r__1, c__[i__3].i = 0.f;
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    q__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
+				    .i + q__2.i;
+			    c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*conjg( A' )*A + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * a_dim1;
+			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
+				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L190: */
+		    }
+		    if (*beta == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
+				i__4].i;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L200: */
+		}
+		rtemp = 0.f;
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    r_cnjg(&q__3, &a[l + j * a_dim1]);
+		    i__3 = l + j * a_dim1;
+		    q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
+			     q__3.r * a[i__3].i + q__3.i * a[i__3].r;
+		    q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
+		    rtemp = q__1.r;
+/* L210: */
+		}
+		if (*beta == 0.f) {
+		    i__2 = j + j * c_dim1;
+		    r__1 = *alpha * rtemp;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *alpha * rtemp + *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		}
+/* L220: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		rtemp = 0.f;
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    r_cnjg(&q__3, &a[l + j * a_dim1]);
+		    i__3 = l + j * a_dim1;
+		    q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
+			     q__3.r * a[i__3].i + q__3.i * a[i__3].r;
+		    q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
+		    rtemp = q__1.r;
+/* L230: */
+		}
+		if (*beta == 0.f) {
+		    i__2 = j + j * c_dim1;
+		    r__1 = *alpha * rtemp;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    r__1 = *alpha * rtemp + *beta * c__[i__3].r;
+		    c__[i__2].r = r__1, c__[i__2].i = 0.f;
+		}
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    temp.r = 0.f, temp.i = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			r_cnjg(&q__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * a_dim1;
+			q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i,
+				q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
+				.r;
+			q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
+			temp.r = q__1.r, temp.i = q__1.i;
+/* L240: */
+		    }
+		    if (*beta == 0.f) {
+			i__3 = i__ + j * c_dim1;
+			q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
+			i__4 = i__ + j * c_dim1;
+			q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
+				i__4].i;
+			q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+		    }
+/* L250: */
+		}
+/* L260: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CHERK . */
+
+} /* cherk_ */
+
+/* Subroutine */ int cscal_(integer *n, complex *ca, complex *cx, integer *
+	incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__, nincx;
+
+
+/*
+       scales a vector by a constant.
+       jack dongarra, linpack,  3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cx;
+
+    /* Function Body */
+    if ((*n <= 0) || (*incx <= 0)) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	i__3 = i__;
+	i__4 = i__;
+	q__1.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__1.i = ca->r * cx[
+		i__4].i + ca->i * cx[i__4].r;
+	cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
+/* L10: */
+    }
+    return 0;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__2 = *n;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	i__1 = i__;
+	i__3 = i__;
+	q__1.r = ca->r * cx[i__3].r - ca->i * cx[i__3].i, q__1.i = ca->r * cx[
+		i__3].i + ca->i * cx[i__3].r;
+	cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
+/* L30: */
+    }
+    return 0;
+} /* cscal_ */
+
+/* Subroutine */ int csscal_(integer *n, real *sa, complex *cx, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    real r__1, r__2;
+    complex q__1;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, nincx;
+
+
+/*
+       scales a complex vector by a real constant.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cx;
+
+    /* Function Body */
+    if ((*n <= 0) || (*incx <= 0)) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	i__3 = i__;
+	i__4 = i__;
+	r__1 = *sa * cx[i__4].r;
+	r__2 = *sa * r_imag(&cx[i__]);
+	q__1.r = r__1, q__1.i = r__2;
+	cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
+/* L10: */
+    }
+    return 0;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__2 = *n;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	i__1 = i__;
+	i__3 = i__;
+	r__1 = *sa * cx[i__3].r;
+	r__2 = *sa * r_imag(&cx[i__]);
+	q__1.r = r__1, q__1.i = r__2;
+	cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
+/* L30: */
+    }
+    return 0;
+} /* csscal_ */
+
+/* Subroutine */ int cswap_(integer *n, complex *cx, integer *incx, complex *
+	cy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static complex ctemp;
+
+
+/*
+       interchanges two vectors.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
+	i__2 = ix;
+	i__3 = iy;
+	cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
+	i__2 = iy;
+	cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*       code for both increments equal to 1 */
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
+	i__2 = i__;
+	i__3 = i__;
+	cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
+	i__2 = i__;
+	cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
+/* L30: */
+    }
+    return 0;
+} /* cswap_ */
+
+/* Subroutine */ int ctrmm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, complex *alpha, complex *a, integer *lda,
+	complex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5,
+	    i__6;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static complex temp;
+    extern logical lsame_(char *, char *);
+    static logical lside;
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj, nounit;
+
+
+/*
+    Purpose
+    =======
+
+    CTRMM  performs one of the matrix-matrix operations
+
+       B := alpha*op( A )*B,   or   B := alpha*B*op( A )
+
+    where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry,  SIDE specifies whether  op( A ) multiplies B from
+             the left or right as follows:
+
+                SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+
+                SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX         .
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - COMPLEX          array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain the matrix  B,  and  on exit  is overwritten  by the
+             transformed matrix.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    noconj = lsame_(transa, "T");
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("CTRMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (alpha->r == 0.f && alpha->i == 0.f) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		b[i__3].r = 0.f, b[i__3].i = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*A*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			i__3 = k + j * b_dim1;
+			if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
+			    i__3 = k + j * b_dim1;
+			    q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
+				    .i, q__1.i = alpha->r * b[i__3].i +
+				    alpha->i * b[i__3].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    i__3 = k - 1;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * a_dim1;
+				q__2.r = temp.r * a[i__6].r - temp.i * a[i__6]
+					.i, q__2.i = temp.r * a[i__6].i +
+					temp.i * a[i__6].r;
+				q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
+					.i + q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L30: */
+			    }
+			    if (nounit) {
+				i__3 = k + k * a_dim1;
+				q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
+					.i, q__1.i = temp.r * a[i__3].i +
+					temp.i * a[i__3].r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__3 = k + j * b_dim1;
+			    b[i__3].r = temp.r, b[i__3].i = temp.i;
+			}
+/* L40: */
+		    }
+/* L50: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (k = *m; k >= 1; --k) {
+			i__2 = k + j * b_dim1;
+			if ((b[i__2].r != 0.f) || (b[i__2].i != 0.f)) {
+			    i__2 = k + j * b_dim1;
+			    q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2]
+				    .i, q__1.i = alpha->r * b[i__2].i +
+				    alpha->i * b[i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    i__2 = k + j * b_dim1;
+			    b[i__2].r = temp.r, b[i__2].i = temp.i;
+			    if (nounit) {
+				i__2 = k + j * b_dim1;
+				i__3 = k + j * b_dim1;
+				i__4 = k + k * a_dim1;
+				q__1.r = b[i__3].r * a[i__4].r - b[i__3].i *
+					a[i__4].i, q__1.i = b[i__3].r * a[
+					i__4].i + b[i__3].i * a[i__4].r;
+				b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+			    }
+			    i__2 = *m;
+			    for (i__ = k + 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * a_dim1;
+				q__2.r = temp.r * a[i__5].r - temp.i * a[i__5]
+					.i, q__2.i = temp.r * a[i__5].i +
+					temp.i * a[i__5].r;
+				q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
+					.i + q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L60: */
+			    }
+			}
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			i__2 = i__ + j * b_dim1;
+			temp.r = b[i__2].r, temp.i = b[i__2].i;
+			if (noconj) {
+			    if (nounit) {
+				i__2 = i__ + i__ * a_dim1;
+				q__1.r = temp.r * a[i__2].r - temp.i * a[i__2]
+					.i, q__1.i = temp.r * a[i__2].i +
+					temp.i * a[i__2].r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__2 = i__ - 1;
+			    for (k = 1; k <= i__2; ++k) {
+				i__3 = k + i__ * a_dim1;
+				i__4 = k + j * b_dim1;
+				q__2.r = a[i__3].r * b[i__4].r - a[i__3].i *
+					b[i__4].i, q__2.i = a[i__3].r * b[
+					i__4].i + a[i__3].i * b[i__4].r;
+				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
+/* L90: */
+			    }
+			} else {
+			    if (nounit) {
+				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
+				q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+					q__1.i = temp.r * q__2.i + temp.i *
+					q__2.r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__2 = i__ - 1;
+			    for (k = 1; k <= i__2; ++k) {
+				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
+				i__3 = k + j * b_dim1;
+				q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
+					.i, q__2.i = q__3.r * b[i__3].i +
+					q__3.i * b[i__3].r;
+				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
+/* L100: */
+			    }
+			}
+			i__2 = i__ + j * b_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L110: */
+		    }
+/* L120: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * b_dim1;
+			temp.r = b[i__3].r, temp.i = b[i__3].i;
+			if (noconj) {
+			    if (nounit) {
+				i__3 = i__ + i__ * a_dim1;
+				q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
+					.i, q__1.i = temp.r * a[i__3].i +
+					temp.i * a[i__3].r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__3 = *m;
+			    for (k = i__ + 1; k <= i__3; ++k) {
+				i__4 = k + i__ * a_dim1;
+				i__5 = k + j * b_dim1;
+				q__2.r = a[i__4].r * b[i__5].r - a[i__4].i *
+					b[i__5].i, q__2.i = a[i__4].r * b[
+					i__5].i + a[i__4].i * b[i__5].r;
+				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
+/* L130: */
+			    }
+			} else {
+			    if (nounit) {
+				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
+				q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+					q__1.i = temp.r * q__2.i + temp.i *
+					q__2.r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__3 = *m;
+			    for (k = i__ + 1; k <= i__3; ++k) {
+				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
+				i__4 = k + j * b_dim1;
+				q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
+					.i, q__2.i = q__3.r * b[i__4].i +
+					q__3.i * b[i__4].r;
+				q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
+/* L140: */
+			    }
+			}
+			i__3 = i__ + j * b_dim1;
+			q__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				q__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L150: */
+		    }
+/* L160: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*A. */
+
+	    if (upper) {
+		for (j = *n; j >= 1; --j) {
+		    temp.r = alpha->r, temp.i = alpha->i;
+		    if (nounit) {
+			i__1 = j + j * a_dim1;
+			q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				q__1.i = temp.r * a[i__1].i + temp.i * a[i__1]
+				.r;
+			temp.r = q__1.r, temp.i = q__1.i;
+		    }
+		    i__1 = *m;
+		    for (i__ = 1; i__ <= i__1; ++i__) {
+			i__2 = i__ + j * b_dim1;
+			i__3 = i__ + j * b_dim1;
+			q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				q__1.i = temp.r * b[i__3].i + temp.i * b[i__3]
+				.r;
+			b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L170: */
+		    }
+		    i__1 = j - 1;
+		    for (k = 1; k <= i__1; ++k) {
+			i__2 = k + j * a_dim1;
+			if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
+			    i__2 = k + j * a_dim1;
+			    q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2]
+				    .i, q__1.i = alpha->r * a[i__2].i +
+				    alpha->i * a[i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
+					.i, q__2.i = temp.r * b[i__5].i +
+					temp.i * b[i__5].r;
+				q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
+					.i + q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L180: */
+			    }
+			}
+/* L190: */
+		    }
+/* L200: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp.r = alpha->r, temp.i = alpha->i;
+		    if (nounit) {
+			i__2 = j + j * a_dim1;
+			q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				q__1.i = temp.r * a[i__2].i + temp.i * a[i__2]
+				.r;
+			temp.r = q__1.r, temp.i = q__1.i;
+		    }
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * b_dim1;
+			i__4 = i__ + j * b_dim1;
+			q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				q__1.i = temp.r * b[i__4].i + temp.i * b[i__4]
+				.r;
+			b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L210: */
+		    }
+		    i__2 = *n;
+		    for (k = j + 1; k <= i__2; ++k) {
+			i__3 = k + j * a_dim1;
+			if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
+			    i__3 = k + j * a_dim1;
+			    q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3]
+				    .i, q__1.i = alpha->r * a[i__3].i +
+				    alpha->i * a[i__3].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
+					.i, q__2.i = temp.r * b[i__6].i +
+					temp.i * b[i__6].r;
+				q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
+					.i + q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L220: */
+			    }
+			}
+/* L230: */
+		    }
+/* L240: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ). */
+
+	    if (upper) {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    i__2 = k - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			i__3 = j + k * a_dim1;
+			if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
+			    if (noconj) {
+				i__3 = j + k * a_dim1;
+				q__1.r = alpha->r * a[i__3].r - alpha->i * a[
+					i__3].i, q__1.i = alpha->r * a[i__3]
+					.i + alpha->i * a[i__3].r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    } else {
+				r_cnjg(&q__2, &a[j + k * a_dim1]);
+				q__1.r = alpha->r * q__2.r - alpha->i *
+					q__2.i, q__1.i = alpha->r * q__2.i +
+					alpha->i * q__2.r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
+					.i, q__2.i = temp.r * b[i__6].i +
+					temp.i * b[i__6].r;
+				q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
+					.i + q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L250: */
+			    }
+			}
+/* L260: */
+		    }
+		    temp.r = alpha->r, temp.i = alpha->i;
+		    if (nounit) {
+			if (noconj) {
+			    i__2 = k + k * a_dim1;
+			    q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				    q__1.i = temp.r * a[i__2].i + temp.i * a[
+				    i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			} else {
+			    r_cnjg(&q__2, &a[k + k * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    if ((temp.r != 1.f) || (temp.i != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + k * b_dim1;
+			    i__4 = i__ + k * b_dim1;
+			    q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				    q__1.i = temp.r * b[i__4].i + temp.i * b[
+				    i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L270: */
+			}
+		    }
+/* L280: */
+		}
+	    } else {
+		for (k = *n; k >= 1; --k) {
+		    i__1 = *n;
+		    for (j = k + 1; j <= i__1; ++j) {
+			i__2 = j + k * a_dim1;
+			if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
+			    if (noconj) {
+				i__2 = j + k * a_dim1;
+				q__1.r = alpha->r * a[i__2].r - alpha->i * a[
+					i__2].i, q__1.i = alpha->r * a[i__2]
+					.i + alpha->i * a[i__2].r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    } else {
+				r_cnjg(&q__2, &a[j + k * a_dim1]);
+				q__1.r = alpha->r * q__2.r - alpha->i *
+					q__2.i, q__1.i = alpha->r * q__2.i +
+					alpha->i * q__2.r;
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
+					.i, q__2.i = temp.r * b[i__5].i +
+					temp.i * b[i__5].r;
+				q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
+					.i + q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L290: */
+			    }
+			}
+/* L300: */
+		    }
+		    temp.r = alpha->r, temp.i = alpha->i;
+		    if (nounit) {
+			if (noconj) {
+			    i__1 = k + k * a_dim1;
+			    q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				    q__1.i = temp.r * a[i__1].i + temp.i * a[
+				    i__1].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			} else {
+			    r_cnjg(&q__2, &a[k + k * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    if ((temp.r != 1.f) || (temp.i != 0.f)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + k * b_dim1;
+			    i__3 = i__ + k * b_dim1;
+			    q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				    q__1.i = temp.r * b[i__3].i + temp.i * b[
+				    i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L310: */
+			}
+		    }
+/* L320: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CTRMM . */
+
+} /* ctrmm_ */
+
+/* Subroutine */ int ctrmv_(char *uplo, char *trans, char *diag, integer *n,
+	complex *a, integer *lda, complex *x, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, ix, jx, kx, info;
+    static complex temp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj, nounit;
+
+
+/*
+    Purpose
+    =======
+
+    CTRMV  performs one of the matrix-vector operations
+
+       x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+
+    where x is an n element vector and  A is an n by n unit, or non-unit,
+    upper or lower triangular matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   x := A*x.
+
+                TRANS = 'T' or 't'   x := A'*x.
+
+                TRANS = 'C' or 'c'   x := conjg( A' )*x.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit
+             triangular as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular matrix and the strictly lower triangular part of
+             A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular matrix and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of
+             A are not referenced either, but are assumed to be unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x. On exit, X is overwritten with the
+             tranformed vector x.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < max(1,*n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    }
+    if (info != 0) {
+	xerbla_("CTRMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    noconj = lsame_(trans, "T");
+    nounit = lsame_(diag, "N");
+
+/*
+       Set up the start point in X if the increment is not unity. This
+       will be  ( N - 1 )*INCX  too small for descending loops.
+*/
+
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  x := A*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
+			i__2 = j;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__;
+			    i__4 = i__;
+			    i__5 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    q__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i +
+				    q__2.i;
+			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+/* L10: */
+			}
+			if (nounit) {
+			    i__2 = j;
+			    i__3 = j;
+			    i__4 = j + j * a_dim1;
+			    q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
+				    i__4].i, q__1.i = x[i__3].r * a[i__4].i +
+				    x[i__3].i * a[i__4].r;
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = jx;
+		    if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
+			i__2 = jx;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			ix = kx;
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = ix;
+			    i__4 = ix;
+			    i__5 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    q__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i +
+				    q__2.i;
+			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+			    ix += *incx;
+/* L30: */
+			}
+			if (nounit) {
+			    i__2 = jx;
+			    i__3 = jx;
+			    i__4 = j + j * a_dim1;
+			    q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
+				    i__4].i, q__1.i = x[i__3].r * a[i__4].i +
+				    x[i__3].i * a[i__4].r;
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			}
+		    }
+		    jx += *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    if ((x[i__1].r != 0.f) || (x[i__1].i != 0.f)) {
+			i__1 = j;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = i__;
+			    i__3 = i__;
+			    i__4 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+				    q__2.i = temp.r * a[i__4].i + temp.i * a[
+				    i__4].r;
+			    q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i +
+				    q__2.i;
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+/* L50: */
+			}
+			if (nounit) {
+			    i__1 = j;
+			    i__2 = j;
+			    i__3 = j + j * a_dim1;
+			    q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
+				    i__3].i, q__1.i = x[i__2].r * a[i__3].i +
+				    x[i__2].i * a[i__3].r;
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    i__1 = jx;
+		    if ((x[i__1].r != 0.f) || (x[i__1].i != 0.f)) {
+			i__1 = jx;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			ix = kx;
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = ix;
+			    i__3 = ix;
+			    i__4 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+				    q__2.i = temp.r * a[i__4].i + temp.i * a[
+				    i__4].r;
+			    q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i +
+				    q__2.i;
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			    ix -= *incx;
+/* L70: */
+			}
+			if (nounit) {
+			    i__1 = jx;
+			    i__2 = jx;
+			    i__3 = j + j * a_dim1;
+			    q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
+				    i__3].i, q__1.i = x[i__2].r * a[i__3].i +
+				    x[i__2].i * a[i__3].r;
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+			}
+		    }
+		    jx -= *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
+
+/*        Form  x := A'*x  or  x := conjg( A' )*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (noconj) {
+			if (nounit) {
+			    i__1 = j + j * a_dim1;
+			    q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				    q__1.i = temp.r * a[i__1].i + temp.i * a[
+				    i__1].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    i__1 = i__ + j * a_dim1;
+			    i__2 = i__;
+			    q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+				    i__2].i, q__2.i = a[i__1].r * x[i__2].i +
+				    a[i__1].i * x[i__2].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L90: */
+			}
+		    } else {
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__1 = i__;
+			    q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i,
+				    q__2.i = q__3.r * x[i__1].i + q__3.i * x[
+				    i__1].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L100: */
+			}
+		    }
+		    i__1 = j;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+/* L110: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    i__1 = jx;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    ix = jx;
+		    if (noconj) {
+			if (nounit) {
+			    i__1 = j + j * a_dim1;
+			    q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				    q__1.i = temp.r * a[i__1].i + temp.i * a[
+				    i__1].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    ix -= *incx;
+			    i__1 = i__ + j * a_dim1;
+			    i__2 = ix;
+			    q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+				    i__2].i, q__2.i = a[i__1].r * x[i__2].i +
+				    a[i__1].i * x[i__2].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L120: */
+			}
+		    } else {
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    ix -= *incx;
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__1 = ix;
+			    q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i,
+				    q__2.i = q__3.r * x[i__1].i + q__3.i * x[
+				    i__1].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L130: */
+			}
+		    }
+		    i__1 = jx;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+		    jx -= *incx;
+/* L140: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (noconj) {
+			if (nounit) {
+			    i__2 = j + j * a_dim1;
+			    q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				    q__1.i = temp.r * a[i__2].i + temp.i * a[
+				    i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = i__;
+			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L150: */
+			}
+		    } else {
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__3 = i__;
+			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
+				    i__3].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L160: */
+			}
+		    }
+		    i__2 = j;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+/* L170: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = jx;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    ix = jx;
+		    if (noconj) {
+			if (nounit) {
+			    i__2 = j + j * a_dim1;
+			    q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				    q__1.i = temp.r * a[i__2].i + temp.i * a[
+				    i__2].r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    ix += *incx;
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = ix;
+			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L180: */
+			}
+		    } else {
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    q__1.r = temp.r * q__2.r - temp.i * q__2.i,
+				    q__1.i = temp.r * q__2.i + temp.i *
+				    q__2.r;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    ix += *incx;
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__3 = ix;
+			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
+				    i__3].r;
+			    q__1.r = temp.r + q__2.r, q__1.i = temp.i +
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L190: */
+			}
+		    }
+		    i__2 = jx;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+		    jx += *incx;
+/* L200: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CTRMV . */
+
+} /* ctrmv_ */
+
+/* Subroutine */ int ctrsm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, complex *alpha, complex *a, integer *lda,
+	complex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5,
+	    i__6, i__7;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static complex temp;
+    extern logical lsame_(char *, char *);
+    static logical lside;
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj, nounit;
+
+
+/*
+    Purpose
+    =======
+
+    CTRSM  solves one of the matrix equations
+
+       op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+
+    where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+
+    The matrix X is overwritten on B.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry, SIDE specifies whether op( A ) appears on the left
+             or right of X as follows:
+
+                SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+
+                SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX         .
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - COMPLEX          array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain  the  right-hand  side  matrix  B,  and  on exit  is
+             overwritten by the solution matrix  X.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    noconj = lsame_(transa, "T");
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("CTRSM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (alpha->r == 0.f && alpha->i == 0.f) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		b[i__3].r = 0.f, b[i__3].i = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*inv( A )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, q__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L30: */
+			}
+		    }
+		    for (k = *m; k >= 1; --k) {
+			i__2 = k + j * b_dim1;
+			if ((b[i__2].r != 0.f) || (b[i__2].i != 0.f)) {
+			    if (nounit) {
+				i__2 = k + j * b_dim1;
+				c_div(&q__1, &b[k + j * b_dim1], &a[k + k *
+					a_dim1]);
+				b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+			    }
+			    i__2 = k - 1;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = k + j * b_dim1;
+				i__6 = i__ + k * a_dim1;
+				q__2.r = b[i__5].r * a[i__6].r - b[i__5].i *
+					a[i__6].i, q__2.i = b[i__5].r * a[
+					i__6].i + b[i__5].i * a[i__6].r;
+				q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
+					.i - q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L40: */
+			    }
+			}
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, q__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L70: */
+			}
+		    }
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			i__3 = k + j * b_dim1;
+			if ((b[i__3].r != 0.f) || (b[i__3].i != 0.f)) {
+			    if (nounit) {
+				i__3 = k + j * b_dim1;
+				c_div(&q__1, &b[k + j * b_dim1], &a[k + k *
+					a_dim1]);
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+			    }
+			    i__3 = *m;
+			    for (i__ = k + 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = k + j * b_dim1;
+				i__7 = i__ + k * a_dim1;
+				q__2.r = b[i__6].r * a[i__7].r - b[i__6].i *
+					a[i__7].i, q__2.i = b[i__6].r * a[
+					i__7].i + b[i__6].i * a[i__7].r;
+				q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
+					.i - q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L80: */
+			    }
+			}
+/* L90: */
+		    }
+/* L100: */
+		}
+	    }
+	} else {
+
+/*
+             Form  B := alpha*inv( A' )*B
+             or    B := alpha*inv( conjg( A' ) )*B.
+*/
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * b_dim1;
+			q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
+				q__1.i = alpha->r * b[i__3].i + alpha->i * b[
+				i__3].r;
+			temp.r = q__1.r, temp.i = q__1.i;
+			if (noconj) {
+			    i__3 = i__ - 1;
+			    for (k = 1; k <= i__3; ++k) {
+				i__4 = k + i__ * a_dim1;
+				i__5 = k + j * b_dim1;
+				q__2.r = a[i__4].r * b[i__5].r - a[i__4].i *
+					b[i__5].i, q__2.i = a[i__4].r * b[
+					i__5].i + a[i__4].i * b[i__5].r;
+				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
+/* L110: */
+			    }
+			    if (nounit) {
+				c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			} else {
+			    i__3 = i__ - 1;
+			    for (k = 1; k <= i__3; ++k) {
+				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
+				i__4 = k + j * b_dim1;
+				q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
+					.i, q__2.i = q__3.r * b[i__4].i +
+					q__3.i * b[i__4].r;
+				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
+/* L120: */
+			    }
+			    if (nounit) {
+				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
+				c_div(&q__1, &temp, &q__2);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			}
+			i__3 = i__ + j * b_dim1;
+			b[i__3].r = temp.r, b[i__3].i = temp.i;
+/* L130: */
+		    }
+/* L140: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			i__2 = i__ + j * b_dim1;
+			q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i,
+				q__1.i = alpha->r * b[i__2].i + alpha->i * b[
+				i__2].r;
+			temp.r = q__1.r, temp.i = q__1.i;
+			if (noconj) {
+			    i__2 = *m;
+			    for (k = i__ + 1; k <= i__2; ++k) {
+				i__3 = k + i__ * a_dim1;
+				i__4 = k + j * b_dim1;
+				q__2.r = a[i__3].r * b[i__4].r - a[i__3].i *
+					b[i__4].i, q__2.i = a[i__3].r * b[
+					i__4].i + a[i__3].i * b[i__4].r;
+				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
+/* L150: */
+			    }
+			    if (nounit) {
+				c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			} else {
+			    i__2 = *m;
+			    for (k = i__ + 1; k <= i__2; ++k) {
+				r_cnjg(&q__3, &a[k + i__ * a_dim1]);
+				i__3 = k + j * b_dim1;
+				q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
+					.i, q__2.i = q__3.r * b[i__3].i +
+					q__3.i * b[i__3].r;
+				q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+					q__2.i;
+				temp.r = q__1.r, temp.i = q__1.i;
+/* L160: */
+			    }
+			    if (nounit) {
+				r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
+				c_div(&q__1, &temp, &q__2);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			}
+			i__2 = i__ + j * b_dim1;
+			b[i__2].r = temp.r, b[i__2].i = temp.i;
+/* L170: */
+		    }
+/* L180: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*inv( A ). */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, q__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L190: */
+			}
+		    }
+		    i__2 = j - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			i__3 = k + j * a_dim1;
+			if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = k + j * a_dim1;
+				i__7 = i__ + k * b_dim1;
+				q__2.r = a[i__6].r * b[i__7].r - a[i__6].i *
+					b[i__7].i, q__2.i = a[i__6].r * b[
+					i__7].i + a[i__6].i * b[i__7].r;
+				q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
+					.i - q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L200: */
+			    }
+			}
+/* L210: */
+		    }
+		    if (nounit) {
+			c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				    q__1.i = temp.r * b[i__4].i + temp.i * b[
+				    i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L220: */
+			}
+		    }
+/* L230: */
+		}
+	    } else {
+		for (j = *n; j >= 1; --j) {
+		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * b_dim1;
+			    i__3 = i__ + j * b_dim1;
+			    q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
+				    .i, q__1.i = alpha->r * b[i__3].i +
+				    alpha->i * b[i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L240: */
+			}
+		    }
+		    i__1 = *n;
+		    for (k = j + 1; k <= i__1; ++k) {
+			i__2 = k + j * a_dim1;
+			if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = k + j * a_dim1;
+				i__6 = i__ + k * b_dim1;
+				q__2.r = a[i__5].r * b[i__6].r - a[i__5].i *
+					b[i__6].i, q__2.i = a[i__5].r * b[
+					i__6].i + a[i__5].i * b[i__6].r;
+				q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
+					.i - q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L250: */
+			    }
+			}
+/* L260: */
+		    }
+		    if (nounit) {
+			c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
+			temp.r = q__1.r, temp.i = q__1.i;
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * b_dim1;
+			    i__3 = i__ + j * b_dim1;
+			    q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				    q__1.i = temp.r * b[i__3].i + temp.i * b[
+				    i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L270: */
+			}
+		    }
+/* L280: */
+		}
+	    }
+	} else {
+
+/*
+             Form  B := alpha*B*inv( A' )
+             or    B := alpha*B*inv( conjg( A' ) ).
+*/
+
+	    if (upper) {
+		for (k = *n; k >= 1; --k) {
+		    if (nounit) {
+			if (noconj) {
+			    c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			} else {
+			    r_cnjg(&q__2, &a[k + k * a_dim1]);
+			    c_div(&q__1, &c_b21, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + k * b_dim1;
+			    i__3 = i__ + k * b_dim1;
+			    q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				    q__1.i = temp.r * b[i__3].i + temp.i * b[
+				    i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L290: */
+			}
+		    }
+		    i__1 = k - 1;
+		    for (j = 1; j <= i__1; ++j) {
+			i__2 = j + k * a_dim1;
+			if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
+			    if (noconj) {
+				i__2 = j + k * a_dim1;
+				temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    } else {
+				r_cnjg(&q__1, &a[j + k * a_dim1]);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
+					.i, q__2.i = temp.r * b[i__5].i +
+					temp.i * b[i__5].r;
+				q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
+					.i - q__2.i;
+				b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L300: */
+			    }
+			}
+/* L310: */
+		    }
+		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + k * b_dim1;
+			    i__3 = i__ + k * b_dim1;
+			    q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
+				    .i, q__1.i = alpha->r * b[i__3].i +
+				    alpha->i * b[i__3].r;
+			    b[i__2].r = q__1.r, b[i__2].i = q__1.i;
+/* L320: */
+			}
+		    }
+/* L330: */
+		}
+	    } else {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    if (nounit) {
+			if (noconj) {
+			    c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			} else {
+			    r_cnjg(&q__2, &a[k + k * a_dim1]);
+			    c_div(&q__1, &c_b21, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + k * b_dim1;
+			    i__4 = i__ + k * b_dim1;
+			    q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				    q__1.i = temp.r * b[i__4].i + temp.i * b[
+				    i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L340: */
+			}
+		    }
+		    i__2 = *n;
+		    for (j = k + 1; j <= i__2; ++j) {
+			i__3 = j + k * a_dim1;
+			if ((a[i__3].r != 0.f) || (a[i__3].i != 0.f)) {
+			    if (noconj) {
+				i__3 = j + k * a_dim1;
+				temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    } else {
+				r_cnjg(&q__1, &a[j + k * a_dim1]);
+				temp.r = q__1.r, temp.i = q__1.i;
+			    }
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * b_dim1;
+				q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
+					.i, q__2.i = temp.r * b[i__6].i +
+					temp.i * b[i__6].r;
+				q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
+					.i - q__2.i;
+				b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L350: */
+			    }
+			}
+/* L360: */
+		    }
+		    if ((alpha->r != 1.f) || (alpha->i != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + k * b_dim1;
+			    i__4 = i__ + k * b_dim1;
+			    q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, q__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L370: */
+			}
+		    }
+/* L380: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CTRSM . */
+
+} /* ctrsm_ */
+
+/* Subroutine */ int ctrsv_(char *uplo, char *trans, char *diag, integer *n,
+	complex *a, integer *lda, complex *x, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2, q__3;
+
+    /* Builtin functions */
+    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, ix, jx, kx, info;
+    static complex temp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj, nounit;
+
+
+/*
+    Purpose
+    =======
+
+    CTRSV  solves one of the systems of equations
+
+       A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+
+    where b and x are n element vectors and A is an n by n unit, or
+    non-unit, upper or lower triangular matrix.
+
+    No test for singularity or near-singularity is included in this
+    routine. Such tests must be performed before calling this routine.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the equations to be solved as
+             follows:
+
+                TRANS = 'N' or 'n'   A*x = b.
+
+                TRANS = 'T' or 't'   A'*x = b.
+
+                TRANS = 'C' or 'c'   conjg( A' )*x = b.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit
+             triangular as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    A      - COMPLEX          array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular matrix and the strictly lower triangular part of
+             A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular matrix and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of
+             A are not referenced either, but are assumed to be unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - COMPLEX          array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element right-hand side vector b. On exit, X is overwritten
+             with the solution vector x.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < max(1,*n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    }
+    if (info != 0) {
+	xerbla_("CTRSV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    noconj = lsame_(trans, "T");
+    nounit = lsame_(diag, "N");
+
+/*
+       Set up the start point in X if the increment is not unity. This
+       will be  ( N - 1 )*INCX  too small for descending loops.
+*/
+
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  x := inv( A )*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    if ((x[i__1].r != 0.f) || (x[i__1].i != 0.f)) {
+			if (nounit) {
+			    i__1 = j;
+			    c_div(&q__1, &x[j], &a[j + j * a_dim1]);
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+			}
+			i__1 = j;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    i__1 = i__;
+			    i__2 = i__;
+			    i__3 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+				    q__2.i = temp.r * a[i__3].i + temp.i * a[
+				    i__3].r;
+			    q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i -
+				    q__2.i;
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+/* L10: */
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    i__1 = jx;
+		    if ((x[i__1].r != 0.f) || (x[i__1].i != 0.f)) {
+			if (nounit) {
+			    i__1 = jx;
+			    c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+			}
+			i__1 = jx;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			ix = jx;
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    ix -= *incx;
+			    i__1 = ix;
+			    i__2 = ix;
+			    i__3 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+				    q__2.i = temp.r * a[i__3].i + temp.i * a[
+				    i__3].r;
+			    q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i -
+				    q__2.i;
+			    x[i__1].r = q__1.r, x[i__1].i = q__1.i;
+/* L30: */
+			}
+		    }
+		    jx -= *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
+			if (nounit) {
+			    i__2 = j;
+			    c_div(&q__1, &x[j], &a[j + j * a_dim1]);
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			}
+			i__2 = j;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    i__3 = i__;
+			    i__4 = i__;
+			    i__5 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    q__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i -
+				    q__2.i;
+			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = jx;
+		    if ((x[i__2].r != 0.f) || (x[i__2].i != 0.f)) {
+			if (nounit) {
+			    i__2 = jx;
+			    c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
+			    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+			}
+			i__2 = jx;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			ix = jx;
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    ix += *incx;
+			    i__3 = ix;
+			    i__4 = ix;
+			    i__5 = i__ + j * a_dim1;
+			    q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    q__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i -
+				    q__2.i;
+			    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+/* L70: */
+			}
+		    }
+		    jx += *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
+
+/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (noconj) {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = i__;
+			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L90: */
+			}
+			if (nounit) {
+			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    } else {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__3 = i__;
+			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
+				    i__3].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L100: */
+			}
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    c_div(&q__1, &temp, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    i__2 = j;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+/* L110: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    ix = kx;
+		    i__2 = jx;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (noconj) {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = ix;
+			    q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, q__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    ix += *incx;
+/* L120: */
+			}
+			if (nounit) {
+			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    } else {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__3 = ix;
+			    q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i,
+				    q__2.i = q__3.r * x[i__3].i + q__3.i * x[
+				    i__3].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    ix += *incx;
+/* L130: */
+			}
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    c_div(&q__1, &temp, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    i__2 = jx;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+		    jx += *incx;
+/* L140: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (noconj) {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = i__ + j * a_dim1;
+			    i__3 = i__;
+			    q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
+				    i__3].i, q__2.i = a[i__2].r * x[i__3].i +
+				    a[i__2].i * x[i__3].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L150: */
+			}
+			if (nounit) {
+			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    } else {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__2 = i__;
+			    q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i,
+				    q__2.i = q__3.r * x[i__2].i + q__3.i * x[
+				    i__2].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+/* L160: */
+			}
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    c_div(&q__1, &temp, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    i__1 = j;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+/* L170: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    ix = kx;
+		    i__1 = jx;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (noconj) {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = i__ + j * a_dim1;
+			    i__3 = ix;
+			    q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
+				    i__3].i, q__2.i = a[i__2].r * x[i__3].i +
+				    a[i__2].i * x[i__3].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    ix -= *incx;
+/* L180: */
+			}
+			if (nounit) {
+			    c_div(&q__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    } else {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    r_cnjg(&q__3, &a[i__ + j * a_dim1]);
+			    i__2 = ix;
+			    q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i,
+				    q__2.i = q__3.r * x[i__2].i + q__3.i * x[
+				    i__2].r;
+			    q__1.r = temp.r - q__2.r, q__1.i = temp.i -
+				    q__2.i;
+			    temp.r = q__1.r, temp.i = q__1.i;
+			    ix -= *incx;
+/* L190: */
+			}
+			if (nounit) {
+			    r_cnjg(&q__2, &a[j + j * a_dim1]);
+			    c_div(&q__1, &temp, &q__2);
+			    temp.r = q__1.r, temp.i = q__1.i;
+			}
+		    }
+		    i__1 = jx;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+		    jx -= *incx;
+/* L200: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CTRSV . */
+
+} /* ctrsv_ */
+
+/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx,
+	integer *incx, doublereal *dy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+
+
+/*
+       constant times a vector plus a vector.
+       uses unrolled loops for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dy;
+    --dx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*da == 0.) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dy[iy] += *da * dx[ix];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 4;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dy[i__] += *da * dx[i__];
+/* L30: */
+    }
+    if (*n < 4) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 4) {
+	dy[i__] += *da * dx[i__];
+	dy[i__ + 1] += *da * dx[i__ + 1];
+	dy[i__ + 2] += *da * dx[i__ + 2];
+	dy[i__ + 3] += *da * dx[i__ + 3];
+/* L50: */
+    }
+    return 0;
+} /* daxpy_ */
+
+doublereal dcabs1_(doublecomplex *z__)
+{
+    /* System generated locals */
+    doublereal ret_val;
+    static doublecomplex equiv_0[1];
+
+    /* Local variables */
+#define t ((doublereal *)equiv_0)
+#define zz (equiv_0)
+
+    zz->r = z__->r, zz->i = z__->i;
+    ret_val = abs(t[0]) + abs(t[1]);
+    return ret_val;
+} /* dcabs1_ */
+
+#undef zz
+#undef t
+
+
+/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx,
+	doublereal *dy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+
+
+/*
+       copies a vector, x, to a vector, y.
+       uses unrolled loops for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dy;
+    --dx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dy[iy] = dx[ix];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 7;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dy[i__] = dx[i__];
+/* L30: */
+    }
+    if (*n < 7) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 7) {
+	dy[i__] = dx[i__];
+	dy[i__ + 1] = dx[i__ + 1];
+	dy[i__ + 2] = dx[i__ + 2];
+	dy[i__ + 3] = dx[i__ + 3];
+	dy[i__ + 4] = dx[i__ + 4];
+	dy[i__ + 5] = dx[i__ + 5];
+	dy[i__ + 6] = dx[i__ + 6];
+/* L50: */
+    }
+    return 0;
+} /* dcopy_ */
+
+doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy,
+	integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal ret_val;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+    static doublereal dtemp;
+
+
+/*
+       forms the dot product of two vectors.
+       uses unrolled loops for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dy;
+    --dx;
+
+    /* Function Body */
+    ret_val = 0.;
+    dtemp = 0.;
+    if (*n <= 0) {
+	return ret_val;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp += dx[ix] * dy[iy];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    ret_val = dtemp;
+    return ret_val;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp += dx[i__] * dy[i__];
+/* L30: */
+    }
+    if (*n < 5) {
+	goto L60;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 5) {
+	dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[
+		i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ +
+		4] * dy[i__ + 4];
+/* L50: */
+    }
+L60:
+    ret_val = dtemp;
+    return ret_val;
+} /* ddot_ */
+
+/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
+	n, integer *k, doublereal *alpha, doublereal *a, integer *lda,
+	doublereal *b, integer *ldb, doublereal *beta, doublereal *c__,
+	integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static logical nota, notb;
+    static doublereal temp;
+    static integer ncola;
+    extern logical lsame_(char *, char *);
+    static integer nrowa, nrowb;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DGEMM  performs one of the matrix-matrix operations
+
+       C := alpha*op( A )*op( B ) + beta*C,
+
+    where  op( X ) is one of
+
+       op( X ) = X   or   op( X ) = X',
+
+    alpha and beta are scalars, and A, B and C are matrices, with op( A )
+    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n',  op( A ) = A.
+
+                TRANSA = 'T' or 't',  op( A ) = A'.
+
+                TRANSA = 'C' or 'c',  op( A ) = A'.
+
+             Unchanged on exit.
+
+    TRANSB - CHARACTER*1.
+             On entry, TRANSB specifies the form of op( B ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSB = 'N' or 'n',  op( B ) = B.
+
+                TRANSB = 'T' or 't',  op( B ) = B'.
+
+                TRANSB = 'C' or 'c',  op( B ) = B'.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry,  M  specifies  the number  of rows  of the  matrix
+             op( A )  and of the  matrix  C.  M  must  be at least  zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N  specifies the number  of columns of the matrix
+             op( B ) and the number of columns of the matrix C. N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry,  K  specifies  the number of columns of the matrix
+             op( A ) and the number of rows of the matrix op( B ). K must
+             be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by m  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+             LDA must be at least  max( 1, m ), otherwise  LDA must be at
+             least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  n by k  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+             LDB must be at least  max( 1, k ), otherwise  LDB must be at
+             least  max( 1, n ).
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+             supplied as zero then C need not be set on input.
+             Unchanged on exit.
+
+    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+             Before entry, the leading  m by n  part of the array  C must
+             contain the matrix  C,  except when  beta  is zero, in which
+             case C need not be set on entry.
+             On exit, the array  C  is overwritten by the  m by n  matrix
+             ( alpha*op( A )*op( B ) + beta*C ).
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
+       and  columns of  A  and the  number of  rows  of  B  respectively.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    nota = lsame_(transa, "N");
+    notb = lsame_(transb, "N");
+    if (nota) {
+	nrowa = *m;
+	ncola = *k;
+    } else {
+	nrowa = *k;
+	ncola = *m;
+    }
+    if (notb) {
+	nrowb = *k;
+    } else {
+	nrowb = *n;
+    }
+
+/*     Test the input parameters. */
+
+    info = 0;
+    if (! nota && ! lsame_(transa, "C") && ! lsame_(
+	    transa, "T")) {
+	info = 1;
+    } else if (! notb && ! lsame_(transb, "C") && !
+	    lsame_(transb, "T")) {
+	info = 2;
+    } else if (*m < 0) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*k < 0) {
+	info = 5;
+    } else if (*lda < max(1,nrowa)) {
+	info = 8;
+    } else if (*ldb < max(1,nrowb)) {
+	info = 10;
+    } else if (*ldc < max(1,*m)) {
+	info = 13;
+    }
+    if (info != 0) {
+	xerbla_("DGEMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (((*alpha == 0.) || (*k == 0)) && *beta ==
+	     1.)) {
+	return 0;
+    }
+
+/*     And if  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	if (*beta == 0.) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    c__[i__ + j * c_dim1] = 0.;
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (notb) {
+	if (nota) {
+
+/*           Form  C := alpha*A*B + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L50: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L60: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (b[l + j * b_dim1] != 0.) {
+			temp = *alpha * b[l + j * b_dim1];
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+/* L90: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+/* L100: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L110: */
+		}
+/* L120: */
+	    }
+	}
+    } else {
+	if (nota) {
+
+/*           Form  C := alpha*A*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L130: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L140: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (b[j + l * b_dim1] != 0.) {
+			temp = *alpha * b[j + l * b_dim1];
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+/* L170: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
+/* L180: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L190: */
+		}
+/* L200: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DGEMM . */
+
+} /* dgemm_ */
+
+/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
+	alpha, doublereal *a, integer *lda, doublereal *x, integer *incx,
+	doublereal *beta, doublereal *y, integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublereal temp;
+    static integer lenx, leny;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DGEMV  performs one of the matrix-vector operations
+
+       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+
+    where alpha and beta are scalars, x and y are vectors and A is an
+    m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+
+                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+
+                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of DIMENSION at least
+             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+             Before entry, the incremented array X must contain the
+             vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - DOUBLE PRECISION array of DIMENSION at least
+             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+             Before entry with BETA non-zero, the incremented array Y
+             must contain the vector y. On exit, Y is overwritten by the
+             updated vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
+	    ) {
+	info = 1;
+    } else if (*m < 0) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*lda < max(1,*m)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    } else if (*incy == 0) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("DGEMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (*alpha == 0. && *beta == 1.)) {
+	return 0;
+    }
+
+/*
+       Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+       up the start points in  X  and  Y.
+*/
+
+    if (lsame_(trans, "N")) {
+	lenx = *n;
+	leny = *m;
+    } else {
+	lenx = *m;
+	leny = *n;
+    }
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (lenx - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (leny - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+
+       First form  y := beta*y.
+*/
+
+    if (*beta != 1.) {
+	if (*incy == 1) {
+	    if (*beta == 0.) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = 0.;
+/* L10: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = *beta * y[i__];
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (*beta == 0.) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = 0.;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = *beta * y[iy];
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (*alpha == 0.) {
+	return 0;
+    }
+    if (lsame_(trans, "N")) {
+
+/*        Form  y := alpha*A*x + y. */
+
+	jx = kx;
+	if (*incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.) {
+		    temp = *alpha * x[jx];
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			y[i__] += temp * a[i__ + j * a_dim1];
+/* L50: */
+		    }
+		}
+		jx += *incx;
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.) {
+		    temp = *alpha * x[jx];
+		    iy = ky;
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			y[iy] += temp * a[i__ + j * a_dim1];
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y := alpha*A'*x + y. */
+
+	jy = ky;
+	if (*incx == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = 0.;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		}
+		y[jy] += *alpha * temp;
+		jy += *incy;
+/* L100: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = 0.;
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[ix];
+		    ix += *incx;
+/* L110: */
+		}
+		y[jy] += *alpha * temp;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DGEMV . */
+
+} /* dgemv_ */
+
+/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha,
+	doublereal *x, integer *incx, doublereal *y, integer *incy,
+	doublereal *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jy, kx, info;
+    static doublereal temp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DGER   performs the rank 1 operation
+
+       A := alpha*x*y' + A,
+
+    where alpha is a scalar, x is an m element vector, y is an n element
+    vector and A is an m by n matrix.
+
+    Parameters
+    ==========
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( m - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the m
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients. On exit, A is
+             overwritten by the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (*m < 0) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("DGER  ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (*alpha == 0.)) {
+	return 0;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (*incy > 0) {
+	jy = 1;
+    } else {
+	jy = 1 - (*n - 1) * *incy;
+    }
+    if (*incx == 1) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (y[jy] != 0.) {
+		temp = *alpha * y[jy];
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    a[i__ + j * a_dim1] += x[i__] * temp;
+/* L10: */
+		}
+	    }
+	    jy += *incy;
+/* L20: */
+	}
+    } else {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*m - 1) * *incx;
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (y[jy] != 0.) {
+		temp = *alpha * y[jy];
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    a[i__ + j * a_dim1] += x[ix] * temp;
+		    ix += *incx;
+/* L30: */
+		}
+	    }
+	    jy += *incy;
+/* L40: */
+	}
+    }
+
+    return 0;
+
+/*     End of DGER  . */
+
+} /* dger_ */
+
+doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    doublereal ret_val, d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer ix;
+    static doublereal ssq, norm, scale, absxi;
+
+
+/*
+    DNRM2 returns the euclidean norm of a vector via the function
+    name, so that
+
+       DNRM2 := sqrt( x'*x )
+
+
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to DLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if ((*n < 1) || (*incx < 1)) {
+	norm = 0.;
+    } else if (*n == 1) {
+	norm = abs(x[1]);
+    } else {
+	scale = 0.;
+	ssq = 1.;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL DLASSQ( N, X, INCX, SCALE, SSQ )
+*/
+
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    if (x[ix] != 0.) {
+		absxi = (d__1 = x[ix], abs(d__1));
+		if (scale < absxi) {
+/* Computing 2nd power */
+		    d__1 = scale / absxi;
+		    ssq = ssq * (d__1 * d__1) + 1.;
+		    scale = absxi;
+		} else {
+/* Computing 2nd power */
+		    d__1 = absxi / scale;
+		    ssq += d__1 * d__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
+
+    ret_val = norm;
+    return ret_val;
+
+/*     End of DNRM2. */
+
+} /* dnrm2_ */
+
+/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx,
+	doublereal *dy, integer *incy, doublereal *c__, doublereal *s)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static doublereal dtemp;
+
+
+/*
+       applies a plane rotation.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dy;
+    --dx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = *c__ * dx[ix] + *s * dy[iy];
+	dy[iy] = *c__ * dy[iy] - *s * dx[ix];
+	dx[ix] = dtemp;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*       code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = *c__ * dx[i__] + *s * dy[i__];
+	dy[i__] = *c__ * dy[i__] - *s * dx[i__];
+	dx[i__] = dtemp;
+/* L30: */
+    }
+    return 0;
+} /* drot_ */
+
+/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx,
+	integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Local variables */
+    static integer i__, m, mp1, nincx;
+
+
+/*
+       scales a vector by a constant.
+       uses unrolled loops for increment equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dx;
+
+    /* Function Body */
+    if ((*n <= 0) || (*incx <= 0)) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	dx[i__] = *da * dx[i__];
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for increment equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__2 = m;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	dx[i__] = *da * dx[i__];
+/* L30: */
+    }
+    if (*n < 5) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__2 = *n;
+    for (i__ = mp1; i__ <= i__2; i__ += 5) {
+	dx[i__] = *da * dx[i__];
+	dx[i__ + 1] = *da * dx[i__ + 1];
+	dx[i__ + 2] = *da * dx[i__ + 2];
+	dx[i__ + 3] = *da * dx[i__ + 3];
+	dx[i__ + 4] = *da * dx[i__ + 4];
+/* L50: */
+    }
+    return 0;
+} /* dscal_ */
+
+/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx,
+	doublereal *dy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+    static doublereal dtemp;
+
+
+/*
+       interchanges two vectors.
+       uses unrolled loops for increments equal one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dy;
+    --dx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = dx[ix];
+	dx[ix] = dy[iy];
+	dy[iy] = dtemp;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+         code for both increments equal to 1
+
+
+         clean-up loop
+*/
+
+L20:
+    m = *n % 3;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dtemp = dx[i__];
+	dx[i__] = dy[i__];
+	dy[i__] = dtemp;
+/* L30: */
+    }
+    if (*n < 3) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 3) {
+	dtemp = dx[i__];
+	dx[i__] = dy[i__];
+	dy[i__] = dtemp;
+	dtemp = dx[i__ + 1];
+	dx[i__ + 1] = dy[i__ + 1];
+	dy[i__ + 1] = dtemp;
+	dtemp = dx[i__ + 2];
+	dx[i__ + 2] = dy[i__ + 2];
+	dy[i__ + 2] = dtemp;
+/* L50: */
+    }
+    return 0;
+} /* dswap_ */
+
+/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha,
+	doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal
+	*beta, doublereal *y, integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublereal temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DSYMV  performs the matrix-vector  operation
+
+       y := alpha*A*x + beta*y,
+
+    where alpha and beta are scalars, x and y are n element vectors and
+    A is an n by n symmetric matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the symmetric matrix and the strictly
+             lower triangular part of A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the symmetric matrix and the strictly
+             upper triangular part of A is not referenced.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y. On exit, Y is overwritten by the updated
+             vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*lda < max(1,*n)) {
+	info = 5;
+    } else if (*incx == 0) {
+	info = 7;
+    } else if (*incy == 0) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("DSYMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (*alpha == 0. && *beta == 1.)) {
+	return 0;
+    }
+
+/*     Set up the start points in  X  and  Y. */
+
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (*n - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (*n - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+
+       First form  y := beta*y.
+*/
+
+    if (*beta != 1.) {
+	if (*incy == 1) {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = 0.;
+/* L10: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = *beta * y[i__];
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = 0.;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = *beta * y[iy];
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (*alpha == 0.) {
+	return 0;
+    }
+    if (lsame_(uplo, "U")) {
+
+/*        Form  y  when A is stored in upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[j];
+		temp2 = 0.;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
+/* L50: */
+		}
+		y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+/* L60: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[jx];
+		temp2 = 0.;
+		ix = kx;
+		iy = ky;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
+		    ix += *incx;
+		    iy += *incy;
+/* L70: */
+		}
+		y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y  when A is stored in lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[j];
+		temp2 = 0.;
+		y[j] += temp1 * a[j + j * a_dim1];
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		}
+		y[j] += *alpha * temp2;
+/* L100: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[jx];
+		temp2 = 0.;
+		y[jy] += temp1 * a[j + j * a_dim1];
+		ix = jx;
+		iy = jy;
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    ix += *incx;
+		    iy += *incy;
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
+/* L110: */
+		}
+		y[jy] += *alpha * temp2;
+		jx += *incx;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DSYMV . */
+
+} /* dsymv_ */
+
+/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha,
+	doublereal *x, integer *incx, doublereal *y, integer *incy,
+	doublereal *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublereal temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DSYR2  performs the symmetric rank 2 operation
+
+       A := alpha*x*y' + alpha*y*x' + A,
+
+    where alpha is a scalar, x and y are n element vectors and A is an n
+    by n symmetric matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the symmetric matrix and the strictly
+             lower triangular part of A is not referenced. On exit, the
+             upper triangular part of the array A is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the symmetric matrix and the strictly
+             upper triangular part of A is not referenced. On exit, the
+             lower triangular part of the array A is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*n)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("DSYR2 ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (*alpha == 0.)) {
+	return 0;
+    }
+
+/*
+       Set up the start points in X and Y if the increments are not both
+       unity.
+*/
+
+    if ((*incx != 1) || (*incy != 1)) {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*n - 1) * *incx;
+	}
+	if (*incy > 0) {
+	    ky = 1;
+	} else {
+	    ky = 1 - (*n - 1) * *incy;
+	}
+	jx = kx;
+	jy = ky;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+*/
+
+    if (lsame_(uplo, "U")) {
+
+/*        Form  A  when A is stored in the upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if ((x[j] != 0.) || (y[j] != 0.)) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
+/* L10: */
+		    }
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if ((x[jx] != 0.) || (y[jy] != 0.)) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
+		    ix = kx;
+		    iy = ky;
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
+			ix += *incx;
+			iy += *incy;
+/* L30: */
+		    }
+		}
+		jx += *incx;
+		jy += *incy;
+/* L40: */
+	    }
+	}
+    } else {
+
+/*        Form  A  when A is stored in the lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if ((x[j] != 0.) || (y[j] != 0.)) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
+/* L50: */
+		    }
+		}
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if ((x[jx] != 0.) || (y[jy] != 0.)) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
+		    ix = jx;
+		    iy = jy;
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
+			ix += *incx;
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DSYR2 . */
+
+} /* dsyr2_ */
+
+/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k,
+	doublereal *alpha, doublereal *a, integer *lda, doublereal *b,
+	integer *ldb, doublereal *beta, doublereal *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static doublereal temp1, temp2;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DSYR2K  performs one of the symmetric rank 2k operations
+
+       C := alpha*A*B' + alpha*B*A' + beta*C,
+
+    or
+
+       C := alpha*A'*B + alpha*B'*A + beta*C,
+
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+    matrices in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
+                                          beta*C.
+
+                TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns  of the  matrices  A and B,  and on  entry  with
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrices  A and B.  K must be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  k by n  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDB must be at least  max( 1, n ), otherwise  LDB must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  symmetric matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  symmetric matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldb < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldc < max(1,*n)) {
+	info = 12;
+    }
+    if (info != 0) {
+	xerbla_("DSYR2K", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (((*alpha == 0.) || (*k == 0)) && *beta == 1.)) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	if (upper) {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		    }
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L90: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L100: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if ((a[j + l * a_dim1] != 0.) || (b[j + l * b_dim1] != 0.)
+			    ) {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
+			i__3 = j;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L140: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L150: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if ((a[j + l * a_dim1] != 0.) || (b[j + l * b_dim1] != 0.)
+			    ) {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
+			i__3 = *n;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp1 = 0.;
+		    temp2 = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+/* L190: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
+		    } else {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp1 = 0.;
+		    temp2 = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+/* L220: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
+		    } else {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DSYR2K. */
+
+} /* dsyr2k_ */
+
+/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k,
+	doublereal *alpha, doublereal *a, integer *lda, doublereal *beta,
+	doublereal *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static doublereal temp;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    DSYRK  performs one of the symmetric rank k operations
+
+       C := alpha*A*A' + beta*C,
+
+    or
+
+       C := alpha*A'*A + beta*C,
+
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+    in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+
+                TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns   of  the   matrix   A,   and  on   entry   with
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrix  A.  K must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  symmetric matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  symmetric matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldc < max(1,*n)) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("DSYRK ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (((*alpha == 0.) || (*k == 0)) && *beta == 1.)) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	if (upper) {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		    }
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*A' + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L90: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L100: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.) {
+			temp = *alpha * a[j + l * a_dim1];
+			i__3 = j;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.;
+/* L140: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L150: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.) {
+			temp = *alpha * a[j + l * a_dim1];
+			i__3 = *n;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*A'*A + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+/* L190: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+/* L220: */
+		    }
+		    if (*beta == 0.) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DSYRK . */
+
+} /* dsyrk_ */
+
+/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
+	lda, doublereal *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static doublereal temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    DTRMM  performs one of the matrix-matrix operations
+
+       B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
+
+    where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry,  SIDE specifies whether  op( A ) multiplies B from
+             the left or right as follows:
+
+                SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+
+                SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = A'.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain the matrix  B,  and  on exit  is overwritten  by the
+             transformed matrix.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("DTRMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*A*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			if (b[k + j * b_dim1] != 0.) {
+			    temp = *alpha * b[k + j * b_dim1];
+			    i__3 = k - 1;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
+/* L30: */
+			    }
+			    if (nounit) {
+				temp *= a[k + k * a_dim1];
+			    }
+			    b[k + j * b_dim1] = temp;
+			}
+/* L40: */
+		    }
+/* L50: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (k = *m; k >= 1; --k) {
+			if (b[k + j * b_dim1] != 0.) {
+			    temp = *alpha * b[k + j * b_dim1];
+			    b[k + j * b_dim1] = temp;
+			    if (nounit) {
+				b[k + j * b_dim1] *= a[k + k * a_dim1];
+			    }
+			    i__2 = *m;
+			    for (i__ = k + 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
+/* L60: */
+			    }
+			}
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*A'*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
+			}
+			i__2 = i__ - 1;
+			for (k = 1; k <= i__2; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L90: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L100: */
+		    }
+/* L110: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
+			}
+			i__3 = *m;
+			for (k = i__ + 1; k <= i__3; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L120: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L130: */
+		    }
+/* L140: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*A. */
+
+	    if (upper) {
+		for (j = *n; j >= 1; --j) {
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__1 = *m;
+		    for (i__ = 1; i__ <= i__1; ++i__) {
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L150: */
+		    }
+		    i__1 = j - 1;
+		    for (k = 1; k <= i__1; ++k) {
+			if (a[k + j * a_dim1] != 0.) {
+			    temp = *alpha * a[k + j * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L160: */
+			    }
+			}
+/* L170: */
+		    }
+/* L180: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L190: */
+		    }
+		    i__2 = *n;
+		    for (k = j + 1; k <= i__2; ++k) {
+			if (a[k + j * a_dim1] != 0.) {
+			    temp = *alpha * a[k + j * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L200: */
+			    }
+			}
+/* L210: */
+		    }
+/* L220: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*A'. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    i__2 = k - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = *alpha * a[j + k * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L230: */
+			    }
+			}
+/* L240: */
+		    }
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[k + k * a_dim1];
+		    }
+		    if (temp != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L250: */
+			}
+		    }
+/* L260: */
+		}
+	    } else {
+		for (k = *n; k >= 1; --k) {
+		    i__1 = *n;
+		    for (j = k + 1; j <= i__1; ++j) {
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = *alpha * a[j + k * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L270: */
+			    }
+			}
+/* L280: */
+		    }
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[k + k * a_dim1];
+		    }
+		    if (temp != 1.) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L290: */
+			}
+		    }
+/* L300: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DTRMM . */
+
+} /* dtrmm_ */
+
+/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n,
+	doublereal *a, integer *lda, doublereal *x, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jx, kx, info;
+    static doublereal temp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    DTRMV  performs one of the matrix-vector operations
+
+       x := A*x,   or   x := A'*x,
+
+    where x is an n element vector and  A is an n by n unit, or non-unit,
+    upper or lower triangular matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   x := A*x.
+
+                TRANS = 'T' or 't'   x := A'*x.
+
+                TRANS = 'C' or 'c'   x := A'*x.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit
+             triangular as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular matrix and the strictly lower triangular part of
+             A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular matrix and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of
+             A are not referenced either, but are assumed to be unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - DOUBLE PRECISION array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x. On exit, X is overwritten with the
+             tranformed vector x.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < max(1,*n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    }
+    if (info != 0) {
+	xerbla_("DTRMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    nounit = lsame_(diag, "N");
+
+/*
+       Set up the start point in X if the increment is not unity. This
+       will be  ( N - 1 )*INCX  too small for descending loops.
+*/
+
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  x := A*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (x[j] != 0.) {
+			temp = x[j];
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    x[i__] += temp * a[i__ + j * a_dim1];
+/* L10: */
+			}
+			if (nounit) {
+			    x[j] *= a[j + j * a_dim1];
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (x[jx] != 0.) {
+			temp = x[jx];
+			ix = kx;
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    x[ix] += temp * a[i__ + j * a_dim1];
+			    ix += *incx;
+/* L30: */
+			}
+			if (nounit) {
+			    x[jx] *= a[j + j * a_dim1];
+			}
+		    }
+		    jx += *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    if (x[j] != 0.) {
+			temp = x[j];
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    x[i__] += temp * a[i__ + j * a_dim1];
+/* L50: */
+			}
+			if (nounit) {
+			    x[j] *= a[j + j * a_dim1];
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    if (x[jx] != 0.) {
+			temp = x[jx];
+			ix = kx;
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    x[ix] += temp * a[i__ + j * a_dim1];
+			    ix -= *incx;
+/* L70: */
+			}
+			if (nounit) {
+			    x[jx] *= a[j + j * a_dim1];
+			}
+		    }
+		    jx -= *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
+
+/*        Form  x := A'*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		    }
+		    x[j] = temp;
+/* L100: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    temp = x[jx];
+		    ix = jx;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			ix -= *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L110: */
+		    }
+		    x[jx] = temp;
+		    jx -= *incx;
+/* L120: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
+/* L130: */
+		    }
+		    x[j] = temp;
+/* L140: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = x[jx];
+		    ix = jx;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			ix += *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L150: */
+		    }
+		    x[jx] = temp;
+		    jx += *incx;
+/* L160: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DTRMV . */
+
+} /* dtrmv_ */
+
+/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
+	lda, doublereal *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static doublereal temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    DTRSM  solves one of the matrix equations
+
+       op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+
+    where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'.
+
+    The matrix X is overwritten on B.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry, SIDE specifies whether op( A ) appears on the left
+             or right of X as follows:
+
+                SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+
+                SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = A'.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION.
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain  the  right-hand  side  matrix  B,  and  on exit  is
+             overwritten by the solution matrix  X.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("DTRSM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*inv( A )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L30: */
+			}
+		    }
+		    for (k = *m; k >= 1; --k) {
+			if (b[k + j * b_dim1] != 0.) {
+			    if (nounit) {
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
+			    }
+			    i__2 = k - 1;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
+/* L40: */
+			    }
+			}
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L70: */
+			}
+		    }
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			if (b[k + j * b_dim1] != 0.) {
+			    if (nounit) {
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
+			    }
+			    i__3 = *m;
+			    for (i__ = k + 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
+/* L80: */
+			    }
+			}
+/* L90: */
+		    }
+/* L100: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*inv( A' )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__3 = i__ - 1;
+			for (k = 1; k <= i__3; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L110: */
+			}
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L120: */
+		    }
+/* L130: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__2 = *m;
+			for (k = i__ + 1; k <= i__2; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L140: */
+			}
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L150: */
+		    }
+/* L160: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*inv( A ). */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L170: */
+			}
+		    }
+		    i__2 = j - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			if (a[k + j * a_dim1] != 0.) {
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L180: */
+			    }
+			}
+/* L190: */
+		    }
+		    if (nounit) {
+			temp = 1. / a[j + j * a_dim1];
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L200: */
+			}
+		    }
+/* L210: */
+		}
+	    } else {
+		for (j = *n; j >= 1; --j) {
+		    if (*alpha != 1.) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L220: */
+			}
+		    }
+		    i__1 = *n;
+		    for (k = j + 1; k <= i__1; ++k) {
+			if (a[k + j * a_dim1] != 0.) {
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L230: */
+			    }
+			}
+/* L240: */
+		    }
+		    if (nounit) {
+			temp = 1. / a[j + j * a_dim1];
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L250: */
+			}
+		    }
+/* L260: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*inv( A' ). */
+
+	    if (upper) {
+		for (k = *n; k >= 1; --k) {
+		    if (nounit) {
+			temp = 1. / a[k + k * a_dim1];
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L270: */
+			}
+		    }
+		    i__1 = k - 1;
+		    for (j = 1; j <= i__1; ++j) {
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = a[j + k * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L280: */
+			    }
+			}
+/* L290: */
+		    }
+		    if (*alpha != 1.) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L300: */
+			}
+		    }
+/* L310: */
+		}
+	    } else {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    if (nounit) {
+			temp = 1. / a[k + k * a_dim1];
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L320: */
+			}
+		    }
+		    i__2 = *n;
+		    for (j = k + 1; j <= i__2; ++j) {
+			if (a[j + k * a_dim1] != 0.) {
+			    temp = a[j + k * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L330: */
+			    }
+			}
+/* L340: */
+		    }
+		    if (*alpha != 1.) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L350: */
+			}
+		    }
+/* L360: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DTRSM . */
+
+} /* dtrsm_ */
+
+doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal ret_val;
+
+    /* Local variables */
+    static integer i__, ix;
+    static doublereal stemp;
+    extern doublereal dcabs1_(doublecomplex *);
+
+
+/*
+       takes the sum of the absolute values.
+       jack dongarra, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --zx;
+
+    /* Function Body */
+    ret_val = 0.;
+    stemp = 0.;
+    if ((*n <= 0) || (*incx <= 0)) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += dcabs1_(&zx[ix]);
+	ix += *incx;
+/* L10: */
+    }
+    ret_val = stemp;
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += dcabs1_(&zx[i__]);
+/* L30: */
+    }
+    ret_val = stemp;
+    return ret_val;
+} /* dzasum_ */
+
+doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublereal ret_val, d__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer ix;
+    static doublereal ssq, temp, norm, scale;
+
+
+/*
+    DZNRM2 returns the euclidean norm of a vector via the function
+    name, so that
+
+       DZNRM2 := sqrt( conjg( x' )*x )
+
+
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to ZLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if ((*n < 1) || (*incx < 1)) {
+	norm = 0.;
+    } else {
+	scale = 0.;
+	ssq = 1.;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL ZLASSQ( N, X, INCX, SCALE, SSQ )
+*/
+
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    i__3 = ix;
+	    if (x[i__3].r != 0.) {
+		i__3 = ix;
+		temp = (d__1 = x[i__3].r, abs(d__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    d__1 = scale / temp;
+		    ssq = ssq * (d__1 * d__1) + 1.;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    d__1 = temp / scale;
+		    ssq += d__1 * d__1;
+		}
+	    }
+	    if (d_imag(&x[ix]) != 0.) {
+		temp = (d__1 = d_imag(&x[ix]), abs(d__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    d__1 = scale / temp;
+		    ssq = ssq * (d__1 * d__1) + 1.;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    d__1 = temp / scale;
+		    ssq += d__1 * d__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
+
+    ret_val = norm;
+    return ret_val;
+
+/*     End of DZNRM2. */
+
+} /* dznrm2_ */
+
+integer icamax_(integer *n, complex *cx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1, i__2;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, ix;
+    static real smax;
+
+
+/*
+       finds the index of element having max. absolute value.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cx;
+
+    /* Function Body */
+    ret_val = 0;
+    if ((*n < 1) || (*incx <= 0)) {
+	return ret_val;
+    }
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), dabs(
+		r__2)) <= smax) {
+	    goto L5;
+	}
+	ret_val = i__;
+	i__2 = ix;
+	smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]),
+		dabs(r__2));
+L5:
+	ix += *incx;
+/* L10: */
+    }
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), dabs(
+		r__2)) <= smax) {
+	    goto L30;
+	}
+	ret_val = i__;
+	i__2 = i__;
+	smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]),
+		dabs(r__2));
+L30:
+	;
+    }
+    return ret_val;
+} /* icamax_ */
+
+integer idamax_(integer *n, doublereal *dx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, ix;
+    static doublereal dmax__;
+
+
+/*
+       finds the index of element having max. absolute value.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --dx;
+
+    /* Function Body */
+    ret_val = 0;
+    if ((*n < 1) || (*incx <= 0)) {
+	return ret_val;
+    }
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    dmax__ = abs(dx[1]);
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((d__1 = dx[ix], abs(d__1)) <= dmax__) {
+	    goto L5;
+	}
+	ret_val = i__;
+	dmax__ = (d__1 = dx[ix], abs(d__1));
+L5:
+	ix += *incx;
+/* L10: */
+    }
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    dmax__ = abs(dx[1]);
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((d__1 = dx[i__], abs(d__1)) <= dmax__) {
+	    goto L30;
+	}
+	ret_val = i__;
+	dmax__ = (d__1 = dx[i__], abs(d__1));
+L30:
+	;
+    }
+    return ret_val;
+} /* idamax_ */
+
+integer isamax_(integer *n, real *sx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1;
+    real r__1;
+
+    /* Local variables */
+    static integer i__, ix;
+    static real smax;
+
+
+/*
+       finds the index of element having max. absolute value.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sx;
+
+    /* Function Body */
+    ret_val = 0;
+    if ((*n < 1) || (*incx <= 0)) {
+	return ret_val;
+    }
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    smax = dabs(sx[1]);
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((r__1 = sx[ix], dabs(r__1)) <= smax) {
+	    goto L5;
+	}
+	ret_val = i__;
+	smax = (r__1 = sx[ix], dabs(r__1));
+L5:
+	ix += *incx;
+/* L10: */
+    }
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    smax = dabs(sx[1]);
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if ((r__1 = sx[i__], dabs(r__1)) <= smax) {
+	    goto L30;
+	}
+	ret_val = i__;
+	smax = (r__1 = sx[i__], dabs(r__1));
+L30:
+	;
+    }
+    return ret_val;
+} /* isamax_ */
+
+integer izamax_(integer *n, doublecomplex *zx, integer *incx)
+{
+    /* System generated locals */
+    integer ret_val, i__1;
+
+    /* Local variables */
+    static integer i__, ix;
+    static doublereal smax;
+    extern doublereal dcabs1_(doublecomplex *);
+
+
+/*
+       finds the index of element having max. absolute value.
+       jack dongarra, 1/15/85.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --zx;
+
+    /* Function Body */
+    ret_val = 0;
+    if ((*n < 1) || (*incx <= 0)) {
+	return ret_val;
+    }
+    ret_val = 1;
+    if (*n == 1) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    smax = dcabs1_(&zx[1]);
+    ix += *incx;
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if (dcabs1_(&zx[ix]) <= smax) {
+	    goto L5;
+	}
+	ret_val = i__;
+	smax = dcabs1_(&zx[ix]);
+L5:
+	ix += *incx;
+/* L10: */
+    }
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    smax = dcabs1_(&zx[1]);
+    i__1 = *n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	if (dcabs1_(&zx[i__]) <= smax) {
+	    goto L30;
+	}
+	ret_val = i__;
+	smax = dcabs1_(&zx[i__]);
+L30:
+	;
+    }
+    return ret_val;
+} /* izamax_ */
+
+logical lsame_(char *ca, char *cb)
+{
+    /* System generated locals */
+    logical ret_val;
+
+    /* Local variables */
+    static integer inta, intb, zcode;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    LSAME returns .TRUE. if CA is the same letter as CB regardless of
+    case.
+
+    Arguments
+    =========
+
+    CA      (input) CHARACTER*1
+    CB      (input) CHARACTER*1
+            CA and CB specify the single characters to be compared.
+
+   =====================================================================
+
+
+       Test if the characters are equal
+*/
+
+    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
+    if (ret_val) {
+	return ret_val;
+    }
+
+/*     Now test for equivalence if both characters are alphabetic. */
+
+    zcode = 'Z';
+
+/*
+       Use 'Z' rather than 'A' so that ASCII can be detected on Prime
+       machines, on which ICHAR returns a value with bit 8 set.
+       ICHAR('A') on Prime machines returns 193 which is the same as
+       ICHAR('A') on an EBCDIC machine.
+*/
+
+    inta = *(unsigned char *)ca;
+    intb = *(unsigned char *)cb;
+
+    if ((zcode == 90) || (zcode == 122)) {
+
+/*
+          ASCII is assumed - ZCODE is the ASCII code of either lower or
+          upper case 'Z'.
+*/
+
+	if (inta >= 97 && inta <= 122) {
+	    inta += -32;
+	}
+	if (intb >= 97 && intb <= 122) {
+	    intb += -32;
+	}
+
+    } else if ((zcode == 233) || (zcode == 169)) {
+
+/*
+          EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
+          upper case 'Z'.
+*/
+
+	if (((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153)) ||
+		(inta >= 162 && inta <= 169)) {
+	    inta += 64;
+	}
+	if (((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153)) ||
+		(intb >= 162 && intb <= 169)) {
+	    intb += 64;
+	}
+
+    } else if ((zcode == 218) || (zcode == 250)) {
+
+/*
+          ASCII is assumed, on Prime machines - ZCODE is the ASCII code
+          plus 128 of either lower or upper case 'Z'.
+*/
+
+	if (inta >= 225 && inta <= 250) {
+	    inta += -32;
+	}
+	if (intb >= 225 && intb <= 250) {
+	    intb += -32;
+	}
+    }
+    ret_val = inta == intb;
+
+/*
+       RETURN
+
+       End of LSAME
+*/
+
+    return ret_val;
+} /* lsame_ */
+
+/* Subroutine */ int saxpy_(integer *n, real *sa, real *sx, integer *incx,
+	real *sy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+
+
+/*
+       constant times a vector plus a vector.
+       uses unrolled loop for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*sa == 0.f) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	sy[iy] += *sa * sx[ix];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 4;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	sy[i__] += *sa * sx[i__];
+/* L30: */
+    }
+    if (*n < 4) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 4) {
+	sy[i__] += *sa * sx[i__];
+	sy[i__ + 1] += *sa * sx[i__ + 1];
+	sy[i__ + 2] += *sa * sx[i__ + 2];
+	sy[i__ + 3] += *sa * sx[i__ + 3];
+/* L50: */
+    }
+    return 0;
+} /* saxpy_ */
+
+doublereal scasum_(integer *n, complex *cx, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    real ret_val, r__1, r__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, nincx;
+    static real stemp;
+
+
+/*
+       takes the sum of the absolute values of a complex vector and
+       returns a single precision result.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --cx;
+
+    /* Function Body */
+    ret_val = 0.f;
+    stemp = 0.f;
+    if ((*n <= 0) || (*incx <= 0)) {
+	return ret_val;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	i__3 = i__;
+	stemp = stemp + (r__1 = cx[i__3].r, dabs(r__1)) + (r__2 = r_imag(&cx[
+		i__]), dabs(r__2));
+/* L10: */
+    }
+    ret_val = stemp;
+    return ret_val;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__2 = *n;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	i__1 = i__;
+	stemp = stemp + (r__1 = cx[i__1].r, dabs(r__1)) + (r__2 = r_imag(&cx[
+		i__]), dabs(r__2));
+/* L30: */
+    }
+    ret_val = stemp;
+    return ret_val;
+} /* scasum_ */
+
+doublereal scnrm2_(integer *n, complex *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    real ret_val, r__1;
+
+    /* Builtin functions */
+    double r_imag(complex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer ix;
+    static real ssq, temp, norm, scale;
+
+
+/*
+    SCNRM2 returns the euclidean norm of a vector via the function
+    name, so that
+
+       SCNRM2 := sqrt( conjg( x' )*x )
+
+
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to CLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if ((*n < 1) || (*incx < 1)) {
+	norm = 0.f;
+    } else {
+	scale = 0.f;
+	ssq = 1.f;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL CLASSQ( N, X, INCX, SCALE, SSQ )
+*/
+
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    i__3 = ix;
+	    if (x[i__3].r != 0.f) {
+		i__3 = ix;
+		temp = (r__1 = x[i__3].r, dabs(r__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    r__1 = scale / temp;
+		    ssq = ssq * (r__1 * r__1) + 1.f;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    r__1 = temp / scale;
+		    ssq += r__1 * r__1;
+		}
+	    }
+	    if (r_imag(&x[ix]) != 0.f) {
+		temp = (r__1 = r_imag(&x[ix]), dabs(r__1));
+		if (scale < temp) {
+/* Computing 2nd power */
+		    r__1 = scale / temp;
+		    ssq = ssq * (r__1 * r__1) + 1.f;
+		    scale = temp;
+		} else {
+/* Computing 2nd power */
+		    r__1 = temp / scale;
+		    ssq += r__1 * r__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
+
+    ret_val = norm;
+    return ret_val;
+
+/*     End of SCNRM2. */
+
+} /* scnrm2_ */
+
+/* Subroutine */ int scopy_(integer *n, real *sx, integer *incx, real *sy,
+	integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+
+
+/*
+       copies a vector, x, to a vector, y.
+       uses unrolled loops for increments equal to 1.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	sy[iy] = sx[ix];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 7;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	sy[i__] = sx[i__];
+/* L30: */
+    }
+    if (*n < 7) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 7) {
+	sy[i__] = sx[i__];
+	sy[i__ + 1] = sx[i__ + 1];
+	sy[i__ + 2] = sx[i__ + 2];
+	sy[i__ + 3] = sx[i__ + 3];
+	sy[i__ + 4] = sx[i__ + 4];
+	sy[i__ + 5] = sx[i__ + 5];
+	sy[i__ + 6] = sx[i__ + 6];
+/* L50: */
+    }
+    return 0;
+} /* scopy_ */
+
+doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+    real ret_val;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+    static real stemp;
+
+
+/*
+       forms the dot product of two vectors.
+       uses unrolled loops for increments equal to one.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    stemp = 0.f;
+    ret_val = 0.f;
+    if (*n <= 0) {
+	return ret_val;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += sx[ix] * sy[iy];
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    ret_val = stemp;
+    return ret_val;
+
+/*
+          code for both increments equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp += sx[i__] * sy[i__];
+/* L30: */
+    }
+    if (*n < 5) {
+	goto L60;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 5) {
+	stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[
+		i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ +
+		4] * sy[i__ + 4];
+/* L50: */
+    }
+L60:
+    ret_val = stemp;
+    return ret_val;
+} /* sdot_ */
+
+/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer *
+	n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *
+	ldb, real *beta, real *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static logical nota, notb;
+    static real temp;
+    static integer ncola;
+    extern logical lsame_(char *, char *);
+    static integer nrowa, nrowb;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SGEMM  performs one of the matrix-matrix operations
+
+       C := alpha*op( A )*op( B ) + beta*C,
+
+    where  op( X ) is one of
+
+       op( X ) = X   or   op( X ) = X',
+
+    alpha and beta are scalars, and A, B and C are matrices, with op( A )
+    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n',  op( A ) = A.
+
+                TRANSA = 'T' or 't',  op( A ) = A'.
+
+                TRANSA = 'C' or 'c',  op( A ) = A'.
+
+             Unchanged on exit.
+
+    TRANSB - CHARACTER*1.
+             On entry, TRANSB specifies the form of op( B ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSB = 'N' or 'n',  op( B ) = B.
+
+                TRANSB = 'T' or 't',  op( B ) = B'.
+
+                TRANSB = 'C' or 'c',  op( B ) = B'.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry,  M  specifies  the number  of rows  of the  matrix
+             op( A )  and of the  matrix  C.  M  must  be at least  zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N  specifies the number  of columns of the matrix
+             op( B ) and the number of columns of the matrix C. N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry,  K  specifies  the number of columns of the matrix
+             op( A ) and the number of rows of the matrix op( B ). K must
+             be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by m  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+             LDA must be at least  max( 1, m ), otherwise  LDA must be at
+             least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
+             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  n by k  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+             LDB must be at least  max( 1, k ), otherwise  LDB must be at
+             least  max( 1, n ).
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+             supplied as zero then C need not be set on input.
+             Unchanged on exit.
+
+    C      - REAL             array of DIMENSION ( LDC, n ).
+             Before entry, the leading  m by n  part of the array  C must
+             contain the matrix  C,  except when  beta  is zero, in which
+             case C need not be set on entry.
+             On exit, the array  C  is overwritten by the  m by n  matrix
+             ( alpha*op( A )*op( B ) + beta*C ).
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+       transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
+       and  columns of  A  and the  number of  rows  of  B  respectively.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    nota = lsame_(transa, "N");
+    notb = lsame_(transb, "N");
+    if (nota) {
+	nrowa = *m;
+	ncola = *k;
+    } else {
+	nrowa = *k;
+	ncola = *m;
+    }
+    if (notb) {
+	nrowb = *k;
+    } else {
+	nrowb = *n;
+    }
+
+/*     Test the input parameters. */
+
+    info = 0;
+    if (! nota && ! lsame_(transa, "C") && ! lsame_(
+	    transa, "T")) {
+	info = 1;
+    } else if (! notb && ! lsame_(transb, "C") && !
+	    lsame_(transb, "T")) {
+	info = 2;
+    } else if (*m < 0) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*k < 0) {
+	info = 5;
+    } else if (*lda < max(1,nrowa)) {
+	info = 8;
+    } else if (*ldb < max(1,nrowb)) {
+	info = 10;
+    } else if (*ldc < max(1,*m)) {
+	info = 13;
+    }
+    if (info != 0) {
+	xerbla_("SGEMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (((*alpha == 0.f) || (*k == 0)) && *beta
+	    == 1.f)) {
+	return 0;
+    }
+
+/*     And if  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	if (*beta == 0.f) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    c__[i__ + j * c_dim1] = 0.f;
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (notb) {
+	if (nota) {
+
+/*           Form  C := alpha*A*B + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L50: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L60: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (b[l + j * b_dim1] != 0.f) {
+			temp = *alpha * b[l + j * b_dim1];
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+/* L90: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+/* L100: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L110: */
+		}
+/* L120: */
+	    }
+	}
+    } else {
+	if (nota) {
+
+/*           Form  C := alpha*A*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L130: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L140: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (b[j + l * b_dim1] != 0.f) {
+			temp = *alpha * b[j + l * b_dim1];
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+/* L170: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
+/* L180: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L190: */
+		}
+/* L200: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SGEMM . */
+
+} /* sgemm_ */
+
+/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha,
+	real *a, integer *lda, real *x, integer *incx, real *beta, real *y,
+	integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static real temp;
+    static integer lenx, leny;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SGEMV  performs one of the matrix-vector operations
+
+       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,
+
+    where alpha and beta are scalars, x and y are vectors and A is an
+    m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+
+                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+
+                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+    X      - REAL             array of DIMENSION at least
+             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+             Before entry, the incremented array X must contain the
+             vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - REAL             array of DIMENSION at least
+             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+             Before entry with BETA non-zero, the incremented array Y
+             must contain the vector y. On exit, Y is overwritten by the
+             updated vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
+	    ) {
+	info = 1;
+    } else if (*m < 0) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*lda < max(1,*m)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    } else if (*incy == 0) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("SGEMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (*alpha == 0.f && *beta == 1.f)) {
+	return 0;
+    }
+
+/*
+       Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+       up the start points in  X  and  Y.
+*/
+
+    if (lsame_(trans, "N")) {
+	lenx = *n;
+	leny = *m;
+    } else {
+	lenx = *m;
+	leny = *n;
+    }
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (lenx - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (leny - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+
+       First form  y := beta*y.
+*/
+
+    if (*beta != 1.f) {
+	if (*incy == 1) {
+	    if (*beta == 0.f) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = 0.f;
+/* L10: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = *beta * y[i__];
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (*beta == 0.f) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = 0.f;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = *beta * y[iy];
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (*alpha == 0.f) {
+	return 0;
+    }
+    if (lsame_(trans, "N")) {
+
+/*        Form  y := alpha*A*x + y. */
+
+	jx = kx;
+	if (*incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.f) {
+		    temp = *alpha * x[jx];
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			y[i__] += temp * a[i__ + j * a_dim1];
+/* L50: */
+		    }
+		}
+		jx += *incx;
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (x[jx] != 0.f) {
+		    temp = *alpha * x[jx];
+		    iy = ky;
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			y[iy] += temp * a[i__ + j * a_dim1];
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y := alpha*A'*x + y. */
+
+	jy = ky;
+	if (*incx == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = 0.f;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		}
+		y[jy] += *alpha * temp;
+		jy += *incy;
+/* L100: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = 0.f;
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp += a[i__ + j * a_dim1] * x[ix];
+		    ix += *incx;
+/* L110: */
+		}
+		y[jy] += *alpha * temp;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SGEMV . */
+
+} /* sgemv_ */
+
+/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x,
+	integer *incx, real *y, integer *incy, real *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jy, kx, info;
+    static real temp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SGER   performs the rank 1 operation
+
+       A := alpha*x*y' + A,
+
+    where alpha is a scalar, x is an m element vector, y is an n element
+    vector and A is an m by n matrix.
+
+    Parameters
+    ==========
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - REAL             array of dimension at least
+             ( 1 + ( m - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the m
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients. On exit, A is
+             overwritten by the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (*m < 0) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("SGER  ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (*alpha == 0.f)) {
+	return 0;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (*incy > 0) {
+	jy = 1;
+    } else {
+	jy = 1 - (*n - 1) * *incy;
+    }
+    if (*incx == 1) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (y[jy] != 0.f) {
+		temp = *alpha * y[jy];
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    a[i__ + j * a_dim1] += x[i__] * temp;
+/* L10: */
+		}
+	    }
+	    jy += *incy;
+/* L20: */
+	}
+    } else {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*m - 1) * *incx;
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (y[jy] != 0.f) {
+		temp = *alpha * y[jy];
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    a[i__ + j * a_dim1] += x[ix] * temp;
+		    ix += *incx;
+/* L30: */
+		}
+	    }
+	    jy += *incy;
+/* L40: */
+	}
+    }
+
+    return 0;
+
+/*     End of SGER  . */
+
+} /* sger_ */
+
+doublereal snrm2_(integer *n, real *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    real ret_val, r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer ix;
+    static real ssq, norm, scale, absxi;
+
+
+/*
+    SNRM2 returns the euclidean norm of a vector via the function
+    name, so that
+
+       SNRM2 := sqrt( x'*x )
+
+
+    -- This version written on 25-October-1982.
+       Modified on 14-October-1993 to inline the call to SLASSQ.
+       Sven Hammarling, Nag Ltd.
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if ((*n < 1) || (*incx < 1)) {
+	norm = 0.f;
+    } else if (*n == 1) {
+	norm = dabs(x[1]);
+    } else {
+	scale = 0.f;
+	ssq = 1.f;
+/*
+          The following loop is equivalent to this call to the LAPACK
+          auxiliary routine:
+          CALL SLASSQ( N, X, INCX, SCALE, SSQ )
+*/
+
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    if (x[ix] != 0.f) {
+		absxi = (r__1 = x[ix], dabs(r__1));
+		if (scale < absxi) {
+/* Computing 2nd power */
+		    r__1 = scale / absxi;
+		    ssq = ssq * (r__1 * r__1) + 1.f;
+		    scale = absxi;
+		} else {
+/* Computing 2nd power */
+		    r__1 = absxi / scale;
+		    ssq += r__1 * r__1;
+		}
+	    }
+/* L10: */
+	}
+	norm = scale * sqrt(ssq);
+    }
+
+    ret_val = norm;
+    return ret_val;
+
+/*     End of SNRM2. */
+
+} /* snrm2_ */
+
+/* Subroutine */ int srot_(integer *n, real *sx, integer *incx, real *sy,
+	integer *incy, real *c__, real *s)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static real stemp;
+
+
+/*
+       applies a plane rotation.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp = *c__ * sx[ix] + *s * sy[iy];
+	sy[iy] = *c__ * sy[iy] - *s * sx[ix];
+	sx[ix] = stemp;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*       code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp = *c__ * sx[i__] + *s * sy[i__];
+	sy[i__] = *c__ * sy[i__] - *s * sx[i__];
+	sx[i__] = stemp;
+/* L30: */
+    }
+    return 0;
+} /* srot_ */
+
+/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Local variables */
+    static integer i__, m, mp1, nincx;
+
+
+/*
+       scales a vector by a constant.
+       uses unrolled loops for increment equal to 1.
+       jack dongarra, linpack, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sx;
+
+    /* Function Body */
+    if ((*n <= 0) || (*incx <= 0)) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    nincx = *n * *incx;
+    i__1 = nincx;
+    i__2 = *incx;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	sx[i__] = *sa * sx[i__];
+/* L10: */
+    }
+    return 0;
+
+/*
+          code for increment equal to 1
+
+
+          clean-up loop
+*/
+
+L20:
+    m = *n % 5;
+    if (m == 0) {
+	goto L40;
+    }
+    i__2 = m;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+	sx[i__] = *sa * sx[i__];
+/* L30: */
+    }
+    if (*n < 5) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__2 = *n;
+    for (i__ = mp1; i__ <= i__2; i__ += 5) {
+	sx[i__] = *sa * sx[i__];
+	sx[i__ + 1] = *sa * sx[i__ + 1];
+	sx[i__ + 2] = *sa * sx[i__ + 2];
+	sx[i__ + 3] = *sa * sx[i__ + 3];
+	sx[i__ + 4] = *sa * sx[i__ + 4];
+/* L50: */
+    }
+    return 0;
+} /* sscal_ */
+
+/* Subroutine */ int sswap_(integer *n, real *sx, integer *incx, real *sy,
+	integer *incy)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, m, ix, iy, mp1;
+    static real stemp;
+
+
+/*
+       interchanges two vectors.
+       uses unrolled loops for increments equal to 1.
+       jack dongarra, linpack, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --sy;
+    --sx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp = sx[ix];
+	sx[ix] = sy[iy];
+	sy[iy] = stemp;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*
+         code for both increments equal to 1
+
+
+         clean-up loop
+*/
+
+L20:
+    m = *n % 3;
+    if (m == 0) {
+	goto L40;
+    }
+    i__1 = m;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	stemp = sx[i__];
+	sx[i__] = sy[i__];
+	sy[i__] = stemp;
+/* L30: */
+    }
+    if (*n < 3) {
+	return 0;
+    }
+L40:
+    mp1 = m + 1;
+    i__1 = *n;
+    for (i__ = mp1; i__ <= i__1; i__ += 3) {
+	stemp = sx[i__];
+	sx[i__] = sy[i__];
+	sy[i__] = stemp;
+	stemp = sx[i__ + 1];
+	sx[i__ + 1] = sy[i__ + 1];
+	sy[i__ + 1] = stemp;
+	stemp = sx[i__ + 2];
+	sx[i__ + 2] = sy[i__ + 2];
+	sy[i__ + 2] = stemp;
+/* L50: */
+    }
+    return 0;
+} /* sswap_ */
+
+/* Subroutine */ int ssymv_(char *uplo, integer *n, real *alpha, real *a,
+	integer *lda, real *x, integer *incx, real *beta, real *y, integer *
+	incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static real temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SSYMV  performs the matrix-vector  operation
+
+       y := alpha*A*x + beta*y,
+
+    where alpha and beta are scalars, x and y are n element vectors and
+    A is an n by n symmetric matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the symmetric matrix and the strictly
+             lower triangular part of A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the symmetric matrix and the strictly
+             upper triangular part of A is not referenced.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y. On exit, Y is overwritten by the updated
+             vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*lda < max(1,*n)) {
+	info = 5;
+    } else if (*incx == 0) {
+	info = 7;
+    } else if (*incy == 0) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("SSYMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (*alpha == 0.f && *beta == 1.f)) {
+	return 0;
+    }
+
+/*     Set up the start points in  X  and  Y. */
+
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (*n - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (*n - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+
+       First form  y := beta*y.
+*/
+
+    if (*beta != 1.f) {
+	if (*incy == 1) {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = 0.f;
+/* L10: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[i__] = *beta * y[i__];
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = 0.f;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    y[iy] = *beta * y[iy];
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (*alpha == 0.f) {
+	return 0;
+    }
+    if (lsame_(uplo, "U")) {
+
+/*        Form  y  when A is stored in upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[j];
+		temp2 = 0.f;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
+/* L50: */
+		}
+		y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+/* L60: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[jx];
+		temp2 = 0.f;
+		ix = kx;
+		iy = ky;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
+		    ix += *incx;
+		    iy += *incy;
+/* L70: */
+		}
+		y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y  when A is stored in lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[j];
+		temp2 = 0.f;
+		y[j] += temp1 * a[j + j * a_dim1];
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    y[i__] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		}
+		y[j] += *alpha * temp2;
+/* L100: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp1 = *alpha * x[jx];
+		temp2 = 0.f;
+		y[jy] += temp1 * a[j + j * a_dim1];
+		ix = jx;
+		iy = jy;
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    ix += *incx;
+		    iy += *incy;
+		    y[iy] += temp1 * a[i__ + j * a_dim1];
+		    temp2 += a[i__ + j * a_dim1] * x[ix];
+/* L110: */
+		}
+		y[jy] += *alpha * temp2;
+		jx += *incx;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SSYMV . */
+
+} /* ssymv_ */
+
+/* Subroutine */ int ssyr2_(char *uplo, integer *n, real *alpha, real *x,
+	integer *incx, real *y, integer *incy, real *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static real temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SSYR2  performs the symmetric rank 2 operation
+
+       A := alpha*x*y' + alpha*y*x' + A,
+
+    where alpha is a scalar, x and y are n element vectors and A is an n
+    by n symmetric matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the symmetric matrix and the strictly
+             lower triangular part of A is not referenced. On exit, the
+             upper triangular part of the array A is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the symmetric matrix and the strictly
+             upper triangular part of A is not referenced. On exit, the
+             lower triangular part of the array A is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*n)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("SSYR2 ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (*alpha == 0.f)) {
+	return 0;
+    }
+
+/*
+       Set up the start points in X and Y if the increments are not both
+       unity.
+*/
+
+    if ((*incx != 1) || (*incy != 1)) {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*n - 1) * *incx;
+	}
+	if (*incy > 0) {
+	    ky = 1;
+	} else {
+	    ky = 1 - (*n - 1) * *incy;
+	}
+	jx = kx;
+	jy = ky;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+*/
+
+    if (lsame_(uplo, "U")) {
+
+/*        Form  A  when A is stored in the upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if ((x[j] != 0.f) || (y[j] != 0.f)) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
+/* L10: */
+		    }
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if ((x[jx] != 0.f) || (y[jy] != 0.f)) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
+		    ix = kx;
+		    iy = ky;
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
+			ix += *incx;
+			iy += *incy;
+/* L30: */
+		    }
+		}
+		jx += *incx;
+		jy += *incy;
+/* L40: */
+	    }
+	}
+    } else {
+
+/*        Form  A  when A is stored in the lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if ((x[j] != 0.f) || (y[j] != 0.f)) {
+		    temp1 = *alpha * y[j];
+		    temp2 = *alpha * x[j];
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] *
+				temp1 + y[i__] * temp2;
+/* L50: */
+		    }
+		}
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if ((x[jx] != 0.f) || (y[jy] != 0.f)) {
+		    temp1 = *alpha * y[jy];
+		    temp2 = *alpha * x[jx];
+		    ix = jx;
+		    iy = jy;
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] *
+				temp1 + y[iy] * temp2;
+			ix += *incx;
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SSYR2 . */
+
+} /* ssyr2_ */
+
+/* Subroutine */ int ssyr2k_(char *uplo, char *trans, integer *n, integer *k,
+	real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta,
+	 real *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static real temp1, temp2;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SSYR2K  performs one of the symmetric rank 2k operations
+
+       C := alpha*A*B' + alpha*B*A' + beta*C,
+
+    or
+
+       C := alpha*A'*B + alpha*B'*A + beta*C,
+
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A and B  are  n by k  matrices  in the  first  case  and  k by n
+    matrices in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' +
+                                          beta*C.
+
+                TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A +
+                                          beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns  of the  matrices  A and B,  and on  entry  with
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrices  A and B.  K must be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - REAL             array of DIMENSION ( LDB, kb ), where kb is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  k by n  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDB must be at least  max( 1, n ), otherwise  LDB must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - REAL             array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  symmetric matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  symmetric matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldb < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldc < max(1,*n)) {
+	info = 12;
+    }
+    if (info != 0) {
+	xerbla_("SSYR2K", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (((*alpha == 0.f) || (*k == 0)) && *beta == 1.f)) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	if (upper) {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		    }
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L90: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L100: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if ((a[j + l * a_dim1] != 0.f) || (b[j + l * b_dim1] !=
+			    0.f)) {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
+			i__3 = j;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L140: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L150: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if ((a[j + l * a_dim1] != 0.f) || (b[j + l * b_dim1] !=
+			    0.f)) {
+			temp1 = *alpha * b[j + l * b_dim1];
+			temp2 = *alpha * a[j + l * a_dim1];
+			i__3 = *n;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
+				    i__ + l * a_dim1] * temp1 + b[i__ + l *
+				    b_dim1] * temp2;
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp1 = 0.f;
+		    temp2 = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+/* L190: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
+		    } else {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp1 = 0.f;
+		    temp2 = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
+			temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
+/* L220: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha *
+				temp2;
+		    } else {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]
+				+ *alpha * temp1 + *alpha * temp2;
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SSYR2K. */
+
+} /* ssyr2k_ */
+
+/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k,
+	real *alpha, real *a, integer *lda, real *beta, real *c__, integer *
+	ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static real temp;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    SSYRK  performs one of the symmetric rank k operations
+
+       C := alpha*A*A' + beta*C,
+
+    or
+
+       C := alpha*A'*A + beta*C,
+
+    where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
+    and  A  is an  n by k  matrix in the first case and a  k by n  matrix
+    in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C.
+
+                TRANS = 'T' or 't'   C := alpha*A'*A + beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns   of  the   matrix   A,   and  on   entry   with
+             TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
+             of rows of the matrix  A.  K must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - REAL            .
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - REAL             array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  symmetric matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  symmetric matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldc < max(1,*n)) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("SSYRK ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (((*alpha == 0.f) || (*k == 0)) && *beta == 1.f)) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	if (upper) {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L30: */
+		    }
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.f) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*A' + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L90: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L100: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.f) {
+			temp = *alpha * a[j + l * a_dim1];
+			i__3 = j;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = 0.f;
+/* L140: */
+		    }
+		} else if (*beta != 1.f) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
+/* L150: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    if (a[j + l * a_dim1] != 0.f) {
+			temp = *alpha * a[j + l * a_dim1];
+			i__3 = *n;
+			for (i__ = j; i__ <= i__3; ++i__) {
+			    c__[i__ + j * c_dim1] += temp * a[i__ + l *
+				    a_dim1];
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*A'*A + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+/* L190: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp = 0.f;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
+/* L220: */
+		    }
+		    if (*beta == 0.f) {
+			c__[i__ + j * c_dim1] = *alpha * temp;
+		    } else {
+			c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
+				i__ + j * c_dim1];
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SSYRK . */
+
+} /* ssyrk_ */
+
+/* Subroutine */ int strmm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, real *alpha, real *a, integer *lda, real *b,
+	integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static real temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    STRMM  performs one of the matrix-matrix operations
+
+       B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
+
+    where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry,  SIDE specifies whether  op( A ) multiplies B from
+             the left or right as follows:
+
+                SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+
+                SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = A'.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - REAL             array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain the matrix  B,  and  on exit  is overwritten  by the
+             transformed matrix.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("STRMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*A*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			if (b[k + j * b_dim1] != 0.f) {
+			    temp = *alpha * b[k + j * b_dim1];
+			    i__3 = k - 1;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
+/* L30: */
+			    }
+			    if (nounit) {
+				temp *= a[k + k * a_dim1];
+			    }
+			    b[k + j * b_dim1] = temp;
+			}
+/* L40: */
+		    }
+/* L50: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (k = *m; k >= 1; --k) {
+			if (b[k + j * b_dim1] != 0.f) {
+			    temp = *alpha * b[k + j * b_dim1];
+			    b[k + j * b_dim1] = temp;
+			    if (nounit) {
+				b[k + j * b_dim1] *= a[k + k * a_dim1];
+			    }
+			    i__2 = *m;
+			    for (i__ = k + 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * a[i__ + k *
+					a_dim1];
+/* L60: */
+			    }
+			}
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*A'*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
+			}
+			i__2 = i__ - 1;
+			for (k = 1; k <= i__2; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L90: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L100: */
+		    }
+/* L110: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			temp = b[i__ + j * b_dim1];
+			if (nounit) {
+			    temp *= a[i__ + i__ * a_dim1];
+			}
+			i__3 = *m;
+			for (k = i__ + 1; k <= i__3; ++k) {
+			    temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L120: */
+			}
+			b[i__ + j * b_dim1] = *alpha * temp;
+/* L130: */
+		    }
+/* L140: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*A. */
+
+	    if (upper) {
+		for (j = *n; j >= 1; --j) {
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__1 = *m;
+		    for (i__ = 1; i__ <= i__1; ++i__) {
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L150: */
+		    }
+		    i__1 = j - 1;
+		    for (k = 1; k <= i__1; ++k) {
+			if (a[k + j * a_dim1] != 0.f) {
+			    temp = *alpha * a[k + j * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L160: */
+			    }
+			}
+/* L170: */
+		    }
+/* L180: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L190: */
+		    }
+		    i__2 = *n;
+		    for (k = j + 1; k <= i__2; ++k) {
+			if (a[k + j * a_dim1] != 0.f) {
+			    temp = *alpha * a[k + j * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L200: */
+			    }
+			}
+/* L210: */
+		    }
+/* L220: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*A'. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    i__2 = k - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			if (a[j + k * a_dim1] != 0.f) {
+			    temp = *alpha * a[j + k * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L230: */
+			    }
+			}
+/* L240: */
+		    }
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[k + k * a_dim1];
+		    }
+		    if (temp != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L250: */
+			}
+		    }
+/* L260: */
+		}
+	    } else {
+		for (k = *n; k >= 1; --k) {
+		    i__1 = *n;
+		    for (j = k + 1; j <= i__1; ++j) {
+			if (a[j + k * a_dim1] != 0.f) {
+			    temp = *alpha * a[j + k * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] += temp * b[i__ + k *
+					b_dim1];
+/* L270: */
+			    }
+			}
+/* L280: */
+		    }
+		    temp = *alpha;
+		    if (nounit) {
+			temp *= a[k + k * a_dim1];
+		    }
+		    if (temp != 1.f) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L290: */
+			}
+		    }
+/* L300: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of STRMM . */
+
+} /* strmm_ */
+
+/* Subroutine */ int strmv_(char *uplo, char *trans, char *diag, integer *n,
+	real *a, integer *lda, real *x, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jx, kx, info;
+    static real temp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    STRMV  performs one of the matrix-vector operations
+
+       x := A*x,   or   x := A'*x,
+
+    where x is an n element vector and  A is an n by n unit, or non-unit,
+    upper or lower triangular matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   x := A*x.
+
+                TRANS = 'T' or 't'   x := A'*x.
+
+                TRANS = 'C' or 'c'   x := A'*x.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit
+             triangular as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular matrix and the strictly lower triangular part of
+             A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular matrix and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of
+             A are not referenced either, but are assumed to be unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - REAL             array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x. On exit, X is overwritten with the
+             tranformed vector x.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < max(1,*n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    }
+    if (info != 0) {
+	xerbla_("STRMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    nounit = lsame_(diag, "N");
+
+/*
+       Set up the start point in X if the increment is not unity. This
+       will be  ( N - 1 )*INCX  too small for descending loops.
+*/
+
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  x := A*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (x[j] != 0.f) {
+			temp = x[j];
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    x[i__] += temp * a[i__ + j * a_dim1];
+/* L10: */
+			}
+			if (nounit) {
+			    x[j] *= a[j + j * a_dim1];
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (x[jx] != 0.f) {
+			temp = x[jx];
+			ix = kx;
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    x[ix] += temp * a[i__ + j * a_dim1];
+			    ix += *incx;
+/* L30: */
+			}
+			if (nounit) {
+			    x[jx] *= a[j + j * a_dim1];
+			}
+		    }
+		    jx += *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    if (x[j] != 0.f) {
+			temp = x[j];
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    x[i__] += temp * a[i__ + j * a_dim1];
+/* L50: */
+			}
+			if (nounit) {
+			    x[j] *= a[j + j * a_dim1];
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    if (x[jx] != 0.f) {
+			temp = x[jx];
+			ix = kx;
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    x[ix] += temp * a[i__ + j * a_dim1];
+			    ix -= *incx;
+/* L70: */
+			}
+			if (nounit) {
+			    x[jx] *= a[j + j * a_dim1];
+			}
+		    }
+		    jx -= *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
+
+/*        Form  x := A'*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
+/* L90: */
+		    }
+		    x[j] = temp;
+/* L100: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    temp = x[jx];
+		    ix = jx;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    for (i__ = j - 1; i__ >= 1; --i__) {
+			ix -= *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L110: */
+		    }
+		    x[jx] = temp;
+		    jx -= *incx;
+/* L120: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = x[j];
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			temp += a[i__ + j * a_dim1] * x[i__];
+/* L130: */
+		    }
+		    x[j] = temp;
+/* L140: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp = x[jx];
+		    ix = jx;
+		    if (nounit) {
+			temp *= a[j + j * a_dim1];
+		    }
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			ix += *incx;
+			temp += a[i__ + j * a_dim1] * x[ix];
+/* L150: */
+		    }
+		    x[jx] = temp;
+		    jx += *incx;
+/* L160: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of STRMV . */
+
+} /* strmv_ */
+
+/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, real *alpha, real *a, integer *lda, real *b,
+	integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static real temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    Purpose
+    =======
+
+    STRSM  solves one of the matrix equations
+
+       op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+
+    where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'.
+
+    The matrix X is overwritten on B.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry, SIDE specifies whether op( A ) appears on the left
+             or right of X as follows:
+
+                SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+
+                SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = A'.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - REAL            .
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - REAL             array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - REAL             array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain  the  right-hand  side  matrix  B,  and  on exit  is
+             overwritten by the solution matrix  X.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("STRSM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.f) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*inv( A )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L30: */
+			}
+		    }
+		    for (k = *m; k >= 1; --k) {
+			if (b[k + j * b_dim1] != 0.f) {
+			    if (nounit) {
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
+			    }
+			    i__2 = k - 1;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
+/* L40: */
+			    }
+			}
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L70: */
+			}
+		    }
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			if (b[k + j * b_dim1] != 0.f) {
+			    if (nounit) {
+				b[k + j * b_dim1] /= a[k + k * a_dim1];
+			    }
+			    i__3 = *m;
+			    for (i__ = k + 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
+					i__ + k * a_dim1];
+/* L80: */
+			    }
+			}
+/* L90: */
+		    }
+/* L100: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*inv( A' )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__3 = i__ - 1;
+			for (k = 1; k <= i__3; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L110: */
+			}
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L120: */
+		    }
+/* L130: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			temp = *alpha * b[i__ + j * b_dim1];
+			i__2 = *m;
+			for (k = i__ + 1; k <= i__2; ++k) {
+			    temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
+/* L140: */
+			}
+			if (nounit) {
+			    temp /= a[i__ + i__ * a_dim1];
+			}
+			b[i__ + j * b_dim1] = temp;
+/* L150: */
+		    }
+/* L160: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*inv( A ). */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if (*alpha != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L170: */
+			}
+		    }
+		    i__2 = j - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			if (a[k + j * a_dim1] != 0.f) {
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L180: */
+			    }
+			}
+/* L190: */
+		    }
+		    if (nounit) {
+			temp = 1.f / a[j + j * a_dim1];
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L200: */
+			}
+		    }
+/* L210: */
+		}
+	    } else {
+		for (j = *n; j >= 1; --j) {
+		    if (*alpha != 1.f) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
+				    ;
+/* L220: */
+			}
+		    }
+		    i__1 = *n;
+		    for (k = j + 1; k <= i__1; ++k) {
+			if (a[k + j * a_dim1] != 0.f) {
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
+					i__ + k * b_dim1];
+/* L230: */
+			    }
+			}
+/* L240: */
+		    }
+		    if (nounit) {
+			temp = 1.f / a[j + j * a_dim1];
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
+/* L250: */
+			}
+		    }
+/* L260: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*inv( A' ). */
+
+	    if (upper) {
+		for (k = *n; k >= 1; --k) {
+		    if (nounit) {
+			temp = 1.f / a[k + k * a_dim1];
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L270: */
+			}
+		    }
+		    i__1 = k - 1;
+		    for (j = 1; j <= i__1; ++j) {
+			if (a[j + k * a_dim1] != 0.f) {
+			    temp = a[j + k * a_dim1];
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L280: */
+			    }
+			}
+/* L290: */
+		    }
+		    if (*alpha != 1.f) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L300: */
+			}
+		    }
+/* L310: */
+		}
+	    } else {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    if (nounit) {
+			temp = 1.f / a[k + k * a_dim1];
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
+/* L320: */
+			}
+		    }
+		    i__2 = *n;
+		    for (j = k + 1; j <= i__2; ++j) {
+			if (a[j + k * a_dim1] != 0.f) {
+			    temp = a[j + k * a_dim1];
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				b[i__ + j * b_dim1] -= temp * b[i__ + k *
+					b_dim1];
+/* L330: */
+			    }
+			}
+/* L340: */
+		    }
+		    if (*alpha != 1.f) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
+				    ;
+/* L350: */
+			}
+		    }
+/* L360: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of STRSM . */
+
+} /* strsm_ */
+#if 0
+/* Subroutine */ int xerbla_(char *srname, integer *info)
+{
+    /* Format strings */
+    static char fmt_9999[] = "(\002 ** On entry to \002,a6,\002 parameter nu"
+	    "mber \002,i2,\002 had \002,\002an illegal value\002)";
+
+    /* Builtin functions */
+    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
+    /* Subroutine */ int s_stop(char *, ftnlen);
+
+    /* Fortran I/O blocks */
+    static cilist io___425 = { 0, 6, 0, fmt_9999, 0 };
+
+
+/*
+    -- LAPACK auxiliary routine (preliminary version) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    XERBLA  is an error handler for the LAPACK routines.
+    It is called by an LAPACK routine if an input parameter has an
+    invalid value.  A message is printed and execution stops.
+
+    Installers may consider modifying the STOP statement in order to
+    call system-specific exception-handling facilities.
+
+    Arguments
+    =========
+
+    SRNAME  (input) CHARACTER*6
+            The name of the routine which called XERBLA.
+
+    INFO    (input) INTEGER
+            The position of the invalid parameter in the parameter list
+            of the calling routine.
+*/
+
+
+    s_wsfe(&io___425);
+    do_fio(&c__1, srname, (ftnlen)6);
+    do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer));
+    e_wsfe();
+
+    s_stop("", (ftnlen)0);
+
+
+/*     End of XERBLA */
+
+    return 0;
+} /* xerbla_ */
+#endif
+
+/* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx,
+	integer *incx, doublecomplex *zy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    doublecomplex z__1, z__2;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    extern doublereal dcabs1_(doublecomplex *);
+
+
+/*
+       constant times a vector plus a vector.
+       jack dongarra, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+    /* Parameter adjustments */
+    --zy;
+    --zx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (dcabs1_(za) == 0.) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = iy;
+	i__3 = iy;
+	i__4 = ix;
+	z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[
+		i__4].i + za->i * zx[i__4].r;
+	z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i;
+	zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__;
+	i__4 = i__;
+	z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[
+		i__4].i + za->i * zx[i__4].r;
+	z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i;
+	zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
+/* L30: */
+    }
+    return 0;
+} /* zaxpy_ */
+
+/* Subroutine */ int zcopy_(integer *n, doublecomplex *zx, integer *incx,
+	doublecomplex *zy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+
+
+/*
+       copies a vector, x, to a vector, y.
+       jack dongarra, linpack, 4/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --zy;
+    --zx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = iy;
+	i__3 = ix;
+	zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__;
+	zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i;
+/* L30: */
+    }
+    return 0;
+} /* zcopy_ */
+
+/* Double Complex */ VOID zdotc_(doublecomplex * ret_val, integer *n,
+	doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static doublecomplex ztemp;
+
+
+/*
+       forms the dot product of a vector.
+       jack dongarra, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+    /* Parameter adjustments */
+    --zy;
+    --zx;
+
+    /* Function Body */
+    ztemp.r = 0., ztemp.i = 0.;
+     ret_val->r = 0.,  ret_val->i = 0.;
+    if (*n <= 0) {
+	return ;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d_cnjg(&z__3, &zx[ix]);
+	i__2 = iy;
+	z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r *
+		zy[i__2].i + z__3.i * zy[i__2].r;
+	z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
+	ztemp.r = z__1.r, ztemp.i = z__1.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
+    return ;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d_cnjg(&z__3, &zx[i__]);
+	i__2 = i__;
+	z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r *
+		zy[i__2].i + z__3.i * zy[i__2].r;
+	z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
+	ztemp.r = z__1.r, ztemp.i = z__1.i;
+/* L30: */
+    }
+     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
+    return ;
+} /* zdotc_ */
+
+/* Double Complex */ VOID zdotu_(doublecomplex * ret_val, integer *n,
+	doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublecomplex z__1, z__2;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static doublecomplex ztemp;
+
+
+/*
+       forms the dot product of two vectors.
+       jack dongarra, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+    /* Parameter adjustments */
+    --zy;
+    --zx;
+
+    /* Function Body */
+    ztemp.r = 0., ztemp.i = 0.;
+     ret_val->r = 0.,  ret_val->i = 0.;
+    if (*n <= 0) {
+	return ;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments
+            not equal to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	i__3 = iy;
+	z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i =
+		zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r;
+	z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
+	ztemp.r = z__1.r, ztemp.i = z__1.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
+    return ;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__;
+	z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i =
+		zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r;
+	z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
+	ztemp.r = z__1.r, ztemp.i = z__1.i;
+/* L30: */
+    }
+     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
+    return ;
+} /* zdotu_ */
+
+/* Subroutine */ int zdscal_(integer *n, doublereal *da, doublecomplex *zx,
+	integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublecomplex z__1, z__2;
+
+    /* Local variables */
+    static integer i__, ix;
+
+
+/*
+       scales a vector by a constant.
+       jack dongarra, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --zx;
+
+    /* Function Body */
+    if ((*n <= 0) || (*incx <= 0)) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	z__2.r = *da, z__2.i = 0.;
+	i__3 = ix;
+	z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r *
+		zx[i__3].i + z__2.i * zx[i__3].r;
+	zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
+	ix += *incx;
+/* L10: */
+    }
+    return 0;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	z__2.r = *da, z__2.i = 0.;
+	i__3 = i__;
+	z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r *
+		zx[i__3].i + z__2.i * zx[i__3].r;
+	zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
+/* L30: */
+    }
+    return 0;
+} /* zdscal_ */
+
+/* Subroutine */ int zgemm_(char *transa, char *transb, integer *m, integer *
+	n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda,
+	doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex *
+	c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5, i__6;
+    doublecomplex z__1, z__2, z__3, z__4;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static logical nota, notb;
+    static doublecomplex temp;
+    static logical conja, conjb;
+    static integer ncola;
+    extern logical lsame_(char *, char *);
+    static integer nrowa, nrowb;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    ZGEMM  performs one of the matrix-matrix operations
+
+       C := alpha*op( A )*op( B ) + beta*C,
+
+    where  op( X ) is one of
+
+       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
+
+    alpha and beta are scalars, and A, B and C are matrices, with op( A )
+    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n',  op( A ) = A.
+
+                TRANSA = 'T' or 't',  op( A ) = A'.
+
+                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).
+
+             Unchanged on exit.
+
+    TRANSB - CHARACTER*1.
+             On entry, TRANSB specifies the form of op( B ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSB = 'N' or 'n',  op( B ) = B.
+
+                TRANSB = 'T' or 't',  op( B ) = B'.
+
+                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry,  M  specifies  the number  of rows  of the  matrix
+             op( A )  and of the  matrix  C.  M  must  be at least  zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N  specifies the number  of columns of the matrix
+             op( B ) and the number of columns of the matrix C. N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry,  K  specifies  the number of columns of the matrix
+             op( A ) and the number of rows of the matrix op( B ). K must
+             be at least  zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX*16      .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
+             Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by m  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. When  TRANSA = 'N' or 'n' then
+             LDA must be at least  max( 1, m ), otherwise  LDA must be at
+             least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is
+             n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
+             Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  n by k  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in the calling (sub) program. When  TRANSB = 'N' or 'n' then
+             LDB must be at least  max( 1, k ), otherwise  LDB must be at
+             least  max( 1, n ).
+             Unchanged on exit.
+
+    BETA   - COMPLEX*16      .
+             On entry,  BETA  specifies the scalar  beta.  When  BETA  is
+             supplied as zero then C need not be set on input.
+             Unchanged on exit.
+
+    C      - COMPLEX*16       array of DIMENSION ( LDC, n ).
+             Before entry, the leading  m by n  part of the array  C must
+             contain the matrix  C,  except when  beta  is zero, in which
+             case C need not be set on entry.
+             On exit, the array  C  is overwritten by the  m by n  matrix
+             ( alpha*op( A )*op( B ) + beta*C ).
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
+       conjugated or transposed, set  CONJA and CONJB  as true if  A  and
+       B  respectively are to be  transposed but  not conjugated  and set
+       NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
+       and the number of rows of  B  respectively.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    nota = lsame_(transa, "N");
+    notb = lsame_(transb, "N");
+    conja = lsame_(transa, "C");
+    conjb = lsame_(transb, "C");
+    if (nota) {
+	nrowa = *m;
+	ncola = *k;
+    } else {
+	nrowa = *k;
+	ncola = *m;
+    }
+    if (notb) {
+	nrowb = *k;
+    } else {
+	nrowb = *n;
+    }
+
+/*     Test the input parameters. */
+
+    info = 0;
+    if (! nota && ! conja && ! lsame_(transa, "T")) {
+	info = 1;
+    } else if (! notb && ! conjb && ! lsame_(transb, "T")) {
+	info = 2;
+    } else if (*m < 0) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*k < 0) {
+	info = 5;
+    } else if (*lda < max(1,nrowa)) {
+	info = 8;
+    } else if (*ldb < max(1,nrowb)) {
+	info = 10;
+    } else if (*ldc < max(1,*m)) {
+	info = 13;
+    }
+    if (info != 0) {
+	xerbla_("ZGEMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (((alpha->r == 0. && alpha->i == 0.) || (*
+	    k == 0)) && (beta->r == 1. && beta->i == 0.))) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (alpha->r == 0. && alpha->i == 0.) {
+	if (beta->r == 0. && beta->i == 0.) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * c_dim1;
+		    c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * c_dim1;
+		    i__4 = i__ + j * c_dim1;
+		    z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i,
+			    z__1.i = beta->r * c__[i__4].i + beta->i * c__[
+			    i__4].r;
+		    c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (notb) {
+	if (nota) {
+
+/*           Form  C := alpha*A*B + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (beta->r == 0. && beta->i == 0.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L50: */
+		    }
+		} else if ((beta->r != 1.) || (beta->i != 0.)) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, z__1.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L60: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = l + j * b_dim1;
+		    if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+			i__3 = l + j * b_dim1;
+			z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
+				z__1.i = alpha->r * b[i__3].i + alpha->i * b[
+				i__3].r;
+			temp.r = z__1.r, temp.i = z__1.i;
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    z__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
+				    .i + z__2.i;
+			    c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+/* L90: */
+	    }
+	} else if (conja) {
+
+/*           Form  C := alpha*conjg( A' )*B + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0., temp.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			d_cnjg(&z__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * b_dim1;
+			z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i,
+				z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
+				.r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L100: */
+		    }
+		    if (beta->r == 0. && beta->i == 0.) {
+			i__3 = i__ + j * c_dim1;
+			z__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			z__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, z__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    }
+/* L110: */
+		}
+/* L120: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0., temp.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			i__4 = l + i__ * a_dim1;
+			i__5 = l + j * b_dim1;
+			z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
+				.i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
+				.i * b[i__5].r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L130: */
+		    }
+		    if (beta->r == 0. && beta->i == 0.) {
+			i__3 = i__ + j * c_dim1;
+			z__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			z__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, z__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    }
+/* L140: */
+		}
+/* L150: */
+	    }
+	}
+    } else if (nota) {
+	if (conjb) {
+
+/*           Form  C := alpha*A*conjg( B' ) + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (beta->r == 0. && beta->i == 0.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L160: */
+		    }
+		} else if ((beta->r != 1.) || (beta->i != 0.)) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, z__1.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L170: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * b_dim1;
+		    if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+			d_cnjg(&z__2, &b[j + l * b_dim1]);
+			z__1.r = alpha->r * z__2.r - alpha->i * z__2.i,
+				z__1.i = alpha->r * z__2.i + alpha->i *
+				z__2.r;
+			temp.r = z__1.r, temp.i = z__1.i;
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    z__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
+				    .i + z__2.i;
+			    c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
+/* L180: */
+			}
+		    }
+/* L190: */
+		}
+/* L200: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A*B'          + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (beta->r == 0. && beta->i == 0.) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L210: */
+		    }
+		} else if ((beta->r != 1.) || (beta->i != 0.)) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, z__1.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L220: */
+		    }
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * b_dim1;
+		    if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+			i__3 = j + l * b_dim1;
+			z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
+				z__1.i = alpha->r * b[i__3].i + alpha->i * b[
+				i__3].r;
+			temp.r = z__1.r, temp.i = z__1.i;
+			i__3 = *m;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    z__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
+				    .i + z__2.i;
+			    c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
+/* L230: */
+			}
+		    }
+/* L240: */
+		}
+/* L250: */
+	    }
+	}
+    } else if (conja) {
+	if (conjb) {
+
+/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0., temp.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			d_cnjg(&z__3, &a[l + i__ * a_dim1]);
+			d_cnjg(&z__4, &b[j + l * b_dim1]);
+			z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i =
+				z__3.r * z__4.i + z__3.i * z__4.r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L260: */
+		    }
+		    if (beta->r == 0. && beta->i == 0.) {
+			i__3 = i__ + j * c_dim1;
+			z__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			z__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, z__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    }
+/* L270: */
+		}
+/* L280: */
+	    }
+	} else {
+
+/*           Form  C := alpha*conjg( A' )*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0., temp.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			d_cnjg(&z__3, &a[l + i__ * a_dim1]);
+			i__4 = j + l * b_dim1;
+			z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i,
+				z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
+				.r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L290: */
+		    }
+		    if (beta->r == 0. && beta->i == 0.) {
+			i__3 = i__ + j * c_dim1;
+			z__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			z__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, z__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    }
+/* L300: */
+		}
+/* L310: */
+	    }
+	}
+    } else {
+	if (conjb) {
+
+/*           Form  C := alpha*A'*conjg( B' ) + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0., temp.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			i__4 = l + i__ * a_dim1;
+			d_cnjg(&z__3, &b[j + l * b_dim1]);
+			z__2.r = a[i__4].r * z__3.r - a[i__4].i * z__3.i,
+				z__2.i = a[i__4].r * z__3.i + a[i__4].i *
+				z__3.r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L320: */
+		    }
+		    if (beta->r == 0. && beta->i == 0.) {
+			i__3 = i__ + j * c_dim1;
+			z__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			z__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, z__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    }
+/* L330: */
+		}
+/* L340: */
+	    }
+	} else {
+
+/*           Form  C := alpha*A'*B' + beta*C */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0., temp.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			i__4 = l + i__ * a_dim1;
+			i__5 = j + l * b_dim1;
+			z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
+				.i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
+				.i * b[i__5].r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L350: */
+		    }
+		    if (beta->r == 0. && beta->i == 0.) {
+			i__3 = i__ + j * c_dim1;
+			z__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			z__2.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__2.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			i__4 = i__ + j * c_dim1;
+			z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
+				.i, z__3.i = beta->r * c__[i__4].i + beta->i *
+				 c__[i__4].r;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    }
+/* L360: */
+		}
+/* L370: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZGEMM . */
+
+} /* zgemm_ */
+
+/* Subroutine */ int zgemv_(char *trans, integer *m, integer *n,
+	doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
+	x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *
+	incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublecomplex temp;
+    static integer lenx, leny;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj;
+
+
+/*
+    Purpose
+    =======
+
+    ZGEMV  performs one of the matrix-vector operations
+
+       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or
+
+       y := alpha*conjg( A' )*x + beta*y,
+
+    where alpha and beta are scalars, x and y are vectors and A is an
+    m by n matrix.
+
+    Parameters
+    ==========
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
+
+                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.
+
+                TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX*16      .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+    X      - COMPLEX*16       array of DIMENSION at least
+             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+             Before entry, the incremented array X must contain the
+             vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - COMPLEX*16      .
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - COMPLEX*16       array of DIMENSION at least
+             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+             and at least
+             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
+             Before entry with BETA non-zero, the incremented array Y
+             must contain the vector y. On exit, Y is overwritten by the
+             updated vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C")
+	    ) {
+	info = 1;
+    } else if (*m < 0) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*lda < max(1,*m)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    } else if (*incy == 0) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("ZGEMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (alpha->r == 0. && alpha->i == 0. && (
+	    beta->r == 1. && beta->i == 0.))) {
+	return 0;
+    }
+
+    noconj = lsame_(trans, "T");
+
+/*
+       Set  LENX  and  LENY, the lengths of the vectors x and y, and set
+       up the start points in  X  and  Y.
+*/
+
+    if (lsame_(trans, "N")) {
+	lenx = *n;
+	leny = *m;
+    } else {
+	lenx = *m;
+	leny = *n;
+    }
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (lenx - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (leny - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+
+       First form  y := beta*y.
+*/
+
+    if ((beta->r != 1.) || (beta->i != 0.)) {
+	if (*incy == 1) {
+	    if (beta->r == 0. && beta->i == 0.) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__;
+		    y[i__2].r = 0., y[i__2].i = 0.;
+/* L10: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__;
+		    i__3 = i__;
+		    z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (beta->r == 0. && beta->i == 0.) {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = iy;
+		    y[i__2].r = 0., y[i__2].i = 0.;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = leny;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = iy;
+		    i__3 = iy;
+		    z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (alpha->r == 0. && alpha->i == 0.) {
+	return 0;
+    }
+    if (lsame_(trans, "N")) {
+
+/*        Form  y := alpha*A*x + y. */
+
+	jx = kx;
+	if (*incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+		    i__2 = jx;
+		    z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    temp.r = z__1.r, temp.i = z__1.i;
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__;
+			i__4 = i__;
+			i__5 = i__ + j * a_dim1;
+			z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i +
+				z__2.i;
+			y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+/* L50: */
+		    }
+		}
+		jx += *incx;
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+		    i__2 = jx;
+		    z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    temp.r = z__1.r, temp.i = z__1.i;
+		    iy = ky;
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = iy;
+			i__4 = iy;
+			i__5 = i__ + j * a_dim1;
+			z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i +
+				z__2.i;
+			y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+			iy += *incy;
+/* L70: */
+		    }
+		}
+		jx += *incx;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
+
+	jy = ky;
+	if (*incx == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp.r = 0., temp.i = 0.;
+		if (noconj) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__;
+			z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
+				.i, z__2.i = a[i__3].r * x[i__4].i + a[i__3]
+				.i * x[i__4].r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L90: */
+		    }
+		} else {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			i__3 = i__;
+			z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i,
+				z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3]
+				.r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L100: */
+		    }
+		}
+		i__2 = jy;
+		i__3 = jy;
+		z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i =
+			alpha->r * temp.i + alpha->i * temp.r;
+		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+		jy += *incy;
+/* L110: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		temp.r = 0., temp.i = 0.;
+		ix = kx;
+		if (noconj) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = ix;
+			z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
+				.i, z__2.i = a[i__3].r * x[i__4].i + a[i__3]
+				.i * x[i__4].r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+			ix += *incx;
+/* L120: */
+		    }
+		} else {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			i__3 = ix;
+			z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i,
+				z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3]
+				.r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+			ix += *incx;
+/* L130: */
+		    }
+		}
+		i__2 = jy;
+		i__3 = jy;
+		z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i =
+			alpha->r * temp.i + alpha->i * temp.r;
+		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+		jy += *incy;
+/* L140: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZGEMV . */
+
+} /* zgemv_ */
+
+/* Subroutine */ int zgerc_(integer *m, integer *n, doublecomplex *alpha,
+	doublecomplex *x, integer *incx, doublecomplex *y, integer *incy,
+	doublecomplex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, ix, jy, kx, info;
+    static doublecomplex temp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    ZGERC  performs the rank 1 operation
+
+       A := alpha*x*conjg( y' ) + A,
+
+    where alpha is a scalar, x is an m element vector, y is an n element
+    vector and A is an m by n matrix.
+
+    Parameters
+    ==========
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX*16      .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - COMPLEX*16       array of dimension at least
+             ( 1 + ( m - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the m
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - COMPLEX*16       array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients. On exit, A is
+             overwritten by the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (*m < 0) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("ZGERC ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (alpha->r == 0. && alpha->i == 0.)) {
+	return 0;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (*incy > 0) {
+	jy = 1;
+    } else {
+	jy = 1 - (*n - 1) * *incy;
+    }
+    if (*incx == 1) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if ((y[i__2].r != 0.) || (y[i__2].i != 0.)) {
+		d_cnjg(&z__2, &y[jy]);
+		z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
+			alpha->r * z__2.i + alpha->i * z__2.r;
+		temp.r = z__1.r, temp.i = z__1.i;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = i__;
+		    z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
+		    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L10: */
+		}
+	    }
+	    jy += *incy;
+/* L20: */
+	}
+    } else {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*m - 1) * *incx;
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if ((y[i__2].r != 0.) || (y[i__2].i != 0.)) {
+		d_cnjg(&z__2, &y[jy]);
+		z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
+			alpha->r * z__2.i + alpha->i * z__2.r;
+		temp.r = z__1.r, temp.i = z__1.i;
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = ix;
+		    z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
+		    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+		    ix += *incx;
+/* L30: */
+		}
+	    }
+	    jy += *incy;
+/* L40: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZGERC . */
+
+} /* zgerc_ */
+
+/* Subroutine */ int zgeru_(integer *m, integer *n, doublecomplex *alpha,
+	doublecomplex *x, integer *incx, doublecomplex *y, integer *incy,
+	doublecomplex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1, z__2;
+
+    /* Local variables */
+    static integer i__, j, ix, jy, kx, info;
+    static doublecomplex temp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    ZGERU  performs the rank 1 operation
+
+       A := alpha*x*y' + A,
+
+    where alpha is a scalar, x is an m element vector, y is an n element
+    vector and A is an m by n matrix.
+
+    Parameters
+    ==========
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of the matrix A.
+             M must be at least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX*16      .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - COMPLEX*16       array of dimension at least
+             ( 1 + ( m - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the m
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - COMPLEX*16       array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+             Before entry, the leading m by n part of the array A must
+             contain the matrix of coefficients. On exit, A is
+             overwritten by the updated matrix.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (*m < 0) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("ZGERU ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (((*m == 0) || (*n == 0)) || (alpha->r == 0. && alpha->i == 0.)) {
+	return 0;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (*incy > 0) {
+	jy = 1;
+    } else {
+	jy = 1 - (*n - 1) * *incy;
+    }
+    if (*incx == 1) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if ((y[i__2].r != 0.) || (y[i__2].i != 0.)) {
+		i__2 = jy;
+		z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i =
+			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
+		temp.r = z__1.r, temp.i = z__1.i;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = i__;
+		    z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
+		    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L10: */
+		}
+	    }
+	    jy += *incy;
+/* L20: */
+	}
+    } else {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*m - 1) * *incx;
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = jy;
+	    if ((y[i__2].r != 0.) || (y[i__2].i != 0.)) {
+		i__2 = jy;
+		z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i =
+			 alpha->r * y[i__2].i + alpha->i * y[i__2].r;
+		temp.r = z__1.r, temp.i = z__1.i;
+		ix = kx;
+		i__2 = *m;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__ + j * a_dim1;
+		    i__4 = i__ + j * a_dim1;
+		    i__5 = ix;
+		    z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
+			     x[i__5].r * temp.i + x[i__5].i * temp.r;
+		    z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
+		    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+		    ix += *incx;
+/* L30: */
+		}
+	    }
+	    jy += *incy;
+/* L40: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZGERU . */
+
+} /* zgeru_ */
+
+/* Subroutine */ int zhemv_(char *uplo, integer *n, doublecomplex *alpha,
+	doublecomplex *a, integer *lda, doublecomplex *x, integer *incx,
+	doublecomplex *beta, doublecomplex *y, integer *incy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1;
+    doublecomplex z__1, z__2, z__3, z__4;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublecomplex temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    ZHEMV  performs the matrix-vector  operation
+
+       y := alpha*A*x + beta*y,
+
+    where alpha and beta are scalars, x and y are n element vectors and
+    A is an n by n hermitian matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX*16      .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the hermitian matrix and the strictly
+             lower triangular part of A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the hermitian matrix and the strictly
+             upper triangular part of A is not referenced.
+             Note that the imaginary parts of the diagonal elements need
+             not be set and are assumed to be zero.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - COMPLEX*16       array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    BETA   - COMPLEX*16      .
+             On entry, BETA specifies the scalar beta. When BETA is
+             supplied as zero then Y need not be set on input.
+             Unchanged on exit.
+
+    Y      - COMPLEX*16       array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y. On exit, Y is overwritten by the updated
+             vector y.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --y;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*lda < max(1,*n)) {
+	info = 5;
+    } else if (*incx == 0) {
+	info = 7;
+    } else if (*incy == 0) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("ZHEMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (alpha->r == 0. && alpha->i == 0. && (beta->r == 1. &&
+	    beta->i == 0.))) {
+	return 0;
+    }
+
+/*     Set up the start points in  X  and  Y. */
+
+    if (*incx > 0) {
+	kx = 1;
+    } else {
+	kx = 1 - (*n - 1) * *incx;
+    }
+    if (*incy > 0) {
+	ky = 1;
+    } else {
+	ky = 1 - (*n - 1) * *incy;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+
+       First form  y := beta*y.
+*/
+
+    if ((beta->r != 1.) || (beta->i != 0.)) {
+	if (*incy == 1) {
+	    if (beta->r == 0. && beta->i == 0.) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__;
+		    y[i__2].r = 0., y[i__2].i = 0.;
+/* L10: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = i__;
+		    i__3 = i__;
+		    z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+/* L20: */
+		}
+	    }
+	} else {
+	    iy = ky;
+	    if (beta->r == 0. && beta->i == 0.) {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = iy;
+		    y[i__2].r = 0., y[i__2].i = 0.;
+		    iy += *incy;
+/* L30: */
+		}
+	    } else {
+		i__1 = *n;
+		for (i__ = 1; i__ <= i__1; ++i__) {
+		    i__2 = iy;
+		    i__3 = iy;
+		    z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i,
+			    z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
+			    .r;
+		    y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+		    iy += *incy;
+/* L40: */
+		}
+	    }
+	}
+    }
+    if (alpha->r == 0. && alpha->i == 0.) {
+	return 0;
+    }
+    if (lsame_(uplo, "U")) {
+
+/*        Form  y  when A is stored in upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = z__1.r, temp1.i = z__1.i;
+		temp2.r = 0., temp2.i = 0.;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = i__;
+		    i__4 = i__;
+		    i__5 = i__ + j * a_dim1;
+		    z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+		    y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+		    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+		    i__3 = i__;
+		    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
+			     z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+		    temp2.r = z__1.r, temp2.i = z__1.i;
+/* L50: */
+		}
+		i__2 = j;
+		i__3 = j;
+		i__4 = j + j * a_dim1;
+		d__1 = a[i__4].r;
+		z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+		z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
+		z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+/* L60: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = z__1.r, temp1.i = z__1.i;
+		temp2.r = 0., temp2.i = 0.;
+		ix = kx;
+		iy = ky;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    i__3 = iy;
+		    i__4 = iy;
+		    i__5 = i__ + j * a_dim1;
+		    z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+		    y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+		    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+		    i__3 = ix;
+		    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
+			     z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+		    temp2.r = z__1.r, temp2.i = z__1.i;
+		    ix += *incx;
+		    iy += *incy;
+/* L70: */
+		}
+		i__2 = jy;
+		i__3 = jy;
+		i__4 = j + j * a_dim1;
+		d__1 = a[i__4].r;
+		z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
+		z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
+		z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    } else {
+
+/*        Form  y  when A is stored in lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = z__1.r, temp1.i = z__1.i;
+		temp2.r = 0., temp2.i = 0.;
+		i__2 = j;
+		i__3 = j;
+		i__4 = j + j * a_dim1;
+		d__1 = a[i__4].r;
+		z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    i__3 = i__;
+		    i__4 = i__;
+		    i__5 = i__ + j * a_dim1;
+		    z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+		    y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+		    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+		    i__3 = i__;
+		    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
+			     z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+		    temp2.r = z__1.r, temp2.i = z__1.i;
+/* L90: */
+		}
+		i__2 = j;
+		i__3 = j;
+		z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+/* L100: */
+	    }
+	} else {
+	    jx = kx;
+	    jy = ky;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
+			 alpha->r * x[i__2].i + alpha->i * x[i__2].r;
+		temp1.r = z__1.r, temp1.i = z__1.i;
+		temp2.r = 0., temp2.i = 0.;
+		i__2 = jy;
+		i__3 = jy;
+		i__4 = j + j * a_dim1;
+		d__1 = a[i__4].r;
+		z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
+		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+		ix = jx;
+		iy = jy;
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    ix += *incx;
+		    iy += *incy;
+		    i__3 = iy;
+		    i__4 = iy;
+		    i__5 = i__ + j * a_dim1;
+		    z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i,
+			    z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
+			    .r;
+		    z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
+		    y[i__3].r = z__1.r, y[i__3].i = z__1.i;
+		    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+		    i__3 = ix;
+		    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
+			     z__3.r * x[i__3].i + z__3.i * x[i__3].r;
+		    z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+		    temp2.r = z__1.r, temp2.i = z__1.i;
+/* L110: */
+		}
+		i__2 = jy;
+		i__3 = jy;
+		z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i =
+			alpha->r * temp2.i + alpha->i * temp2.r;
+		z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
+		y[i__2].r = z__1.r, y[i__2].i = z__1.i;
+		jx += *incx;
+		jy += *incy;
+/* L120: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZHEMV . */
+
+} /* zhemv_ */
+
+/* Subroutine */ int zher2_(char *uplo, integer *n, doublecomplex *alpha,
+	doublecomplex *x, integer *incx, doublecomplex *y, integer *incy,
+	doublecomplex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    doublereal d__1;
+    doublecomplex z__1, z__2, z__3, z__4;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
+    static doublecomplex temp1, temp2;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    ZHER2  performs the hermitian rank 2 operation
+
+       A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A,
+
+    where alpha is a scalar, x and y are n element vectors and A is an n
+    by n hermitian matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the upper or lower
+             triangular part of the array A is to be referenced as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the upper triangular part of A
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the lower triangular part of A
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX*16      .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    X      - COMPLEX*16       array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x.
+             Unchanged on exit.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+    Y      - COMPLEX*16       array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCY ) ).
+             Before entry, the incremented array Y must contain the n
+             element vector y.
+             Unchanged on exit.
+
+    INCY   - INTEGER.
+             On entry, INCY specifies the increment for the elements of
+             Y. INCY must not be zero.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular part of the hermitian matrix and the strictly
+             lower triangular part of A is not referenced. On exit, the
+             upper triangular part of the array A is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular part of the hermitian matrix and the strictly
+             upper triangular part of A is not referenced. On exit, the
+             lower triangular part of the array A is overwritten by the
+             lower triangular part of the updated matrix.
+             Note that the imaginary parts of the diagonal elements need
+             not be set, they are assumed to be zero, and on exit they
+             are set to zero.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --x;
+    --y;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (*n < 0) {
+	info = 2;
+    } else if (*incx == 0) {
+	info = 5;
+    } else if (*incy == 0) {
+	info = 7;
+    } else if (*lda < max(1,*n)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("ZHER2 ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (alpha->r == 0. && alpha->i == 0.)) {
+	return 0;
+    }
+
+/*
+       Set up the start points in X and Y if the increments are not both
+       unity.
+*/
+
+    if ((*incx != 1) || (*incy != 1)) {
+	if (*incx > 0) {
+	    kx = 1;
+	} else {
+	    kx = 1 - (*n - 1) * *incx;
+	}
+	if (*incy > 0) {
+	    ky = 1;
+	} else {
+	    ky = 1 - (*n - 1) * *incy;
+	}
+	jx = kx;
+	jy = ky;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through the triangular part
+       of A.
+*/
+
+    if (lsame_(uplo, "U")) {
+
+/*        Form  A  when A is stored in the upper triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		i__3 = j;
+		if (((x[i__2].r != 0.) || (x[i__2].i != 0.)) || (((y[i__3].r
+			!= 0.) || (y[i__3].i != 0.)))) {
+		    d_cnjg(&z__2, &y[j]);
+		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
+			    alpha->r * z__2.i + alpha->i * z__2.r;
+		    temp1.r = z__1.r, temp1.i = z__1.i;
+		    i__2 = j;
+		    z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    d_cnjg(&z__1, &z__2);
+		    temp2.r = z__1.r, temp2.i = z__1.i;
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = i__;
+			z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				z__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i +
+				z__3.i;
+			i__6 = i__;
+			z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				z__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+			a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L10: */
+		    }
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = j;
+		    z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    z__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = j;
+		    z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    z__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+		    d__1 = a[i__3].r + z__1.r;
+		    a[i__2].r = d__1, a[i__2].i = 0.;
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    d__1 = a[i__3].r;
+		    a[i__2].r = d__1, a[i__2].i = 0.;
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		i__3 = jy;
+		if (((x[i__2].r != 0.) || (x[i__2].i != 0.)) || (((y[i__3].r
+			!= 0.) || (y[i__3].i != 0.)))) {
+		    d_cnjg(&z__2, &y[jy]);
+		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
+			    alpha->r * z__2.i + alpha->i * z__2.r;
+		    temp1.r = z__1.r, temp1.i = z__1.i;
+		    i__2 = jx;
+		    z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    d_cnjg(&z__1, &z__2);
+		    temp2.r = z__1.r, temp2.i = z__1.i;
+		    ix = kx;
+		    iy = ky;
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = ix;
+			z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				z__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i +
+				z__3.i;
+			i__6 = iy;
+			z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				z__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+			a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+			ix += *incx;
+			iy += *incy;
+/* L30: */
+		    }
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = jx;
+		    z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    z__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = jy;
+		    z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    z__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+		    d__1 = a[i__3].r + z__1.r;
+		    a[i__2].r = d__1, a[i__2].i = 0.;
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    d__1 = a[i__3].r;
+		    a[i__2].r = d__1, a[i__2].i = 0.;
+		}
+		jx += *incx;
+		jy += *incy;
+/* L40: */
+	    }
+	}
+    } else {
+
+/*        Form  A  when A is stored in the lower triangle. */
+
+	if (*incx == 1 && *incy == 1) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		i__3 = j;
+		if (((x[i__2].r != 0.) || (x[i__2].i != 0.)) || (((y[i__3].r
+			!= 0.) || (y[i__3].i != 0.)))) {
+		    d_cnjg(&z__2, &y[j]);
+		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
+			    alpha->r * z__2.i + alpha->i * z__2.r;
+		    temp1.r = z__1.r, temp1.i = z__1.i;
+		    i__2 = j;
+		    z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    d_cnjg(&z__1, &z__2);
+		    temp2.r = z__1.r, temp2.i = z__1.i;
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = j;
+		    z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    z__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = j;
+		    z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    z__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+		    d__1 = a[i__3].r + z__1.r;
+		    a[i__2].r = d__1, a[i__2].i = 0.;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = i__;
+			z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				z__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i +
+				z__3.i;
+			i__6 = i__;
+			z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				z__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+			a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L50: */
+		    }
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    d__1 = a[i__3].r;
+		    a[i__2].r = d__1, a[i__2].i = 0.;
+		}
+/* L60: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = jx;
+		i__3 = jy;
+		if (((x[i__2].r != 0.) || (x[i__2].i != 0.)) || (((y[i__3].r
+			!= 0.) || (y[i__3].i != 0.)))) {
+		    d_cnjg(&z__2, &y[jy]);
+		    z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i =
+			    alpha->r * z__2.i + alpha->i * z__2.r;
+		    temp1.r = z__1.r, temp1.i = z__1.i;
+		    i__2 = jx;
+		    z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i,
+			    z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
+			    .r;
+		    d_cnjg(&z__1, &z__2);
+		    temp2.r = z__1.r, temp2.i = z__1.i;
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    i__4 = jx;
+		    z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i,
+			    z__2.i = x[i__4].r * temp1.i + x[i__4].i *
+			    temp1.r;
+		    i__5 = jy;
+		    z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i,
+			    z__3.i = y[i__5].r * temp2.i + y[i__5].i *
+			    temp2.r;
+		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+		    d__1 = a[i__3].r + z__1.r;
+		    a[i__2].r = d__1, a[i__2].i = 0.;
+		    ix = jx;
+		    iy = jy;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			ix += *incx;
+			iy += *incy;
+			i__3 = i__ + j * a_dim1;
+			i__4 = i__ + j * a_dim1;
+			i__5 = ix;
+			z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i,
+				z__3.i = x[i__5].r * temp1.i + x[i__5].i *
+				temp1.r;
+			z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i +
+				z__3.i;
+			i__6 = iy;
+			z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i,
+				z__4.i = y[i__6].r * temp2.i + y[i__6].i *
+				temp2.r;
+			z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+			a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L70: */
+		    }
+		} else {
+		    i__2 = j + j * a_dim1;
+		    i__3 = j + j * a_dim1;
+		    d__1 = a[i__3].r;
+		    a[i__2].r = d__1, a[i__2].i = 0.;
+		}
+		jx += *incx;
+		jy += *incy;
+/* L80: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZHER2 . */
+
+} /* zher2_ */
+
+/* Subroutine */ int zher2k_(char *uplo, char *trans, integer *n, integer *k,
+	doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
+	b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5, i__6, i__7;
+    doublereal d__1;
+    doublecomplex z__1, z__2, z__3, z__4, z__5, z__6;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static doublecomplex temp1, temp2;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    ZHER2K  performs one of the hermitian rank 2k operations
+
+       C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C,
+
+    or
+
+       C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C,
+
+    where  alpha and beta  are scalars with  beta  real,  C is an  n by n
+    hermitian matrix and  A and B  are  n by k matrices in the first case
+    and  k by n  matrices in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          +
+                                           conjg( alpha )*B*conjg( A' ) +
+                                           beta*C.
+
+                TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          +
+                                           conjg( alpha )*conjg( B' )*A +
+                                           beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns  of the  matrices  A and B,  and on  entry  with
+             TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+             matrices  A and B.  K must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX*16         .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  B  must contain the matrix  B,  otherwise
+             the leading  k by n  part of the array  B  must contain  the
+             matrix B.
+             Unchanged on exit.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDB must be at least  max( 1, n ), otherwise  LDB must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION            .
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - COMPLEX*16          array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  hermitian matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  hermitian matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+             Note that the imaginary parts of the diagonal elements need
+             not be set,  they are assumed to be zero,  and on exit they
+             are set to zero.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+    -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
+       Ed Anderson, Cray Research Inc.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldb < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldc < max(1,*n)) {
+	info = 12;
+    }
+    if (info != 0) {
+	xerbla_("ZHER2K", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (((alpha->r == 0. && alpha->i == 0.) || (*k == 0)) && *
+	    beta == 1.)) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (alpha->r == 0. && alpha->i == 0.) {
+	if (upper) {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L30: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*
+          Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) +
+                     C.
+*/
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L90: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L100: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * a_dim1;
+		    i__4 = j + l * b_dim1;
+		    if (((a[i__3].r != 0.) || (a[i__3].i != 0.)) || (((b[i__4]
+			    .r != 0.) || (b[i__4].i != 0.)))) {
+			d_cnjg(&z__2, &b[j + l * b_dim1]);
+			z__1.r = alpha->r * z__2.r - alpha->i * z__2.i,
+				z__1.i = alpha->r * z__2.i + alpha->i *
+				z__2.r;
+			temp1.r = z__1.r, temp1.i = z__1.i;
+			i__3 = j + l * a_dim1;
+			z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i,
+				z__2.i = alpha->r * a[i__3].i + alpha->i * a[
+				i__3].r;
+			d_cnjg(&z__1, &z__2);
+			temp2.r = z__1.r, temp2.i = z__1.i;
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    z__3.r = a[i__6].r * temp1.r - a[i__6].i *
+				    temp1.i, z__3.i = a[i__6].r * temp1.i + a[
+				    i__6].i * temp1.r;
+			    z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
+				    .i + z__3.i;
+			    i__7 = i__ + l * b_dim1;
+			    z__4.r = b[i__7].r * temp2.r - b[i__7].i *
+				    temp2.i, z__4.i = b[i__7].r * temp2.i + b[
+				    i__7].i * temp2.r;
+			    z__1.r = z__2.r + z__4.r, z__1.i = z__2.i +
+				    z__4.i;
+			    c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
+/* L110: */
+			}
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = j + l * a_dim1;
+			z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i,
+				z__2.i = a[i__5].r * temp1.i + a[i__5].i *
+				temp1.r;
+			i__6 = j + l * b_dim1;
+			z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i,
+				z__3.i = b[i__6].r * temp2.i + b[i__6].i *
+				temp2.r;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			d__1 = c__[i__4].r + z__1.r;
+			c__[i__3].r = d__1, c__[i__3].i = 0.;
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L140: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L150: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * a_dim1;
+		    i__4 = j + l * b_dim1;
+		    if (((a[i__3].r != 0.) || (a[i__3].i != 0.)) || (((b[i__4]
+			    .r != 0.) || (b[i__4].i != 0.)))) {
+			d_cnjg(&z__2, &b[j + l * b_dim1]);
+			z__1.r = alpha->r * z__2.r - alpha->i * z__2.i,
+				z__1.i = alpha->r * z__2.i + alpha->i *
+				z__2.r;
+			temp1.r = z__1.r, temp1.i = z__1.i;
+			i__3 = j + l * a_dim1;
+			z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i,
+				z__2.i = alpha->r * a[i__3].i + alpha->i * a[
+				i__3].r;
+			d_cnjg(&z__1, &z__2);
+			temp2.r = z__1.r, temp2.i = z__1.i;
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    z__3.r = a[i__6].r * temp1.r - a[i__6].i *
+				    temp1.i, z__3.i = a[i__6].r * temp1.i + a[
+				    i__6].i * temp1.r;
+			    z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
+				    .i + z__3.i;
+			    i__7 = i__ + l * b_dim1;
+			    z__4.r = b[i__7].r * temp2.r - b[i__7].i *
+				    temp2.i, z__4.i = b[i__7].r * temp2.i + b[
+				    i__7].i * temp2.r;
+			    z__1.r = z__2.r + z__4.r, z__1.i = z__2.i +
+				    z__4.i;
+			    c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
+/* L160: */
+			}
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = j + l * a_dim1;
+			z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i,
+				z__2.i = a[i__5].r * temp1.i + a[i__5].i *
+				temp1.r;
+			i__6 = j + l * b_dim1;
+			z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i,
+				z__3.i = b[i__6].r * temp2.i + b[i__6].i *
+				temp2.r;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			d__1 = c__[i__4].r + z__1.r;
+			c__[i__3].r = d__1, c__[i__3].i = 0.;
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*
+          Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A +
+                     C.
+*/
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp1.r = 0., temp1.i = 0.;
+		    temp2.r = 0., temp2.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			d_cnjg(&z__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * b_dim1;
+			z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i,
+				z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
+				.r;
+			z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
+			temp1.r = z__1.r, temp1.i = z__1.i;
+			d_cnjg(&z__3, &b[l + i__ * b_dim1]);
+			i__4 = l + j * a_dim1;
+			z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i,
+				z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
+				.r;
+			z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+			temp2.r = z__1.r, temp2.i = z__1.i;
+/* L190: */
+		    }
+		    if (i__ == j) {
+			if (*beta == 0.) {
+			    i__3 = j + j * c_dim1;
+			    z__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    z__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    d_cnjg(&z__4, alpha);
+			    z__3.r = z__4.r * temp2.r - z__4.i * temp2.i,
+				    z__3.i = z__4.r * temp2.i + z__4.i *
+				    temp2.r;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    d__1 = z__1.r;
+			    c__[i__3].r = d__1, c__[i__3].i = 0.;
+			} else {
+			    i__3 = j + j * c_dim1;
+			    i__4 = j + j * c_dim1;
+			    z__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    z__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    d_cnjg(&z__4, alpha);
+			    z__3.r = z__4.r * temp2.r - z__4.i * temp2.i,
+				    z__3.i = z__4.r * temp2.i + z__4.i *
+				    temp2.r;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    d__1 = *beta * c__[i__4].r + z__1.r;
+			    c__[i__3].r = d__1, c__[i__3].i = 0.;
+			}
+		    } else {
+			if (*beta == 0.) {
+			    i__3 = i__ + j * c_dim1;
+			    z__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    z__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    d_cnjg(&z__4, alpha);
+			    z__3.r = z__4.r * temp2.r - z__4.i * temp2.i,
+				    z__3.i = z__4.r * temp2.i + z__4.i *
+				    temp2.r;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+			} else {
+			    i__3 = i__ + j * c_dim1;
+			    i__4 = i__ + j * c_dim1;
+			    z__3.r = *beta * c__[i__4].r, z__3.i = *beta *
+				    c__[i__4].i;
+			    z__4.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    z__4.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i +
+				    z__4.i;
+			    d_cnjg(&z__6, alpha);
+			    z__5.r = z__6.r * temp2.r - z__6.i * temp2.i,
+				    z__5.i = z__6.r * temp2.i + z__6.i *
+				    temp2.r;
+			    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i +
+				    z__5.i;
+			    c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+			}
+		    }
+/* L200: */
+		}
+/* L210: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+		    temp1.r = 0., temp1.i = 0.;
+		    temp2.r = 0., temp2.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			d_cnjg(&z__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * b_dim1;
+			z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i,
+				z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
+				.r;
+			z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
+			temp1.r = z__1.r, temp1.i = z__1.i;
+			d_cnjg(&z__3, &b[l + i__ * b_dim1]);
+			i__4 = l + j * a_dim1;
+			z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i,
+				z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
+				.r;
+			z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
+			temp2.r = z__1.r, temp2.i = z__1.i;
+/* L220: */
+		    }
+		    if (i__ == j) {
+			if (*beta == 0.) {
+			    i__3 = j + j * c_dim1;
+			    z__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    z__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    d_cnjg(&z__4, alpha);
+			    z__3.r = z__4.r * temp2.r - z__4.i * temp2.i,
+				    z__3.i = z__4.r * temp2.i + z__4.i *
+				    temp2.r;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    d__1 = z__1.r;
+			    c__[i__3].r = d__1, c__[i__3].i = 0.;
+			} else {
+			    i__3 = j + j * c_dim1;
+			    i__4 = j + j * c_dim1;
+			    z__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    z__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    d_cnjg(&z__4, alpha);
+			    z__3.r = z__4.r * temp2.r - z__4.i * temp2.i,
+				    z__3.i = z__4.r * temp2.i + z__4.i *
+				    temp2.r;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    d__1 = *beta * c__[i__4].r + z__1.r;
+			    c__[i__3].r = d__1, c__[i__3].i = 0.;
+			}
+		    } else {
+			if (*beta == 0.) {
+			    i__3 = i__ + j * c_dim1;
+			    z__2.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    z__2.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    d_cnjg(&z__4, alpha);
+			    z__3.r = z__4.r * temp2.r - z__4.i * temp2.i,
+				    z__3.i = z__4.r * temp2.i + z__4.i *
+				    temp2.r;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+			} else {
+			    i__3 = i__ + j * c_dim1;
+			    i__4 = i__ + j * c_dim1;
+			    z__3.r = *beta * c__[i__4].r, z__3.i = *beta *
+				    c__[i__4].i;
+			    z__4.r = alpha->r * temp1.r - alpha->i * temp1.i,
+				    z__4.i = alpha->r * temp1.i + alpha->i *
+				    temp1.r;
+			    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i +
+				    z__4.i;
+			    d_cnjg(&z__6, alpha);
+			    z__5.r = z__6.r * temp2.r - z__6.i * temp2.i,
+				    z__5.i = z__6.r * temp2.i + z__6.i *
+				    temp2.r;
+			    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i +
+				    z__5.i;
+			    c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+			}
+		    }
+/* L230: */
+		}
+/* L240: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZHER2K. */
+
+} /* zher2k_ */
+
+/* Subroutine */ int zherk_(char *uplo, char *trans, integer *n, integer *k,
+	doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta,
+	doublecomplex *c__, integer *ldc)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5,
+	    i__6;
+    doublereal d__1;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, l, info;
+    static doublecomplex temp;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static doublereal rtemp;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    Purpose
+    =======
+
+    ZHERK  performs one of the hermitian rank k operations
+
+       C := alpha*A*conjg( A' ) + beta*C,
+
+    or
+
+       C := alpha*conjg( A' )*A + beta*C,
+
+    where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
+    matrix and  A  is an  n by k  matrix in the  first case and a  k by n
+    matrix in the second case.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On  entry,   UPLO  specifies  whether  the  upper  or  lower
+             triangular  part  of the  array  C  is to be  referenced  as
+             follows:
+
+                UPLO = 'U' or 'u'   Only the  upper triangular part of  C
+                                    is to be referenced.
+
+                UPLO = 'L' or 'l'   Only the  lower triangular part of  C
+                                    is to be referenced.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry,  TRANS  specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C.
+
+                TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry,  N specifies the order of the matrix C.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    K      - INTEGER.
+             On entry with  TRANS = 'N' or 'n',  K  specifies  the number
+             of  columns   of  the   matrix   A,   and  on   entry   with
+             TRANS = 'C' or 'c',  K  specifies  the number of rows of the
+             matrix A.  K must be at least zero.
+             Unchanged on exit.
+
+    ALPHA  - DOUBLE PRECISION            .
+             On entry, ALPHA specifies the scalar alpha.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is
+             k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
+             Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
+             part of the array  A  must contain the matrix  A,  otherwise
+             the leading  k by n  part of the array  A  must contain  the
+             matrix A.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
+             then  LDA must be at least  max( 1, n ), otherwise  LDA must
+             be at least  max( 1, k ).
+             Unchanged on exit.
+
+    BETA   - DOUBLE PRECISION.
+             On entry, BETA specifies the scalar beta.
+             Unchanged on exit.
+
+    C      - COMPLEX*16          array of DIMENSION ( LDC, n ).
+             Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
+             upper triangular part of the array C must contain the upper
+             triangular part  of the  hermitian matrix  and the strictly
+             lower triangular part of C is not referenced.  On exit, the
+             upper triangular part of the array  C is overwritten by the
+             upper triangular part of the updated matrix.
+             Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
+             lower triangular part of the array C must contain the lower
+             triangular part  of the  hermitian matrix  and the strictly
+             upper triangular part of C is not referenced.  On exit, the
+             lower triangular part of the array  C is overwritten by the
+             lower triangular part of the updated matrix.
+             Note that the imaginary parts of the diagonal elements need
+             not be set,  they are assumed to be zero,  and on exit they
+             are set to zero.
+
+    LDC    - INTEGER.
+             On entry, LDC specifies the first dimension of C as declared
+             in  the  calling  (sub)  program.   LDC  must  be  at  least
+             max( 1, n ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+    -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
+       Ed Anderson, Cray Research Inc.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+
+    /* Function Body */
+    if (lsame_(trans, "N")) {
+	nrowa = *n;
+    } else {
+	nrowa = *k;
+    }
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! upper && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "C")) {
+	info = 2;
+    } else if (*n < 0) {
+	info = 3;
+    } else if (*k < 0) {
+	info = 4;
+    } else if (*lda < max(1,nrowa)) {
+	info = 7;
+    } else if (*ldc < max(1,*n)) {
+	info = 10;
+    }
+    if (info != 0) {
+	xerbla_("ZHERK ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*n == 0) || (((*alpha == 0.) || (*k == 0)) && *beta == 1.)) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (*alpha == 0.) {
+	if (upper) {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L10: */
+		    }
+/* L20: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L30: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*beta == 0.) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  C := alpha*A*conjg( A' ) + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = j;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L90: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = j - 1;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L100: */
+		    }
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * a_dim1;
+		    if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			d_cnjg(&z__2, &a[j + l * a_dim1]);
+			z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    z__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
+				    .i + z__2.i;
+			    c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
+/* L110: */
+			}
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = i__ + l * a_dim1;
+			z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				z__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			d__1 = c__[i__4].r + z__1.r;
+			c__[i__3].r = d__1, c__[i__3].i = 0.;
+		    }
+/* L120: */
+		}
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (*beta == 0.) {
+		    i__2 = *n;
+		    for (i__ = j; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			c__[i__3].r = 0., c__[i__3].i = 0.;
+/* L140: */
+		    }
+		} else if (*beta != 1.) {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		    i__2 = *n;
+		    for (i__ = j + 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
+				i__4].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L150: */
+		    }
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		}
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    i__3 = j + l * a_dim1;
+		    if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			d_cnjg(&z__2, &a[j + l * a_dim1]);
+			z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+			i__3 = j + j * c_dim1;
+			i__4 = j + j * c_dim1;
+			i__5 = j + l * a_dim1;
+			z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				z__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
+				.r;
+			d__1 = c__[i__4].r + z__1.r;
+			c__[i__3].r = d__1, c__[i__3].i = 0.;
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * c_dim1;
+			    i__5 = i__ + j * c_dim1;
+			    i__6 = i__ + l * a_dim1;
+			    z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i,
+				    z__2.i = temp.r * a[i__6].i + temp.i * a[
+				    i__6].r;
+			    z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
+				    .i + z__2.i;
+			    c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
+/* L160: */
+			}
+		    }
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+    } else {
+
+/*        Form  C := alpha*conjg( A' )*A + beta*C. */
+
+	if (upper) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    temp.r = 0., temp.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			d_cnjg(&z__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * a_dim1;
+			z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i,
+				z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
+				.r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L190: */
+		    }
+		    if (*beta == 0.) {
+			i__3 = i__ + j * c_dim1;
+			z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i;
+			i__4 = i__ + j * c_dim1;
+			z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[
+				i__4].i;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    }
+/* L200: */
+		}
+		rtemp = 0.;
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    d_cnjg(&z__3, &a[l + j * a_dim1]);
+		    i__3 = l + j * a_dim1;
+		    z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i =
+			     z__3.r * a[i__3].i + z__3.i * a[i__3].r;
+		    z__1.r = rtemp + z__2.r, z__1.i = z__2.i;
+		    rtemp = z__1.r;
+/* L210: */
+		}
+		if (*beta == 0.) {
+		    i__2 = j + j * c_dim1;
+		    d__1 = *alpha * rtemp;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *alpha * rtemp + *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		}
+/* L220: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		rtemp = 0.;
+		i__2 = *k;
+		for (l = 1; l <= i__2; ++l) {
+		    d_cnjg(&z__3, &a[l + j * a_dim1]);
+		    i__3 = l + j * a_dim1;
+		    z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i =
+			     z__3.r * a[i__3].i + z__3.i * a[i__3].r;
+		    z__1.r = rtemp + z__2.r, z__1.i = z__2.i;
+		    rtemp = z__1.r;
+/* L230: */
+		}
+		if (*beta == 0.) {
+		    i__2 = j + j * c_dim1;
+		    d__1 = *alpha * rtemp;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		} else {
+		    i__2 = j + j * c_dim1;
+		    i__3 = j + j * c_dim1;
+		    d__1 = *alpha * rtemp + *beta * c__[i__3].r;
+		    c__[i__2].r = d__1, c__[i__2].i = 0.;
+		}
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    temp.r = 0., temp.i = 0.;
+		    i__3 = *k;
+		    for (l = 1; l <= i__3; ++l) {
+			d_cnjg(&z__3, &a[l + i__ * a_dim1]);
+			i__4 = l + j * a_dim1;
+			z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i,
+				z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
+				.r;
+			z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
+			temp.r = z__1.r, temp.i = z__1.i;
+/* L240: */
+		    }
+		    if (*beta == 0.) {
+			i__3 = i__ + j * c_dim1;
+			z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    } else {
+			i__3 = i__ + j * c_dim1;
+			z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i;
+			i__4 = i__ + j * c_dim1;
+			z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[
+				i__4].i;
+			z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+		    }
+/* L250: */
+		}
+/* L260: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZHERK . */
+
+} /* zherk_ */
+
+/* Subroutine */ int zscal_(integer *n, doublecomplex *za, doublecomplex *zx,
+	integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__, ix;
+
+
+/*
+       scales a vector by a constant.
+       jack dongarra, 3/11/78.
+       modified 3/93 to return if incx .le. 0.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --zx;
+
+    /* Function Body */
+    if ((*n <= 0) || (*incx <= 0)) {
+	return 0;
+    }
+    if (*incx == 1) {
+	goto L20;
+    }
+
+/*        code for increment not equal to 1 */
+
+    ix = 1;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	i__3 = ix;
+	z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[
+		i__3].i + za->i * zx[i__3].r;
+	zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
+	ix += *incx;
+/* L10: */
+    }
+    return 0;
+
+/*        code for increment equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__;
+	z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[
+		i__3].i + za->i * zx[i__3].r;
+	zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
+/* L30: */
+    }
+    return 0;
+} /* zscal_ */
+
+/* Subroutine */ int zswap_(integer *n, doublecomplex *zx, integer *incx,
+	doublecomplex *zy, integer *incy)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static doublecomplex ztemp;
+
+
+/*
+       interchanges two vectors.
+       jack dongarra, 3/11/78.
+       modified 12/3/93, array(1) declarations changed to array(*)
+*/
+
+
+    /* Parameter adjustments */
+    --zy;
+    --zx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+         code for unequal increments or equal increments not equal
+           to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i;
+	i__2 = ix;
+	i__3 = iy;
+	zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i;
+	i__2 = iy;
+	zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*       code for both increments equal to 1 */
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i;
+	i__2 = i__;
+	i__3 = i__;
+	zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i;
+	i__2 = i__;
+	zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i;
+/* L30: */
+    }
+    return 0;
+} /* zswap_ */
+
+/* Subroutine */ int ztrmm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, doublecomplex *alpha, doublecomplex *a,
+	integer *lda, doublecomplex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5,
+	    i__6;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static doublecomplex temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj, nounit;
+
+
+/*
+    Purpose
+    =======
+
+    ZTRMM  performs one of the matrix-matrix operations
+
+       B := alpha*op( A )*B,   or   B := alpha*B*op( A )
+
+    where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry,  SIDE specifies whether  op( A ) multiplies B from
+             the left or right as follows:
+
+                SIDE = 'L' or 'l'   B := alpha*op( A )*B.
+
+                SIDE = 'R' or 'r'   B := alpha*B*op( A ).
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX*16      .
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain the matrix  B,  and  on exit  is overwritten  by the
+             transformed matrix.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    noconj = lsame_(transa, "T");
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("ZTRMM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (alpha->r == 0. && alpha->i == 0.) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		b[i__3].r = 0., b[i__3].i = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*A*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			i__3 = k + j * b_dim1;
+			if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+			    i__3 = k + j * b_dim1;
+			    z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
+				    .i, z__1.i = alpha->r * b[i__3].i +
+				    alpha->i * b[i__3].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			    i__3 = k - 1;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * a_dim1;
+				z__2.r = temp.r * a[i__6].r - temp.i * a[i__6]
+					.i, z__2.i = temp.r * a[i__6].i +
+					temp.i * a[i__6].r;
+				z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5]
+					.i + z__2.i;
+				b[i__4].r = z__1.r, b[i__4].i = z__1.i;
+/* L30: */
+			    }
+			    if (nounit) {
+				i__3 = k + k * a_dim1;
+				z__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
+					.i, z__1.i = temp.r * a[i__3].i +
+					temp.i * a[i__3].r;
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			    i__3 = k + j * b_dim1;
+			    b[i__3].r = temp.r, b[i__3].i = temp.i;
+			}
+/* L40: */
+		    }
+/* L50: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (k = *m; k >= 1; --k) {
+			i__2 = k + j * b_dim1;
+			if ((b[i__2].r != 0.) || (b[i__2].i != 0.)) {
+			    i__2 = k + j * b_dim1;
+			    z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2]
+				    .i, z__1.i = alpha->r * b[i__2].i +
+				    alpha->i * b[i__2].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			    i__2 = k + j * b_dim1;
+			    b[i__2].r = temp.r, b[i__2].i = temp.i;
+			    if (nounit) {
+				i__2 = k + j * b_dim1;
+				i__3 = k + j * b_dim1;
+				i__4 = k + k * a_dim1;
+				z__1.r = b[i__3].r * a[i__4].r - b[i__3].i *
+					a[i__4].i, z__1.i = b[i__3].r * a[
+					i__4].i + b[i__3].i * a[i__4].r;
+				b[i__2].r = z__1.r, b[i__2].i = z__1.i;
+			    }
+			    i__2 = *m;
+			    for (i__ = k + 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * a_dim1;
+				z__2.r = temp.r * a[i__5].r - temp.i * a[i__5]
+					.i, z__2.i = temp.r * a[i__5].i +
+					temp.i * a[i__5].r;
+				z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4]
+					.i + z__2.i;
+				b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L60: */
+			    }
+			}
+/* L70: */
+		    }
+/* L80: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			i__2 = i__ + j * b_dim1;
+			temp.r = b[i__2].r, temp.i = b[i__2].i;
+			if (noconj) {
+			    if (nounit) {
+				i__2 = i__ + i__ * a_dim1;
+				z__1.r = temp.r * a[i__2].r - temp.i * a[i__2]
+					.i, z__1.i = temp.r * a[i__2].i +
+					temp.i * a[i__2].r;
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			    i__2 = i__ - 1;
+			    for (k = 1; k <= i__2; ++k) {
+				i__3 = k + i__ * a_dim1;
+				i__4 = k + j * b_dim1;
+				z__2.r = a[i__3].r * b[i__4].r - a[i__3].i *
+					b[i__4].i, z__2.i = a[i__3].r * b[
+					i__4].i + a[i__3].i * b[i__4].r;
+				z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+					z__2.i;
+				temp.r = z__1.r, temp.i = z__1.i;
+/* L90: */
+			    }
+			} else {
+			    if (nounit) {
+				d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
+				z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+					z__1.i = temp.r * z__2.i + temp.i *
+					z__2.r;
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			    i__2 = i__ - 1;
+			    for (k = 1; k <= i__2; ++k) {
+				d_cnjg(&z__3, &a[k + i__ * a_dim1]);
+				i__3 = k + j * b_dim1;
+				z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3]
+					.i, z__2.i = z__3.r * b[i__3].i +
+					z__3.i * b[i__3].r;
+				z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+					z__2.i;
+				temp.r = z__1.r, temp.i = z__1.i;
+/* L100: */
+			    }
+			}
+			i__2 = i__ + j * b_dim1;
+			z__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			b[i__2].r = z__1.r, b[i__2].i = z__1.i;
+/* L110: */
+		    }
+/* L120: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * b_dim1;
+			temp.r = b[i__3].r, temp.i = b[i__3].i;
+			if (noconj) {
+			    if (nounit) {
+				i__3 = i__ + i__ * a_dim1;
+				z__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
+					.i, z__1.i = temp.r * a[i__3].i +
+					temp.i * a[i__3].r;
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			    i__3 = *m;
+			    for (k = i__ + 1; k <= i__3; ++k) {
+				i__4 = k + i__ * a_dim1;
+				i__5 = k + j * b_dim1;
+				z__2.r = a[i__4].r * b[i__5].r - a[i__4].i *
+					b[i__5].i, z__2.i = a[i__4].r * b[
+					i__5].i + a[i__4].i * b[i__5].r;
+				z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+					z__2.i;
+				temp.r = z__1.r, temp.i = z__1.i;
+/* L130: */
+			    }
+			} else {
+			    if (nounit) {
+				d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
+				z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+					z__1.i = temp.r * z__2.i + temp.i *
+					z__2.r;
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			    i__3 = *m;
+			    for (k = i__ + 1; k <= i__3; ++k) {
+				d_cnjg(&z__3, &a[k + i__ * a_dim1]);
+				i__4 = k + j * b_dim1;
+				z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4]
+					.i, z__2.i = z__3.r * b[i__4].i +
+					z__3.i * b[i__4].r;
+				z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+					z__2.i;
+				temp.r = z__1.r, temp.i = z__1.i;
+/* L140: */
+			    }
+			}
+			i__3 = i__ + j * b_dim1;
+			z__1.r = alpha->r * temp.r - alpha->i * temp.i,
+				z__1.i = alpha->r * temp.i + alpha->i *
+				temp.r;
+			b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L150: */
+		    }
+/* L160: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*A. */
+
+	    if (upper) {
+		for (j = *n; j >= 1; --j) {
+		    temp.r = alpha->r, temp.i = alpha->i;
+		    if (nounit) {
+			i__1 = j + j * a_dim1;
+			z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				z__1.i = temp.r * a[i__1].i + temp.i * a[i__1]
+				.r;
+			temp.r = z__1.r, temp.i = z__1.i;
+		    }
+		    i__1 = *m;
+		    for (i__ = 1; i__ <= i__1; ++i__) {
+			i__2 = i__ + j * b_dim1;
+			i__3 = i__ + j * b_dim1;
+			z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				z__1.i = temp.r * b[i__3].i + temp.i * b[i__3]
+				.r;
+			b[i__2].r = z__1.r, b[i__2].i = z__1.i;
+/* L170: */
+		    }
+		    i__1 = j - 1;
+		    for (k = 1; k <= i__1; ++k) {
+			i__2 = k + j * a_dim1;
+			if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+			    i__2 = k + j * a_dim1;
+			    z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2]
+				    .i, z__1.i = alpha->r * a[i__2].i +
+				    alpha->i * a[i__2].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * b_dim1;
+				z__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
+					.i, z__2.i = temp.r * b[i__5].i +
+					temp.i * b[i__5].r;
+				z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4]
+					.i + z__2.i;
+				b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L180: */
+			    }
+			}
+/* L190: */
+		    }
+/* L200: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    temp.r = alpha->r, temp.i = alpha->i;
+		    if (nounit) {
+			i__2 = j + j * a_dim1;
+			z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				z__1.i = temp.r * a[i__2].i + temp.i * a[i__2]
+				.r;
+			temp.r = z__1.r, temp.i = z__1.i;
+		    }
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * b_dim1;
+			i__4 = i__ + j * b_dim1;
+			z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				z__1.i = temp.r * b[i__4].i + temp.i * b[i__4]
+				.r;
+			b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L210: */
+		    }
+		    i__2 = *n;
+		    for (k = j + 1; k <= i__2; ++k) {
+			i__3 = k + j * a_dim1;
+			if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			    i__3 = k + j * a_dim1;
+			    z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3]
+				    .i, z__1.i = alpha->r * a[i__3].i +
+				    alpha->i * a[i__3].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * b_dim1;
+				z__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
+					.i, z__2.i = temp.r * b[i__6].i +
+					temp.i * b[i__6].r;
+				z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5]
+					.i + z__2.i;
+				b[i__4].r = z__1.r, b[i__4].i = z__1.i;
+/* L220: */
+			    }
+			}
+/* L230: */
+		    }
+/* L240: */
+		}
+	    }
+	} else {
+
+/*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ). */
+
+	    if (upper) {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    i__2 = k - 1;
+		    for (j = 1; j <= i__2; ++j) {
+			i__3 = j + k * a_dim1;
+			if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			    if (noconj) {
+				i__3 = j + k * a_dim1;
+				z__1.r = alpha->r * a[i__3].r - alpha->i * a[
+					i__3].i, z__1.i = alpha->r * a[i__3]
+					.i + alpha->i * a[i__3].r;
+				temp.r = z__1.r, temp.i = z__1.i;
+			    } else {
+				d_cnjg(&z__2, &a[j + k * a_dim1]);
+				z__1.r = alpha->r * z__2.r - alpha->i *
+					z__2.i, z__1.i = alpha->r * z__2.i +
+					alpha->i * z__2.r;
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * b_dim1;
+				z__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
+					.i, z__2.i = temp.r * b[i__6].i +
+					temp.i * b[i__6].r;
+				z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5]
+					.i + z__2.i;
+				b[i__4].r = z__1.r, b[i__4].i = z__1.i;
+/* L250: */
+			    }
+			}
+/* L260: */
+		    }
+		    temp.r = alpha->r, temp.i = alpha->i;
+		    if (nounit) {
+			if (noconj) {
+			    i__2 = k + k * a_dim1;
+			    z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				    z__1.i = temp.r * a[i__2].i + temp.i * a[
+				    i__2].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			} else {
+			    d_cnjg(&z__2, &a[k + k * a_dim1]);
+			    z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+				    z__1.i = temp.r * z__2.i + temp.i *
+				    z__2.r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    }
+		    if ((temp.r != 1.) || (temp.i != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + k * b_dim1;
+			    i__4 = i__ + k * b_dim1;
+			    z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				    z__1.i = temp.r * b[i__4].i + temp.i * b[
+				    i__4].r;
+			    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L270: */
+			}
+		    }
+/* L280: */
+		}
+	    } else {
+		for (k = *n; k >= 1; --k) {
+		    i__1 = *n;
+		    for (j = k + 1; j <= i__1; ++j) {
+			i__2 = j + k * a_dim1;
+			if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+			    if (noconj) {
+				i__2 = j + k * a_dim1;
+				z__1.r = alpha->r * a[i__2].r - alpha->i * a[
+					i__2].i, z__1.i = alpha->r * a[i__2]
+					.i + alpha->i * a[i__2].r;
+				temp.r = z__1.r, temp.i = z__1.i;
+			    } else {
+				d_cnjg(&z__2, &a[j + k * a_dim1]);
+				z__1.r = alpha->r * z__2.r - alpha->i *
+					z__2.i, z__1.i = alpha->r * z__2.i +
+					alpha->i * z__2.r;
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * b_dim1;
+				z__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
+					.i, z__2.i = temp.r * b[i__5].i +
+					temp.i * b[i__5].r;
+				z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4]
+					.i + z__2.i;
+				b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L290: */
+			    }
+			}
+/* L300: */
+		    }
+		    temp.r = alpha->r, temp.i = alpha->i;
+		    if (nounit) {
+			if (noconj) {
+			    i__1 = k + k * a_dim1;
+			    z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				    z__1.i = temp.r * a[i__1].i + temp.i * a[
+				    i__1].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			} else {
+			    d_cnjg(&z__2, &a[k + k * a_dim1]);
+			    z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+				    z__1.i = temp.r * z__2.i + temp.i *
+				    z__2.r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    }
+		    if ((temp.r != 1.) || (temp.i != 0.)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + k * b_dim1;
+			    i__3 = i__ + k * b_dim1;
+			    z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				    z__1.i = temp.r * b[i__3].i + temp.i * b[
+				    i__3].r;
+			    b[i__2].r = z__1.r, b[i__2].i = z__1.i;
+/* L310: */
+			}
+		    }
+/* L320: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZTRMM . */
+
+} /* ztrmm_ */
+
+/* Subroutine */ int ztrmv_(char *uplo, char *trans, char *diag, integer *n,
+	doublecomplex *a, integer *lda, doublecomplex *x, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, ix, jx, kx, info;
+    static doublecomplex temp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj, nounit;
+
+
+/*
+    Purpose
+    =======
+
+    ZTRMV  performs one of the matrix-vector operations
+
+       x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x,
+
+    where x is an n element vector and  A is an n by n unit, or non-unit,
+    upper or lower triangular matrix.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the operation to be performed as
+             follows:
+
+                TRANS = 'N' or 'n'   x := A*x.
+
+                TRANS = 'T' or 't'   x := A'*x.
+
+                TRANS = 'C' or 'c'   x := conjg( A' )*x.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit
+             triangular as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular matrix and the strictly lower triangular part of
+             A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular matrix and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of
+             A are not referenced either, but are assumed to be unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - COMPLEX*16       array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element vector x. On exit, X is overwritten with the
+             tranformed vector x.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < max(1,*n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    }
+    if (info != 0) {
+	xerbla_("ZTRMV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    noconj = lsame_(trans, "T");
+    nounit = lsame_(diag, "N");
+
+/*
+       Set up the start point in X if the increment is not unity. This
+       will be  ( N - 1 )*INCX  too small for descending loops.
+*/
+
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  x := A*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+			i__2 = j;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__;
+			    i__4 = i__;
+			    i__5 = i__ + j * a_dim1;
+			    z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    z__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i +
+				    z__2.i;
+			    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+/* L10: */
+			}
+			if (nounit) {
+			    i__2 = j;
+			    i__3 = j;
+			    i__4 = j + j * a_dim1;
+			    z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
+				    i__4].i, z__1.i = x[i__3].r * a[i__4].i +
+				    x[i__3].i * a[i__4].r;
+			    x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = jx;
+		    if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+			i__2 = jx;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			ix = kx;
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = ix;
+			    i__4 = ix;
+			    i__5 = i__ + j * a_dim1;
+			    z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    z__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i +
+				    z__2.i;
+			    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+			    ix += *incx;
+/* L30: */
+			}
+			if (nounit) {
+			    i__2 = jx;
+			    i__3 = jx;
+			    i__4 = j + j * a_dim1;
+			    z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
+				    i__4].i, z__1.i = x[i__3].r * a[i__4].i +
+				    x[i__3].i * a[i__4].r;
+			    x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+			}
+		    }
+		    jx += *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    if ((x[i__1].r != 0.) || (x[i__1].i != 0.)) {
+			i__1 = j;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = i__;
+			    i__3 = i__;
+			    i__4 = i__ + j * a_dim1;
+			    z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+				    z__2.i = temp.r * a[i__4].i + temp.i * a[
+				    i__4].r;
+			    z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i +
+				    z__2.i;
+			    x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+/* L50: */
+			}
+			if (nounit) {
+			    i__1 = j;
+			    i__2 = j;
+			    i__3 = j + j * a_dim1;
+			    z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
+				    i__3].i, z__1.i = x[i__2].r * a[i__3].i +
+				    x[i__2].i * a[i__3].r;
+			    x[i__1].r = z__1.r, x[i__1].i = z__1.i;
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    i__1 = jx;
+		    if ((x[i__1].r != 0.) || (x[i__1].i != 0.)) {
+			i__1 = jx;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			ix = kx;
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = ix;
+			    i__3 = ix;
+			    i__4 = i__ + j * a_dim1;
+			    z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i,
+				    z__2.i = temp.r * a[i__4].i + temp.i * a[
+				    i__4].r;
+			    z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i +
+				    z__2.i;
+			    x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+			    ix -= *incx;
+/* L70: */
+			}
+			if (nounit) {
+			    i__1 = jx;
+			    i__2 = jx;
+			    i__3 = j + j * a_dim1;
+			    z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
+				    i__3].i, z__1.i = x[i__2].r * a[i__3].i +
+				    x[i__2].i * a[i__3].r;
+			    x[i__1].r = z__1.r, x[i__1].i = z__1.i;
+			}
+		    }
+		    jx -= *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
+
+/*        Form  x := A'*x  or  x := conjg( A' )*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (noconj) {
+			if (nounit) {
+			    i__1 = j + j * a_dim1;
+			    z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				    z__1.i = temp.r * a[i__1].i + temp.i * a[
+				    i__1].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    i__1 = i__ + j * a_dim1;
+			    i__2 = i__;
+			    z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+				    i__2].i, z__2.i = a[i__1].r * x[i__2].i +
+				    a[i__1].i * x[i__2].r;
+			    z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L90: */
+			}
+		    } else {
+			if (nounit) {
+			    d_cnjg(&z__2, &a[j + j * a_dim1]);
+			    z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+				    z__1.i = temp.r * z__2.i + temp.i *
+				    z__2.r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			    i__1 = i__;
+			    z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i,
+				    z__2.i = z__3.r * x[i__1].i + z__3.i * x[
+				    i__1].r;
+			    z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L100: */
+			}
+		    }
+		    i__1 = j;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+/* L110: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    i__1 = jx;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    ix = jx;
+		    if (noconj) {
+			if (nounit) {
+			    i__1 = j + j * a_dim1;
+			    z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i,
+				    z__1.i = temp.r * a[i__1].i + temp.i * a[
+				    i__1].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    ix -= *incx;
+			    i__1 = i__ + j * a_dim1;
+			    i__2 = ix;
+			    z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
+				    i__2].i, z__2.i = a[i__1].r * x[i__2].i +
+				    a[i__1].i * x[i__2].r;
+			    z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L120: */
+			}
+		    } else {
+			if (nounit) {
+			    d_cnjg(&z__2, &a[j + j * a_dim1]);
+			    z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+				    z__1.i = temp.r * z__2.i + temp.i *
+				    z__2.r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    ix -= *incx;
+			    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			    i__1 = ix;
+			    z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i,
+				    z__2.i = z__3.r * x[i__1].i + z__3.i * x[
+				    i__1].r;
+			    z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L130: */
+			}
+		    }
+		    i__1 = jx;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+		    jx -= *incx;
+/* L140: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (noconj) {
+			if (nounit) {
+			    i__2 = j + j * a_dim1;
+			    z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				    z__1.i = temp.r * a[i__2].i + temp.i * a[
+				    i__2].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = i__;
+			    z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, z__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L150: */
+			}
+		    } else {
+			if (nounit) {
+			    d_cnjg(&z__2, &a[j + j * a_dim1]);
+			    z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+				    z__1.i = temp.r * z__2.i + temp.i *
+				    z__2.r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			    i__3 = i__;
+			    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i,
+				    z__2.i = z__3.r * x[i__3].i + z__3.i * x[
+				    i__3].r;
+			    z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L160: */
+			}
+		    }
+		    i__2 = j;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+/* L170: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = jx;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    ix = jx;
+		    if (noconj) {
+			if (nounit) {
+			    i__2 = j + j * a_dim1;
+			    z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i,
+				    z__1.i = temp.r * a[i__2].i + temp.i * a[
+				    i__2].r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    ix += *incx;
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = ix;
+			    z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, z__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L180: */
+			}
+		    } else {
+			if (nounit) {
+			    d_cnjg(&z__2, &a[j + j * a_dim1]);
+			    z__1.r = temp.r * z__2.r - temp.i * z__2.i,
+				    z__1.i = temp.r * z__2.i + temp.i *
+				    z__2.r;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    ix += *incx;
+			    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			    i__3 = ix;
+			    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i,
+				    z__2.i = z__3.r * x[i__3].i + z__3.i * x[
+				    i__3].r;
+			    z__1.r = temp.r + z__2.r, z__1.i = temp.i +
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L190: */
+			}
+		    }
+		    i__2 = jx;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+		    jx += *incx;
+/* L200: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZTRMV . */
+
+} /* ztrmv_ */
+
+/* Subroutine */ int ztrsm_(char *side, char *uplo, char *transa, char *diag,
+	integer *m, integer *n, doublecomplex *alpha, doublecomplex *a,
+	integer *lda, doublecomplex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5,
+	    i__6, i__7;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Builtin functions */
+    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
+	    doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, k, info;
+    static doublecomplex temp;
+    static logical lside;
+    extern logical lsame_(char *, char *);
+    static integer nrowa;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj, nounit;
+
+
+/*
+    Purpose
+    =======
+
+    ZTRSM  solves one of the matrix equations
+
+       op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
+
+    where alpha is a scalar, X and B are m by n matrices, A is a unit, or
+    non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
+
+       op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ).
+
+    The matrix X is overwritten on B.
+
+    Parameters
+    ==========
+
+    SIDE   - CHARACTER*1.
+             On entry, SIDE specifies whether op( A ) appears on the left
+             or right of X as follows:
+
+                SIDE = 'L' or 'l'   op( A )*X = alpha*B.
+
+                SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
+
+             Unchanged on exit.
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix A is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANSA - CHARACTER*1.
+             On entry, TRANSA specifies the form of op( A ) to be used in
+             the matrix multiplication as follows:
+
+                TRANSA = 'N' or 'n'   op( A ) = A.
+
+                TRANSA = 'T' or 't'   op( A ) = A'.
+
+                TRANSA = 'C' or 'c'   op( A ) = conjg( A' ).
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit triangular
+             as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    M      - INTEGER.
+             On entry, M specifies the number of rows of B. M must be at
+             least zero.
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the number of columns of B.  N must be
+             at least zero.
+             Unchanged on exit.
+
+    ALPHA  - COMPLEX*16      .
+             On entry,  ALPHA specifies the scalar  alpha. When  alpha is
+             zero then  A is not referenced and  B need not be set before
+             entry.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m
+             when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
+             Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
+             upper triangular part of the array  A must contain the upper
+             triangular matrix  and the strictly lower triangular part of
+             A is not referenced.
+             Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
+             lower triangular part of the array  A must contain the lower
+             triangular matrix  and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u',  the diagonal elements of
+             A  are not referenced either,  but are assumed to be  unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
+             LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
+             then LDA must be at least max( 1, n ).
+             Unchanged on exit.
+
+    B      - COMPLEX*16       array of DIMENSION ( LDB, n ).
+             Before entry,  the leading  m by n part of the array  B must
+             contain  the  right-hand  side  matrix  B,  and  on exit  is
+             overwritten by the solution matrix  X.
+
+    LDB    - INTEGER.
+             On entry, LDB specifies the first dimension of B as declared
+             in  the  calling  (sub)  program.   LDB  must  be  at  least
+             max( 1, m ).
+             Unchanged on exit.
+
+
+    Level 3 Blas routine.
+
+    -- Written on 8-February-1989.
+       Jack Dongarra, Argonne National Laboratory.
+       Iain Duff, AERE Harwell.
+       Jeremy Du Croz, Numerical Algorithms Group Ltd.
+       Sven Hammarling, Numerical Algorithms Group Ltd.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    lside = lsame_(side, "L");
+    if (lside) {
+	nrowa = *m;
+    } else {
+	nrowa = *n;
+    }
+    noconj = lsame_(transa, "T");
+    nounit = lsame_(diag, "N");
+    upper = lsame_(uplo, "U");
+
+    info = 0;
+    if (! lside && ! lsame_(side, "R")) {
+	info = 1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	info = 2;
+    } else if (! lsame_(transa, "N") && ! lsame_(transa,
+	     "T") && ! lsame_(transa, "C")) {
+	info = 3;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 4;
+    } else if (*m < 0) {
+	info = 5;
+    } else if (*n < 0) {
+	info = 6;
+    } else if (*lda < max(1,nrowa)) {
+	info = 9;
+    } else if (*ldb < max(1,*m)) {
+	info = 11;
+    }
+    if (info != 0) {
+	xerbla_("ZTRSM ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     And when  alpha.eq.zero. */
+
+    if (alpha->r == 0. && alpha->i == 0.) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		b[i__3].r = 0., b[i__3].i = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*     Start the operations. */
+
+    if (lside) {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*inv( A )*B. */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, z__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L30: */
+			}
+		    }
+		    for (k = *m; k >= 1; --k) {
+			i__2 = k + j * b_dim1;
+			if ((b[i__2].r != 0.) || (b[i__2].i != 0.)) {
+			    if (nounit) {
+				i__2 = k + j * b_dim1;
+				z_div(&z__1, &b[k + j * b_dim1], &a[k + k *
+					a_dim1]);
+				b[i__2].r = z__1.r, b[i__2].i = z__1.i;
+			    }
+			    i__2 = k - 1;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = k + j * b_dim1;
+				i__6 = i__ + k * a_dim1;
+				z__2.r = b[i__5].r * a[i__6].r - b[i__5].i *
+					a[i__6].i, z__2.i = b[i__5].r * a[
+					i__6].i + b[i__5].i * a[i__6].r;
+				z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4]
+					.i - z__2.i;
+				b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L40: */
+			    }
+			}
+/* L50: */
+		    }
+/* L60: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, z__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L70: */
+			}
+		    }
+		    i__2 = *m;
+		    for (k = 1; k <= i__2; ++k) {
+			i__3 = k + j * b_dim1;
+			if ((b[i__3].r != 0.) || (b[i__3].i != 0.)) {
+			    if (nounit) {
+				i__3 = k + j * b_dim1;
+				z_div(&z__1, &b[k + j * b_dim1], &a[k + k *
+					a_dim1]);
+				b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+			    }
+			    i__3 = *m;
+			    for (i__ = k + 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = k + j * b_dim1;
+				i__7 = i__ + k * a_dim1;
+				z__2.r = b[i__6].r * a[i__7].r - b[i__6].i *
+					a[i__7].i, z__2.i = b[i__6].r * a[
+					i__7].i + b[i__6].i * a[i__7].r;
+				z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5]
+					.i - z__2.i;
+				b[i__4].r = z__1.r, b[i__4].i = z__1.i;
+/* L80: */
+			    }
+			}
+/* L90: */
+		    }
+/* L100: */
+		}
+	    }
+	} else {
+
+/*
+             Form  B := alpha*inv( A' )*B
+             or    B := alpha*inv( conjg( A' ) )*B.
+*/
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * b_dim1;
+			z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i,
+				z__1.i = alpha->r * b[i__3].i + alpha->i * b[
+				i__3].r;
+			temp.r = z__1.r, temp.i = z__1.i;
+			if (noconj) {
+			    i__3 = i__ - 1;
+			    for (k = 1; k <= i__3; ++k) {
+				i__4 = k + i__ * a_dim1;
+				i__5 = k + j * b_dim1;
+				z__2.r = a[i__4].r * b[i__5].r - a[i__4].i *
+					b[i__5].i, z__2.i = a[i__4].r * b[
+					i__5].i + a[i__4].i * b[i__5].r;
+				z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+					z__2.i;
+				temp.r = z__1.r, temp.i = z__1.i;
+/* L110: */
+			    }
+			    if (nounit) {
+				z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]);
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			} else {
+			    i__3 = i__ - 1;
+			    for (k = 1; k <= i__3; ++k) {
+				d_cnjg(&z__3, &a[k + i__ * a_dim1]);
+				i__4 = k + j * b_dim1;
+				z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4]
+					.i, z__2.i = z__3.r * b[i__4].i +
+					z__3.i * b[i__4].r;
+				z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+					z__2.i;
+				temp.r = z__1.r, temp.i = z__1.i;
+/* L120: */
+			    }
+			    if (nounit) {
+				d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
+				z_div(&z__1, &temp, &z__2);
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			}
+			i__3 = i__ + j * b_dim1;
+			b[i__3].r = temp.r, b[i__3].i = temp.i;
+/* L130: */
+		    }
+/* L140: */
+		}
+	    } else {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    for (i__ = *m; i__ >= 1; --i__) {
+			i__2 = i__ + j * b_dim1;
+			z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i,
+				z__1.i = alpha->r * b[i__2].i + alpha->i * b[
+				i__2].r;
+			temp.r = z__1.r, temp.i = z__1.i;
+			if (noconj) {
+			    i__2 = *m;
+			    for (k = i__ + 1; k <= i__2; ++k) {
+				i__3 = k + i__ * a_dim1;
+				i__4 = k + j * b_dim1;
+				z__2.r = a[i__3].r * b[i__4].r - a[i__3].i *
+					b[i__4].i, z__2.i = a[i__3].r * b[
+					i__4].i + a[i__3].i * b[i__4].r;
+				z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+					z__2.i;
+				temp.r = z__1.r, temp.i = z__1.i;
+/* L150: */
+			    }
+			    if (nounit) {
+				z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]);
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			} else {
+			    i__2 = *m;
+			    for (k = i__ + 1; k <= i__2; ++k) {
+				d_cnjg(&z__3, &a[k + i__ * a_dim1]);
+				i__3 = k + j * b_dim1;
+				z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3]
+					.i, z__2.i = z__3.r * b[i__3].i +
+					z__3.i * b[i__3].r;
+				z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+					z__2.i;
+				temp.r = z__1.r, temp.i = z__1.i;
+/* L160: */
+			    }
+			    if (nounit) {
+				d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
+				z_div(&z__1, &temp, &z__2);
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			}
+			i__2 = i__ + j * b_dim1;
+			b[i__2].r = temp.r, b[i__2].i = temp.i;
+/* L170: */
+		    }
+/* L180: */
+		}
+	    }
+	}
+    } else {
+	if (lsame_(transa, "N")) {
+
+/*           Form  B := alpha*B*inv( A ). */
+
+	    if (upper) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, z__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L190: */
+			}
+		    }
+		    i__2 = j - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			i__3 = k + j * a_dim1;
+			if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = k + j * a_dim1;
+				i__7 = i__ + k * b_dim1;
+				z__2.r = a[i__6].r * b[i__7].r - a[i__6].i *
+					b[i__7].i, z__2.i = a[i__6].r * b[
+					i__7].i + a[i__6].i * b[i__7].r;
+				z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5]
+					.i - z__2.i;
+				b[i__4].r = z__1.r, b[i__4].i = z__1.i;
+/* L200: */
+			    }
+			}
+/* L210: */
+		    }
+		    if (nounit) {
+			z_div(&z__1, &c_b1077, &a[j + j * a_dim1]);
+			temp.r = z__1.r, temp.i = z__1.i;
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * b_dim1;
+			    i__4 = i__ + j * b_dim1;
+			    z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				    z__1.i = temp.r * b[i__4].i + temp.i * b[
+				    i__4].r;
+			    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L220: */
+			}
+		    }
+/* L230: */
+		}
+	    } else {
+		for (j = *n; j >= 1; --j) {
+		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * b_dim1;
+			    i__3 = i__ + j * b_dim1;
+			    z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
+				    .i, z__1.i = alpha->r * b[i__3].i +
+				    alpha->i * b[i__3].r;
+			    b[i__2].r = z__1.r, b[i__2].i = z__1.i;
+/* L240: */
+			}
+		    }
+		    i__1 = *n;
+		    for (k = j + 1; k <= i__1; ++k) {
+			i__2 = k + j * a_dim1;
+			if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = k + j * a_dim1;
+				i__6 = i__ + k * b_dim1;
+				z__2.r = a[i__5].r * b[i__6].r - a[i__5].i *
+					b[i__6].i, z__2.i = a[i__5].r * b[
+					i__6].i + a[i__5].i * b[i__6].r;
+				z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4]
+					.i - z__2.i;
+				b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L250: */
+			    }
+			}
+/* L260: */
+		    }
+		    if (nounit) {
+			z_div(&z__1, &c_b1077, &a[j + j * a_dim1]);
+			temp.r = z__1.r, temp.i = z__1.i;
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * b_dim1;
+			    i__3 = i__ + j * b_dim1;
+			    z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				    z__1.i = temp.r * b[i__3].i + temp.i * b[
+				    i__3].r;
+			    b[i__2].r = z__1.r, b[i__2].i = z__1.i;
+/* L270: */
+			}
+		    }
+/* L280: */
+		}
+	    }
+	} else {
+
+/*
+             Form  B := alpha*B*inv( A' )
+             or    B := alpha*B*inv( conjg( A' ) ).
+*/
+
+	    if (upper) {
+		for (k = *n; k >= 1; --k) {
+		    if (nounit) {
+			if (noconj) {
+			    z_div(&z__1, &c_b1077, &a[k + k * a_dim1]);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			} else {
+			    d_cnjg(&z__2, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b1077, &z__2);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + k * b_dim1;
+			    i__3 = i__ + k * b_dim1;
+			    z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i,
+				    z__1.i = temp.r * b[i__3].i + temp.i * b[
+				    i__3].r;
+			    b[i__2].r = z__1.r, b[i__2].i = z__1.i;
+/* L290: */
+			}
+		    }
+		    i__1 = k - 1;
+		    for (j = 1; j <= i__1; ++j) {
+			i__2 = j + k * a_dim1;
+			if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+			    if (noconj) {
+				i__2 = j + k * a_dim1;
+				temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    } else {
+				d_cnjg(&z__1, &a[j + k * a_dim1]);
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			    i__2 = *m;
+			    for (i__ = 1; i__ <= i__2; ++i__) {
+				i__3 = i__ + j * b_dim1;
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + k * b_dim1;
+				z__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
+					.i, z__2.i = temp.r * b[i__5].i +
+					temp.i * b[i__5].r;
+				z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4]
+					.i - z__2.i;
+				b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L300: */
+			    }
+			}
+/* L310: */
+		    }
+		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + k * b_dim1;
+			    i__3 = i__ + k * b_dim1;
+			    z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
+				    .i, z__1.i = alpha->r * b[i__3].i +
+				    alpha->i * b[i__3].r;
+			    b[i__2].r = z__1.r, b[i__2].i = z__1.i;
+/* L320: */
+			}
+		    }
+/* L330: */
+		}
+	    } else {
+		i__1 = *n;
+		for (k = 1; k <= i__1; ++k) {
+		    if (nounit) {
+			if (noconj) {
+			    z_div(&z__1, &c_b1077, &a[k + k * a_dim1]);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			} else {
+			    d_cnjg(&z__2, &a[k + k * a_dim1]);
+			    z_div(&z__1, &c_b1077, &z__2);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + k * b_dim1;
+			    i__4 = i__ + k * b_dim1;
+			    z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i,
+				    z__1.i = temp.r * b[i__4].i + temp.i * b[
+				    i__4].r;
+			    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L340: */
+			}
+		    }
+		    i__2 = *n;
+		    for (j = k + 1; j <= i__2; ++j) {
+			i__3 = j + k * a_dim1;
+			if ((a[i__3].r != 0.) || (a[i__3].i != 0.)) {
+			    if (noconj) {
+				i__3 = j + k * a_dim1;
+				temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    } else {
+				d_cnjg(&z__1, &a[j + k * a_dim1]);
+				temp.r = z__1.r, temp.i = z__1.i;
+			    }
+			    i__3 = *m;
+			    for (i__ = 1; i__ <= i__3; ++i__) {
+				i__4 = i__ + j * b_dim1;
+				i__5 = i__ + j * b_dim1;
+				i__6 = i__ + k * b_dim1;
+				z__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
+					.i, z__2.i = temp.r * b[i__6].i +
+					temp.i * b[i__6].r;
+				z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5]
+					.i - z__2.i;
+				b[i__4].r = z__1.r, b[i__4].i = z__1.i;
+/* L350: */
+			    }
+			}
+/* L360: */
+		    }
+		    if ((alpha->r != 1.) || (alpha->i != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + k * b_dim1;
+			    i__4 = i__ + k * b_dim1;
+			    z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
+				    .i, z__1.i = alpha->r * b[i__4].i +
+				    alpha->i * b[i__4].r;
+			    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L370: */
+			}
+		    }
+/* L380: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZTRSM . */
+
+} /* ztrsm_ */
+
+/* Subroutine */ int ztrsv_(char *uplo, char *trans, char *diag, integer *n,
+	doublecomplex *a, integer *lda, doublecomplex *x, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Builtin functions */
+    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
+	    doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, ix, jx, kx, info;
+    static doublecomplex temp;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconj, nounit;
+
+
+/*
+    Purpose
+    =======
+
+    ZTRSV  solves one of the systems of equations
+
+       A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b,
+
+    where b and x are n element vectors and A is an n by n unit, or
+    non-unit, upper or lower triangular matrix.
+
+    No test for singularity or near-singularity is included in this
+    routine. Such tests must be performed before calling this routine.
+
+    Parameters
+    ==========
+
+    UPLO   - CHARACTER*1.
+             On entry, UPLO specifies whether the matrix is an upper or
+             lower triangular matrix as follows:
+
+                UPLO = 'U' or 'u'   A is an upper triangular matrix.
+
+                UPLO = 'L' or 'l'   A is a lower triangular matrix.
+
+             Unchanged on exit.
+
+    TRANS  - CHARACTER*1.
+             On entry, TRANS specifies the equations to be solved as
+             follows:
+
+                TRANS = 'N' or 'n'   A*x = b.
+
+                TRANS = 'T' or 't'   A'*x = b.
+
+                TRANS = 'C' or 'c'   conjg( A' )*x = b.
+
+             Unchanged on exit.
+
+    DIAG   - CHARACTER*1.
+             On entry, DIAG specifies whether or not A is unit
+             triangular as follows:
+
+                DIAG = 'U' or 'u'   A is assumed to be unit triangular.
+
+                DIAG = 'N' or 'n'   A is not assumed to be unit
+                                    triangular.
+
+             Unchanged on exit.
+
+    N      - INTEGER.
+             On entry, N specifies the order of the matrix A.
+             N must be at least zero.
+             Unchanged on exit.
+
+    A      - COMPLEX*16       array of DIMENSION ( LDA, n ).
+             Before entry with  UPLO = 'U' or 'u', the leading n by n
+             upper triangular part of the array A must contain the upper
+             triangular matrix and the strictly lower triangular part of
+             A is not referenced.
+             Before entry with UPLO = 'L' or 'l', the leading n by n
+             lower triangular part of the array A must contain the lower
+             triangular matrix and the strictly upper triangular part of
+             A is not referenced.
+             Note that when  DIAG = 'U' or 'u', the diagonal elements of
+             A are not referenced either, but are assumed to be unity.
+             Unchanged on exit.
+
+    LDA    - INTEGER.
+             On entry, LDA specifies the first dimension of A as declared
+             in the calling (sub) program. LDA must be at least
+             max( 1, n ).
+             Unchanged on exit.
+
+    X      - COMPLEX*16       array of dimension at least
+             ( 1 + ( n - 1 )*abs( INCX ) ).
+             Before entry, the incremented array X must contain the n
+             element right-hand side vector b. On exit, X is overwritten
+             with the solution vector x.
+
+    INCX   - INTEGER.
+             On entry, INCX specifies the increment for the elements of
+             X. INCX must not be zero.
+             Unchanged on exit.
+
+
+    Level 2 Blas routine.
+
+    -- Written on 22-October-1986.
+       Jack Dongarra, Argonne National Lab.
+       Jeremy Du Croz, Nag Central Office.
+       Sven Hammarling, Nag Central Office.
+       Richard Hanson, Sandia National Labs.
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+
+    /* Function Body */
+    info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	info = 1;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T") && ! lsame_(trans, "C")) {
+	info = 2;
+    } else if (! lsame_(diag, "U") && ! lsame_(diag,
+	    "N")) {
+	info = 3;
+    } else if (*n < 0) {
+	info = 4;
+    } else if (*lda < max(1,*n)) {
+	info = 6;
+    } else if (*incx == 0) {
+	info = 8;
+    }
+    if (info != 0) {
+	xerbla_("ZTRSV ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    noconj = lsame_(trans, "T");
+    nounit = lsame_(diag, "N");
+
+/*
+       Set up the start point in X if the increment is not unity. This
+       will be  ( N - 1 )*INCX  too small for descending loops.
+*/
+
+    if (*incx <= 0) {
+	kx = 1 - (*n - 1) * *incx;
+    } else if (*incx != 1) {
+	kx = 1;
+    }
+
+/*
+       Start the operations. In this version the elements of A are
+       accessed sequentially with one pass through A.
+*/
+
+    if (lsame_(trans, "N")) {
+
+/*        Form  x := inv( A )*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    if ((x[i__1].r != 0.) || (x[i__1].i != 0.)) {
+			if (nounit) {
+			    i__1 = j;
+			    z_div(&z__1, &x[j], &a[j + j * a_dim1]);
+			    x[i__1].r = z__1.r, x[i__1].i = z__1.i;
+			}
+			i__1 = j;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    i__1 = i__;
+			    i__2 = i__;
+			    i__3 = i__ + j * a_dim1;
+			    z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+				    z__2.i = temp.r * a[i__3].i + temp.i * a[
+				    i__3].r;
+			    z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i -
+				    z__2.i;
+			    x[i__1].r = z__1.r, x[i__1].i = z__1.i;
+/* L10: */
+			}
+		    }
+/* L20: */
+		}
+	    } else {
+		jx = kx + (*n - 1) * *incx;
+		for (j = *n; j >= 1; --j) {
+		    i__1 = jx;
+		    if ((x[i__1].r != 0.) || (x[i__1].i != 0.)) {
+			if (nounit) {
+			    i__1 = jx;
+			    z_div(&z__1, &x[jx], &a[j + j * a_dim1]);
+			    x[i__1].r = z__1.r, x[i__1].i = z__1.i;
+			}
+			i__1 = jx;
+			temp.r = x[i__1].r, temp.i = x[i__1].i;
+			ix = jx;
+			for (i__ = j - 1; i__ >= 1; --i__) {
+			    ix -= *incx;
+			    i__1 = ix;
+			    i__2 = ix;
+			    i__3 = i__ + j * a_dim1;
+			    z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i,
+				    z__2.i = temp.r * a[i__3].i + temp.i * a[
+				    i__3].r;
+			    z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i -
+				    z__2.i;
+			    x[i__1].r = z__1.r, x[i__1].i = z__1.i;
+/* L30: */
+			}
+		    }
+		    jx -= *incx;
+/* L40: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+			if (nounit) {
+			    i__2 = j;
+			    z_div(&z__1, &x[j], &a[j + j * a_dim1]);
+			    x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+			}
+			i__2 = j;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    i__3 = i__;
+			    i__4 = i__;
+			    i__5 = i__ + j * a_dim1;
+			    z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    z__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i -
+				    z__2.i;
+			    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = jx;
+		    if ((x[i__2].r != 0.) || (x[i__2].i != 0.)) {
+			if (nounit) {
+			    i__2 = jx;
+			    z_div(&z__1, &x[jx], &a[j + j * a_dim1]);
+			    x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+			}
+			i__2 = jx;
+			temp.r = x[i__2].r, temp.i = x[i__2].i;
+			ix = jx;
+			i__2 = *n;
+			for (i__ = j + 1; i__ <= i__2; ++i__) {
+			    ix += *incx;
+			    i__3 = ix;
+			    i__4 = ix;
+			    i__5 = i__ + j * a_dim1;
+			    z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i,
+				    z__2.i = temp.r * a[i__5].i + temp.i * a[
+				    i__5].r;
+			    z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i -
+				    z__2.i;
+			    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+/* L70: */
+			}
+		    }
+		    jx += *incx;
+/* L80: */
+		}
+	    }
+	}
+    } else {
+
+/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
+
+	if (lsame_(uplo, "U")) {
+	    if (*incx == 1) {
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = j;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (noconj) {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = i__;
+			    z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, z__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L90: */
+			}
+			if (nounit) {
+			    z_div(&z__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    } else {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			    i__3 = i__;
+			    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i,
+				    z__2.i = z__3.r * x[i__3].i + z__3.i * x[
+				    i__3].r;
+			    z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L100: */
+			}
+			if (nounit) {
+			    d_cnjg(&z__2, &a[j + j * a_dim1]);
+			    z_div(&z__1, &temp, &z__2);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    }
+		    i__2 = j;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+/* L110: */
+		}
+	    } else {
+		jx = kx;
+		i__1 = *n;
+		for (j = 1; j <= i__1; ++j) {
+		    ix = kx;
+		    i__2 = jx;
+		    temp.r = x[i__2].r, temp.i = x[i__2].i;
+		    if (noconj) {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = ix;
+			    z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
+				    i__4].i, z__2.i = a[i__3].r * x[i__4].i +
+				    a[i__3].i * x[i__4].r;
+			    z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			    ix += *incx;
+/* L120: */
+			}
+			if (nounit) {
+			    z_div(&z__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    } else {
+			i__2 = j - 1;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			    i__3 = ix;
+			    z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i,
+				    z__2.i = z__3.r * x[i__3].i + z__3.i * x[
+				    i__3].r;
+			    z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			    ix += *incx;
+/* L130: */
+			}
+			if (nounit) {
+			    d_cnjg(&z__2, &a[j + j * a_dim1]);
+			    z_div(&z__1, &temp, &z__2);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    }
+		    i__2 = jx;
+		    x[i__2].r = temp.r, x[i__2].i = temp.i;
+		    jx += *incx;
+/* L140: */
+		}
+	    }
+	} else {
+	    if (*incx == 1) {
+		for (j = *n; j >= 1; --j) {
+		    i__1 = j;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (noconj) {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = i__ + j * a_dim1;
+			    i__3 = i__;
+			    z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
+				    i__3].i, z__2.i = a[i__2].r * x[i__3].i +
+				    a[i__2].i * x[i__3].r;
+			    z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L150: */
+			}
+			if (nounit) {
+			    z_div(&z__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    } else {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			    i__2 = i__;
+			    z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i,
+				    z__2.i = z__3.r * x[i__2].i + z__3.i * x[
+				    i__2].r;
+			    z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+/* L160: */
+			}
+			if (nounit) {
+			    d_cnjg(&z__2, &a[j + j * a_dim1]);
+			    z_div(&z__1, &temp, &z__2);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    }
+		    i__1 = j;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+/* L170: */
+		}
+	    } else {
+		kx += (*n - 1) * *incx;
+		jx = kx;
+		for (j = *n; j >= 1; --j) {
+		    ix = kx;
+		    i__1 = jx;
+		    temp.r = x[i__1].r, temp.i = x[i__1].i;
+		    if (noconj) {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    i__2 = i__ + j * a_dim1;
+			    i__3 = ix;
+			    z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
+				    i__3].i, z__2.i = a[i__2].r * x[i__3].i +
+				    a[i__2].i * x[i__3].r;
+			    z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			    ix -= *incx;
+/* L180: */
+			}
+			if (nounit) {
+			    z_div(&z__1, &temp, &a[j + j * a_dim1]);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    } else {
+			i__1 = j + 1;
+			for (i__ = *n; i__ >= i__1; --i__) {
+			    d_cnjg(&z__3, &a[i__ + j * a_dim1]);
+			    i__2 = ix;
+			    z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i,
+				    z__2.i = z__3.r * x[i__2].i + z__3.i * x[
+				    i__2].r;
+			    z__1.r = temp.r - z__2.r, z__1.i = temp.i -
+				    z__2.i;
+			    temp.r = z__1.r, temp.i = z__1.i;
+			    ix -= *incx;
+/* L190: */
+			}
+			if (nounit) {
+			    d_cnjg(&z__2, &a[j + j * a_dim1]);
+			    z_div(&z__1, &temp, &z__2);
+			    temp.r = z__1.r, temp.i = z__1.i;
+			}
+		    }
+		    i__1 = jx;
+		    x[i__1].r = temp.r, x[i__1].i = temp.i;
+		    jx -= *incx;
+/* L200: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZTRSV . */
+
+} /* ztrsv_ */
+
diff --git a/numpy/core/src/umath/lapack_lite/dlamch.c b/numpy/core/src/umath/lapack_lite/dlamch.c
new file mode 100644
index 000000000000..bf1dfdb05973
--- /dev/null
+++ b/numpy/core/src/umath/lapack_lite/dlamch.c
@@ -0,0 +1,951 @@
+#include <stdio.h>
+#include "f2c.h"
+
+/* If config.h is available, we only need dlamc3 */
+#ifndef HAVE_CONFIG
+doublereal dlamch_(char *cmach)
+{
+/*  -- LAPACK auxiliary routine (version 3.0) --   
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+       Courant Institute, Argonne National Lab, and Rice University   
+       October 31, 1992   
+
+
+    Purpose   
+    =======   
+
+    DLAMCH determines double precision machine parameters.   
+
+    Arguments   
+    =========   
+
+    CMACH   (input) CHARACTER*1   
+            Specifies the value to be returned by DLAMCH:   
+            = 'E' or 'e',   DLAMCH := eps   
+            = 'S' or 's ,   DLAMCH := sfmin   
+            = 'B' or 'b',   DLAMCH := base   
+            = 'P' or 'p',   DLAMCH := eps*base   
+            = 'N' or 'n',   DLAMCH := t   
+            = 'R' or 'r',   DLAMCH := rnd   
+            = 'M' or 'm',   DLAMCH := emin   
+            = 'U' or 'u',   DLAMCH := rmin   
+            = 'L' or 'l',   DLAMCH := emax   
+            = 'O' or 'o',   DLAMCH := rmax   
+
+            where   
+
+            eps   = relative machine precision   
+            sfmin = safe minimum, such that 1/sfmin does not overflow   
+            base  = base of the machine   
+            prec  = eps*base   
+            t     = number of (base) digits in the mantissa   
+            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise   
+            emin  = minimum exponent before (gradual) underflow   
+            rmin  = underflow threshold - base**(emin-1)   
+            emax  = largest exponent before overflow   
+            rmax  = overflow threshold  - (base**emax)*(1-eps)   
+
+   ===================================================================== 
+*/
+/* >>Start of File<<   
+       Initialized data */
+    static logical first = TRUE_;
+    /* System generated locals */
+    integer i__1;
+    doublereal ret_val;
+    /* Builtin functions */
+    double pow_di(doublereal *, integer *);
+    /* Local variables */
+    static doublereal base;
+    static integer beta;
+    static doublereal emin, prec, emax;
+    static integer imin, imax;
+    static logical lrnd;
+    static doublereal rmin, rmax, t, rmach;
+    extern logical lsame_(char *, char *);
+    static doublereal small, sfmin;
+    extern /* Subroutine */ int dlamc2_(integer *, integer *, logical *, 
+	    doublereal *, integer *, doublereal *, integer *, doublereal *);
+    static integer it;
+    static doublereal rnd, eps;
+
+
+
+    if (first) {
+	first = FALSE_;
+	dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
+	base = (doublereal) beta;
+	t = (doublereal) it;
+	if (lrnd) {
+	    rnd = 1.;
+	    i__1 = 1 - it;
+	    eps = pow_di(&base, &i__1) / 2;
+	} else {
+	    rnd = 0.;
+	    i__1 = 1 - it;
+	    eps = pow_di(&base, &i__1);
+	}
+	prec = eps * base;
+	emin = (doublereal) imin;
+	emax = (doublereal) imax;
+	sfmin = rmin;
+	small = 1. / rmax;
+	if (small >= sfmin) {
+
+/*           Use SMALL plus a bit, to avoid the possibility of rou
+nding   
+             causing overflow when computing  1/sfmin. */
+
+	    sfmin = small * (eps + 1.);
+	}
+    }
+
+    if (lsame_(cmach, "E")) {
+	rmach = eps;
+    } else if (lsame_(cmach, "S")) {
+	rmach = sfmin;
+    } else if (lsame_(cmach, "B")) {
+	rmach = base;
+    } else if (lsame_(cmach, "P")) {
+	rmach = prec;
+    } else if (lsame_(cmach, "N")) {
+	rmach = t;
+    } else if (lsame_(cmach, "R")) {
+	rmach = rnd;
+    } else if (lsame_(cmach, "M")) {
+	rmach = emin;
+    } else if (lsame_(cmach, "U")) {
+	rmach = rmin;
+    } else if (lsame_(cmach, "L")) {
+	rmach = emax;
+    } else if (lsame_(cmach, "O")) {
+	rmach = rmax;
+    }
+
+    ret_val = rmach;
+    return ret_val;
+
+/*     End of DLAMCH */
+
+} /* dlamch_ */
+
+
+/* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical 
+	*ieee1)
+{
+/*  -- LAPACK auxiliary routine (version 3.0) --   
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+       Courant Institute, Argonne National Lab, and Rice University   
+       October 31, 1992   
+
+
+    Purpose   
+    =======   
+
+    DLAMC1 determines the machine parameters given by BETA, T, RND, and   
+    IEEE1.   
+
+    Arguments   
+    =========   
+
+    BETA    (output) INTEGER   
+            The base of the machine.   
+
+    T       (output) INTEGER   
+            The number of ( BETA ) digits in the mantissa.   
+
+    RND     (output) LOGICAL   
+            Specifies whether proper rounding  ( RND = .TRUE. )  or   
+            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
+  
+            be a reliable guide to the way in which the machine performs 
+  
+            its arithmetic.   
+
+    IEEE1   (output) LOGICAL   
+            Specifies whether rounding appears to be done in the IEEE   
+            'round to nearest' style.   
+
+    Further Details   
+    ===============   
+
+    The routine is based on the routine  ENVRON  by Malcolm and   
+    incorporates suggestions by Gentleman and Marovich. See   
+
+       Malcolm M. A. (1972) Algorithms to reveal properties of   
+          floating-point arithmetic. Comms. of the ACM, 15, 949-951.   
+
+       Gentleman W. M. and Marovich S. B. (1974) More on algorithms   
+          that reveal properties of floating point arithmetic units.   
+          Comms. of the ACM, 17, 276-277.   
+
+   ===================================================================== 
+*/
+    /* Initialized data */
+    static logical first = TRUE_;
+    /* System generated locals */
+    doublereal d__1, d__2;
+    /* Local variables */
+    static logical lrnd;
+    static doublereal a, b, c, f;
+    static integer lbeta;
+    static doublereal savec;
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    static logical lieee1;
+    static doublereal t1, t2;
+    static integer lt;
+    static doublereal one, qtr;
+
+
+
+    if (first) {
+	first = FALSE_;
+	one = 1.;
+
+/*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BE
+TA,   
+          IEEE1, T and RND.   
+
+          Throughout this routine  we use the function  DLAMC3  to ens
+ure   
+          that relevant values are  stored and not held in registers, 
+ or   
+          are not affected by optimizers.   
+
+          Compute  a = 2.0**m  with the  smallest positive integer m s
+uch   
+          that   
+
+             fl( a + 1.0 ) = a. */
+
+	a = 1.;
+	c = 1.;
+
+/* +       WHILE( C.EQ.ONE )LOOP */
+L10:
+	if (c == one) {
+	    a *= 2;
+	    c = dlamc3_(&a, &one);
+	    d__1 = -a;
+	    c = dlamc3_(&c, &d__1);
+	    goto L10;
+	}
+/* +       END WHILE   
+
+          Now compute  b = 2.0**m  with the smallest positive integer 
+m   
+          such that   
+
+             fl( a + b ) .gt. a. */
+
+	b = 1.;
+	c = dlamc3_(&a, &b);
+
+/* +       WHILE( C.EQ.A )LOOP */
+L20:
+	if (c == a) {
+	    b *= 2;
+	    c = dlamc3_(&a, &b);
+	    goto L20;
+	}
+/* +       END WHILE   
+
+          Now compute the base.  a and c  are neighbouring floating po
+int   
+          numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and
+ so   
+          their difference is beta. Adding 0.25 to c is to ensure that
+ it   
+          is truncated to beta and not ( beta - 1 ). */
+
+	qtr = one / 4;
+	savec = c;
+	d__1 = -a;
+	c = dlamc3_(&c, &d__1);
+	lbeta = (integer) (c + qtr);
+
+/*        Now determine whether rounding or chopping occurs,  by addin
+g a   
+          bit  less  than  beta/2  and a  bit  more  than  beta/2  to 
+ a. */
+
+	b = (doublereal) lbeta;
+	d__1 = b / 2;
+	d__2 = -b / 100;
+	f = dlamc3_(&d__1, &d__2);
+	c = dlamc3_(&f, &a);
+	if (c == a) {
+	    lrnd = TRUE_;
+	} else {
+	    lrnd = FALSE_;
+	}
+	d__1 = b / 2;
+	d__2 = b / 100;
+	f = dlamc3_(&d__1, &d__2);
+	c = dlamc3_(&f, &a);
+	if (lrnd && c == a) {
+	    lrnd = FALSE_;
+	}
+
+/*        Try and decide whether rounding is done in the  IEEE  'round
+ to   
+          nearest' style. B/2 is half a unit in the last place of the 
+two   
+          numbers A and SAVEC. Furthermore, A is even, i.e. has last  
+bit   
+          zero, and SAVEC is odd. Thus adding B/2 to A should not  cha
+nge   
+          A, but adding B/2 to SAVEC should change SAVEC. */
+
+	d__1 = b / 2;
+	t1 = dlamc3_(&d__1, &a);
+	d__1 = b / 2;
+	t2 = dlamc3_(&d__1, &savec);
+	lieee1 = t1 == a && t2 > savec && lrnd;
+
+/*        Now find  the  mantissa, t.  It should  be the  integer part
+ of   
+          log to the base beta of a,  however it is safer to determine
+  t   
+          by powering.  So we find t as the smallest positive integer 
+for   
+          which   
+
+             fl( beta**t + 1.0 ) = 1.0. */
+
+	lt = 0;
+	a = 1.;
+	c = 1.;
+
+/* +       WHILE( C.EQ.ONE )LOOP */
+L30:
+	if (c == one) {
+	    ++lt;
+	    a *= lbeta;
+	    c = dlamc3_(&a, &one);
+	    d__1 = -a;
+	    c = dlamc3_(&c, &d__1);
+	    goto L30;
+	}
+/* +       END WHILE */
+
+    }
+
+    *beta = lbeta;
+    *t = lt;
+    *rnd = lrnd;
+    *ieee1 = lieee1;
+    return 0;
+
+/*     End of DLAMC1 */
+
+} /* dlamc1_ */
+
+
+/* Subroutine */ int dlamc2_(integer *beta, integer *t, logical *rnd, 
+	doublereal *eps, integer *emin, doublereal *rmin, integer *emax, 
+	doublereal *rmax)
+{
+/*  -- LAPACK auxiliary routine (version 3.0) --   
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+       Courant Institute, Argonne National Lab, and Rice University   
+       October 31, 1992   
+
+
+    Purpose   
+    =======   
+
+    DLAMC2 determines the machine parameters specified in its argument   
+    list.   
+
+    Arguments   
+    =========   
+
+    BETA    (output) INTEGER   
+            The base of the machine.   
+
+    T       (output) INTEGER   
+            The number of ( BETA ) digits in the mantissa.   
+
+    RND     (output) LOGICAL   
+            Specifies whether proper rounding  ( RND = .TRUE. )  or   
+            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
+  
+            be a reliable guide to the way in which the machine performs 
+  
+            its arithmetic.   
+
+    EPS     (output) DOUBLE PRECISION   
+            The smallest positive number such that   
+
+               fl( 1.0 - EPS ) .LT. 1.0,   
+
+            where fl denotes the computed value.   
+
+    EMIN    (output) INTEGER   
+            The minimum exponent before (gradual) underflow occurs.   
+
+    RMIN    (output) DOUBLE PRECISION   
+            The smallest normalized number for the machine, given by   
+            BASE**( EMIN - 1 ), where  BASE  is the floating point value 
+  
+            of BETA.   
+
+    EMAX    (output) INTEGER   
+            The maximum exponent before overflow occurs.   
+
+    RMAX    (output) DOUBLE PRECISION   
+            The largest positive number for the machine, given by   
+            BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point 
+  
+            value of BETA.   
+
+    Further Details   
+    ===============   
+
+    The computation of  EPS  is based on a routine PARANOIA by   
+    W. Kahan of the University of California at Berkeley.   
+
+   ===================================================================== 
+*/
+    
+    /* Initialized data */
+    static logical first = TRUE_;
+    static logical iwarn = FALSE_;
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2, d__3, d__4, d__5;
+    /* Builtin functions */
+    double pow_di(doublereal *, integer *);
+    /* Local variables */
+    static logical ieee;
+    static doublereal half;
+    static logical lrnd;
+    static doublereal leps, zero, a, b, c;
+    static integer i, lbeta;
+    static doublereal rbase;
+    static integer lemin, lemax, gnmin;
+    static doublereal small;
+    static integer gpmin;
+    static doublereal third, lrmin, lrmax, sixth;
+    extern /* Subroutine */ int dlamc1_(integer *, integer *, logical *, 
+	    logical *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    static logical lieee1;
+    extern /* Subroutine */ int dlamc4_(integer *, doublereal *, integer *), 
+	    dlamc5_(integer *, integer *, integer *, logical *, integer *, 
+	    doublereal *);
+    static integer lt, ngnmin, ngpmin;
+    static doublereal one, two;
+
+
+
+    if (first) {
+	first = FALSE_;
+	zero = 0.;
+	one = 1.;
+	two = 2.;
+
+/*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values
+ of   
+          BETA, T, RND, EPS, EMIN and RMIN.   
+
+          Throughout this routine  we use the function  DLAMC3  to ens
+ure   
+          that relevant values are stored  and not held in registers, 
+ or   
+          are not affected by optimizers.   
+
+          DLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1. 
+*/
+
+	dlamc1_(&lbeta, &lt, &lrnd, &lieee1);
+
+/*        Start to find EPS. */
+
+	b = (doublereal) lbeta;
+	i__1 = -lt;
+	a = pow_di(&b, &i__1);
+	leps = a;
+
+/*        Try some tricks to see whether or not this is the correct  E
+PS. */
+
+	b = two / 3;
+	half = one / 2;
+	d__1 = -half;
+	sixth = dlamc3_(&b, &d__1);
+	third = dlamc3_(&sixth, &sixth);
+	d__1 = -half;
+	b = dlamc3_(&third, &d__1);
+	b = dlamc3_(&b, &sixth);
+	b = abs(b);
+	if (b < leps) {
+	    b = leps;
+	}
+
+	leps = 1.;
+
+/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
+L10:
+	if (leps > b && b > zero) {
+	    leps = b;
+	    d__1 = half * leps;
+/* Computing 5th power */
+	    d__3 = two, d__4 = d__3, d__3 *= d__3;
+/* Computing 2nd power */
+	    d__5 = leps;
+	    d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5);
+	    c = dlamc3_(&d__1, &d__2);
+	    d__1 = -c;
+	    c = dlamc3_(&half, &d__1);
+	    b = dlamc3_(&half, &c);
+	    d__1 = -b;
+	    c = dlamc3_(&half, &d__1);
+	    b = dlamc3_(&half, &c);
+	    goto L10;
+	}
+/* +       END WHILE */
+
+	if (a < leps) {
+	    leps = a;
+	}
+
+/*        Computation of EPS complete.   
+
+          Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3
+)).   
+          Keep dividing  A by BETA until (gradual) underflow occurs. T
+his   
+          is detected when we cannot recover the previous A. */
+
+	rbase = one / lbeta;
+	small = one;
+	for (i = 1; i <= 3; ++i) {
+	    d__1 = small * rbase;
+	    small = dlamc3_(&d__1, &zero);
+/* L20: */
+	}
+	a = dlamc3_(&one, &small);
+	dlamc4_(&ngpmin, &one, &lbeta);
+	d__1 = -one;
+	dlamc4_(&ngnmin, &d__1, &lbeta);
+	dlamc4_(&gpmin, &a, &lbeta);
+	d__1 = -a;
+	dlamc4_(&gnmin, &d__1, &lbeta);
+	ieee = FALSE_;
+
+	if (ngpmin == ngnmin && gpmin == gnmin) {
+	    if (ngpmin == gpmin) {
+		lemin = ngpmin;
+/*            ( Non twos-complement machines, no gradual under
+flow;   
+                e.g.,  VAX ) */
+	    } else if (gpmin - ngpmin == 3) {
+		lemin = ngpmin - 1 + lt;
+		ieee = TRUE_;
+/*            ( Non twos-complement machines, with gradual und
+erflow;   
+                e.g., IEEE standard followers ) */
+	    } else {
+		lemin = min(ngpmin,gpmin);
+/*            ( A guess; no known machine ) */
+		iwarn = TRUE_;
+	    }
+
+	} else if (ngpmin == gpmin && ngnmin == gnmin) {
+	    if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
+		lemin = max(ngpmin,ngnmin);
+/*            ( Twos-complement machines, no gradual underflow
+;   
+                e.g., CYBER 205 ) */
+	    } else {
+		lemin = min(ngpmin,ngnmin);
+/*            ( A guess; no known machine ) */
+		iwarn = TRUE_;
+	    }
+
+	} else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
+		 {
+	    if (gpmin - min(ngpmin,ngnmin) == 3) {
+		lemin = max(ngpmin,ngnmin) - 1 + lt;
+/*            ( Twos-complement machines with gradual underflo
+w;   
+                no known machine ) */
+	    } else {
+		lemin = min(ngpmin,ngnmin);
+/*            ( A guess; no known machine ) */
+		iwarn = TRUE_;
+	    }
+
+	} else {
+/* Computing MIN */
+	    i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
+	    lemin = min(i__1,gnmin);
+/*         ( A guess; no known machine ) */
+	    iwarn = TRUE_;
+	}
+/* **   
+   Comment out this if block if EMIN is ok */
+	if (iwarn) {
+	    first = TRUE_;
+	    printf("\n\n WARNING. The value EMIN may be incorrect:- ");
+	    printf("EMIN = %8i\n",lemin);
+	    printf("If, after inspection, the value EMIN looks acceptable");
+            printf("please comment out \n the IF block as marked within the"); 
+            printf("code of routine DLAMC2, \n otherwise supply EMIN"); 
+            printf("explicitly.\n");
+	}
+/* **   
+
+          Assume IEEE arithmetic if we found denormalised  numbers abo
+ve,   
+          or if arithmetic seems to round in the  IEEE style,  determi
+ned   
+          in routine DLAMC1. A true IEEE machine should have both  thi
+ngs   
+          true; however, faulty machines may have one or the other. */
+
+	ieee = ieee || lieee1;
+
+/*        Compute  RMIN by successive division by  BETA. We could comp
+ute   
+          RMIN as BASE**( EMIN - 1 ),  but some machines underflow dur
+ing   
+          this computation. */
+
+	lrmin = 1.;
+	i__1 = 1 - lemin;
+	for (i = 1; i <= 1-lemin; ++i) {
+	    d__1 = lrmin * rbase;
+	    lrmin = dlamc3_(&d__1, &zero);
+/* L30: */
+	}
+
+/*        Finally, call DLAMC5 to compute EMAX and RMAX. */
+
+	dlamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
+    }
+
+    *beta = lbeta;
+    *t = lt;
+    *rnd = lrnd;
+    *eps = leps;
+    *emin = lemin;
+    *rmin = lrmin;
+    *emax = lemax;
+    *rmax = lrmax;
+
+    return 0;
+
+
+/*     End of DLAMC2 */
+
+} /* dlamc2_ */
+#endif
+
+
+doublereal dlamc3_(doublereal *a, doublereal *b)
+{
+/*  -- LAPACK auxiliary routine (version 3.0) --   
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+       Courant Institute, Argonne National Lab, and Rice University   
+       October 31, 1992   
+
+
+    Purpose   
+    =======   
+
+    DLAMC3  is intended to force  A  and  B  to be stored prior to doing 
+  
+    the addition of  A  and  B ,  for use in situations where optimizers 
+  
+    might hold one of these in a register.   
+
+    Arguments   
+    =========   
+
+    A, B    (input) DOUBLE PRECISION   
+            The values A and B.   
+
+   ===================================================================== 
+*/
+/* >>Start of File<<   
+       System generated locals */
+    volatile doublereal ret_val;
+
+
+
+    ret_val = *a + *b;
+
+    return ret_val;
+
+/*     End of DLAMC3 */
+
+} /* dlamc3_ */
+
+
+#ifndef HAVE_CONFIG
+/* Subroutine */ int dlamc4_(integer *emin, doublereal *start, integer *base)
+{
+/*  -- LAPACK auxiliary routine (version 2.0) --   
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+       Courant Institute, Argonne National Lab, and Rice University   
+       October 31, 1992   
+
+
+    Purpose   
+    =======   
+
+    DLAMC4 is a service routine for DLAMC2.   
+
+    Arguments   
+    =========   
+
+    EMIN    (output) EMIN   
+            The minimum exponent before (gradual) underflow, computed by 
+  
+            setting A = START and dividing by BASE until the previous A   
+            can not be recovered.   
+
+    START   (input) DOUBLE PRECISION   
+            The starting point for determining EMIN.   
+
+    BASE    (input) INTEGER   
+            The base of the machine.   
+
+   ===================================================================== 
+*/
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1;
+    /* Local variables */
+    static doublereal zero, a;
+    static integer i;
+    static doublereal rbase, b1, b2, c1, c2, d1, d2;
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    static doublereal one;
+
+
+
+    a = *start;
+    one = 1.;
+    rbase = one / *base;
+    zero = 0.;
+    *emin = 1;
+    d__1 = a * rbase;
+    b1 = dlamc3_(&d__1, &zero);
+    c1 = a;
+    c2 = a;
+    d1 = a;
+    d2 = a;
+/* +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.   
+      $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP */
+L10:
+    if (c1 == a && c2 == a && d1 == a && d2 == a) {
+	--(*emin);
+	a = b1;
+	d__1 = a / *base;
+	b1 = dlamc3_(&d__1, &zero);
+	d__1 = b1 * *base;
+	c1 = dlamc3_(&d__1, &zero);
+	d1 = zero;
+	i__1 = *base;
+	for (i = 1; i <= *base; ++i) {
+	    d1 += b1;
+/* L20: */
+	}
+	d__1 = a * rbase;
+	b2 = dlamc3_(&d__1, &zero);
+	d__1 = b2 / rbase;
+	c2 = dlamc3_(&d__1, &zero);
+	d2 = zero;
+	i__1 = *base;
+	for (i = 1; i <= *base; ++i) {
+	    d2 += b2;
+/* L30: */
+	}
+	goto L10;
+    }
+/* +    END WHILE */
+
+    return 0;
+
+/*     End of DLAMC4 */
+
+} /* dlamc4_ */
+
+
+/* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin, 
+	logical *ieee, integer *emax, doublereal *rmax)
+{
+/*  -- LAPACK auxiliary routine (version 3.0) --   
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
+       Courant Institute, Argonne National Lab, and Rice University   
+       October 31, 1992   
+
+
+    Purpose   
+    =======   
+
+    DLAMC5 attempts to compute RMAX, the largest machine floating-point   
+    number, without overflow.  It assumes that EMAX + abs(EMIN) sum   
+    approximately to a power of 2.  It will fail on machines where this   
+    assumption does not hold, for example, the Cyber 205 (EMIN = -28625, 
+  
+    EMAX = 28718).  It will also fail if the value supplied for EMIN is   
+    too large (i.e. too close to zero), probably with overflow.   
+
+    Arguments   
+    =========   
+
+    BETA    (input) INTEGER   
+            The base of floating-point arithmetic.   
+
+    P       (input) INTEGER   
+            The number of base BETA digits in the mantissa of a   
+            floating-point value.   
+
+    EMIN    (input) INTEGER   
+            The minimum exponent before (gradual) underflow.   
+
+    IEEE    (input) LOGICAL   
+            A logical flag specifying whether or not the arithmetic   
+            system is thought to comply with the IEEE standard.   
+
+    EMAX    (output) INTEGER   
+            The largest exponent before overflow   
+
+    RMAX    (output) DOUBLE PRECISION   
+            The largest machine floating-point number.   
+
+   ===================================================================== 
+  
+
+
+       First compute LEXP and UEXP, two powers of 2 that bound   
+       abs(EMIN). We then assume that EMAX + abs(EMIN) will sum   
+       approximately to the bound that is closest to abs(EMIN).   
+       (EMAX is the exponent of the required number RMAX). */
+    /* Table of constant values */
+    static doublereal c_b5 = 0.;
+    
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1;
+    /* Local variables */
+    static integer lexp;
+    static doublereal oldy;
+    static integer uexp, i;
+    static doublereal y, z;
+    static integer nbits;
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    static doublereal recbas;
+    static integer exbits, expsum, try__;
+
+
+
+    lexp = 1;
+    exbits = 1;
+L10:
+    try__ = lexp << 1;
+    if (try__ <= -(*emin)) {
+	lexp = try__;
+	++exbits;
+	goto L10;
+    }
+    if (lexp == -(*emin)) {
+	uexp = lexp;
+    } else {
+	uexp = try__;
+	++exbits;
+    }
+
+/*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater   
+       than or equal to EMIN. EXBITS is the number of bits needed to   
+       store the exponent. */
+
+    if (uexp + *emin > -lexp - *emin) {
+	expsum = lexp << 1;
+    } else {
+	expsum = uexp << 1;
+    }
+
+/*     EXPSUM is the exponent range, approximately equal to   
+       EMAX - EMIN + 1 . */
+
+    *emax = expsum + *emin - 1;
+    nbits = exbits + 1 + *p;
+
+/*     NBITS is the total number of bits needed to store a   
+       floating-point number. */
+
+    if (nbits % 2 == 1 && *beta == 2) {
+
+/*        Either there are an odd number of bits used to store a   
+          floating-point number, which is unlikely, or some bits are 
+  
+          not used in the representation of numbers, which is possible
+,   
+          (e.g. Cray machines) or the mantissa has an implicit bit,   
+          (e.g. IEEE machines, Dec Vax machines), which is perhaps the
+   
+          most likely. We have to assume the last alternative.   
+          If this is true, then we need to reduce EMAX by one because 
+  
+          there must be some way of representing zero in an implicit-b
+it   
+          system. On machines like Cray, we are reducing EMAX by one 
+  
+          unnecessarily. */
+
+	--(*emax);
+    }
+
+    if (*ieee) {
+
+/*        Assume we are on an IEEE machine which reserves one exponent
+   
+          for infinity and NaN. */
+
+	--(*emax);
+    }
+
+/*     Now create RMAX, the largest machine number, which should   
+       be equal to (1.0 - BETA**(-P)) * BETA**EMAX .   
+
+       First compute 1.0 - BETA**(-P), being careful that the   
+       result is less than 1.0 . */
+
+    recbas = 1. / *beta;
+    z = *beta - 1.;
+    y = 0.;
+    i__1 = *p;
+    for (i = 1; i <= *p; ++i) {
+	z *= recbas;
+	if (y < 1.) {
+	    oldy = y;
+	}
+	y = dlamc3_(&y, &z);
+/* L20: */
+    }
+    if (y >= 1.) {
+	y = oldy;
+    }
+
+/*     Now multiply by BETA**EMAX to get RMAX. */
+
+    i__1 = *emax;
+    for (i = 1; i <= *emax; ++i) {
+	d__1 = y * *beta;
+	y = dlamc3_(&d__1, &c_b5);
+/* L30: */
+    }
+
+    *rmax = y;
+    return 0;
+
+/*     End of DLAMC5 */
+
+} /* dlamc5_ */
+#endif
diff --git a/numpy/core/src/umath/lapack_lite/dlapack_lite.c b/numpy/core/src/umath/lapack_lite/dlapack_lite.c
new file mode 100644
index 000000000000..6a36fe6a8352
--- /dev/null
+++ b/numpy/core/src/umath/lapack_lite/dlapack_lite.c
@@ -0,0 +1,100833 @@
+/*
+NOTE: This is generated code. Look in Misc/lapack_lite for information on
+      remaking this file.
+*/
+#include "f2c.h"
+
+#ifdef HAVE_CONFIG
+#include "config.h"
+#else
+extern doublereal dlamch_(char *);
+#define EPSILON dlamch_("Epsilon")
+#define SAFEMINIMUM dlamch_("Safe minimum")
+#define PRECISION dlamch_("Precision")
+#define BASE dlamch_("Base")
+#endif
+
+extern doublereal dlapy2_(doublereal *x, doublereal *y);
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static complex c_b55 = {0.f,0.f};
+static complex c_b56 = {1.f,0.f};
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+static integer c__0 = 0;
+static integer c__8 = 8;
+static integer c__4 = 4;
+static integer c__65 = 65;
+static integer c__6 = 6;
+static integer c__9 = 9;
+static real c_b320 = 0.f;
+static real c_b1011 = 1.f;
+static integer c__15 = 15;
+static logical c_false = FALSE_;
+static real c_b1290 = -1.f;
+static real c_b2206 = .5f;
+static doublereal c_b2865 = 1.;
+static doublereal c_b2879 = 0.;
+static doublereal c_b2944 = -.125;
+static doublereal c_b3001 = -1.;
+static integer c__10 = 10;
+static integer c__11 = 11;
+static doublereal c_b5654 = 2.;
+static logical c_true = TRUE_;
+static real c_b9647 = 2.f;
+
+/* Subroutine */ int cgebak_(char *job, char *side, integer *n, integer *ilo,
+	integer *ihi, real *scale, integer *m, complex *v, integer *ldv,
+	integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, i__1;
+
+    /* Local variables */
+    static integer i__, k;
+    static real s;
+    static integer ii;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static logical leftv;
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), xerbla_(char *, integer *);
+    static logical rightv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CGEBAK forms the right or left eigenvectors of a complex general
+    matrix by backward transformation on the computed eigenvectors of the
+    balanced matrix output by CGEBAL.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the type of backward transformation required:
+            = 'N', do nothing, return immediately;
+            = 'P', do backward transformation for permutation only;
+            = 'S', do backward transformation for scaling only;
+            = 'B', do backward transformations for both permutation and
+                   scaling.
+            JOB must be the same as the argument JOB supplied to CGEBAL.
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  V contains right eigenvectors;
+            = 'L':  V contains left eigenvectors.
+
+    N       (input) INTEGER
+            The number of rows of the matrix V.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            The integers ILO and IHI determined by CGEBAL.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    SCALE   (input) REAL array, dimension (N)
+            Details of the permutation and scaling factors, as returned
+            by CGEBAL.
+
+    M       (input) INTEGER
+            The number of columns of the matrix V.  M >= 0.
+
+    V       (input/output) COMPLEX array, dimension (LDV,M)
+            On entry, the matrix of right or left eigenvectors to be
+            transformed, as returned by CHSEIN or CTREVC.
+            On exit, V is overwritten by the transformed eigenvectors.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V. LDV >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Decode and Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --scale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+
+    /* Function Body */
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
+
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (! rightv && ! leftv) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -4;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*ldv < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEBAK", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	goto L30;
+    }
+
+/*     Backward balance */
+
+    if ((lsame_(job, "S")) || (lsame_(job, "B"))) {
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = scale[i__];
+		csscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L10: */
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = 1.f / scale[i__];
+		csscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
+
+    }
+
+/*
+       Backward permutation
+
+       For  I = ILO-1 step -1 until 1,
+                IHI+1 step 1 until N do --
+*/
+
+L30:
+    if ((lsame_(job, "P")) || (lsame_(job, "B"))) {
+	if (rightv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L40;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L50;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L50;
+		}
+		cswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L50:
+		;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CGEBAK */
+
+} /* cgebak_ */
+
+/* Subroutine */ int cgebal_(char *job, integer *n, complex *a, integer *lda,
+	integer *ilo, integer *ihi, real *scale, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double r_imag(complex *), c_abs(complex *);
+
+    /* Local variables */
+    static real c__, f, g;
+    static integer i__, j, k, l, m;
+    static real r__, s, ca, ra;
+    static integer ica, ira, iexc;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static real sfmin1, sfmin2, sfmax1, sfmax2;
+    extern integer icamax_(integer *, complex *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), xerbla_(char *, integer *);
+    static logical noconv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CGEBAL balances a general complex matrix A.  This involves, first,
+    permuting A by a similarity transformation to isolate eigenvalues
+    in the first 1 to ILO-1 and last IHI+1 to N elements on the
+    diagonal; and second, applying a diagonal similarity transformation
+    to rows and columns ILO to IHI to make the rows and columns as
+    close in norm as possible.  Both steps are optional.
+
+    Balancing may reduce the 1-norm of the matrix, and improve the
+    accuracy of the computed eigenvalues and/or eigenvectors.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the operations to be performed on A:
+            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+                    for i = 1,...,N;
+            = 'P':  permute only;
+            = 'S':  scale only;
+            = 'B':  both permute and scale.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the input matrix A.
+            On exit,  A is overwritten by the balanced matrix.
+            If JOB = 'N', A is not referenced.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    ILO     (output) INTEGER
+    IHI     (output) INTEGER
+            ILO and IHI are set to integers such that on exit
+            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+
+    SCALE   (output) REAL array, dimension (N)
+            Details of the permutations and scaling factors applied to
+            A.  If P(j) is the index of the row and column interchanged
+            with row and column j and D(j) is the scaling factor
+            applied to row and column j, then
+            SCALE(j) = P(j)    for j = 1,...,ILO-1
+                     = D(j)    for j = ILO,...,IHI
+                     = P(j)    for j = IHI+1,...,N.
+            The order in which the interchanges are made is N to IHI+1,
+            then 1 to ILO-1.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The permutations consist of row and column interchanges which put
+    the matrix in the form
+
+               ( T1   X   Y  )
+       P A P = (  0   B   Z  )
+               (  0   0   T2 )
+
+    where T1 and T2 are upper triangular matrices whose eigenvalues lie
+    along the diagonal.  The column indices ILO and IHI mark the starting
+    and ending columns of the submatrix B. Balancing consists of applying
+    a diagonal similarity transformation inv(D) * B * D to make the
+    1-norms of each row of B and its corresponding column nearly equal.
+    The output matrix is
+
+       ( T1     X*D          Y    )
+       (  0  inv(D)*B*D  inv(D)*Z ).
+       (  0      0           T2   )
+
+    Information about the permutations P and the diagonal matrix D is
+    returned in the vector SCALE.
+
+    This subroutine is based on the EISPACK routine CBAL.
+
+    Modified by Tzu-Yi Chen, Computer Science Division, University of
+      California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --scale;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEBAL", &i__1);
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+
+    if (*n == 0) {
+	goto L210;
+    }
+
+    if (lsame_(job, "N")) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scale[i__] = 1.f;
+/* L10: */
+	}
+	goto L210;
+    }
+
+    if (lsame_(job, "S")) {
+	goto L120;
+    }
+
+/*     Permutation to isolate eigenvalues if possible */
+
+    goto L50;
+
+/*     Row and column exchange. */
+
+L20:
+    scale[m] = (real) j;
+    if (j == m) {
+	goto L30;
+    }
+
+    cswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    i__1 = *n - k + 1;
+    cswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+
+L30:
+    switch (iexc) {
+	case 1:  goto L40;
+	case 2:  goto L80;
+    }
+
+/*     Search for rows isolating an eigenvalue and push them down. */
+
+L40:
+    if (l == 1) {
+	goto L210;
+    }
+    --l;
+
+L50:
+    for (j = l; j >= 1; --j) {
+
+	i__1 = l;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (i__ == j) {
+		goto L60;
+	    }
+	    i__2 = j + i__ * a_dim1;
+	    if ((a[i__2].r != 0.f) || (r_imag(&a[j + i__ * a_dim1]) != 0.f)) {
+		goto L70;
+	    }
+L60:
+	    ;
+	}
+
+	m = l;
+	iexc = 1;
+	goto L20;
+L70:
+	;
+    }
+
+    goto L90;
+
+/*     Search for columns isolating an eigenvalue and push them left. */
+
+L80:
+    ++k;
+
+L90:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+
+	i__2 = l;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    if (i__ == j) {
+		goto L100;
+	    }
+	    i__3 = i__ + j * a_dim1;
+	    if ((a[i__3].r != 0.f) || (r_imag(&a[i__ + j * a_dim1]) != 0.f)) {
+		goto L110;
+	    }
+L100:
+	    ;
+	}
+
+	m = k;
+	iexc = 2;
+	goto L20;
+L110:
+	;
+    }
+
+L120:
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	scale[i__] = 1.f;
+/* L130: */
+    }
+
+    if (lsame_(job, "P")) {
+	goto L210;
+    }
+
+/*
+       Balance the submatrix in rows K to L.
+
+       Iterative loop for norm reduction
+*/
+
+    sfmin1 = slamch_("S") / slamch_("P");
+    sfmax1 = 1.f / sfmin1;
+    sfmin2 = sfmin1 * 8.f;
+    sfmax2 = 1.f / sfmin2;
+L140:
+    noconv = FALSE_;
+
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	c__ = 0.f;
+	r__ = 0.f;
+
+	i__2 = l;
+	for (j = k; j <= i__2; ++j) {
+	    if (j == i__) {
+		goto L150;
+	    }
+	    i__3 = j + i__ * a_dim1;
+	    c__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[j + i__
+		    * a_dim1]), dabs(r__2));
+	    i__3 = i__ + j * a_dim1;
+	    r__ += (r__1 = a[i__3].r, dabs(r__1)) + (r__2 = r_imag(&a[i__ + j
+		    * a_dim1]), dabs(r__2));
+L150:
+	    ;
+	}
+	ica = icamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = c_abs(&a[ica + i__ * a_dim1]);
+	i__2 = *n - k + 1;
+	ira = icamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = c_abs(&a[i__ + (ira + k - 1) * a_dim1]);
+
+/*        Guard against zero C or R due to underflow. */
+
+	if ((c__ == 0.f) || (r__ == 0.f)) {
+	    goto L200;
+	}
+	g = r__ / 8.f;
+	f = 1.f;
+	s = c__ + r__;
+L160:
+/* Computing MAX */
+	r__1 = max(f,c__);
+/* Computing MIN */
+	r__2 = min(r__,g);
+	if (((c__ >= g) || (dmax(r__1,ca) >= sfmax2)) || (dmin(r__2,ra) <=
+		sfmin2)) {
+	    goto L170;
+	}
+	f *= 8.f;
+	c__ *= 8.f;
+	ca *= 8.f;
+	r__ /= 8.f;
+	g /= 8.f;
+	ra /= 8.f;
+	goto L160;
+
+L170:
+	g = c__ / 8.f;
+L180:
+/* Computing MIN */
+	r__1 = min(f,c__), r__1 = min(r__1,g);
+	if (((g < r__) || (dmax(r__,ra) >= sfmax2)) || (dmin(r__1,ca) <=
+		sfmin2)) {
+	    goto L190;
+	}
+	f /= 8.f;
+	c__ /= 8.f;
+	g /= 8.f;
+	ca /= 8.f;
+	r__ *= 8.f;
+	ra *= 8.f;
+	goto L180;
+
+/*        Now balance. */
+
+L190:
+	if (c__ + r__ >= s * .95f) {
+	    goto L200;
+	}
+	if (f < 1.f && scale[i__] < 1.f) {
+	    if (f * scale[i__] <= sfmin1) {
+		goto L200;
+	    }
+	}
+	if (f > 1.f && scale[i__] > 1.f) {
+	    if (scale[i__] >= sfmax1 / f) {
+		goto L200;
+	    }
+	}
+	g = 1.f / f;
+	scale[i__] *= f;
+	noconv = TRUE_;
+
+	i__2 = *n - k + 1;
+	csscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	csscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+
+L200:
+	;
+    }
+
+    if (noconv) {
+	goto L140;
+    }
+
+L210:
+    *ilo = k;
+    *ihi = l;
+
+    return 0;
+
+/*     End of CGEBAL */
+
+} /* cgebal_ */
+
+/* Subroutine */ int cgebd2_(integer *m, integer *n, complex *a, integer *lda,
+	 real *d__, real *e, complex *tauq, complex *taup, complex *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__;
+    static complex alpha;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    clarfg_(integer *, complex *, complex *, integer *, complex *),
+	    clacgv_(integer *, complex *, integer *), xerbla_(char *, integer
+	    *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CGEBD2 reduces a complex general m by n matrix A to upper or lower
+    real bidiagonal form B by a unitary transformation: Q' * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the unitary matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the unitary matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) REAL array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) COMPLEX array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix Q. See Further Details.
+
+    TAUP    (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix P. See Further Details.
+
+    WORK    (workspace) COMPLEX array, dimension (max(M,N))
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+    A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+    A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, v and u are complex vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("CGEBD2", &i__1);
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, &
+		    tauq[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*           Apply H(i)' to A(i:m,i+1:n) from the left */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    r_cnjg(&q__1, &tauq[i__]);
+	    clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1,
+		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__;
+	    a[i__2].r = d__[i__3], a[i__2].i = 0.f;
+
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector G(i) to annihilate
+                A(i,i+2:n)
+*/
+
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+			taup[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		clarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
+			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		i__3 = i__;
+		a[i__2].r = e[i__3], a[i__2].i = 0.f;
+	    } else {
+		i__2 = i__;
+		taup[i__2].r = 0.f, taup[i__2].i = 0.f;
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+		    taup[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*           Apply G(i) to A(i+1:m,i:n) from the right */
+
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+/* Computing MIN */
+	    i__4 = i__ + 1;
+	    clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
+		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    i__2 = *n - i__ + 1;
+	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__;
+	    a[i__2].r = d__[i__3], a[i__2].i = 0.f;
+
+	    if (i__ < *m) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:m,i)
+*/
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1,
+			 &tauq[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Apply H(i)' to A(i+1:m,i+1:n) from the left */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		r_cnjg(&q__1, &tauq[i__]);
+		clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &q__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &
+			work[1]);
+		i__2 = i__ + 1 + i__ * a_dim1;
+		i__3 = i__;
+		a[i__2].r = e[i__3], a[i__2].i = 0.f;
+	    } else {
+		i__2 = i__;
+		tauq[i__2].r = 0.f, tauq[i__2].i = 0.f;
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of CGEBD2 */
+
+} /* cgebd2_ */
+
+/* Subroutine */ int cgebrd_(integer *m, integer *n, complex *a, integer *lda,
+	 real *d__, real *e, complex *tauq, complex *taup, complex *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__, j, nb, nx;
+    static real ws;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    static integer nbmin, iinfo, minmn;
+    extern /* Subroutine */ int cgebd2_(integer *, integer *, complex *,
+	    integer *, real *, real *, complex *, complex *, complex *,
+	    integer *), clabrd_(integer *, integer *, integer *, complex *,
+	    integer *, real *, real *, complex *, complex *, complex *,
+	    integer *, complex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwrkx, ldwrky, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CGEBRD reduces a general complex M-by-N matrix A to upper or lower
+    bidiagonal form B by a unitary transformation: Q**H * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the unitary matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the unitary matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) REAL array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) COMPLEX array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix Q. See Further Details.
+
+    TAUP    (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix P. See Further Details.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,M,N).
+            For optimum performance LWORK >= (M+N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+    A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+    A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = ilaenv_(&c__1, "CGEBRD", " ", m, n, &c_n1, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = max(i__1,i__2);
+    lwkopt = (*m + *n) * nb;
+    r__1 = (real) lwkopt;
+    work[1].r = r__1, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = max(1,*m);
+	if (*lwork < max(i__1,*n) && ! lquery) {
+	    *info = -10;
+	}
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("CGEBRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    minmn = min(*m,*n);
+    if (minmn == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    ws = (real) max(*m,*n);
+    ldwrkx = *m;
+    ldwrky = *n;
+
+    if (nb > 1 && nb < minmn) {
+
+/*
+          Set the crossover point NX.
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "CGEBRD", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+
+/*        Determine when to switch from blocked to unblocked code. */
+
+	if (nx < minmn) {
+	    ws = (real) ((*m + *n) * nb);
+	    if ((real) (*lwork) < ws) {
+
+/*
+                Not enough work space for the optimal NB, consider using
+                a smaller block size.
+*/
+
+		nbmin = ilaenv_(&c__2, "CGEBRD", " ", m, n, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		if (*lwork >= (*m + *n) * nbmin) {
+		    nb = *lwork / (*m + *n);
+		} else {
+		    nb = 1;
+		    nx = minmn;
+		}
+	    }
+	}
+    } else {
+	nx = minmn;
+    }
+
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+
+/*
+          Reduce rows and columns i:i+ib-1 to bidiagonal form and return
+          the matrices X and Y which are needed to update the unreduced
+          part of the matrix
+*/
+
+	i__3 = *m - i__ + 1;
+	i__4 = *n - i__ + 1;
+	clabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
+		* nb + 1], &ldwrky);
+
+/*
+          Update the trailing submatrix A(i+ib:m,i+ib:n), using
+          an update of the form  A := A - V*Y' - X*U'
+*/
+
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	q__1.r = -1.f, q__1.i = -0.f;
+	cgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, &
+		q__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb +
+		nb + 1], &ldwrky, &c_b56, &a[i__ + nb + (i__ + nb) * a_dim1],
+		lda);
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	q__1.r = -1.f, q__1.i = -0.f;
+	cgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &q__1, &
+		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
+		c_b56, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*        Copy diagonal and off-diagonal elements of B back into A */
+
+	if (*m >= *n) {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = d__[i__5], a[i__4].i = 0.f;
+		i__4 = j + (j + 1) * a_dim1;
+		i__5 = j;
+		a[i__4].r = e[i__5], a[i__4].i = 0.f;
+/* L10: */
+	    }
+	} else {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = d__[i__5], a[i__4].i = 0.f;
+		i__4 = j + 1 + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = e[i__5], a[i__4].i = 0.f;
+/* L20: */
+	    }
+	}
+/* L30: */
+    }
+
+/*     Use unblocked code to reduce the remainder of the matrix */
+
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    cgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+	    tauq[i__], &taup[i__], &work[1], &iinfo);
+    work[1].r = ws, work[1].i = 0.f;
+    return 0;
+
+/*     End of CGEBRD */
+
+} /* cgebrd_ */
+
+/* Subroutine */ int cgeev_(char *jobvl, char *jobvr, integer *n, complex *a,
+	integer *lda, complex *w, complex *vl, integer *ldvl, complex *vr,
+	integer *ldvr, complex *work, integer *lwork, real *rwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4;
+    real r__1, r__2;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_imag(complex *);
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, k, ihi;
+    static real scl;
+    static integer ilo;
+    static real dum[1], eps;
+    static complex tmp;
+    static integer ibal;
+    static char side[1];
+    static integer maxb;
+    static real anrm;
+    static integer ierr, itau, iwrk, nout;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern doublereal scnrm2_(integer *, complex *, integer *);
+    extern /* Subroutine */ int cgebak_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, complex *, integer *, integer *), cgebal_(char *, integer *, complex *, integer *,
+	    integer *, integer *, real *, integer *), slabad_(real *,
+	    real *);
+    static logical scalea;
+    extern doublereal clange_(char *, integer *, integer *, complex *,
+	    integer *, real *);
+    static real cscale;
+    extern /* Subroutine */ int cgehrd_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, integer *),
+	     clascl_(char *, integer *, integer *, real *, real *, integer *,
+	    integer *, complex *, integer *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), clacpy_(char *, integer *, integer *, complex *, integer *,
+	    complex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical select[1];
+    static real bignum;
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int chseqr_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *), ctrevc_(char *,
+	    char *, logical *, integer *, complex *, integer *, complex *,
+	    integer *, complex *, integer *, integer *, integer *, complex *,
+	    real *, integer *), cunghr_(integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    integer *);
+    static integer minwrk, maxwrk;
+    static logical wantvl;
+    static real smlnum;
+    static integer hswork, irwork;
+    static logical lquery, wantvr;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CGEEV computes for an N-by-N complex nonsymmetric matrix A, the
+    eigenvalues and, optionally, the left and/or right eigenvectors.
+
+    The right eigenvector v(j) of A satisfies
+                     A * v(j) = lambda(j) * v(j)
+    where lambda(j) is its eigenvalue.
+    The left eigenvector u(j) of A satisfies
+                  u(j)**H * A = lambda(j) * u(j)**H
+    where u(j)**H denotes the conjugate transpose of u(j).
+
+    The computed eigenvectors are normalized to have Euclidean norm
+    equal to 1 and largest component real.
+
+    Arguments
+    =========
+
+    JOBVL   (input) CHARACTER*1
+            = 'N': left eigenvectors of A are not computed;
+            = 'V': left eigenvectors of are computed.
+
+    JOBVR   (input) CHARACTER*1
+            = 'N': right eigenvectors of A are not computed;
+            = 'V': right eigenvectors of A are computed.
+
+    N       (input) INTEGER
+            The order of the matrix A. N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the N-by-N matrix A.
+            On exit, A has been overwritten.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    W       (output) COMPLEX array, dimension (N)
+            W contains the computed eigenvalues.
+
+    VL      (output) COMPLEX array, dimension (LDVL,N)
+            If JOBVL = 'V', the left eigenvectors u(j) are stored one
+            after another in the columns of VL, in the same order
+            as their eigenvalues.
+            If JOBVL = 'N', VL is not referenced.
+            u(j) = VL(:,j), the j-th column of VL.
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= 1; if
+            JOBVL = 'V', LDVL >= N.
+
+    VR      (output) COMPLEX array, dimension (LDVR,N)
+            If JOBVR = 'V', the right eigenvectors v(j) are stored one
+            after another in the columns of VR, in the same order
+            as their eigenvalues.
+            If JOBVR = 'N', VR is not referenced.
+            v(j) = VR(:,j), the j-th column of VR.
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= 1; if
+            JOBVR = 'V', LDVR >= N.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,2*N).
+            For good performance, LWORK must generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    RWORK   (workspace) REAL array, dimension (2*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = i, the QR algorithm failed to compute all the
+                  eigenvalues, and no eigenvectors have been computed;
+                  elements and i+1:N of W contain eigenvalues which have
+                  converged.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --w;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+    --rwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -1;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if ((*ldvl < 1) || (wantvl && *ldvl < *n)) {
+	*info = -8;
+    } else if ((*ldvr < 1) || (wantvr && *ldvr < *n)) {
+	*info = -10;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         CWorkspace refers to complex workspace, and RWorkspace to real
+         workspace. NB refers to the optimal block size for the
+         immediately following subroutine, as returned by ILAENV.
+         HSWORK refers to the workspace preferred by CHSEQR, as
+         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+         the worst case.)
+*/
+
+    minwrk = 1;
+    if (*info == 0 && ((*lwork >= 1) || (lquery))) {
+	maxwrk = *n + *n * ilaenv_(&c__1, "CGEHRD", " ", n, &c__1, n, &c__0, (
+		ftnlen)6, (ftnlen)1);
+	if (! wantvl && ! wantvr) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = (*n) << (1);
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "CHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "EN", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    hswork = max(i__1,i__2);
+	    maxwrk = max(maxwrk,hswork);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = (*n) << (1);
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "CUNGHR",
+		    " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "CHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "CHSEQR", "SV", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = max(maxwrk,hswork), i__2 = (*n) << (1);
+	    maxwrk = max(i__1,i__2);
+	}
+	work[1].r = (real) maxwrk, work[1].i = 0.f;
+    }
+    if (*lwork < minwrk && ! lquery) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEEV ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = clange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	clascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*
+       Balance the matrix
+       (CWorkspace: none)
+       (RWorkspace: need N)
+*/
+
+    ibal = 1;
+    cgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr);
+
+/*
+       Reduce to upper Hessenberg form
+       (CWorkspace: need 2*N, prefer N+N*NB)
+       (RWorkspace: none)
+*/
+
+    itau = 1;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    cgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvl) {
+
+/*
+          Want left eigenvectors
+          Copy Householder vectors to VL
+*/
+
+	*(unsigned char *)side = 'L';
+	clacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*
+          Generate unitary matrix in VL
+          (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+          (RWorkspace: none)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	cunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VL
+          (CWorkspace: need 1, prefer HSWORK (see comments) )
+          (RWorkspace: none)
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[
+		vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*
+             Want left and right eigenvectors
+             Copy Schur vectors to VR
+*/
+
+	    *(unsigned char *)side = 'B';
+	    clacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*
+          Want right eigenvectors
+          Copy Householder vectors to VR
+*/
+
+	*(unsigned char *)side = 'R';
+	clacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*
+          Generate unitary matrix in VR
+          (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+          (RWorkspace: none)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	cunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VR
+          (CWorkspace: need 1, prefer HSWORK (see comments) )
+          (RWorkspace: none)
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	chseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*
+          Compute eigenvalues only
+          (CWorkspace: need 1, prefer HSWORK (see comments) )
+          (RWorkspace: none)
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	chseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO > 0 from CHSEQR, then quit */
+
+    if (*info > 0) {
+	goto L50;
+    }
+
+    if ((wantvl) || (wantvr)) {
+
+/*
+          Compute left and/or right eigenvectors
+          (CWorkspace: need 2*N)
+          (RWorkspace: need 2*N)
+*/
+
+	irwork = ibal + *n;
+	ctrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
+		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork],
+		&ierr);
+    }
+
+    if (wantvl) {
+
+/*
+          Undo balancing of left eigenvectors
+          (CWorkspace: none)
+          (RWorkspace: need N)
+*/
+
+	cgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset],
+		ldvl, &ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scl = 1.f / scnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+	    csscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		i__3 = k + i__ * vl_dim1;
+/* Computing 2nd power */
+		r__1 = vl[i__3].r;
+/* Computing 2nd power */
+		r__2 = r_imag(&vl[k + i__ * vl_dim1]);
+		rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2;
+/* L10: */
+	    }
+	    k = isamax_(n, &rwork[irwork], &c__1);
+	    r_cnjg(&q__2, &vl[k + i__ * vl_dim1]);
+	    r__1 = sqrt(rwork[irwork + k - 1]);
+	    q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1;
+	    tmp.r = q__1.r, tmp.i = q__1.i;
+	    cscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1);
+	    i__2 = k + i__ * vl_dim1;
+	    i__3 = k + i__ * vl_dim1;
+	    r__1 = vl[i__3].r;
+	    q__1.r = r__1, q__1.i = 0.f;
+	    vl[i__2].r = q__1.r, vl[i__2].i = q__1.i;
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*
+          Undo balancing of right eigenvectors
+          (CWorkspace: none)
+          (RWorkspace: need N)
+*/
+
+	cgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset],
+		ldvr, &ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scl = 1.f / scnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+	    csscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		i__3 = k + i__ * vr_dim1;
+/* Computing 2nd power */
+		r__1 = vr[i__3].r;
+/* Computing 2nd power */
+		r__2 = r_imag(&vr[k + i__ * vr_dim1]);
+		rwork[irwork + k - 1] = r__1 * r__1 + r__2 * r__2;
+/* L30: */
+	    }
+	    k = isamax_(n, &rwork[irwork], &c__1);
+	    r_cnjg(&q__2, &vr[k + i__ * vr_dim1]);
+	    r__1 = sqrt(rwork[irwork + k - 1]);
+	    q__1.r = q__2.r / r__1, q__1.i = q__2.i / r__1;
+	    tmp.r = q__1.r, tmp.i = q__1.i;
+	    cscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1);
+	    i__2 = k + i__ * vr_dim1;
+	    i__3 = k + i__ * vr_dim1;
+	    r__1 = vr[i__3].r;
+	    q__1.r = r__1, q__1.i = 0.f;
+	    vr[i__2].r = q__1.r, vr[i__2].i = q__1.i;
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1]
+		, &i__2, &ierr);
+	if (*info > 0) {
+	    i__1 = ilo - 1;
+	    clascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n,
+		     &ierr);
+	}
+    }
+
+    work[1].r = (real) maxwrk, work[1].i = 0.f;
+    return 0;
+
+/*     End of CGEEV */
+
+} /* cgeev_ */
+
+/* Subroutine */ int cgehd2_(integer *n, integer *ilo, integer *ihi, complex *
+	a, integer *lda, complex *tau, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__;
+    static complex alpha;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    clarfg_(integer *, complex *, complex *, integer *, complex *),
+	    xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CGEHD2 reduces a complex general matrix A to upper Hessenberg form H
+    by a unitary similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to CGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= max(1,N).
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the n by n general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the unitary matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) COMPLEX array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEHD2", &i__1);
+	return 0;
+    }
+
+    i__1 = *ihi - 1;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+
+/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
+
+	i__2 = i__ + 1 + i__ * a_dim1;
+	alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	i__2 = *ihi - i__;
+/* Computing MIN */
+	i__3 = i__ + 2;
+	clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[
+		i__]);
+	i__2 = i__ + 1 + i__ * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
+
+	i__2 = *ihi - i__;
+	clarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
+
+/*        Apply H(i)' to A(i+1:ihi,i+1:n) from the left */
+
+	i__2 = *ihi - i__;
+	i__3 = *n - i__;
+	r_cnjg(&q__1, &tau[i__]);
+	clarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &q__1,
+		 &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
+
+	i__2 = i__ + 1 + i__ * a_dim1;
+	a[i__2].r = alpha.r, a[i__2].i = alpha.i;
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of CGEHD2 */
+
+} /* cgehd2_ */
+
+/* Subroutine */ int cgehrd_(integer *n, integer *ilo, integer *ihi, complex *
+	a, integer *lda, complex *tau, complex *work, integer *lwork, integer
+	*info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__;
+    static complex t[4160]	/* was [65][64] */;
+    static integer ib;
+    static complex ei;
+    static integer nb, nh, nx, iws;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int cgehd2_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *), clarfb_(
+	    char *, char *, char *, char *, integer *, integer *, integer *,
+	    complex *, integer *, complex *, integer *, complex *, integer *,
+	    complex *, integer *), clahrd_(
+	    integer *, integer *, integer *, complex *, integer *, complex *,
+	    complex *, integer *, complex *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CGEHRD reduces a complex general matrix A to upper Hessenberg form H
+    by a unitary similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to CGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the N-by-N general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the unitary matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+            zero.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MIN */
+    i__1 = 64, i__2 = ilaenv_(&c__1, "CGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    lwkopt = *n * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEHRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
+/* L10: */
+    }
+    i__1 = *n - 1;
+    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
+	i__2 = i__;
+	tau[i__2].r = 0.f, tau[i__2].i = 0.f;
+/* L20: */
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    iws = 1;
+    if (nb > 1 && nb < nh) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "CGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    iws = *n * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code.
+
+   Computing MAX
+*/
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CGEHRD", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+		if (*lwork >= *n * nbmin) {
+		    nb = *lwork / *n;
+		} else {
+		    nb = 1;
+		}
+	    }
+	}
+    }
+    ldwork = *n;
+
+    if ((nb < nbmin) || (nb >= nh)) {
+
+/*        Use unblocked code below */
+
+	i__ = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	i__1 = *ihi - 1 - nx;
+	i__2 = nb;
+	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - i__;
+	    ib = min(i__3,i__4);
+
+/*
+             Reduce columns i:i+ib-1 to Hessenberg form, returning the
+             matrices V and T of the block reflector H = I - V*T*V'
+             which performs the reduction, and also the matrix Y = A*V*T
+*/
+
+	    clahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+		    c__65, &work[1], &ldwork);
+
+/*
+             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+             to 1.
+*/
+
+	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
+	    ei.r = a[i__3].r, ei.i = a[i__3].i;
+	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
+	    a[i__3].r = 1.f, a[i__3].i = 0.f;
+	    i__3 = *ihi - i__ - ib + 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, &
+		    q__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda,
+		     &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda);
+	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
+	    a[i__3].r = ei.r, a[i__3].i = ei.i;
+
+/*
+             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+             left
+*/
+
+	    i__3 = *ihi - i__;
+	    i__4 = *n - i__ - ib + 1;
+	    clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", &
+		    i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &
+		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
+		    ldwork);
+/* L30: */
+	}
+    }
+
+/*     Use unblocked code to reduce the rest of the matrix */
+
+    cgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1].r = (real) iws, work[1].i = 0.f;
+
+    return 0;
+
+/*     End of CGEHRD */
+
+} /* cgehrd_ */
+
+/* Subroutine */ int cgelq2_(integer *m, integer *n, complex *a, integer *lda,
+	 complex *tau, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static complex alpha;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    clarfg_(integer *, complex *, complex *, integer *, complex *),
+	    clacgv_(integer *, complex *, integer *), xerbla_(char *, integer
+	    *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CGELQ2 computes an LQ factorization of a complex m by n matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m by min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) COMPLEX array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+    A(i,i+1:n), and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGELQ2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
+
+	i__2 = *n - i__ + 1;
+	clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	i__2 = i__ + i__ * a_dim1;
+	alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	i__2 = *n - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__]
+		);
+	if (i__ < *m) {
+
+/*           Apply H(i) to A(i+1:m,i:n) from the right */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1.f, a[i__2].i = 0.f;
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+	    clarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	}
+	i__2 = i__ + i__ * a_dim1;
+	a[i__2].r = alpha.r, a[i__2].i = alpha.i;
+	i__2 = *n - i__ + 1;
+	clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+/* L10: */
+    }
+    return 0;
+
+/*     End of CGELQ2 */
+
+} /* cgelq2_ */
+
+/* Subroutine */ int cgelqf_(integer *m, integer *n, complex *a, integer *lda,
+	 complex *tau, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int cgelq2_(integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *), clarfb_(char *, char
+	    *, char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CGELQF computes an LQ factorization of a complex M-by-N matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+    A(i,i+1:n), and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "CGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *m * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGELQF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "CGELQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CGELQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the LQ factorization of the current block
+             A(i:i+ib-1,i:n)
+*/
+
+	    i__3 = *n - i__ + 1;
+	    cgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *n - i__ + 1;
+		clarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i+ib:m,i:n) from the right */
+
+		i__3 = *m - i__ - ib + 1;
+		i__4 = *n - i__ + 1;
+		clarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	cgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1].r = (real) iws, work[1].i = 0.f;
+    return 0;
+
+/*     End of CGELQF */
+
+} /* cgelqf_ */
+
+/* Subroutine */ int cgelsd_(integer *m, integer *n, integer *nrhs, complex *
+	a, integer *lda, complex *b, integer *ldb, real *s, real *rcond,
+	integer *rank, complex *work, integer *lwork, real *rwork, integer *
+	iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+    real r__1;
+    complex q__1;
+
+    /* Local variables */
+    static integer ie, il, mm;
+    static real eps, anrm, bnrm;
+    static integer itau, iascl, ibscl;
+    static real sfmin;
+    static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
+    extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *,
+	    integer *, real *, real *, complex *, complex *, complex *,
+	    integer *, integer *), slabad_(real *, real *);
+    extern doublereal clange_(char *, integer *, integer *, complex *,
+	    integer *, real *);
+    extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *, integer *), clalsd_(
+	    char *, integer *, integer *, integer *, real *, real *, complex *
+	    , integer *, real *, integer *, complex *, real *, integer *,
+	    integer *), clascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, complex *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer *,
+	    complex *, complex *, integer *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
+	    *, integer *, complex *, integer *), claset_(char *,
+	    integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *), slaset_(
+	    char *, integer *, integer *, real *, real *, real *, integer *), cunmlq_(char *, char *, integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, complex *,
+	    integer *, integer *);
+    static integer ldwork;
+    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *);
+    static integer minwrk, maxwrk;
+    static real smlnum;
+    static logical lquery;
+    static integer nrwork, smlsiz;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    CGELSD computes the minimum-norm solution to a real linear least
+    squares problem:
+        minimize 2-norm(| b - A*x |)
+    using the singular value decomposition (SVD) of A. A is an M-by-N
+    matrix which may be rank-deficient.
+
+    Several right hand side vectors b and solution vectors x can be
+    handled in a single call; they are stored as the columns of the
+    M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+    matrix X.
+
+    The problem is solved in three steps:
+    (1) Reduce the coefficient matrix A to bidiagonal form with
+        Householder tranformations, reducing the original problem
+        into a "bidiagonal least squares problem" (BLS)
+    (2) Solve the BLS using a divide and conquer approach.
+    (3) Apply back all the Householder tranformations to solve
+        the original least squares problem.
+
+    The effective rank of A is determined by treating as zero those
+    singular values which are less than RCOND times the largest singular
+    value.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A. N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrices B and X. NRHS >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, A has been destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+            On entry, the M-by-NRHS right hand side matrix B.
+            On exit, B is overwritten by the N-by-NRHS solution matrix X.
+            If m >= n and RANK = n, the residual sum-of-squares for
+            the solution in the i-th column is given by the sum of
+            squares of elements n+1:m in that column.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M,N).
+
+    S       (output) REAL array, dimension (min(M,N))
+            The singular values of A in decreasing order.
+            The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+
+    RCOND   (input) REAL
+            RCOND is used to determine the effective rank of A.
+            Singular values S(i) <= RCOND*S(1) are treated as zero.
+            If RCOND < 0, machine precision is used instead.
+
+    RANK    (output) INTEGER
+            The effective rank of A, i.e., the number of singular values
+            which are greater than RCOND*S(1).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK must be at least 1.
+            The exact minimum amount of workspace needed depends on M,
+            N and NRHS. As long as LWORK is at least
+                2 * N + N * NRHS
+            if M is greater than or equal to N or
+                2 * M + M * NRHS
+            if M is less than N, the code will execute correctly.
+            For good performance, LWORK should generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+
+    RWORK   (workspace) REAL array, dimension at least
+               10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS +
+               (SMLSIZ+1)**2
+            if M is greater than or equal to N or
+               10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS +
+               (SMLSIZ+1)**2
+            if M is less than N, the code will execute correctly.
+            SMLSIZ is returned by ILAENV and is equal to the maximum
+            size of the subproblems at the bottom of the computation
+            tree (usually about 25), and
+               NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+
+    IWORK   (workspace) INTEGER array, dimension (LIWORK)
+            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+            where MINMN = MIN( M,N ).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value.
+            > 0:  the algorithm for computing the SVD failed to converge;
+                  if INFO = i, i off-diagonal elements of an intermediate
+                  bidiagonal form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input arguments.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --s;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    maxmn = max(*m,*n);
+    mnthr = ilaenv_(&c__6, "CGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*ldb < max(1,maxmn)) {
+	*info = -7;
+    }
+
+    smlsiz = ilaenv_(&c__9, "CGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Compute workspace.
+       (Note: Comments in the code beginning "Workspace:" describe the
+       minimal amount of workspace needed at that point in the code,
+       as well as the preferred amount for good performance.
+       NB refers to the optimal block size for the immediately
+       following subroutine, as returned by ILAENV.)
+*/
+
+    minwrk = 1;
+    if (*info == 0) {
+	maxwrk = 0;
+	mm = *m;
+	if (*m >= *n && *m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+		    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "CUNMQR", "LC", m,
+		    nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
+	    maxwrk = max(i__1,i__2);
+	}
+	if (*m >= *n) {
+
+/*
+             Path 1 - overdetermined or exactly determined.
+
+   Computing MAX
+*/
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (mm + *n) * ilaenv_(&c__1,
+		    "CGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1)
+		    ;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + *nrhs * ilaenv_(&c__1,
+		    "CUNMBR", "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)
+		    3);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (*n - 1) * ilaenv_(&c__1,
+		    "CUNMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * *nrhs;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ((*n) << (1)) + mm, i__2 = ((*n) << (1)) + *n * *nrhs;
+	    minwrk = max(i__1,i__2);
+	}
+	if (*n > *m) {
+	    if (*n >= mnthr) {
+
+/*
+                Path 2a - underdetermined, with many more columns
+                than rows.
+*/
+
+		maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &c_n1,
+			&c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + ((*m) << (1))
+			* ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *nrhs *
+			ilaenv_(&c__1, "CUNMBR", "QLC", m, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + (*m - 1) *
+			ilaenv_(&c__1, "CUNMLQ", "LC", n, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+		maxwrk = max(i__1,i__2);
+		if (*nrhs > 1) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+		    maxwrk = max(i__1,i__2);
+		} else {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + ((*m) << (1));
+		    maxwrk = max(i__1,i__2);
+		}
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *m * *nrhs;
+		maxwrk = max(i__1,i__2);
+	    } else {
+
+/*              Path 2 - underdetermined. */
+
+		maxwrk = ((*m) << (1)) + (*n + *m) * ilaenv_(&c__1, "CGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = ((*m) << (1)) + *nrhs * ilaenv_(&c__1,
+			"CUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, (
+			ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			"CUNMBR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (
+			ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * *nrhs;
+		maxwrk = max(i__1,i__2);
+	    }
+/* Computing MAX */
+	    i__1 = ((*m) << (1)) + *n, i__2 = ((*m) << (1)) + *m * *nrhs;
+	    minwrk = max(i__1,i__2);
+	}
+	minwrk = min(minwrk,maxwrk);
+	r__1 = (real) maxwrk;
+	q__1.r = r__1, q__1.i = 0.f;
+	work[1].r = q__1.r, work[1].i = q__1.i;
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGELSD", &i__1);
+	return 0;
+    } else if (lquery) {
+	goto L10;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*m == 0) || (*n == 0)) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters. */
+
+    eps = slamch_("P");
+    sfmin = slamch_("S");
+    smlnum = sfmin / eps;
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+
+/*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */
+
+    anrm = clange_("M", m, n, &a[a_offset], lda, &rwork[1]);
+    iascl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 2;
+    } else if (anrm == 0.f) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = max(*m,*n);
+	claset_("F", &i__1, nrhs, &c_b55, &c_b55, &b[b_offset], ldb);
+	slaset_("F", &minmn, &c__1, &c_b320, &c_b320, &s[1], &c__1)
+		;
+	*rank = 0;
+	goto L10;
+    }
+
+/*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */
+
+    bnrm = clange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1]);
+    ibscl = 0;
+    if (bnrm > 0.f && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	clascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	clascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     If M < N make sure B(M+1:N,:) = 0 */
+
+    if (*m < *n) {
+	i__1 = *n - *m;
+	claset_("F", &i__1, nrhs, &c_b55, &c_b55, &b[*m + 1 + b_dim1], ldb);
+    }
+
+/*     Overdetermined case. */
+
+    if (*m >= *n) {
+
+/*        Path 1 - overdetermined or exactly determined. */
+
+	mm = *m;
+	if (*m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns */
+
+	    mm = *n;
+	    itau = 1;
+	    nwork = itau + *n;
+
+/*
+             Compute A=Q*R.
+             (RWorkspace: need N)
+             (CWorkspace: need N, prefer N*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+
+/*
+             Multiply B by transpose(Q).
+             (RWorkspace: need N)
+             (CWorkspace: need NRHS, prefer NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cunmqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+/*           Zero out below R. */
+
+	    if (*n > 1) {
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &a[a_dim1 + 2],
+			lda);
+	    }
+	}
+
+	itauq = 1;
+	itaup = itauq + *n;
+	nwork = itaup + *n;
+	ie = 1;
+	nrwork = ie + *n;
+
+/*
+          Bidiagonalize R in A.
+          (RWorkspace: need N)
+          (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	cgebrd_(&mm, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], &
+		work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of R.
+          (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	cunmbr_("Q", "L", "C", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
+		&b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	clalsd_("U", &smlsiz, n, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb,
+		rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info);
+	if (*info != 0) {
+	    goto L10;
+	}
+
+/*        Multiply B by right bidiagonalizing vectors of R. */
+
+	i__1 = *lwork - nwork + 1;
+	cunmbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
+		b[b_offset], ldb, &work[nwork], &i__1, info);
+
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = *m, i__2 = ((*m) << (1)) - 4, i__1 = max(i__1,i__2), i__1 =
+		max(i__1,*nrhs), i__2 = *n - *m * 3;
+	if (*n >= mnthr && *lwork >= ((*m) << (2)) + *m * *m + max(i__1,i__2))
+		 {
+
+/*
+          Path 2a - underdetermined, with many more columns than rows
+          and sufficient workspace for an efficient algorithm.
+*/
+
+	    ldwork = *m;
+/*
+   Computing MAX
+   Computing MAX
+*/
+	    i__3 = *m, i__4 = ((*m) << (1)) - 4, i__3 = max(i__3,i__4), i__3 =
+		     max(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__1 = ((*m) << (2)) + *m * *lda + max(i__3,i__4), i__2 = *m * *
+		    lda + *m + *m * *nrhs;
+	    if (*lwork >= max(i__1,i__2)) {
+		ldwork = *lda;
+	    }
+	    itau = 1;
+	    nwork = *m + 1;
+
+/*
+          Compute A=L*Q.
+          (CWorkspace: need 2*M, prefer M+M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+	    il = nwork;
+
+/*        Copy L to WORK(IL), zeroing out above its diagonal. */
+
+	    clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
+	    i__1 = *m - 1;
+	    i__2 = *m - 1;
+	    claset_("U", &i__1, &i__2, &c_b55, &c_b55, &work[il + ldwork], &
+		    ldwork);
+	    itauq = il + ldwork * *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+	    ie = 1;
+	    nrwork = ie + *m;
+
+/*
+          Bidiagonalize L in WORK(IL).
+          (RWorkspace: need M)
+          (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cgebrd_(m, m, &work[il], &ldwork, &s[1], &rwork[ie], &work[itauq],
+		     &work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of L.
+          (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cunmbr_("Q", "L", "C", m, nrhs, m, &work[il], &ldwork, &work[
+		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    clalsd_("U", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1],
+		     info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of L. */
+
+	    i__1 = *lwork - nwork + 1;
+	    cunmbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
+		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Zero out below first M rows of B. */
+
+	    i__1 = *n - *m;
+	    claset_("F", &i__1, nrhs, &c_b55, &c_b55, &b[*m + 1 + b_dim1],
+		    ldb);
+	    nwork = itau + *m;
+
+/*
+          Multiply transpose(Q) by B.
+          (CWorkspace: need NRHS, prefer NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cunmlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+	} else {
+
+/*        Path 2 - remaining underdetermined cases. */
+
+	    itauq = 1;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+	    ie = 1;
+	    nrwork = ie + *m;
+
+/*
+          Bidiagonalize A.
+          (RWorkspace: need M)
+          (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors.
+          (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cunmbr_("Q", "L", "C", m, nrhs, n, &a[a_offset], lda, &work[itauq]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    clalsd_("L", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1],
+		     info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of A. */
+
+	    i__1 = *lwork - nwork + 1;
+	    cunmbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+	}
+    }
+
+/*     Undo scaling. */
+
+    if (iascl == 1) {
+	clascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    } else if (iascl == 2) {
+	clascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    }
+    if (ibscl == 1) {
+	clascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	clascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L10:
+    r__1 = (real) maxwrk;
+    q__1.r = r__1, q__1.i = 0.f;
+    work[1].r = q__1.r, work[1].i = q__1.i;
+    return 0;
+
+/*     End of CGELSD */
+
+} /* cgelsd_ */
+
+/* Subroutine */ int cgeqr2_(integer *m, integer *n, complex *a, integer *lda,
+	 complex *tau, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, k;
+    static complex alpha;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *),
+	    clarfg_(integer *, complex *, complex *, integer *, complex *),
+	    xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CGEQR2 computes a QR factorization of a complex m by n matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(m,n) by n upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) COMPLEX array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEQR2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	clarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
+		, &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i)' to A(i:m,i+1:n) from the left */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1.f, a[i__2].i = 0.f;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    r_cnjg(&q__1, &tau[i__]);
+	    clarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &q__1,
+		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = alpha.r, a[i__2].i = alpha.i;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of CGEQR2 */
+
+} /* cgeqr2_ */
+
+/* Subroutine */ int cgeqrf_(integer *m, integer *n, complex *a, integer *lda,
+	 complex *tau, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int cgeqr2_(integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *), clarfb_(char *, char
+	    *, char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CGEQRF computes a QR factorization of a complex M-by-N matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of min(m,n) elementary reflectors (see Further
+            Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) COMPLEX array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "CGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGEQRF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "CGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the QR factorization of the current block
+             A(i:m,i:i+ib-1)
+*/
+
+	    i__3 = *m - i__ + 1;
+	    cgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *m - i__ + 1;
+		clarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		clarfb_("Left", "Conjugate transpose", "Forward", "Columnwise"
+			, &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &
+			work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda,
+			&work[ib + 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	cgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1].r = (real) iws, work[1].i = 0.f;
+    return 0;
+
+/*     End of CGEQRF */
+
+} /* cgeqrf_ */
+
+/* Subroutine */ int cgesdd_(char *jobz, integer *m, integer *n, complex *a,
+	integer *lda, real *s, complex *u, integer *ldu, complex *vt, integer
+	*ldvt, complex *work, integer *lwork, real *rwork, integer *iwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2, i__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, ie, il, ir, iu, blk;
+    static real dum[1], eps;
+    static integer iru, ivt, iscl;
+    static real anrm;
+    static integer idum[1], ierr, itau, irvt;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    static integer chunk, minmn, wrkbl, itaup, itauq;
+    static logical wntqa;
+    static integer nwork;
+    extern /* Subroutine */ int clacp2_(char *, integer *, integer *, real *,
+	    integer *, complex *, integer *);
+    static logical wntqn, wntqo, wntqs;
+    static integer mnthr1, mnthr2;
+    extern /* Subroutine */ int cgebrd_(integer *, integer *, complex *,
+	    integer *, real *, real *, complex *, complex *, complex *,
+	    integer *, integer *);
+    extern doublereal clange_(char *, integer *, integer *, complex *,
+	    integer *, real *);
+    extern /* Subroutine */ int cgelqf_(integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *, integer *), clacrm_(
+	    integer *, integer *, complex *, integer *, real *, integer *,
+	    complex *, integer *, real *), clarcm_(integer *, integer *, real
+	    *, integer *, complex *, integer *, complex *, integer *, real *),
+	     clascl_(char *, integer *, integer *, real *, real *, integer *,
+	    integer *, complex *, integer *, integer *), sbdsdc_(char
+	    *, char *, integer *, real *, real *, real *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *, integer *), cgeqrf_(integer *, integer *, complex *, integer
+	    *, complex *, complex *, integer *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
+	    *, integer *, complex *, integer *), claset_(char *,
+	    integer *, integer *, complex *, complex *, complex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cungbr_(char *, integer *, integer *, integer
+	    *, complex *, integer *, complex *, complex *, integer *, integer
+	    *);
+    static real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), cunmbr_(char *, char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *), cunglq_(
+	    integer *, integer *, integer *, complex *, integer *, complex *,
+	    complex *, integer *, integer *);
+    static integer ldwrkl;
+    extern /* Subroutine */ int cungqr_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, integer *);
+    static integer ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
+    static real smlnum;
+    static logical wntqas, lquery;
+    static integer nrwork;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    CGESDD computes the singular value decomposition (SVD) of a complex
+    M-by-N matrix A, optionally computing the left and/or right singular
+    vectors, by using divide-and-conquer method. The SVD is written
+
+         A = U * SIGMA * conjugate-transpose(V)
+
+    where SIGMA is an M-by-N matrix which is zero except for its
+    min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
+    V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
+    are the singular values of A; they are real and non-negative, and
+    are returned in descending order.  The first min(m,n) columns of
+    U and V are the left and right singular vectors of A.
+
+    Note that the routine returns VT = V**H, not V.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            Specifies options for computing all or part of the matrix U:
+            = 'A':  all M columns of U and all N rows of V**H are
+                    returned in the arrays U and VT;
+            = 'S':  the first min(M,N) columns of U and the first
+                    min(M,N) rows of V**H are returned in the arrays U
+                    and VT;
+            = 'O':  If M >= N, the first N columns of U are overwritten
+                    on the array A and all rows of V**H are returned in
+                    the array VT;
+                    otherwise, all columns of U are returned in the
+                    array U and the first M rows of V**H are overwritten
+                    in the array VT;
+            = 'N':  no columns of U or rows of V**H are computed.
+
+    M       (input) INTEGER
+            The number of rows of the input matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the input matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit,
+            if JOBZ = 'O',  A is overwritten with the first N columns
+                            of U (the left singular vectors, stored
+                            columnwise) if M >= N;
+                            A is overwritten with the first M rows
+                            of V**H (the right singular vectors, stored
+                            rowwise) otherwise.
+            if JOBZ .ne. 'O', the contents of A are destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    S       (output) REAL array, dimension (min(M,N))
+            The singular values of A, sorted so that S(i) >= S(i+1).
+
+    U       (output) COMPLEX array, dimension (LDU,UCOL)
+            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+            UCOL = min(M,N) if JOBZ = 'S'.
+            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+            unitary matrix U;
+            if JOBZ = 'S', U contains the first min(M,N) columns of U
+            (the left singular vectors, stored columnwise);
+            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1; if
+            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+
+    VT      (output) COMPLEX array, dimension (LDVT,N)
+            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+            N-by-N unitary matrix V**H;
+            if JOBZ = 'S', VT contains the first min(M,N) rows of
+            V**H (the right singular vectors, stored rowwise);
+            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1; if
+            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+            if JOBZ = 'S', LDVT >= min(M,N).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= 1.
+            if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N).
+            if JOBZ = 'O',
+                  LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+            if JOBZ = 'S' or 'A',
+                  LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+            For good performance, LWORK should generally be larger.
+            If LWORK < 0 but other input arguments are legal, WORK(1)
+            returns the optimal LWORK.
+
+    RWORK   (workspace) REAL array, dimension (LRWORK)
+            If JOBZ = 'N', LRWORK >= 7*min(M,N).
+            Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N)
+
+    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The updating process of SBDSDC did not converge.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    mnthr1 = (integer) (minmn * 17.f / 9.f);
+    mnthr2 = (integer) (minmn * 5.f / 3.f);
+    wntqa = lsame_(jobz, "A");
+    wntqs = lsame_(jobz, "S");
+    wntqas = (wntqa) || (wntqs);
+    wntqo = lsame_(jobz, "O");
+    wntqn = lsame_(jobz, "N");
+    minwrk = 1;
+    maxwrk = 1;
+    lquery = *lwork == -1;
+
+    if (! ((((wntqa) || (wntqs)) || (wntqo)) || (wntqn))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (((*ldu < 1) || (wntqas && *ldu < *m)) || (wntqo && *m < *n && *
+	    ldu < *m)) {
+	*info = -8;
+    } else if ((((*ldvt < 1) || (wntqa && *ldvt < *n)) || (wntqs && *ldvt <
+	    minmn)) || (wntqo && *m >= *n && *ldvt < *n)) {
+	*info = -10;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         CWorkspace refers to complex workspace, and RWorkspace to
+         real workspace. NB refers to the optimal block size for the
+         immediately following subroutine, as returned by ILAENV.)
+*/
+
+    if (*info == 0 && *m > 0 && *n > 0) {
+	if (*m >= *n) {
+
+/*
+             There is no complex work space needed for bidiagonal SVD
+             The real work space needed for bidiagonal SVD is BDSPAC,
+             BDSPAC = 3*N*N + 4*N
+*/
+
+	    if (*m >= mnthr1) {
+		if (wntqn) {
+
+/*                 Path 1 (M much larger than N, JOBZ='N') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
+			    ilaenv_(&c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl;
+		    minwrk = *n * 3;
+		} else if (wntqo) {
+
+/*                 Path 2 (M much larger than N, JOBZ='O') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
+			    ilaenv_(&c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *n + *n * *n + wrkbl;
+		    minwrk = ((*n) << (1)) * *n + *n * 3;
+		} else if (wntqs) {
+
+/*                 Path 3 (M much larger than N, JOBZ='S') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "CUNGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
+			    ilaenv_(&c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *n * *n + wrkbl;
+		    minwrk = *n * *n + *n * 3;
+		} else if (wntqa) {
+
+/*                 Path 4 (M much larger than N, JOBZ='A') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "CGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "CUNGQR",
+			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
+			    ilaenv_(&c__1, "CGEBRD", " ", n, n, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *n * *n + wrkbl;
+		    minwrk = *n * *n + ((*n) << (1)) + *m;
+		}
+	    } else if (*m >= mnthr2) {
+
+/*              Path 5 (M much larger than N, but not as much as MNTHR1) */
+
+		maxwrk = ((*n) << (1)) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = ((*n) << (1)) + *m;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *n * *n;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		}
+	    } else {
+
+/*              Path 6 (M at least N, but not much larger) */
+
+		maxwrk = ((*n) << (1)) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = ((*n) << (1)) + *m;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *n * *n;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		}
+	    }
+	} else {
+
+/*
+             There is no complex work space needed for bidiagonal SVD
+             The real work space needed for bidiagonal SVD is BDSPAC,
+             BDSPAC = 3*M*M + 4*M
+*/
+
+	    if (*n >= mnthr1) {
+		if (wntqn) {
+
+/*                 Path 1t (N much larger than M, JOBZ='N') */
+
+		    maxwrk = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + ((*m) << (1)) *
+			    ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3;
+		} else if (wntqo) {
+
+/*                 Path 2t (N much larger than M, JOBZ='O') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
+			    ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *n + *m * *m + wrkbl;
+		    minwrk = ((*m) << (1)) * *m + *m * 3;
+		} else if (wntqs) {
+
+/*                 Path 3t (N much larger than M, JOBZ='S') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "CUNGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
+			    ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *m + wrkbl;
+		    minwrk = *m * *m + *m * 3;
+		} else if (wntqa) {
+
+/*                 Path 4t (N much larger than M, JOBZ='A') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "CGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "CUNGLQ",
+			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
+			    ilaenv_(&c__1, "CGEBRD", " ", m, m, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *m + wrkbl;
+		    minwrk = *m * *m + ((*m) << (1)) + *n;
+		}
+	    } else if (*n >= mnthr2) {
+
+/*              Path 5t (N much larger than M, but not as much as MNTHR1) */
+
+		maxwrk = ((*m) << (1)) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = ((*m) << (1)) + *n;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *m * *m;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		}
+	    } else {
+
+/*              Path 6t (N greater than M, but not much larger) */
+
+		maxwrk = ((*m) << (1)) + (*m + *n) * ilaenv_(&c__1, "CGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = ((*m) << (1)) + *n;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *m * *m;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *n * ilaenv_(&c__1,
+			    "CUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "CUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		}
+	    }
+	}
+	maxwrk = max(maxwrk,minwrk);
+	work[1].r = (real) maxwrk, work[1].i = 0.f;
+    }
+
+    if (*lwork < minwrk && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGESDD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	if (*lwork >= 1) {
+	    work[1].r = 1.f, work[1].i = 0.f;
+	}
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = sqrt(slamch_("S")) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = clange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+	iscl = 1;
+	clascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	clascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*
+          A has at least as many rows as columns. If A has sufficiently
+          more rows than columns, first reduce using the QR
+          decomposition (if sufficient workspace available)
+*/
+
+	if (*m >= mnthr1) {
+
+	    if (wntqn) {
+
+/*
+                Path 1 (M much larger than N, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: need 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out below R */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &a[a_dim1 + 2],
+			lda);
+		ie = 1;
+		itauq = 1;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nrwork = ie + *n;
+
+/*
+                Perform bidiagonal SVD, compute singular values only
+                (CWorkspace: 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2 (M much larger than N, JOBZ='O')
+                N left singular vectors to be overwritten on A and
+                N right singular vectors to be computed in VT
+*/
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		ir = iu + ldwrku * *n;
+		if (*lwork >= *m * *n + *n * *n + *n * 3) {
+
+/*                 WORK(IR) is M by N */
+
+		    ldwrkr = *m;
+		} else {
+		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
+		}
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy R to WORK( IR ), zeroing out below it */
+
+		clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		claset_("L", &i__1, &i__2, &c_b55, &c_b55, &work[ir + 1], &
+			ldwrkr);
+
+/*
+                Generate Q in A
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in WORK(IR)
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of R in WORK(IRU) and computing right singular vectors
+                of R in WORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *n;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by the left singular vectors of R
+                (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by the right singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IU), storing result in WORK(IR) and copying to A
+                (CWorkspace: need 2*N*N, prefer N*N+M*N)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *m;
+		i__2 = ldwrkr;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrkr);
+		    cgemm_("N", "N", &chunk, n, n, &c_b56, &a[i__ + a_dim1],
+			    lda, &work[iu], &ldwrku, &c_b55, &work[ir], &
+			    ldwrkr);
+		    clacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+			    a_dim1], lda);
+/* L10: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3 (M much larger than N, JOBZ='S')
+                N left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is N by N */
+
+		ldwrkr = *n;
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		clacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		claset_("L", &i__2, &i__1, &c_b55, &c_b55, &work[ir + 1], &
+			ldwrkr);
+
+/*
+                Generate Q in A
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in WORK(IR)
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *n;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IR), storing result in U
+                (CWorkspace: need N*N)
+                (RWorkspace: 0)
+*/
+
+		clacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		cgemm_("N", "N", m, n, n, &c_b56, &a[a_offset], lda, &work[ir]
+			, &ldwrkr, &c_b55, &u[u_offset], ldu);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4 (M much larger than N, JOBZ='A')
+                M left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		itau = iu + ldwrku * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R, copying result to U
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Generate Q in U
+                (CWorkspace: need N+M, prefer N+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+
+/*              Produce R in A, zeroing out below it */
+
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		claset_("L", &i__2, &i__1, &c_b55, &c_b55, &a[a_dim1 + 2],
+			lda);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+		iru = ie + *n;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by left singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of R
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in U by left singular vectors of R in
+                WORK(IU), storing result in A
+                (CWorkspace: need N*N)
+                (RWorkspace: 0)
+*/
+
+		cgemm_("N", "N", m, n, n, &c_b56, &u[u_offset], ldu, &work[iu]
+			, &ldwrku, &c_b55, &a[a_offset], lda);
+
+/*              Copy left singular vectors of A from A to U */
+
+		clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+	    }
+
+	} else if (*m >= mnthr2) {
+
+/*
+             MNTHR2 <= M < MNTHR1
+
+             Path 5 (M much larger than N, but not as much as MNTHR1)
+             Reduce to bidiagonal form without QR decomposition, use
+             CUNGBR and matrix multiplication to compute singular vectors
+*/
+
+	    ie = 1;
+	    nrwork = ie + *n;
+	    itauq = 1;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+             (RWorkspace: need N)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
+
+		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__2, &ierr);
+
+/*
+                Generate Q in A
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
+			nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *n * 3) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		} else {
+
+/*                 WORK(IU) is LDWRKU by N */
+
+		    ldwrku = (*lwork - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in WORK(IU), copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need 3*N*N)
+*/
+
+		clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu]
+			, &ldwrku, &rwork[nrwork]);
+		clacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt);
+
+/*
+                Multiply Q in A by real matrix RWORK(IRU), storing the
+                result in WORK(IU), copying to A
+                (CWorkspace: need N*N, prefer M*N)
+                (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*/
+
+		nrwork = irvt;
+		i__2 = *m;
+		i__1 = ldwrku;
+		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			i__1) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrku);
+		    clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n,
+			    &work[iu], &ldwrku, &rwork[nrwork]);
+		    clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
+			    a_dim1], lda);
+/* L20: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__1, &ierr);
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need 3*N*N)
+*/
+
+		clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: need 0)
+                (Rworkspace: need N*N+2*M*N)
+*/
+
+		nrwork = irvt;
+		clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+	    } else {
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__1, &ierr);
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need 3*N*N)
+*/
+
+		clarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		clacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: 0)
+                (Rworkspace: need 3*N*N)
+*/
+
+		nrwork = irvt;
+		clacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		clacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+	    }
+
+	} else {
+
+/*
+             M .LT. MNTHR2
+
+             Path 6 (M at least N, but not much larger)
+             Reduce to bidiagonal form without QR decomposition
+             Use CUNMBR to compute singular vectors
+*/
+
+	    ie = 1;
+	    nrwork = ie + *n;
+	    itauq = 1;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+             (RWorkspace: need N)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, &ierr);
+	    if (wntqn) {
+
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
+
+		sbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		if (*lwork >= *m * *n + *n * 3) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		} else {
+
+/*                 WORK( IU ) is LDWRKU by N */
+
+		    ldwrku = (*lwork - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: need 0)
+*/
+
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+		if (*lwork >= *m * *n + *n * 3) {
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by left singular vectors of A, copying
+                to A
+                (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB)
+                (Rworkspace: need 0)
+*/
+
+		    claset_("F", m, n, &c_b55, &c_b55, &work[iu], &ldwrku);
+		    clacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
+		    i__1 = *lwork - nwork + 1;
+		    cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			    itauq], &work[iu], &ldwrku, &work[nwork], &i__1, &
+			    ierr);
+		    clacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate Q in A
+                   (Cworkspace: need 2*N, prefer N+N*NB)
+                   (Rworkspace: need 0)
+*/
+
+		    i__1 = *lwork - nwork + 1;
+		    cungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[nwork], &i__1, &ierr);
+
+/*
+                   Multiply Q in A by real matrix RWORK(IRU), storing the
+                   result in WORK(IU), copying to A
+                   (CWorkspace: need N*N, prefer M*N)
+                   (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*/
+
+		    nrwork = irvt;
+		    i__1 = *m;
+		    i__2 = ldwrku;
+		    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			     i__2) {
+/* Computing MIN */
+			i__3 = *m - i__ + 1;
+			chunk = min(i__3,ldwrku);
+			clacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru],
+				 n, &work[iu], &ldwrku, &rwork[nrwork]);
+			clacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
+				a_dim1], lda);
+/* L30: */
+		    }
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		claset_("F", m, n, &c_b55, &c_b55, &u[u_offset], ldu);
+		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+	    } else {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		sbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*              Set the right corner of U to identity matrix */
+
+		claset_("F", m, m, &c_b55, &c_b55, &u[u_offset], ldu);
+		i__2 = *m - *n;
+		i__1 = *m - *n;
+		claset_("F", &i__2, &i__1, &c_b55, &c_b56, &u[*n + 1 + (*n +
+			1) * u_dim1], ldu);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+	    }
+
+	}
+
+    } else {
+
+/*
+          A has more columns than rows. If A has sufficiently more
+          columns than rows, first reduce using the LQ decomposition
+          (if sufficient workspace available)
+*/
+
+	if (*n >= mnthr1) {
+
+	    if (wntqn) {
+
+/*
+                Path 1t (N much larger than M, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Zero out above L */
+
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		claset_("U", &i__2, &i__1, &c_b55, &c_b55, &a[((a_dim1) << (1)
+			) + 1], lda);
+		ie = 1;
+		itauq = 1;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+                (RWorkspace: need M)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+		nrwork = ie + *m;
+
+/*
+                Perform bidiagonal SVD, compute singular values only
+                (CWorkspace: 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		sbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2t (N much larger than M, JOBZ='O')
+                M right singular vectors to be overwritten on A and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+		ldwkvt = *m;
+
+/*              WORK(IVT) is M by M */
+
+		il = ivt + ldwkvt * *m;
+		if (*lwork >= *m * *n + *m * *m + *m * 3) {
+
+/*                 WORK(IL) M by N */
+
+		    ldwrkl = *m;
+		    chunk = *n;
+		} else {
+
+/*                 WORK(IL) is M by CHUNK */
+
+		    ldwrkl = *m;
+		    chunk = (*lwork - *m * *m - *m * 3) / *m;
+		}
+		itau = il + ldwrkl * chunk;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy L to WORK(IL), zeroing about above it */
+
+		clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		claset_("U", &i__2, &i__1, &c_b55, &c_b55, &work[il + ldwrkl],
+			 &ldwrkl);
+
+/*
+                Generate Q in A
+                (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IL)
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+                (RWorkspace: need M)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *m;
+		irvt = iru + *m * *m;
+		nrwork = irvt + *m * *m;
+		sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by the left singular vectors of L
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+                Overwrite WORK(IVT) by the right singular vectors of L
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IL) by Q
+                in A, storing result in WORK(IL) and copying to A
+                (CWorkspace: need 2*M*M, prefer M*M+M*N))
+                (RWorkspace: 0)
+*/
+
+		i__2 = *n;
+		i__1 = chunk;
+		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			i__1) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    cgemm_("N", "N", m, &blk, m, &c_b56, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b55, &work[il], &
+			    ldwrkl);
+		    clacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
+			    + 1], lda);
+/* L40: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+               Path 3t (N much larger than M, JOBZ='S')
+               M right singular vectors to be computed in VT and
+               M left singular vectors to be computed in U
+*/
+
+		il = 1;
+
+/*              WORK(IL) is M by M */
+
+		ldwrkl = *m;
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy L to WORK(IL), zeroing out above it */
+
+		clacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		claset_("U", &i__1, &i__2, &c_b55, &c_b55, &work[il + ldwrkl],
+			 &ldwrkl);
+
+/*
+                Generate Q in A
+                (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IL)
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+                (RWorkspace: need M)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *m;
+		irvt = iru + *m * *m;
+		nrwork = irvt + *m * *m;
+		sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of L
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by left singular vectors of L
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Copy VT to WORK(IL), multiply right singular vectors of L
+                in WORK(IL) by Q in A, storing result in VT
+                (CWorkspace: need M*M)
+                (RWorkspace: 0)
+*/
+
+		clacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		cgemm_("N", "N", m, n, m, &c_b56, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b55, &vt[vt_offset], ldvt);
+
+	    } else if (wntqa) {
+
+/*
+                Path 9t (N much larger than M, JOBZ='A')
+                N right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+
+		ldwkvt = *m;
+		itau = ivt + ldwkvt * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q, copying result to VT
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+		clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Generate Q in VT
+                (CWorkspace: need M+N, prefer M+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+			nwork], &i__1, &ierr);
+
+/*              Produce L in A, zeroing out above it */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		claset_("U", &i__1, &i__2, &c_b55, &c_b55, &a[((a_dim1) << (1)
+			) + 1], lda);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+                (RWorkspace: need M)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *m;
+		irvt = iru + *m * *m;
+		nrwork = irvt + *m * *m;
+		sbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of L
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+                Overwrite WORK(IVT) by right singular vectors of L
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by
+                Q in VT, storing result in A
+                (CWorkspace: need M*M)
+                (RWorkspace: 0)
+*/
+
+		cgemm_("N", "N", m, n, m, &c_b56, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b55, &a[a_offset], lda);
+
+/*              Copy right singular vectors of A from A to VT */
+
+		clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+	    }
+
+	} else if (*n >= mnthr2) {
+
+/*
+             MNTHR2 <= N < MNTHR1
+
+             Path 5t (N much larger than M, but not as much as MNTHR1)
+             Reduce to bidiagonal form without QR decomposition, use
+             CUNGBR and matrix multiplication to compute singular vectors
+*/
+
+
+	    ie = 1;
+	    nrwork = ie + *m;
+	    itauq = 1;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+             (RWorkspace: M)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, &ierr);
+
+	    if (wntqn) {
+
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
+
+		sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		ivt = nwork;
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__1, &ierr);
+
+/*
+                Generate P**H in A
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
+			nwork], &i__1, &ierr);
+
+		ldwkvt = *m;
+		if (*lwork >= *m * *n + *m * 3) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    nwork = ivt + ldwkvt * *n;
+		    chunk = *n;
+		} else {
+
+/*                 WORK( IVT ) is M by CHUNK */
+
+		    chunk = (*lwork - *m * 3) / *m;
+		    nwork = ivt + ldwkvt * chunk;
+		}
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRVT)
+                storing the result in WORK(IVT), copying to U
+                (Cworkspace: need 0)
+                (Rworkspace: need 2*M*M)
+*/
+
+		clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], &
+			ldwkvt, &rwork[nrwork]);
+		clacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu);
+
+/*
+                Multiply RWORK(IRVT) by P**H in A, storing the
+                result in WORK(IVT), copying to A
+                (CWorkspace: need M*M, prefer M*N)
+                (Rworkspace: need 2*M*M, prefer 2*M*N)
+*/
+
+		nrwork = iru;
+		i__1 = *n;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1],
+			    lda, &work[ivt], &ldwkvt, &rwork[nrwork]);
+		    clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ *
+			    a_dim1 + 1], lda);
+/* L50: */
+		}
+	    } else if (wntqs) {
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__2, &ierr);
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: need 0)
+                (Rworkspace: need 3*M*M)
+*/
+
+		clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need M*M+2*M*N)
+*/
+
+		nrwork = iru;
+		clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+	    } else {
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__2, &ierr);
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		clacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		cungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: need 0)
+                (Rworkspace: need 3*M*M)
+*/
+
+		clacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		clacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need M*M+2*M*N)
+*/
+
+		clarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		clacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+	    }
+
+	} else {
+
+/*
+             N .LT. MNTHR2
+
+             Path 6t (N greater than M, but not much larger)
+             Reduce to bidiagonal form without LQ decomposition
+             Use CUNMBR to compute singular vectors
+*/
+
+	    ie = 1;
+	    nrwork = ie + *m;
+	    itauq = 1;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+             (RWorkspace: M)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    cgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
+
+		sbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		ldwkvt = *m;
+		ivt = nwork;
+		if (*lwork >= *m * *n + *m * 3) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    claset_("F", m, n, &c_b55, &c_b55, &work[ivt], &ldwkvt);
+		    nwork = ivt + ldwkvt * *n;
+		} else {
+
+/*                 WORK( IVT ) is M by CHUNK */
+
+		    chunk = (*lwork - *m * 3) / *m;
+		    nwork = ivt + ldwkvt * chunk;
+		}
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: need 0)
+*/
+
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *m * 3) {
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+                Overwrite WORK(IVT) by right singular vectors of A,
+                copying to A
+                (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB)
+                (Rworkspace: need 0)
+*/
+
+		    clacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
+		    i__2 = *lwork - nwork + 1;
+		    cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
+			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
+			    &ierr);
+		    clacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate P**H in A
+                   (Cworkspace: need 2*M, prefer M+M*NB)
+                   (Rworkspace: need 0)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    cungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by real matrix RWORK(IRU), storing the
+                   result in WORK(IU), copying to A
+                   (CWorkspace: need M*M, prefer M*N)
+                   (Rworkspace: need 3*M*M, prefer M*M+2*M*N)
+*/
+
+		    nrwork = iru;
+		    i__2 = *n;
+		    i__1 = chunk;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *n - i__ + 1;
+			blk = min(i__3,chunk);
+			clarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1]
+				, lda, &work[ivt], &ldwkvt, &rwork[nrwork]);
+			clacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ *
+				a_dim1 + 1], lda);
+/* L60: */
+		    }
+		}
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: M*M)
+*/
+
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: M*M)
+*/
+
+		claset_("F", m, n, &c_b55, &c_b55, &vt[vt_offset], ldvt);
+		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+
+		sbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: M*M)
+*/
+
+		clacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*              Set the right corner of VT to identity matrix */
+
+		i__1 = *n - *m;
+		i__2 = *n - *m;
+		claset_("F", &i__1, &i__2, &c_b55, &c_b56, &vt[*m + 1 + (*m +
+			1) * vt_dim1], ldvt);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+                (RWorkspace: M*M)
+*/
+
+		claset_("F", n, n, &c_b55, &c_b55, &vt[vt_offset], ldvt);
+		clacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		cunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1].r = (real) maxwrk, work[1].i = 0.f;
+
+    return 0;
+
+/*     End of CGESDD */
+
+} /* cgesdd_ */
+
+/* Subroutine */ int cgesv_(integer *n, integer *nrhs, complex *a, integer *
+	lda, integer *ipiv, complex *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int cgetrf_(integer *, integer *, complex *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *), cgetrs_(char *, integer *, integer *, complex *, integer
+	    *, integer *, complex *, integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    CGESV computes the solution to a complex system of linear equations
+       A * X = B,
+    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+
+    The LU decomposition with partial pivoting and row interchanges is
+    used to factor A as
+       A = P * L * U,
+    where P is a permutation matrix, L is unit lower triangular, and U is
+    upper triangular.  The factored form of A is then used to solve the
+    system of equations A * X = B.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of linear equations, i.e., the order of the
+            matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the N-by-N coefficient matrix A.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (output) INTEGER array, dimension (N)
+            The pivot indices that define the permutation matrix P;
+            row i of the matrix was interchanged with row IPIV(i).
+
+    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+            On entry, the N-by-NRHS matrix of right hand side matrix B.
+            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+                  has been completed, but the factor U is exactly
+                  singular, so the solution could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGESV ", &i__1);
+	return 0;
+    }
+
+/*     Compute the LU factorization of A. */
+
+    cgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
+
+/*        Solve the system A*X = B, overwriting B with X. */
+
+	cgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+		b_offset], ldb, info);
+    }
+    return 0;
+
+/*     End of CGESV */
+
+} /* cgesv_ */
+
+/* Subroutine */ int cgetf2_(integer *m, integer *n, complex *a, integer *lda,
+	 integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void c_div(complex *, complex *, complex *);
+
+    /* Local variables */
+    static integer j, jp;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), cgeru_(integer *, integer *, complex *, complex *,
+	    integer *, complex *, integer *, complex *, integer *), cswap_(
+	    integer *, complex *, integer *, complex *, integer *);
+    extern integer icamax_(integer *, complex *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CGETF2 computes an LU factorization of a general m-by-n matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+                 has been completed, but the factor U is exactly
+                 singular, and division by zero will occur if it is used
+                 to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGETF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    i__1 = min(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
+
+/*        Find pivot and test for singularity. */
+
+	i__2 = *m - j + 1;
+	jp = j - 1 + icamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	ipiv[j] = jp;
+	i__2 = jp + j * a_dim1;
+	if ((a[i__2].r != 0.f) || (a[i__2].i != 0.f)) {
+
+/*           Apply the interchange to columns 1:N. */
+
+	    if (jp != j) {
+		cswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	    }
+
+/*           Compute elements J+1:M of J-th column. */
+
+	    if (j < *m) {
+		i__2 = *m - j;
+		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
+		cscal_(&i__2, &q__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+
+	} else if (*info == 0) {
+
+	    *info = j;
+	}
+
+	if (j < min(*m,*n)) {
+
+/*           Update trailing submatrix. */
+
+	    i__2 = *m - j;
+	    i__3 = *n - j;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgeru_(&i__2, &i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &a[j +
+		    (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda)
+		    ;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of CGETF2 */
+
+} /* cgetf2_ */
+
+/* Subroutine */ int cgetrf_(integer *m, integer *n, complex *a, integer *lda,
+	 integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__, j, jb, nb;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    static integer iinfo;
+    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), cgetf2_(integer *,
+	    integer *, complex *, integer *, integer *, integer *), xerbla_(
+	    char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int claswp_(integer *, complex *, integer *,
+	    integer *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CGETRF computes an LU factorization of a general M-by-N matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the M-by-N matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+                  has been completed, but the factor U is exactly
+                  singular, and division by zero will occur if it is used
+                  to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGETRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "CGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    if ((nb <= 1) || (nb >= min(*m,*n))) {
+
+/*        Use unblocked code. */
+
+	cgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code. */
+
+	i__1 = min(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = min(*m,*n) - j + 1;
+	    jb = min(i__3,nb);
+
+/*
+             Factor diagonal and subdiagonal blocks and test for exact
+             singularity.
+*/
+
+	    i__3 = *m - j + 1;
+	    cgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+
+/*           Adjust INFO and the pivot indices. */
+
+	    if (*info == 0 && iinfo > 0) {
+		*info = iinfo + j - 1;
+	    }
+/* Computing MIN */
+	    i__4 = *m, i__5 = j + jb - 1;
+	    i__3 = min(i__4,i__5);
+	    for (i__ = j; i__ <= i__3; ++i__) {
+		ipiv[i__] = j - 1 + ipiv[i__];
+/* L10: */
+	    }
+
+/*           Apply interchanges to columns 1:J-1. */
+
+	    i__3 = j - 1;
+	    i__4 = j + jb - 1;
+	    claswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+
+	    if (j + jb <= *n) {
+
+/*              Apply interchanges to columns J+JB:N. */
+
+		i__3 = *n - j - jb + 1;
+		i__4 = j + jb - 1;
+		claswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+			ipiv[1], &c__1);
+
+/*              Compute block row of U. */
+
+		i__3 = *n - j - jb + 1;
+		ctrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b56, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+			a_dim1], lda);
+		if (j + jb <= *m) {
+
+/*                 Update trailing submatrix. */
+
+		    i__3 = *m - j - jb + 1;
+		    i__4 = *n - j - jb + 1;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
+			    &q__1, &a[j + jb + j * a_dim1], lda, &a[j + (j +
+			    jb) * a_dim1], lda, &c_b56, &a[j + jb + (j + jb) *
+			     a_dim1], lda);
+		}
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of CGETRF */
+
+} /* cgetrf_ */
+
+/* Subroutine */ int cgetrs_(char *trans, integer *n, integer *nrhs, complex *
+	a, integer *lda, integer *ipiv, complex *b, integer *ldb, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), xerbla_(char *,
+	    integer *), claswp_(integer *, complex *, integer *,
+	    integer *, integer *, integer *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CGETRS solves a system of linear equations
+       A * X = B,  A**T * X = B,  or  A**H * X = B
+    with a general N-by-N matrix A using the LU factorization computed
+    by CGETRF.
+
+    Arguments
+    =========
+
+    TRANS   (input) CHARACTER*1
+            Specifies the form of the system of equations:
+            = 'N':  A * X = B     (No transpose)
+            = 'T':  A**T * X = B  (Transpose)
+            = 'C':  A**H * X = B  (Conjugate transpose)
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The factors L and U from the factorization A = P*L*U
+            as computed by CGETRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (input) INTEGER array, dimension (N)
+            The pivot indices from CGETRF; for 1<=i<=N, row i of the
+            matrix was interchanged with row IPIV(i).
+
+    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CGETRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*nrhs == 0)) {
+	return 0;
+    }
+
+    if (notran) {
+
+/*
+          Solve A * X = B.
+
+          Apply row interchanges to the right hand sides.
+*/
+
+	claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+
+/*        Solve L*X = B, overwriting B with X. */
+
+	ctrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b56, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A**T * X = B  or A**H * X = B.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	ctrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b56, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	ctrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b56, &a[a_offset],
+		lda, &b[b_offset], ldb);
+
+/*        Apply row interchanges to the solution vectors. */
+
+	claswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
+
+    return 0;
+
+/*     End of CGETRS */
+
+} /* cgetrs_ */
+
+/* Subroutine */ int cheevd_(char *jobz, char *uplo, integer *n, complex *a,
+	integer *lda, real *w, complex *work, integer *lwork, real *rwork,
+	integer *lrwork, integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real eps;
+    static integer inde;
+    static real anrm;
+    static integer imax;
+    static real rmin, rmax;
+    static integer lopt;
+    static real sigma;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static integer lwmin, liopt;
+    static logical lower;
+    static integer llrwk, lropt;
+    static logical wantz;
+    static integer indwk2, llwrk2;
+    extern doublereal clanhe_(char *, char *, integer *, complex *, integer *,
+	     real *);
+    static integer iscale;
+    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, complex *, integer *, integer *), cstedc_(char *, integer *, real *, real *, complex *,
+	    integer *, complex *, integer *, real *, integer *, integer *,
+	    integer *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int chetrd_(char *, integer *, complex *, integer
+	    *, real *, real *, complex *, complex *, integer *, integer *), clacpy_(char *, integer *, integer *, complex *, integer
+	    *, complex *, integer *);
+    static real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real bignum;
+    static integer indtau, indrwk, indwrk, liwmin;
+    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
+    static integer lrwmin;
+    extern /* Subroutine */ int cunmtr_(char *, char *, char *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *);
+    static integer llwork;
+    static real smlnum;
+    static logical lquery;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CHEEVD computes all eigenvalues and, optionally, eigenvectors of a
+    complex Hermitian matrix A.  If eigenvectors are desired, it uses a
+    divide and conquer algorithm.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only;
+            = 'V':  Compute eigenvalues and eigenvectors.
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA, N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of A contains the
+            upper triangular part of the matrix A.  If UPLO = 'L',
+            the leading N-by-N lower triangular part of A contains
+            the lower triangular part of the matrix A.
+            On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+            orthonormal eigenvectors of the matrix A.
+            If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+            or the upper triangle (if UPLO='U') of A, including the
+            diagonal, is destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    W       (output) REAL array, dimension (N)
+            If INFO = 0, the eigenvalues in ascending order.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.
+            If N <= 1,                LWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LWORK must be at least N + 1.
+            If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    RWORK   (workspace/output) REAL array,
+                                           dimension (LRWORK)
+            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+
+    LRWORK  (input) INTEGER
+            The dimension of the array RWORK.
+            If N <= 1,                LRWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LRWORK must be at least N.
+            If JOBZ  = 'V' and N > 1, LRWORK must be at least
+                           1 + 5*N + 2*N**2.
+
+            If LRWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the RWORK array,
+            returns this value as the first entry of the RWORK array, and
+            no error message related to LRWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If N <= 1,                LIWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+            If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the algorithm failed to converge; i
+                  off-diagonal elements of an intermediate tridiagonal
+                  form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --w;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    wantz = lsame_(jobz, "V");
+    lower = lsame_(uplo, "L");
+    lquery = ((*lwork == -1) || (*lrwork == -1)) || (*liwork == -1);
+
+    *info = 0;
+    if (*n <= 1) {
+	lwmin = 1;
+	lrwmin = 1;
+	liwmin = 1;
+	lopt = lwmin;
+	lropt = lrwmin;
+	liopt = liwmin;
+    } else {
+	if (wantz) {
+	    lwmin = ((*n) << (1)) + *n * *n;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = *n * 5 + 1 + ((i__1 * i__1) << (1));
+	    liwmin = *n * 5 + 3;
+	} else {
+	    lwmin = *n + 1;
+	    lrwmin = *n;
+	    liwmin = 1;
+	}
+	lopt = lwmin;
+	lropt = lrwmin;
+	liopt = liwmin;
+    }
+    if (! ((wantz) || (lsame_(jobz, "N")))) {
+	*info = -1;
+    } else if (! ((lower) || (lsame_(uplo, "U")))) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*lrwork < lrwmin && ! lquery) {
+	*info = -10;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	work[1].r = (real) lopt, work[1].i = 0.f;
+	rwork[1] = (real) lropt;
+	iwork[1] = liopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CHEEVD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	i__1 = a_dim1 + 1;
+	w[1] = a[i__1].r;
+	if (wantz) {
+	    i__1 = a_dim1 + 1;
+	    a[i__1].r = 1.f, a[i__1].i = 0.f;
+	}
+	return 0;
+    }
+
+/*     Get machine constants. */
+
+    safmin = slamch_("Safe minimum");
+    eps = slamch_("Precision");
+    smlnum = safmin / eps;
+    bignum = 1.f / smlnum;
+    rmin = sqrt(smlnum);
+    rmax = sqrt(bignum);
+
+/*     Scale matrix to allowable range, if necessary. */
+
+    anrm = clanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
+    iscale = 0;
+    if (anrm > 0.f && anrm < rmin) {
+	iscale = 1;
+	sigma = rmin / anrm;
+    } else if (anrm > rmax) {
+	iscale = 1;
+	sigma = rmax / anrm;
+    }
+    if (iscale == 1) {
+	clascl_(uplo, &c__0, &c__0, &c_b1011, &sigma, n, n, &a[a_offset], lda,
+		 info);
+    }
+
+/*     Call CHETRD to reduce Hermitian matrix to tridiagonal form. */
+
+    inde = 1;
+    indtau = 1;
+    indwrk = indtau + *n;
+    indrwk = inde + *n;
+    indwk2 = indwrk + *n * *n;
+    llwork = *lwork - indwrk + 1;
+    llwrk2 = *lwork - indwk2 + 1;
+    llrwk = *lrwork - indrwk + 1;
+    chetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
+	    work[indwrk], &llwork, &iinfo);
+/* Computing MAX */
+    i__1 = indwrk;
+    r__1 = (real) lopt, r__2 = (real) (*n) + work[i__1].r;
+    lopt = dmax(r__1,r__2);
+
+/*
+       For eigenvalues only, call SSTERF.  For eigenvectors, first call
+       CSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+       tridiagonal matrix, then call CUNMTR to multiply it to the
+       Householder transformations represented as Householder vectors in
+       A.
+*/
+
+    if (! wantz) {
+	ssterf_(n, &w[1], &rwork[inde], info);
+    } else {
+	cstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2],
+		&llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
+	cunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
+		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
+	clacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
+/*
+   Computing MAX
+   Computing 2nd power
+*/
+	i__3 = *n;
+	i__4 = indwk2;
+	i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r;
+	lopt = max(i__1,i__2);
+    }
+
+/*     If matrix was scaled, then rescale eigenvalues appropriately. */
+
+    if (iscale == 1) {
+	if (*info == 0) {
+	    imax = *n;
+	} else {
+	    imax = *info - 1;
+	}
+	r__1 = 1.f / sigma;
+	sscal_(&imax, &r__1, &w[1], &c__1);
+    }
+
+    work[1].r = (real) lopt, work[1].i = 0.f;
+    rwork[1] = (real) lropt;
+    iwork[1] = liopt;
+
+    return 0;
+
+/*     End of CHEEVD */
+
+} /* cheevd_ */
+
+/* Subroutine */ int chetd2_(char *uplo, integer *n, complex *a, integer *lda,
+	 real *d__, real *e, complex *tau, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Local variables */
+    static integer i__;
+    static complex taui;
+    extern /* Subroutine */ int cher2_(char *, integer *, complex *, complex *
+	    , integer *, complex *, integer *, complex *, integer *);
+    static complex alpha;
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int chemv_(char *, integer *, complex *, complex *
+	    , integer *, complex *, integer *, complex *, complex *, integer *
+	    ), caxpy_(integer *, complex *, complex *, integer *,
+	    complex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *,
+	    integer *, complex *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    CHETD2 reduces a complex Hermitian matrix A to real symmetric
+    tridiagonal form T by a unitary similarity transformation:
+    Q' * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the unitary
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the unitary matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) REAL array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CHETD2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Reduce the upper triangle of A */
+
+	i__1 = *n + *n * a_dim1;
+	i__2 = *n + *n * a_dim1;
+	r__1 = a[i__2].r;
+	a[i__1].r = r__1, a[i__1].i = 0.f;
+	for (i__ = *n - 1; i__ >= 1; --i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(1:i-1,i+1)
+*/
+
+	    i__1 = i__ + (i__ + 1) * a_dim1;
+	    alpha.r = a[i__1].r, alpha.i = a[i__1].i;
+	    clarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui);
+	    i__1 = i__;
+	    e[i__1] = alpha.r;
+
+	    if ((taui.r != 0.f) || (taui.i != 0.f)) {
+
+/*              Apply H(i) from both sides to A(1:i,1:i) */
+
+		i__1 = i__ + (i__ + 1) * a_dim1;
+		a[i__1].r = 1.f, a[i__1].i = 0.f;
+
+/*              Compute  x := tau * A * v  storing x in TAU(1:i) */
+
+		chemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
+			a_dim1 + 1], &c__1, &c_b55, &tau[1], &c__1)
+			;
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		q__3.r = -.5f, q__3.i = -0.f;
+		q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r *
+			taui.i + q__3.i * taui.r;
+		cdotc_(&q__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1]
+			, &c__1);
+		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
+			q__4.i + q__2.i * q__4.r;
+		alpha.r = q__1.r, alpha.i = q__1.i;
+		caxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
+			1], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		q__1.r = -1.f, q__1.i = -0.f;
+		cher2_(uplo, &i__, &q__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, &
+			tau[1], &c__1, &a[a_offset], lda);
+
+	    } else {
+		i__1 = i__ + i__ * a_dim1;
+		i__2 = i__ + i__ * a_dim1;
+		r__1 = a[i__2].r;
+		a[i__1].r = r__1, a[i__1].i = 0.f;
+	    }
+	    i__1 = i__ + (i__ + 1) * a_dim1;
+	    i__2 = i__;
+	    a[i__1].r = e[i__2], a[i__1].i = 0.f;
+	    i__1 = i__ + 1;
+	    i__2 = i__ + 1 + (i__ + 1) * a_dim1;
+	    d__[i__1] = a[i__2].r;
+	    i__1 = i__;
+	    tau[i__1].r = taui.r, tau[i__1].i = taui.i;
+/* L10: */
+	}
+	i__1 = a_dim1 + 1;
+	d__[1] = a[i__1].r;
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__1 = a_dim1 + 1;
+	i__2 = a_dim1 + 1;
+	r__1 = a[i__2].r;
+	a[i__1].r = r__1, a[i__1].i = 0.f;
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(i+2:n,i)
+*/
+
+	    i__2 = i__ + 1 + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *n - i__;
+/* Computing MIN */
+	    i__3 = i__ + 2;
+	    clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &
+		    taui);
+	    i__2 = i__;
+	    e[i__2] = alpha.r;
+
+	    if ((taui.r != 0.f) || (taui.i != 0.f)) {
+
+/*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
+
+		i__2 = *n - i__;
+		chemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b55, &tau[
+			i__], &c__1);
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		q__3.r = -.5f, q__3.i = -0.f;
+		q__2.r = q__3.r * taui.r - q__3.i * taui.i, q__2.i = q__3.r *
+			taui.i + q__3.i * taui.r;
+		i__2 = *n - i__;
+		cdotc_(&q__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ *
+			a_dim1], &c__1);
+		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
+			q__4.i + q__2.i * q__4.r;
+		alpha.r = q__1.r, alpha.i = q__1.i;
+		i__2 = *n - i__;
+		caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+			i__], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cher2_(uplo, &i__2, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1,
+			&tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda);
+
+	    } else {
+		i__2 = i__ + 1 + (i__ + 1) * a_dim1;
+		i__3 = i__ + 1 + (i__ + 1) * a_dim1;
+		r__1 = a[i__3].r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+	    }
+	    i__2 = i__ + 1 + i__ * a_dim1;
+	    i__3 = i__;
+	    a[i__2].r = e[i__3], a[i__2].i = 0.f;
+	    i__2 = i__;
+	    i__3 = i__ + i__ * a_dim1;
+	    d__[i__2] = a[i__3].r;
+	    i__2 = i__;
+	    tau[i__2].r = taui.r, tau[i__2].i = taui.i;
+/* L20: */
+	}
+	i__1 = *n;
+	i__2 = *n + *n * a_dim1;
+	d__[i__1] = a[i__2].r;
+    }
+
+    return 0;
+
+/*     End of CHETD2 */
+
+} /* chetd2_ */
+
+/* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda,
+	 real *d__, real *e, complex *tau, complex *work, integer *lwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__, j, nb, kk, nx, iws;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    static logical upper;
+    extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer
+	    *, real *, real *, complex *, integer *), cher2k_(char *,
+	    char *, integer *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, real *, complex *, integer *), clatrd_(char *, integer *, integer *, complex *, integer
+	    *, real *, complex *, complex *, integer *), xerbla_(char
+	    *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CHETRD reduces a complex Hermitian matrix A to real symmetric
+    tridiagonal form T by a unitary similarity transformation:
+    Q**H * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the unitary
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the unitary matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) REAL array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= 1.
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*lwork < 1 && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+
+/*        Determine the block size. */
+
+	nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
+		 (ftnlen)1);
+	lwkopt = *n * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CHETRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nx = *n;
+    iws = 1;
+    if (nb > 1 && nb < *n) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, &
+		c_n1, (ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *n) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code by setting NX = N.
+
+   Computing MAX
+*/
+		i__1 = *lwork / ldwork;
+		nb = max(i__1,1);
+		nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		if (nb < nbmin) {
+		    nx = *n;
+		}
+	    }
+	} else {
+	    nx = *n;
+	}
+    } else {
+	nb = 1;
+    }
+
+    if (upper) {
+
+/*
+          Reduce the upper triangle of A.
+          Columns 1:kk are handled by the unblocked method.
+*/
+
+	kk = *n - (*n - nx + nb - 1) / nb * nb;
+	i__1 = kk + 1;
+	i__2 = -nb;
+	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+		i__2) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = i__ + nb - 1;
+	    clatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
+		    work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(1:i-1,1:i-1), using an
+             update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ * a_dim1
+		    + 1], lda, &work[1], &ldwork, &c_b1011, &a[a_offset], lda);
+
+/*
+             Copy superdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j - 1 + j * a_dim1;
+		i__5 = j - 1;
+		a[i__4].r = e[i__5], a[i__4].i = 0.f;
+		i__4 = j;
+		i__5 = j + j * a_dim1;
+		d__[i__4] = a[i__5].r;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	chetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__2 = *n - nx;
+	i__1 = nb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = *n - i__ + 1;
+	    clatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
+		    tau[i__], &work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(i+nb:n,i+nb:n), using
+             an update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = *n - i__ - nb + 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &a[i__ + nb +
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1011, &a[
+		    i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*
+             Copy subdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j + 1 + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = e[i__5], a[i__4].i = 0.f;
+		i__4 = j;
+		i__5 = j + j * a_dim1;
+		d__[i__4] = a[i__5].r;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	i__1 = *n - i__ + 1;
+	chetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
+		&tau[i__], &iinfo);
+    }
+
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CHETRD */
+
+} /* chetrd_ */
+
+/* Subroutine */ int chseqr_(char *job, char *compz, integer *n, integer *ilo,
+	 integer *ihi, complex *h__, integer *ldh, complex *w, complex *z__,
+	integer *ldz, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2],
+	    i__5, i__6;
+    real r__1, r__2, r__3, r__4;
+    complex q__1;
+    char ch__1[2];
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void r_cnjg(complex *, complex *);
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__, j, k, l;
+    static complex s[225]	/* was [15][15] */, v[16];
+    static integer i1, i2, ii, nh, nr, ns, nv;
+    static complex vv[16];
+    static integer itn;
+    static complex tau;
+    static integer its;
+    static real ulp, tst1;
+    static integer maxb, ierr;
+    static real unfl;
+    static complex temp;
+    static real ovfl;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), ccopy_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static integer itemp;
+    static real rtemp;
+    static logical initz, wantt, wantz;
+    static real rwork[1];
+    extern doublereal slapy2_(real *, real *);
+    extern /* Subroutine */ int slabad_(real *, real *), clarfg_(integer *,
+	    complex *, complex *, integer *, complex *);
+    extern integer icamax_(integer *, complex *, integer *);
+    extern doublereal slamch_(char *), clanhs_(char *, integer *,
+	    complex *, integer *, real *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), clahqr_(logical *, logical *, integer *, integer *, integer *,
+	     complex *, integer *, complex *, integer *, integer *, complex *,
+	     integer *, integer *), clacpy_(char *, integer *, integer *,
+	    complex *, integer *, complex *, integer *), claset_(char
+	    *, integer *, integer *, complex *, complex *, complex *, integer
+	    *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int clarfx_(char *, integer *, integer *, complex
+	    *, complex *, complex *, integer *, complex *);
+    static real smlnum;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CHSEQR computes the eigenvalues of a complex upper Hessenberg
+    matrix H, and, optionally, the matrices T and Z from the Schur
+    decomposition H = Z T Z**H, where T is an upper triangular matrix
+    (the Schur form), and Z is the unitary matrix of Schur vectors.
+
+    Optionally Z may be postmultiplied into an input unitary matrix Q,
+    so that this routine can give the Schur factorization of a matrix A
+    which has been reduced to the Hessenberg form H by the unitary
+    matrix Q:  A = Q*H*Q**H = (QZ)*T*(QZ)**H.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            = 'E': compute eigenvalues only;
+            = 'S': compute eigenvalues and the Schur form T.
+
+    COMPZ   (input) CHARACTER*1
+            = 'N': no Schur vectors are computed;
+            = 'I': Z is initialized to the unit matrix and the matrix Z
+                   of Schur vectors of H is returned;
+            = 'V': Z must contain an unitary matrix Q on entry, and
+                   the product Q*Z is returned.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to CGEBAL, and then passed to CGEHRD
+            when the matrix output by CGEBAL is reduced to Hessenberg
+            form. Otherwise ILO and IHI should be set to 1 and N
+            respectively.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    H       (input/output) COMPLEX array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if JOB = 'S', H contains the upper triangular matrix
+            T from the Schur decomposition (the Schur form). If
+            JOB = 'E', the contents of H are unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    W       (output) COMPLEX array, dimension (N)
+            The computed eigenvalues. If JOB = 'S', the eigenvalues are
+            stored in the same order as on the diagonal of the Schur form
+            returned in H, with W(i) = H(i,i).
+
+    Z       (input/output) COMPLEX array, dimension (LDZ,N)
+            If COMPZ = 'N': Z is not referenced.
+            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+            contains the unitary matrix Z of the Schur vectors of H.
+            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+            which is assumed to be equal to the unit matrix except for
+            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+            Normally Q is the unitary matrix generated by CUNGHR after
+            the call to CGEHRD which formed the Hessenberg matrix H.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.
+            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, CHSEQR failed to compute all the
+                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+                  elements 1:ilo-1 and i+1:n of W contain those
+                  eigenvalues which have been successfully computed.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    wantt = lsame_(job, "S");
+    initz = lsame_(compz, "I");
+    wantz = (initz) || (lsame_(compz, "V"));
+
+    *info = 0;
+    i__1 = max(1,*n);
+    work[1].r = (real) i__1, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (! lsame_(job, "E") && ! wantt) {
+	*info = -1;
+    } else if (! lsame_(compz, "N") && ! wantz) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -4;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -5;
+    } else if (*ldh < max(1,*n)) {
+	*info = -7;
+    } else if ((*ldz < 1) || (wantz && *ldz < max(1,*n))) {
+	*info = -10;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CHSEQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize Z, if necessary */
+
+    if (initz) {
+	claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz);
+    }
+
+/*     Store the eigenvalues isolated by CGEBAL. */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__ + i__ * h_dim1;
+	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
+/* L10: */
+    }
+    i__1 = *n;
+    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__ + i__ * h_dim1;
+	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
+/* L20: */
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	i__1 = *ilo;
+	i__2 = *ilo + *ilo * h_dim1;
+	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+	return 0;
+    }
+
+/*
+       Set rows and columns ILO to IHI to zero below the first
+       subdiagonal.
+*/
+
+    i__1 = *ihi - 2;
+    for (j = *ilo; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = j + 2; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * h_dim1;
+	    h__[i__3].r = 0.f, h__[i__3].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    nh = *ihi - *ilo + 1;
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are re-set inside the main loop.
+*/
+
+    if (wantt) {
+	i1 = 1;
+	i2 = *n;
+    } else {
+	i1 = *ilo;
+	i2 = *ihi;
+    }
+
+/*     Ensure that the subdiagonal elements are real. */
+
+    i__1 = *ihi;
+    for (i__ = *ilo + 1; i__ <= i__1; ++i__) {
+	i__2 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	if (r_imag(&temp) != 0.f) {
+	    r__1 = temp.r;
+	    r__2 = r_imag(&temp);
+	    rtemp = slapy2_(&r__1, &r__2);
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
+	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
+	    temp.r = q__1.r, temp.i = q__1.i;
+	    if (i2 > i__) {
+		i__2 = i2 - i__;
+		r_cnjg(&q__1, &temp);
+		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+	    }
+	    i__2 = i__ - i1;
+	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    if (i__ < *ihi) {
+		i__2 = i__ + 1 + i__ * h_dim1;
+		i__3 = i__ + 1 + i__ * h_dim1;
+		q__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, q__1.i =
+			 temp.r * h__[i__3].i + temp.i * h__[i__3].r;
+		h__[i__2].r = q__1.r, h__[i__2].i = q__1.i;
+	    }
+	    if (wantz) {
+		cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
+	    }
+	}
+/* L50: */
+    }
+
+/*
+       Determine the order of the multi-shift QR algorithm to be used.
+
+   Writing concatenation
+*/
+    i__4[0] = 1, a__1[0] = job;
+    i__4[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
+    ns = ilaenv_(&c__4, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+/* Writing concatenation */
+    i__4[0] = 1, a__1[0] = job;
+    i__4[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
+    maxb = ilaenv_(&c__8, "CHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (((ns <= 1) || (ns > nh)) || (maxb >= nh)) {
+
+/*        Use the standard double-shift algorithm */
+
+	clahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo,
+		ihi, &z__[z_offset], ldz, info);
+	return 0;
+    }
+    maxb = max(2,maxb);
+/* Computing MIN */
+    i__1 = min(ns,maxb);
+    ns = min(i__1,15);
+
+/*
+       Now 1 < NS <= MAXB < NH.
+
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
+    smlnum = unfl * (nh / ulp);
+
+/*     ITN is the total number of multiple-shift QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of at most MAXB. Each iteration of the loop
+       works with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L60:
+    if (i__ < *ilo) {
+	goto L180;
+    }
+
+/*
+       Perform multiple-shift QR iterations on rows and columns ILO to I
+       until a submatrix of order at most MAXB splits off at the bottom
+       because a subdiagonal element has become negligible.
+*/
+
+    l = *ilo;
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    i__3 = k - 1 + (k - 1) * h_dim1;
+	    i__5 = k + k * h_dim1;
+	    tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k -
+		    1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__5]
+		    .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]),
+		    dabs(r__4)));
+	    if (tst1 == 0.f) {
+		i__3 = i__ - l + 1;
+		tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
+	    }
+	    i__3 = k + (k - 1) * h_dim1;
+/* Computing MAX */
+	    r__2 = ulp * tst1;
+	    if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) {
+		goto L80;
+	    }
+/* L70: */
+	}
+L80:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible. */
+
+	    i__2 = l + (l - 1) * h_dim1;
+	    h__[i__2].r = 0.f, h__[i__2].i = 0.f;
+	}
+
+/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+
+	if (l >= i__ - maxb + 1) {
+	    goto L170;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! wantt) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if ((its == 20) || (its == 30)) {
+
+/*           Exceptional shifts. */
+
+	    i__2 = i__;
+	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
+		i__3 = ii;
+		i__5 = ii + (ii - 1) * h_dim1;
+		i__6 = ii + ii * h_dim1;
+		r__3 = ((r__1 = h__[i__5].r, dabs(r__1)) + (r__2 = h__[i__6]
+			.r, dabs(r__2))) * 1.5f;
+		w[i__3].r = r__3, w[i__3].i = 0.f;
+/* L90: */
+	    }
+	} else {
+
+/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
+
+	    clacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
+		    h_dim1], ldh, s, &c__15);
+	    clahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ -
+		    ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr);
+	    if (ierr > 0) {
+
+/*
+                If CLAHQR failed to compute all NS eigenvalues, use the
+                unconverged diagonal elements as the remaining shifts.
+*/
+
+		i__2 = ierr;
+		for (ii = 1; ii <= i__2; ++ii) {
+		    i__3 = i__ - ns + ii;
+		    i__5 = ii + ii * 15 - 16;
+		    w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i;
+/* L100: */
+		}
+	    }
+	}
+
+/*
+          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+          where G is the Hessenberg submatrix H(L:I,L:I) and w is
+          the vector of shifts (stored in W). The result is
+          stored in the local array V.
+*/
+
+	v[0].r = 1.f, v[0].i = 0.f;
+	i__2 = ns + 1;
+	for (ii = 2; ii <= i__2; ++ii) {
+	    i__3 = ii - 1;
+	    v[i__3].r = 0.f, v[i__3].i = 0.f;
+/* L110: */
+	}
+	nv = 1;
+	i__2 = i__;
+	for (j = i__ - ns + 1; j <= i__2; ++j) {
+	    i__3 = nv + 1;
+	    ccopy_(&i__3, v, &c__1, vv, &c__1);
+	    i__3 = nv + 1;
+	    i__5 = j;
+	    q__1.r = -w[i__5].r, q__1.i = -w[i__5].i;
+	    cgemv_("No transpose", &i__3, &nv, &c_b56, &h__[l + l * h_dim1],
+		    ldh, vv, &c__1, &q__1, v, &c__1);
+	    ++nv;
+
+/*
+             Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+             reset it to the unit vector.
+*/
+
+	    itemp = icamax_(&nv, v, &c__1);
+	    i__3 = itemp - 1;
+	    rtemp = (r__1 = v[i__3].r, dabs(r__1)) + (r__2 = r_imag(&v[itemp
+		    - 1]), dabs(r__2));
+	    if (rtemp == 0.f) {
+		v[0].r = 1.f, v[0].i = 0.f;
+		i__3 = nv;
+		for (ii = 2; ii <= i__3; ++ii) {
+		    i__5 = ii - 1;
+		    v[i__5].r = 0.f, v[i__5].i = 0.f;
+/* L120: */
+		}
+	    } else {
+		rtemp = dmax(rtemp,smlnum);
+		r__1 = 1.f / rtemp;
+		csscal_(&nv, &r__1, v, &c__1);
+	    }
+/* L130: */
+	}
+
+/*        Multiple-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = l; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__3 = ns + 1, i__5 = i__ - k + 1;
+	    nr = min(i__3,i__5);
+	    if (k > l) {
+		ccopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    clarfg_(&nr, v, &v[1], &c__1, &tau);
+	    if (k > l) {
+		i__3 = k + (k - 1) * h_dim1;
+		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
+		i__3 = i__;
+		for (ii = k + 1; ii <= i__3; ++ii) {
+		    i__5 = ii + (k - 1) * h_dim1;
+		    h__[i__5].r = 0.f, h__[i__5].i = 0.f;
+/* L140: */
+		}
+	    }
+	    v[0].r = 1.f, v[0].i = 0.f;
+
+/*
+             Apply G' from the left to transform the rows of the matrix
+             in columns K to I2.
+*/
+
+	    i__3 = i2 - k + 1;
+	    r_cnjg(&q__1, &tau);
+	    clarfx_("Left", &nr, &i__3, v, &q__1, &h__[k + k * h_dim1], ldh, &
+		    work[1]);
+
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+NR,I).
+
+   Computing MIN
+*/
+	    i__5 = k + nr;
+	    i__3 = min(i__5,i__) - i1 + 1;
+	    clarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
+		    &work[1]);
+
+	    if (wantz) {
+
+/*              Accumulate transformations in the matrix Z */
+
+		clarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
+			ldz, &work[1]);
+	    }
+/* L150: */
+	}
+
+/*        Ensure that H(I,I-1) is real. */
+
+	i__2 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	if (r_imag(&temp) != 0.f) {
+	    r__1 = temp.r;
+	    r__2 = r_imag(&temp);
+	    rtemp = slapy2_(&r__1, &r__2);
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
+	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
+	    temp.r = q__1.r, temp.i = q__1.i;
+	    if (i2 > i__) {
+		i__2 = i2 - i__;
+		r_cnjg(&q__1, &temp);
+		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+	    }
+	    i__2 = i__ - i1;
+	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    if (wantz) {
+		cscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
+	    }
+	}
+
+/* L160: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L170:
+
+/*
+       A submatrix of order <= MAXB in rows and columns L to I has split
+       off. Use the double-shift QR algorithm to handle it.
+*/
+
+    clahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi,
+	     &z__[z_offset], ldz, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with a new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L60;
+
+L180:
+    i__1 = max(1,*n);
+    work[1].r = (real) i__1, work[1].i = 0.f;
+    return 0;
+
+/*     End of CHSEQR */
+
+} /* chseqr_ */
+
+/* Subroutine */ int clabrd_(integer *m, integer *n, integer *nb, complex *a,
+	integer *lda, real *d__, real *e, complex *tauq, complex *taup,
+	complex *x, integer *ldx, complex *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__;
+    static complex alpha;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), cgemv_(char *, integer *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, complex *,
+	    integer *), clarfg_(integer *, complex *, complex *,
+	    integer *, complex *), clacgv_(integer *, complex *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLABRD reduces the first NB rows and columns of a complex general
+    m by n matrix A to upper or lower real bidiagonal form by a unitary
+    transformation Q' * A * P, and returns the matrices X and Y which
+    are needed to apply the transformation to the unreduced part of A.
+
+    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+    bidiagonal form.
+
+    This is an auxiliary routine called by CGEBRD
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.
+
+    NB      (input) INTEGER
+            The number of leading rows and columns of A to be reduced.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit, the first NB rows and columns of the matrix are
+            overwritten; the rest of the array is unchanged.
+            If m >= n, elements on and below the diagonal in the first NB
+              columns, with the array TAUQ, represent the unitary
+              matrix Q as a product of elementary reflectors; and
+              elements above the diagonal in the first NB rows, with the
+              array TAUP, represent the unitary matrix P as a product
+              of elementary reflectors.
+            If m < n, elements below the diagonal in the first NB
+              columns, with the array TAUQ, represent the unitary
+              matrix Q as a product of elementary reflectors, and
+              elements on and above the diagonal in the first NB rows,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) REAL array, dimension (NB)
+            The diagonal elements of the first NB rows and columns of
+            the reduced matrix.  D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (NB)
+            The off-diagonal elements of the first NB rows and columns of
+            the reduced matrix.
+
+    TAUQ    (output) COMPLEX array dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix Q. See Further Details.
+
+    TAUP    (output) COMPLEX array, dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix P. See Further Details.
+
+    X       (output) COMPLEX array, dimension (LDX,NB)
+            The m-by-nb matrix X required to update the unreduced part
+            of A.
+
+    LDX     (input) INTEGER
+            The leading dimension of the array X. LDX >= max(1,M).
+
+    Y       (output) COMPLEX array, dimension (LDY,NB)
+            The n-by-nb matrix Y required to update the unreduced part
+            of A.
+
+    LDY     (output) INTEGER
+            The leading dimension of the array Y. LDY >= max(1,N).
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors.
+
+    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The elements of the vectors v and u together form the m-by-nb matrix
+    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+    the transformation to the unreduced part of the matrix, using a block
+    update of the form:  A := A - V*Y' - X*U'.
+
+    The contents of A on exit are illustrated by the following examples
+    with nb = 2:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )
+
+    where a denotes an element of the original matrix which is unchanged,
+    vi denotes an element of the vector defining H(i), and ui an element
+    of the vector defining G(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if ((*m <= 0) || (*n <= 0)) {
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:m,i) */
+
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &y[i__ + y_dim1], ldy);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda,
+		     &y[i__ + y_dim1], ldy, &c_b56, &a[i__ + i__ * a_dim1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &y[i__ + y_dim1], ldy);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + x_dim1], ldx,
+		     &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[i__ + i__ *
+		    a_dim1], &c__1);
+
+/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, &
+		    tauq[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ + (
+			i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], &
+			c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b55, &
+			y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[
+			i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &x[i__ +
+			x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b55, &
+			y[i__ * y_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ +
+			1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *n - i__;
+		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+
+/*              Update A(i,i+1:n) */
+
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		clacgv_(&i__, &a[i__ + a_dim1], lda);
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__, &q__1, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b56, &a[i__ +
+			(i__ + 1) * a_dim1], lda);
+		clacgv_(&i__, &a[i__ + a_dim1], lda);
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &x[i__ + x_dim1], ldx);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[(i__ +
+			1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56,
+			&a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &x[i__ + x_dim1], ldx);
+
+/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
+
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+			taup[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + (
+			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
+			 lda, &c_b55, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &y[i__ + 1
+			+ y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b55, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__, &q__1, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b55, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i,i:n) */
+
+	    i__2 = *n - i__ + 1;
+	    clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + y_dim1], ldy,
+		     &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1],
+		    lda);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &x[i__ + x_dim1], ldx);
+	    i__2 = i__ - 1;
+	    i__3 = *n - i__ + 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &a[i__ *
+		    a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b56, &a[i__ +
+		    i__ * a_dim1], lda);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &x[i__ + x_dim1], ldx);
+
+/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    clarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+		    taup[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    if (i__ < *m) {
+		i__2 = i__ + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ + 1 + i__
+			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__ + 1;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &y[i__ +
+			y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[
+			i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__ + 1;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[i__ * a_dim1
+			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b55, &x[
+			i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b56, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		cscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__ + 1;
+		clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+
+/*              Update A(i+1:m,i) */
+
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &y[i__ + y_dim1], ldy);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b56, &a[i__ +
+			1 + i__ * a_dim1], &c__1);
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &y[i__ + y_dim1], ldy);
+		i__2 = *m - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__, &q__1, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b56, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
+
+/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1,
+			 &tauq[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ *
+			a_dim1], &c__1, &c_b55, &y[i__ + 1 + i__ * y_dim1], &
+			c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b56, &y[
+			i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		cgemv_("Conjugate transpose", &i__2, &i__, &c_b56, &x[i__ + 1
+			+ x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("Conjugate transpose", &i__, &i__2, &q__1, &a[(i__ + 1)
+			 * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b56, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *n - i__;
+		cscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+	    } else {
+		i__2 = *n - i__ + 1;
+		clacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of CLABRD */
+
+} /* clabrd_ */
+
+/* Subroutine */ int clacgv_(integer *n, complex *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, ioff;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    CLACGV conjugates a complex vector of length N.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The length of the vector X.  N >= 0.
+
+    X       (input/output) COMPLEX array, dimension
+                           (1+(N-1)*abs(INCX))
+            On entry, the vector of length N to be conjugated.
+            On exit, X is overwritten with conjg(X).
+
+    INCX    (input) INTEGER
+            The spacing between successive elements of X.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*incx == 1) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__;
+	    r_cnjg(&q__1, &x[i__]);
+	    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+/* L10: */
+	}
+    } else {
+	ioff = 1;
+	if (*incx < 0) {
+	    ioff = 1 - (*n - 1) * *incx;
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = ioff;
+	    r_cnjg(&q__1, &x[ioff]);
+	    x[i__2].r = q__1.r, x[i__2].i = q__1.i;
+	    ioff += *incx;
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of CLACGV */
+
+} /* clacgv_ */
+
+/* Subroutine */ int clacp2_(char *uplo, integer *m, integer *n, real *a,
+	integer *lda, complex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CLACP2 copies all or part of a real two-dimensional matrix A to a
+    complex matrix B.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+            is accessed; if UPLO = 'L', only the lower trapezium is
+            accessed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (output) COMPLEX array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4], b[i__3].i = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4], b[i__3].i = 0.f;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4], b[i__3].i = 0.f;
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLACP2 */
+
+} /* clacp2_ */
+
+/* Subroutine */ int clacpy_(char *uplo, integer *m, integer *n, complex *a,
+	integer *lda, complex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    CLACPY copies all or part of a two-dimensional matrix A to another
+    matrix B.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+            is accessed; if UPLO = 'L', only the lower trapezium is
+            accessed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (output) COMPLEX array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLACPY */
+
+} /* clacpy_ */
+
+/* Subroutine */ int clacrm_(integer *m, integer *n, complex *a, integer *lda,
+	 real *b, integer *ldb, complex *c__, integer *ldc, real *rwork)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5;
+    real r__1;
+    complex q__1;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLACRM performs a very simple matrix-matrix multiplication:
+             C := A * B,
+    where A is M by N and complex; B is N by N and real;
+    C is M by N and complex.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A and of the matrix C.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns and rows of the matrix B and
+            the number of columns of the matrix C.
+            N >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA, N)
+            A contains the M by N matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >=max(1,M).
+
+    B       (input) REAL array, dimension (LDB, N)
+            B contains the N by N matrix B.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B. LDB >=max(1,N).
+
+    C       (input) COMPLEX array, dimension (LDC, N)
+            C contains the M by N matrix C.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >=max(1,N).
+
+    RWORK   (workspace) REAL array, dimension (2*M*N)
+
+    =====================================================================
+
+
+       Quick return if possible.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --rwork;
+
+    /* Function Body */
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    rwork[(j - 1) * *m + i__] = a[i__3].r;
+/* L10: */
+	}
+/* L20: */
+    }
+
+    l = *m * *n + 1;
+    sgemm_("N", "N", m, n, n, &c_b1011, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b320, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = l + (j - 1) * *m + i__ - 1;
+	    c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    rwork[(j - 1) * *m + i__] = r_imag(&a[i__ + j * a_dim1]);
+/* L50: */
+	}
+/* L60: */
+    }
+    sgemm_("N", "N", m, n, n, &c_b1011, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b320, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = i__ + j * c_dim1;
+	    r__1 = c__[i__4].r;
+	    i__5 = l + (j - 1) * *m + i__ - 1;
+	    q__1.r = r__1, q__1.i = rwork[i__5];
+	    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L70: */
+	}
+/* L80: */
+    }
+
+    return 0;
+
+/*     End of CLACRM */
+
+} /* clacrm_ */
+
+/* Complex */ VOID cladiv_(complex * ret_val, complex *x, complex *y)
+{
+    /* System generated locals */
+    real r__1, r__2, r__3, r__4;
+    complex q__1;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static real zi, zr;
+    extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real *
+	    , real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    CLADIV := X / Y, where X and Y are complex.  The computation of X / Y
+    will not overflow on an intermediary step unless the results
+    overflows.
+
+    Arguments
+    =========
+
+    X       (input) COMPLEX
+    Y       (input) COMPLEX
+            The complex scalars X and Y.
+
+    =====================================================================
+*/
+
+
+    r__1 = x->r;
+    r__2 = r_imag(x);
+    r__3 = y->r;
+    r__4 = r_imag(y);
+    sladiv_(&r__1, &r__2, &r__3, &r__4, &zr, &zi);
+    q__1.r = zr, q__1.i = zi;
+     ret_val->r = q__1.r,  ret_val->i = q__1.i;
+
+    return ;
+
+/*     End of CLADIV */
+
+} /* cladiv_ */
+
+/* Subroutine */ int claed0_(integer *qsiz, integer *n, real *d__, real *e,
+	complex *q, integer *ldq, complex *qstore, integer *ldqs, real *rwork,
+	 integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2;
+    static real temp;
+    static integer curr, iperm;
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static integer indxq, iwrem;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static integer iqptr;
+    extern /* Subroutine */ int claed7_(integer *, integer *, integer *,
+	    integer *, integer *, integer *, real *, complex *, integer *,
+	    real *, integer *, real *, integer *, integer *, integer *,
+	    integer *, integer *, real *, complex *, real *, integer *,
+	    integer *);
+    static integer tlvls;
+    extern /* Subroutine */ int clacrm_(integer *, integer *, complex *,
+	    integer *, real *, integer *, complex *, integer *, real *);
+    static integer igivcl;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer igivnm, submat, curprb, subpbs, igivpt, curlvl, matsiz,
+	    iprmpt, smlsiz;
+    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    Using the divide and conquer method, CLAED0 computes all eigenvalues
+    of a symmetric tridiagonal matrix which is one diagonal block of
+    those from reducing a dense or band Hermitian matrix and
+    corresponding eigenvectors of the dense or band matrix.
+
+    Arguments
+    =========
+
+    QSIZ   (input) INTEGER
+           The dimension of the unitary matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the diagonal elements of the tridiagonal matrix.
+           On exit, the eigenvalues in ascending order.
+
+    E      (input/output) REAL array, dimension (N-1)
+           On entry, the off-diagonal elements of the tridiagonal matrix.
+           On exit, E has been destroyed.
+
+    Q      (input/output) COMPLEX array, dimension (LDQ,N)
+           On entry, Q must contain an QSIZ x N matrix whose columns
+           unitarily orthonormal. It is a part of the unitary matrix
+           that reduces the full dense Hermitian matrix to a
+           (reducible) symmetric tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    IWORK  (workspace) INTEGER array,
+           the dimension of IWORK must be at least
+                        6 + 6*N + 5*N*lg N
+                        ( lg( N ) = smallest integer k
+                                    such that 2^k >= N )
+
+    RWORK  (workspace) REAL array,
+                                 dimension (1 + 3*N + 2*N*lg N + 3*N**2)
+                          ( lg( N ) = smallest integer k
+                                      such that 2^k >= N )
+
+    QSTORE (workspace) COMPLEX array, dimension (LDQS, N)
+           Used to store parts of
+           the eigenvector matrix when the updating matrix multiplies
+           take place.
+
+    LDQS   (input) INTEGER
+           The leading dimension of the array QSTORE.
+           LDQS >= max(1,N).
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    =====================================================================
+
+    Warning:      N could be as big as QSIZ!
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    qstore_dim1 = *ldqs;
+    qstore_offset = 1 + qstore_dim1;
+    qstore -= qstore_offset;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+/*
+       IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN
+          INFO = -1
+       ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) )
+      $        THEN
+*/
+    if (*qsiz < max(0,*n)) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    } else if (*ldqs < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLAED0", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "CLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Determine the size and placement of the submatrices, and save in
+       the leading elements of IWORK.
+*/
+
+    iwork[1] = *n;
+    subpbs = 1;
+    tlvls = 0;
+L10:
+    if (iwork[subpbs] > smlsiz) {
+	for (j = subpbs; j >= 1; --j) {
+	    iwork[j * 2] = (iwork[j] + 1) / 2;
+	    iwork[((j) << (1)) - 1] = iwork[j] / 2;
+/* L20: */
+	}
+	++tlvls;
+	subpbs <<= 1;
+	goto L10;
+    }
+    i__1 = subpbs;
+    for (j = 2; j <= i__1; ++j) {
+	iwork[j] += iwork[j - 1];
+/* L30: */
+    }
+
+/*
+       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+       using rank-1 modifications (cuts).
+*/
+
+    spm1 = subpbs - 1;
+    i__1 = spm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	submat = iwork[i__] + 1;
+	smm1 = submat - 1;
+	d__[smm1] -= (r__1 = e[smm1], dabs(r__1));
+	d__[submat] -= (r__1 = e[smm1], dabs(r__1));
+/* L40: */
+    }
+
+    indxq = ((*n) << (2)) + 3;
+
+/*
+       Set up workspaces for eigenvalues only/accumulate new vectors
+       routine
+*/
+
+    temp = log((real) (*n)) / log(2.f);
+    lgn = (integer) temp;
+    if (pow_ii(&c__2, &lgn) < *n) {
+	++lgn;
+    }
+    if (pow_ii(&c__2, &lgn) < *n) {
+	++lgn;
+    }
+    iprmpt = indxq + *n + 1;
+    iperm = iprmpt + *n * lgn;
+    iqptr = iperm + *n * lgn;
+    igivpt = iqptr + *n + 2;
+    igivcl = igivpt + *n * lgn;
+
+    igivnm = 1;
+    iq = igivnm + ((*n) << (1)) * lgn;
+/* Computing 2nd power */
+    i__1 = *n;
+    iwrem = iq + i__1 * i__1 + 1;
+/*     Initialize pointers */
+    i__1 = subpbs;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	iwork[iprmpt + i__] = 1;
+	iwork[igivpt + i__] = 1;
+/* L50: */
+    }
+    iwork[iqptr] = 1;
+
+/*
+       Solve each submatrix eigenproblem at the bottom of the divide and
+       conquer tree.
+*/
+
+    curr = 0;
+    i__1 = spm1;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	if (i__ == 0) {
+	    submat = 1;
+	    matsiz = iwork[1];
+	} else {
+	    submat = iwork[i__] + 1;
+	    matsiz = iwork[i__ + 1] - iwork[i__];
+	}
+	ll = iq - 1 + iwork[iqptr + curr];
+	ssteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, &
+		rwork[1], info);
+	clacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], &
+		matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem]
+		);
+/* Computing 2nd power */
+	i__2 = matsiz;
+	iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
+	++curr;
+	if (*info > 0) {
+	    *info = submat * (*n + 1) + submat + matsiz - 1;
+	    return 0;
+	}
+	k = 1;
+	i__2 = iwork[i__ + 1];
+	for (j = submat; j <= i__2; ++j) {
+	    iwork[indxq + j] = k;
+	    ++k;
+/* L60: */
+	}
+/* L70: */
+    }
+
+/*
+       Successively merge eigensystems of adjacent submatrices
+       into eigensystem for the corresponding larger matrix.
+
+       while ( SUBPBS > 1 )
+*/
+
+    curlvl = 1;
+L80:
+    if (subpbs > 1) {
+	spm2 = subpbs - 2;
+	i__1 = spm2;
+	for (i__ = 0; i__ <= i__1; i__ += 2) {
+	    if (i__ == 0) {
+		submat = 1;
+		matsiz = iwork[2];
+		msd2 = iwork[1];
+		curprb = 0;
+	    } else {
+		submat = iwork[i__] + 1;
+		matsiz = iwork[i__ + 2] - iwork[i__];
+		msd2 = matsiz / 2;
+		++curprb;
+	    }
+
+/*
+       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+       into an eigensystem of size MATSIZ.  CLAED7 handles the case
+       when the eigenvectors of a full or band Hermitian matrix (which
+       was reduced to tridiagonal form) are desired.
+
+       I am free to use Q as a valuable working space until Loop 150.
+*/
+
+	    claed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[
+		    submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[
+		    submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], &
+		    iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[
+		    igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat *
+		    q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info);
+	    if (*info > 0) {
+		*info = submat * (*n + 1) + submat + matsiz - 1;
+		return 0;
+	    }
+	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
+/* L90: */
+	}
+	subpbs /= 2;
+	++curlvl;
+	goto L80;
+    }
+
+/*
+       end while
+
+       Re-merge the eigenvalues/vectors which were deflated at the final
+       merge step.
+*/
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	j = iwork[indxq + i__];
+	rwork[i__] = d__[j];
+	ccopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1]
+		, &c__1);
+/* L100: */
+    }
+    scopy_(n, &rwork[1], &c__1, &d__[1], &c__1);
+
+    return 0;
+
+/*     End of CLAED0 */
+
+} /* claed0_ */
+
+/* Subroutine */ int claed7_(integer *n, integer *cutpnt, integer *qsiz,
+	integer *tlvls, integer *curlvl, integer *curpbm, real *d__, complex *
+	q, integer *ldq, real *rho, integer *indxq, real *qstore, integer *
+	qptr, integer *prmptr, integer *perm, integer *givptr, integer *
+	givcol, real *givnum, complex *work, real *rwork, integer *iwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr,
+	    indxc, indxp;
+    extern /* Subroutine */ int claed8_(integer *, integer *, integer *,
+	    complex *, integer *, real *, real *, integer *, real *, real *,
+	    complex *, integer *, real *, integer *, integer *, integer *,
+	    integer *, integer *, integer *, real *, integer *), slaed9_(
+	    integer *, integer *, integer *, integer *, real *, real *,
+	    integer *, real *, real *, real *, real *, integer *, integer *),
+	    slaeda_(integer *, integer *, integer *, integer *, integer *,
+	    integer *, integer *, integer *, real *, real *, integer *, real *
+	    , real *, integer *);
+    static integer idlmda;
+    extern /* Subroutine */ int clacrm_(integer *, integer *, complex *,
+	    integer *, real *, integer *, complex *, integer *, real *),
+	    xerbla_(char *, integer *), slamrg_(integer *, integer *,
+	    real *, integer *, integer *, integer *);
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLAED7 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix. This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and optionally eigenvectors of a dense or banded
+    Hermitian matrix that has been reduced to tridiagonal form.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+      where Z = Q'u, u is a vector of length N with ones in the
+      CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine SLAED2.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine SLAED4 (as called by SLAED3).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    QSIZ   (input) INTEGER
+           The dimension of the unitary matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= curlvl <= tlvls.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) COMPLEX array, dimension (LDQ,N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    RHO    (input) REAL
+           Contains the subdiagonal element used to create the rank-1
+           modification.
+
+    INDXQ  (output) INTEGER array, dimension (N)
+           This contains the permutation which will reintegrate the
+           subproblem just solved back into sorted order,
+           ie. D( INDXQ( I = 1, N ) ) will be in ascending order.
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    RWORK  (workspace) REAL array,
+                                   dimension (3*N+2*QSIZ*N)
+
+    WORK   (workspace) COMPLEX array, dimension (QSIZ*N)
+
+    QSTORE (input/output) REAL array, dimension (N**2+1)
+           Stores eigenvectors of submatrices encountered during
+           divide and conquer, packed together. QPTR points to
+           beginning of the submatrices.
+
+    QPTR   (input/output) INTEGER array, dimension (N+2)
+           List of indices pointing to beginning of submatrices stored
+           in QSTORE. The submatrices are numbered starting at the
+           bottom left of the divide and conquer tree, from left to
+           right and bottom to top.
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and also the size of
+           the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) REAL array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --qstore;
+    --qptr;
+    --prmptr;
+    --perm;
+    --givptr;
+    givcol -= 3;
+    givnum -= 3;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+/*
+       IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
+          INFO = -1
+       ELSE IF( N.LT.0 ) THEN
+*/
+    if (*n < 0) {
+	*info = -1;
+    } else if ((min(1,*n) > *cutpnt) || (*n < *cutpnt)) {
+	*info = -2;
+    } else if (*qsiz < *n) {
+	*info = -3;
+    } else if (*ldq < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLAED7", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in SLAED2 and SLAED3.
+*/
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq = iw + *n;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *tlvls - i__;
+	ptr += pow_ii(&c__2, &i__2);
+/* L10: */
+    }
+    curr = ptr + *curpbm;
+    slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
+	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[
+	    iz + *n], info);
+
+/*
+       When solving the final problem, we no longer need the stored data,
+       so we will overwrite the data from this level onto the previously
+       used storage space.
+*/
+
+    if (*curlvl == *tlvls) {
+	qptr[curr] = 1;
+	prmptr[curr] = 1;
+	givptr[curr] = 1;
+    }
+
+/*     Sort and Deflate eigenvalues. */
+
+    claed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz],
+	    &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[
+	    indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[
+	    ((givptr[curr]) << (1)) + 1], &givnum[((givptr[curr]) << (1)) + 1]
+	    , info);
+    prmptr[curr + 1] = prmptr[curr] + *n;
+    givptr[curr + 1] += givptr[curr];
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	slaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda]
+		, &rwork[iw], &qstore[qptr[curr]], &k, info);
+	clacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[
+		q_offset], ldq, &rwork[iq]);
+/* Computing 2nd power */
+	i__1 = k;
+	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
+	if (*info != 0) {
+	    return 0;
+	}
+
+/*     Prepare the INDXQ sorting premutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	ind1 = 1;
+	ind2 = *n;
+	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	qptr[curr + 1] = qptr[curr];
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLAED7 */
+
+} /* claed7_ */
+
+/* Subroutine */ int claed8_(integer *k, integer *n, integer *qsiz, complex *
+	q, integer *ldq, real *d__, real *rho, integer *cutpnt, real *z__,
+	real *dlamda, complex *q2, integer *ldq2, real *w, integer *indxp,
+	integer *indx, integer *indxq, integer *perm, integer *givptr,
+	integer *givcol, real *givnum, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real c__;
+    static integer i__, j;
+    static real s, t;
+    static integer k2, n1, n2, jp, n1p1;
+    static real eps, tau, tol;
+    static integer jlam, imax, jmax;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    ccopy_(integer *, complex *, integer *, complex *, integer *),
+	    csrot_(integer *, complex *, integer *, complex *, integer *,
+	    real *, real *), scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
+	    *, integer *, complex *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
+	    *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLAED8 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny element in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    K      (output) INTEGER
+           Contains the number of non-deflated eigenvalues.
+           This is the order of the related secular equation.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the unitary matrix used to reduce
+           the dense or band matrix to tridiagonal form.
+           QSIZ >= N if ICOMPQ = 1.
+
+    Q      (input/output) COMPLEX array, dimension (LDQ,N)
+           On entry, Q contains the eigenvectors of the partially solved
+           system which has been previously updated in matrix
+           multiplies with other partially solved eigensystems.
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max( 1, N ).
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, D contains the eigenvalues of the two submatrices to
+           be combined.  On exit, D contains the trailing (N-K) updated
+           eigenvalues (those which were deflated) sorted into increasing
+           order.
+
+    RHO    (input/output) REAL
+           Contains the off diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined. RHO is modified during the computation to
+           the value required by SLAED3.
+
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  MIN(1,N) <= CUTPNT <= N.
+
+    Z      (input) REAL array, dimension (N)
+           On input this vector contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).  The contents of Z are
+           destroyed during the updating process.
+
+    DLAMDA (output) REAL array, dimension (N)
+           Contains a copy of the first K eigenvalues which will be used
+           by SLAED3 to form the secular equation.
+
+    Q2     (output) COMPLEX array, dimension (LDQ2,N)
+           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+           Contains a copy of the first K eigenvectors which will be used
+           by SLAED7 in a matrix multiply (SGEMM) to update the new
+           eigenvectors.
+
+    LDQ2   (input) INTEGER
+           The leading dimension of the array Q2.  LDQ2 >= max( 1, N ).
+
+    W      (output) REAL array, dimension (N)
+           This will hold the first k values of the final
+           deflation-altered z-vector and will be passed to SLAED3.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           This will contain the permutation used to place deflated
+           values of D at the end of the array. On output INDXP(1:K)
+           points to the nondeflated D-values and INDXP(K+1:N)
+           points to the deflated eigenvalues.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           This will contain the permutation used to sort the contents of
+           D into ascending order.
+
+    INDXQ  (input) INTEGER array, dimension (N)
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that elements in
+           the second half of this permutation must first have CUTPNT
+           added to their values in order to be accurate.
+
+    PERM   (output) INTEGER array, dimension (N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (output) INTEGER
+           Contains the number of Givens rotations which took place in
+           this subproblem.
+
+    GIVCOL (output) INTEGER array, dimension (2, N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (output) REAL array, dimension (2, N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --d__;
+    --z__;
+    --dlamda;
+    q2_dim1 = *ldq2;
+    q2_offset = 1 + q2_dim1;
+    q2 -= q2_offset;
+    --w;
+    --indxp;
+    --indx;
+    --indxq;
+    --perm;
+    givcol -= 3;
+    givnum -= 3;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -2;
+    } else if (*qsiz < *n) {
+	*info = -3;
+    } else if (*ldq < max(1,*n)) {
+	*info = -5;
+    } else if ((*cutpnt < min(1,*n)) || (*cutpnt > *n)) {
+	*info = -8;
+    } else if (*ldq2 < max(1,*n)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLAED8", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n1 = *cutpnt;
+    n2 = *n - n1;
+    n1p1 = n1 + 1;
+
+    if (*rho < 0.f) {
+	sscal_(&n2, &c_b1290, &z__[n1p1], &c__1);
+    }
+
+/*     Normalize z so that norm(z) = 1 */
+
+    t = 1.f / sqrt(2.f);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	indx[j] = j;
+/* L10: */
+    }
+    sscal_(n, &t, &z__[1], &c__1);
+    *rho = (r__1 = *rho * 2.f, dabs(r__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
+	indxq[i__] += *cutpnt;
+/* L20: */
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+	w[i__] = z__[indxq[i__]];
+/* L30: */
+    }
+    i__ = 1;
+    j = *cutpnt + 1;
+    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = dlamda[indx[i__]];
+	z__[i__] = w[indx[i__]];
+/* L40: */
+    }
+
+/*     Calculate the allowable deflation tolerance */
+
+    imax = isamax_(n, &z__[1], &c__1);
+    jmax = isamax_(n, &d__[1], &c__1);
+    eps = slamch_("Epsilon");
+    tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1));
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       -- except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
+	*k = 0;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    perm[j] = indxq[indx[j]];
+	    ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
+		    , &c__1);
+/* L50: */
+	}
+	clacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
+	return 0;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    *k = 0;
+    *givptr = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    indxp[k2] = j;
+	    if (j == *n) {
+		goto L100;
+	    }
+	} else {
+	    jlam = j;
+	    goto L70;
+	}
+/* L60: */
+    }
+L70:
+    ++j;
+    if (j > *n) {
+	goto L90;
+    }
+    if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	indxp[k2] = j;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[jlam];
+	c__ = z__[j];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = slapy2_(&c__, &s);
+	t = d__[j] - d__[jlam];
+	c__ /= tau;
+	s = -s / tau;
+	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[j] = tau;
+	    z__[jlam] = 0.f;
+
+/*           Record the appropriate Givens rotation */
+
+	    ++(*givptr);
+	    givcol[((*givptr) << (1)) + 1] = indxq[indx[jlam]];
+	    givcol[((*givptr) << (1)) + 2] = indxq[indx[j]];
+	    givnum[((*givptr) << (1)) + 1] = c__;
+	    givnum[((*givptr) << (1)) + 2] = s;
+	    csrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[
+		    indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
+	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
+	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
+	    d__[jlam] = t;
+	    --k2;
+	    i__ = 1;
+L80:
+	    if (k2 + i__ <= *n) {
+		if (d__[jlam] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = jlam;
+		    ++i__;
+		    goto L80;
+		} else {
+		    indxp[k2 + i__ - 1] = jlam;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = jlam;
+	    }
+	    jlam = j;
+	} else {
+	    ++(*k);
+	    w[*k] = z__[jlam];
+	    dlamda[*k] = d__[jlam];
+	    indxp[*k] = jlam;
+	    jlam = j;
+	}
+    }
+    goto L70;
+L90:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    w[*k] = z__[jlam];
+    dlamda[*k] = d__[jlam];
+    indxp[*k] = jlam;
+
+L100:
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	jp = indxp[j];
+	dlamda[j] = d__[jp];
+	perm[j] = indxq[indx[jp]];
+	ccopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &
+		c__1);
+/* L110: */
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    if (*k < *n) {
+	i__1 = *n - *k;
+	scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	i__1 = *n - *k;
+	clacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k +
+		1) * q_dim1 + 1], ldq);
+    }
+
+    return 0;
+
+/*     End of CLAED8 */
+
+} /* claed8_ */
+
+/* Subroutine */ int clahqr_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, complex *h__, integer *ldh, complex *w,
+	integer *iloz, integer *ihiz, complex *z__, integer *ldz, integer *
+	info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3, r__4, r__5, r__6;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void c_sqrt(complex *, complex *), r_cnjg(complex *, complex *);
+    double c_abs(complex *);
+
+    /* Local variables */
+    static integer i__, j, k, l, m;
+    static real s;
+    static complex t, u, v[2], x, y;
+    static integer i1, i2;
+    static complex t1;
+    static real t2;
+    static complex v2;
+    static real h10;
+    static complex h11;
+    static real h21;
+    static complex h22;
+    static integer nh, nz;
+    static complex h11s;
+    static integer itn, its;
+    static real ulp;
+    static complex sum;
+    static real tst1;
+    static complex temp;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), ccopy_(integer *, complex *, integer *, complex *,
+	    integer *);
+    static real rtemp, rwork[1];
+    extern /* Subroutine */ int clarfg_(integer *, complex *, complex *,
+	    integer *, complex *);
+    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
+    extern doublereal slamch_(char *), clanhs_(char *, integer *,
+	    complex *, integer *, real *);
+    static real smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CLAHQR is an auxiliary routine called by CHSEQR to update the
+    eigenvalues and Schur decomposition already computed by CHSEQR, by
+    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+
+    Arguments
+    =========
+
+    WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+    WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows and
+            columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1).
+            CLAHQR works primarily with the Hessenberg submatrix in rows
+            and columns ILO to IHI, but applies transformations to all of
+            H if WANTT is .TRUE..
+            1 <= ILO <= max(1,IHI); IHI <= N.
+
+    H       (input/output) COMPLEX array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if WANTT is .TRUE., H is upper triangular in rows
+            and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+            standard form. If WANTT is .FALSE., the contents of H are
+            unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    W       (output) COMPLEX array, dimension (N)
+            The computed eigenvalues ILO to IHI are stored in the
+            corresponding elements of W. If WANTT is .TRUE., the
+            eigenvalues are stored in the same order as on the diagonal
+            of the Schur form returned in H, with W(i) = H(i,i).
+
+    ILOZ    (input) INTEGER
+    IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE..
+            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+
+    Z       (input/output) COMPLEX array, dimension (LDZ,N)
+            If WANTZ is .TRUE., on entry Z must contain the current
+            matrix Z of transformations accumulated by CHSEQR, and on
+            exit Z has been updated; transformations are applied only to
+            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+            If WANTZ is .FALSE., Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z. LDZ >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            > 0: if INFO = i, CLAHQR failed to compute all the
+                 eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1)
+                 iterations; elements i+1:ihi of W contain those
+                 eigenvalues which have been successfully computed.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	i__1 = *ilo;
+	i__2 = *ilo + *ilo * h_dim1;
+	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+	return 0;
+    }
+
+    nh = *ihi - *ilo + 1;
+    nz = *ihiz - *iloz + 1;
+
+/*
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    ulp = slamch_("Precision");
+    smlnum = slamch_("Safe minimum") / ulp;
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (*wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of 1. Each iteration of the loop works
+       with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L10:
+    if (i__ < *ilo) {
+	goto L130;
+    }
+
+/*
+       Perform QR iterations on rows and columns ILO to I until a
+       submatrix of order 1 splits off at the bottom because a
+       subdiagonal element has become negligible.
+*/
+
+    l = *ilo;
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    i__3 = k - 1 + (k - 1) * h_dim1;
+	    i__4 = k + k * h_dim1;
+	    tst1 = (r__1 = h__[i__3].r, dabs(r__1)) + (r__2 = r_imag(&h__[k -
+		    1 + (k - 1) * h_dim1]), dabs(r__2)) + ((r__3 = h__[i__4]
+		    .r, dabs(r__3)) + (r__4 = r_imag(&h__[k + k * h_dim1]),
+		    dabs(r__4)));
+	    if (tst1 == 0.f) {
+		i__3 = i__ - l + 1;
+		tst1 = clanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
+	    }
+	    i__3 = k + (k - 1) * h_dim1;
+/* Computing MAX */
+	    r__2 = ulp * tst1;
+	    if ((r__1 = h__[i__3].r, dabs(r__1)) <= dmax(r__2,smlnum)) {
+		goto L30;
+	    }
+/* L20: */
+	}
+L30:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible */
+
+	    i__2 = l + (l - 1) * h_dim1;
+	    h__[i__2].r = 0.f, h__[i__2].i = 0.f;
+	}
+
+/*        Exit from loop if a submatrix of order 1 has split off. */
+
+	if (l >= i__) {
+	    goto L120;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! (*wantt)) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if ((its == 10) || (its == 20)) {
+
+/*           Exceptional shift. */
+
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    s = (r__1 = h__[i__2].r, dabs(r__1)) * .75f;
+	    i__2 = i__ + i__ * h_dim1;
+	    q__1.r = s + h__[i__2].r, q__1.i = h__[i__2].i;
+	    t.r = q__1.r, t.i = q__1.i;
+	} else {
+
+/*           Wilkinson's shift. */
+
+	    i__2 = i__ + i__ * h_dim1;
+	    t.r = h__[i__2].r, t.i = h__[i__2].i;
+	    i__2 = i__ - 1 + i__ * h_dim1;
+	    i__3 = i__ + (i__ - 1) * h_dim1;
+	    r__1 = h__[i__3].r;
+	    q__1.r = r__1 * h__[i__2].r, q__1.i = r__1 * h__[i__2].i;
+	    u.r = q__1.r, u.i = q__1.i;
+	    if ((u.r != 0.f) || (u.i != 0.f)) {
+		i__2 = i__ - 1 + (i__ - 1) * h_dim1;
+		q__2.r = h__[i__2].r - t.r, q__2.i = h__[i__2].i - t.i;
+		q__1.r = q__2.r * .5f, q__1.i = q__2.i * .5f;
+		x.r = q__1.r, x.i = q__1.i;
+		q__3.r = x.r * x.r - x.i * x.i, q__3.i = x.r * x.i + x.i *
+			x.r;
+		q__2.r = q__3.r + u.r, q__2.i = q__3.i + u.i;
+		c_sqrt(&q__1, &q__2);
+		y.r = q__1.r, y.i = q__1.i;
+		if (x.r * y.r + r_imag(&x) * r_imag(&y) < 0.f) {
+		    q__1.r = -y.r, q__1.i = -y.i;
+		    y.r = q__1.r, y.i = q__1.i;
+		}
+		q__3.r = x.r + y.r, q__3.i = x.i + y.i;
+		cladiv_(&q__2, &u, &q__3);
+		q__1.r = t.r - q__2.r, q__1.i = t.i - q__2.i;
+		t.r = q__1.r, t.i = q__1.i;
+	    }
+	}
+
+/*        Look for two consecutive small subdiagonal elements. */
+
+	i__2 = l + 1;
+	for (m = i__ - 1; m >= i__2; --m) {
+
+/*
+             Determine the effect of starting the single-shift QR
+             iteration at row M, and see if this would make H(M,M-1)
+             negligible.
+*/
+
+	    i__3 = m + m * h_dim1;
+	    h11.r = h__[i__3].r, h11.i = h__[i__3].i;
+	    i__3 = m + 1 + (m + 1) * h_dim1;
+	    h22.r = h__[i__3].r, h22.i = h__[i__3].i;
+	    q__1.r = h11.r - t.r, q__1.i = h11.i - t.i;
+	    h11s.r = q__1.r, h11s.i = q__1.i;
+	    i__3 = m + 1 + m * h_dim1;
+	    h21 = h__[i__3].r;
+	    s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(
+		    r__2)) + dabs(h21);
+	    q__1.r = h11s.r / s, q__1.i = h11s.i / s;
+	    h11s.r = q__1.r, h11s.i = q__1.i;
+	    h21 /= s;
+	    v[0].r = h11s.r, v[0].i = h11s.i;
+	    v[1].r = h21, v[1].i = 0.f;
+	    i__3 = m + (m - 1) * h_dim1;
+	    h10 = h__[i__3].r;
+	    tst1 = ((r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(
+		    r__2))) * ((r__3 = h11.r, dabs(r__3)) + (r__4 = r_imag(&
+		    h11), dabs(r__4)) + ((r__5 = h22.r, dabs(r__5)) + (r__6 =
+		    r_imag(&h22), dabs(r__6))));
+	    if ((r__1 = h10 * h21, dabs(r__1)) <= ulp * tst1) {
+		goto L50;
+	    }
+/* L40: */
+	}
+	i__2 = l + l * h_dim1;
+	h11.r = h__[i__2].r, h11.i = h__[i__2].i;
+	i__2 = l + 1 + (l + 1) * h_dim1;
+	h22.r = h__[i__2].r, h22.i = h__[i__2].i;
+	q__1.r = h11.r - t.r, q__1.i = h11.i - t.i;
+	h11s.r = q__1.r, h11s.i = q__1.i;
+	i__2 = l + 1 + l * h_dim1;
+	h21 = h__[i__2].r;
+	s = (r__1 = h11s.r, dabs(r__1)) + (r__2 = r_imag(&h11s), dabs(r__2))
+		+ dabs(h21);
+	q__1.r = h11s.r / s, q__1.i = h11s.i / s;
+	h11s.r = q__1.r, h11s.i = q__1.i;
+	h21 /= s;
+	v[0].r = h11s.r, v[0].i = h11s.i;
+	v[1].r = h21, v[1].i = 0.f;
+L50:
+
+/*        Single-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = m; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix.
+
+             V(2) is always real before the call to CLARFG, and hence
+             after the call T2 ( = T1*V(2) ) is also real.
+*/
+
+	    if (k > m) {
+		ccopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    clarfg_(&c__2, v, &v[1], &c__1, &t1);
+	    if (k > m) {
+		i__3 = k + (k - 1) * h_dim1;
+		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
+		i__3 = k + 1 + (k - 1) * h_dim1;
+		h__[i__3].r = 0.f, h__[i__3].i = 0.f;
+	    }
+	    v2.r = v[1].r, v2.i = v[1].i;
+	    q__1.r = t1.r * v2.r - t1.i * v2.i, q__1.i = t1.r * v2.i + t1.i *
+		    v2.r;
+	    t2 = q__1.r;
+
+/*
+             Apply G from the left to transform the rows of the matrix
+             in columns K to I2.
+*/
+
+	    i__3 = i2;
+	    for (j = k; j <= i__3; ++j) {
+		r_cnjg(&q__3, &t1);
+		i__4 = k + j * h_dim1;
+		q__2.r = q__3.r * h__[i__4].r - q__3.i * h__[i__4].i, q__2.i =
+			 q__3.r * h__[i__4].i + q__3.i * h__[i__4].r;
+		i__5 = k + 1 + j * h_dim1;
+		q__4.r = t2 * h__[i__5].r, q__4.i = t2 * h__[i__5].i;
+		q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
+		sum.r = q__1.r, sum.i = q__1.i;
+		i__4 = k + j * h_dim1;
+		i__5 = k + j * h_dim1;
+		q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i;
+		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+		i__4 = k + 1 + j * h_dim1;
+		i__5 = k + 1 + j * h_dim1;
+		q__2.r = sum.r * v2.r - sum.i * v2.i, q__2.i = sum.r * v2.i +
+			sum.i * v2.r;
+		q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i;
+		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+/* L60: */
+	    }
+
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+2,I).
+
+   Computing MIN
+*/
+	    i__4 = k + 2;
+	    i__3 = min(i__4,i__);
+	    for (j = i1; j <= i__3; ++j) {
+		i__4 = j + k * h_dim1;
+		q__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, q__2.i =
+			t1.r * h__[i__4].i + t1.i * h__[i__4].r;
+		i__5 = j + (k + 1) * h_dim1;
+		q__3.r = t2 * h__[i__5].r, q__3.i = t2 * h__[i__5].i;
+		q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		sum.r = q__1.r, sum.i = q__1.i;
+		i__4 = j + k * h_dim1;
+		i__5 = j + k * h_dim1;
+		q__1.r = h__[i__5].r - sum.r, q__1.i = h__[i__5].i - sum.i;
+		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+		i__4 = j + (k + 1) * h_dim1;
+		i__5 = j + (k + 1) * h_dim1;
+		r_cnjg(&q__3, &v2);
+		q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r *
+			q__3.i + sum.i * q__3.r;
+		q__1.r = h__[i__5].r - q__2.r, q__1.i = h__[i__5].i - q__2.i;
+		h__[i__4].r = q__1.r, h__[i__4].i = q__1.i;
+/* L70: */
+	    }
+
+	    if (*wantz) {
+
+/*              Accumulate transformations in the matrix Z */
+
+		i__3 = *ihiz;
+		for (j = *iloz; j <= i__3; ++j) {
+		    i__4 = j + k * z_dim1;
+		    q__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, q__2.i =
+			     t1.r * z__[i__4].i + t1.i * z__[i__4].r;
+		    i__5 = j + (k + 1) * z_dim1;
+		    q__3.r = t2 * z__[i__5].r, q__3.i = t2 * z__[i__5].i;
+		    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+		    sum.r = q__1.r, sum.i = q__1.i;
+		    i__4 = j + k * z_dim1;
+		    i__5 = j + k * z_dim1;
+		    q__1.r = z__[i__5].r - sum.r, q__1.i = z__[i__5].i -
+			    sum.i;
+		    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
+		    i__4 = j + (k + 1) * z_dim1;
+		    i__5 = j + (k + 1) * z_dim1;
+		    r_cnjg(&q__3, &v2);
+		    q__2.r = sum.r * q__3.r - sum.i * q__3.i, q__2.i = sum.r *
+			     q__3.i + sum.i * q__3.r;
+		    q__1.r = z__[i__5].r - q__2.r, q__1.i = z__[i__5].i -
+			    q__2.i;
+		    z__[i__4].r = q__1.r, z__[i__4].i = q__1.i;
+/* L80: */
+		}
+	    }
+
+	    if (k == m && m > l) {
+
+/*
+                If the QR step was started at row M > L because two
+                consecutive small subdiagonals were found, then extra
+                scaling must be performed to ensure that H(M,M-1) remains
+                real.
+*/
+
+		q__1.r = 1.f - t1.r, q__1.i = 0.f - t1.i;
+		temp.r = q__1.r, temp.i = q__1.i;
+		r__1 = c_abs(&temp);
+		q__1.r = temp.r / r__1, q__1.i = temp.i / r__1;
+		temp.r = q__1.r, temp.i = q__1.i;
+		i__3 = m + 1 + m * h_dim1;
+		i__4 = m + 1 + m * h_dim1;
+		r_cnjg(&q__2, &temp);
+		q__1.r = h__[i__4].r * q__2.r - h__[i__4].i * q__2.i, q__1.i =
+			 h__[i__4].r * q__2.i + h__[i__4].i * q__2.r;
+		h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+		if (m + 2 <= i__) {
+		    i__3 = m + 2 + (m + 1) * h_dim1;
+		    i__4 = m + 2 + (m + 1) * h_dim1;
+		    q__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i,
+			    q__1.i = h__[i__4].r * temp.i + h__[i__4].i *
+			    temp.r;
+		    h__[i__3].r = q__1.r, h__[i__3].i = q__1.i;
+		}
+		i__3 = i__;
+		for (j = m; j <= i__3; ++j) {
+		    if (j != m + 1) {
+			if (i2 > j) {
+			    i__4 = i2 - j;
+			    cscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1],
+				    ldh);
+			}
+			i__4 = j - i1;
+			r_cnjg(&q__1, &temp);
+			cscal_(&i__4, &q__1, &h__[i1 + j * h_dim1], &c__1);
+			if (*wantz) {
+			    r_cnjg(&q__1, &temp);
+			    cscal_(&nz, &q__1, &z__[*iloz + j * z_dim1], &
+				    c__1);
+			}
+		    }
+/* L90: */
+		}
+	    }
+/* L100: */
+	}
+
+/*        Ensure that H(I,I-1) is real. */
+
+	i__2 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	if (r_imag(&temp) != 0.f) {
+	    rtemp = c_abs(&temp);
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__2].r = rtemp, h__[i__2].i = 0.f;
+	    q__1.r = temp.r / rtemp, q__1.i = temp.i / rtemp;
+	    temp.r = q__1.r, temp.i = q__1.i;
+	    if (i2 > i__) {
+		i__2 = i2 - i__;
+		r_cnjg(&q__1, &temp);
+		cscal_(&i__2, &q__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+	    }
+	    i__2 = i__ - i1;
+	    cscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    if (*wantz) {
+		cscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1);
+	    }
+	}
+
+/* L110: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L120:
+
+/*     H(I,I-1) is negligible: one eigenvalue has converged. */
+
+    i__1 = i__;
+    i__2 = i__ + i__ * h_dim1;
+    w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L10;
+
+L130:
+    return 0;
+
+/*     End of CLAHQR */
+
+} /* clahqr_ */
+
+/* Subroutine */ int clahrd_(integer *n, integer *k, integer *nb, complex *a,
+	integer *lda, complex *tau, complex *t, integer *ldt, complex *y,
+	integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__;
+    static complex ei;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), cgemv_(char *, integer *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, complex *,
+	    integer *), ccopy_(integer *, complex *, integer *,
+	    complex *, integer *), caxpy_(integer *, complex *, complex *,
+	    integer *, complex *, integer *), ctrmv_(char *, char *, char *,
+	    integer *, complex *, integer *, complex *, integer *), clarfg_(integer *, complex *, complex *, integer
+	    *, complex *), clacgv_(integer *, complex *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CLAHRD reduces the first NB columns of a complex general n-by-(n-k+1)
+    matrix A so that elements below the k-th subdiagonal are zero. The
+    reduction is performed by a unitary similarity transformation
+    Q' * A * Q. The routine returns the matrices V and T which determine
+    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+
+    This is an auxiliary routine called by CGEHRD.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    K       (input) INTEGER
+            The offset for the reduction. Elements below the k-th
+            subdiagonal in the first NB columns are reduced to zero.
+
+    NB      (input) INTEGER
+            The number of columns to be reduced.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N-K+1)
+            On entry, the n-by-(n-k+1) general matrix A.
+            On exit, the elements on and above the k-th subdiagonal in
+            the first NB columns are overwritten with the corresponding
+            elements of the reduced matrix; the elements below the k-th
+            subdiagonal, with the array TAU, represent the matrix Q as a
+            product of elementary reflectors. The other columns of A are
+            unchanged. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) COMPLEX array, dimension (NB)
+            The scalar factors of the elementary reflectors. See Further
+            Details.
+
+    T       (output) COMPLEX array, dimension (LDT,NB)
+            The upper triangular matrix T.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T.  LDT >= NB.
+
+    Y       (output) COMPLEX array, dimension (LDY,NB)
+            The n-by-nb matrix Y.
+
+    LDY     (input) INTEGER
+            The leading dimension of the array Y. LDY >= max(1,N).
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of nb elementary reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+    A(i+k+1:n,i), and tau in TAU(i).
+
+    The elements of the vectors v together form the (n-k+1)-by-nb matrix
+    V which is needed, with T and Y, to apply the transformation to the
+    unreduced part of the matrix, using an update of the form:
+    A := (I - V*T*V') * (A - Y*V').
+
+    The contents of A on exit are illustrated by the following example
+    with n = 7, k = 3 and nb = 2:
+
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( h   h   a   a   a )
+       ( v1  h   a   a   a )
+       ( v1  v2  a   a   a )
+       ( v1  v2  a   a   a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    --tau;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if (*n <= 1) {
+	return 0;
+    }
+
+    i__1 = *nb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (i__ > 1) {
+
+/*
+             Update A(1:n,i)
+
+             Compute i-th column of A - Y * V'
+*/
+
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
+	    i__2 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &a[*k
+		    + i__ - 1 + a_dim1], lda, &c_b56, &a[i__ * a_dim1 + 1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
+
+/*
+             Apply I - V * T' * V' to this column (call it b) from the
+             left, using the last column of T as workspace
+
+             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+                      ( V2 )             ( b2 )
+
+             where V1 is unit lower triangular
+
+             w := V1' * b1
+*/
+
+	    i__2 = i__ - 1;
+	    ccopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
+		    1], &c__1);
+	    i__2 = i__ - 1;
+	    ctrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 +
+		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1);
+
+/*           w := w + V2'*b2 */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ +
+		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b56,
+		    &t[*nb * t_dim1 + 1], &c__1);
+
+/*           w := T'*w */
+
+	    i__2 = i__ - 1;
+	    ctrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[
+		    t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1);
+
+/*           b2 := b2 - V2*w */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[*k + i__ + a_dim1],
+		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b56, &a[*k + i__ +
+		    i__ * a_dim1], &c__1);
+
+/*           b1 := b1 - V1*w */
+
+	    i__2 = i__ - 1;
+	    ctrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+		    , lda, &t[*nb * t_dim1 + 1], &c__1);
+	    i__2 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    caxpy_(&i__2, &q__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__
+		    * a_dim1], &c__1);
+
+	    i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1;
+	    a[i__2].r = ei.r, a[i__2].i = ei.i;
+	}
+
+/*
+          Generate the elementary reflector H(i) to annihilate
+          A(k+i+1:n,i)
+*/
+
+	i__2 = *k + i__ + i__ * a_dim1;
+	ei.r = a[i__2].r, ei.i = a[i__2].i;
+	i__2 = *n - *k - i__ + 1;
+/* Computing MIN */
+	i__3 = *k + i__ + 1;
+	clarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__])
+		;
+	i__2 = *k + i__ + i__ * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*        Compute  Y(1:n,i) */
+
+	i__2 = *n - *k - i__ + 1;
+	cgemv_("No transpose", n, &i__2, &c_b56, &a[(i__ + 1) * a_dim1 + 1],
+		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &y[i__ *
+		y_dim1 + 1], &c__1);
+	i__2 = *n - *k - i__ + 1;
+	i__3 = i__ - 1;
+	cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[*k + i__ +
+		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b55, &t[
+		i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	q__1.r = -1.f, q__1.i = -0.f;
+	cgemv_("No transpose", n, &i__2, &q__1, &y[y_offset], ldy, &t[i__ *
+		t_dim1 + 1], &c__1, &c_b56, &y[i__ * y_dim1 + 1], &c__1);
+	cscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+
+/*        Compute T(1:i,i) */
+
+	i__2 = i__ - 1;
+	i__3 = i__;
+	q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+	cscal_(&i__2, &q__1, &t[i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		&t[i__ * t_dim1 + 1], &c__1)
+		;
+	i__2 = i__ + i__ * t_dim1;
+	i__3 = i__;
+	t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+
+/* L10: */
+    }
+    i__1 = *k + *nb + *nb * a_dim1;
+    a[i__1].r = ei.r, a[i__1].i = ei.i;
+
+    return 0;
+
+/*     End of CLAHRD */
+
+} /* clahrd_ */
+
+/* Subroutine */ int clals0_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *nrhs, complex *b, integer *ldb, complex *bx,
+	integer *ldbx, integer *perm, integer *givptr, integer *givcol,
+	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
+	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
+	rwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, difr_dim1, difr_offset, givnum_dim1,
+	    givnum_offset, poles_dim1, poles_offset, b_dim1, b_offset,
+	    bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1;
+    complex q__1;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, j, m, n;
+    static real dj;
+    static integer nlp1, jcol;
+    static real temp;
+    static integer jrow;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    static real diflj, difrj, dsigj;
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
+	    complex *, integer *), sgemv_(char *, integer *, integer *, real *
+	    , real *, integer *, real *, integer *, real *, real *, integer *), csrot_(integer *, complex *, integer *, complex *,
+	    integer *, real *, real *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int clascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, complex *, integer *, integer *), csscal_(integer *, real *, complex *, integer *),
+	    clacpy_(char *, integer *, integer *, complex *, integer *,
+	    complex *, integer *), xerbla_(char *, integer *);
+    static real dsigjp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 1, 1999
+
+
+    Purpose
+    =======
+
+    CLALS0 applies back the multiplying factors of either the left or the
+    right singular vector matrix of a diagonal matrix appended by a row
+    to the right hand side matrix B in solving the least squares problem
+    using the divide-and-conquer SVD approach.
+
+    For the left singular vector matrix, three types of orthogonal
+    matrices are involved:
+
+    (1L) Givens rotations: the number of such rotations is GIVPTR; the
+         pairs of columns/rows they were applied to are stored in GIVCOL;
+         and the C- and S-values of these rotations are stored in GIVNUM.
+
+    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+         J-th row.
+
+    (3L) The left singular vector matrix of the remaining matrix.
+
+    For the right singular vector matrix, four types of orthogonal
+    matrices are involved:
+
+    (1R) The right singular vector matrix of the remaining matrix.
+
+    (2R) If SQRE = 1, one extra Givens rotation to generate the right
+         null space.
+
+    (3R) The inverse transformation of (2L).
+
+    (4R) The inverse transformation of (1L).
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed in
+           factored form:
+           = 0: Left singular vector matrix.
+           = 1: Right singular vector matrix.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block. NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block. NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input/output) COMPLEX array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B. LDB must be at least
+           max(1,MAX( M, N ) ).
+
+    BX     (workspace) COMPLEX array, dimension ( LDBX, NRHS )
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    PERM   (input) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) applied
+           to the two blocks.
+
+    GIVPTR (input) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of rows/columns
+           involved in a Givens rotation.
+
+    LDGCOL (input) INTEGER
+           The leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (input) REAL array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value used in the
+           corresponding Givens rotation.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of arrays DIFR, POLES and
+           GIVNUM, must be at least K.
+
+    POLES  (input) REAL array, dimension ( LDGNUM, 2 )
+           On entry, POLES(1:K, 1) contains the new singular
+           values obtained from solving the secular equation, and
+           POLES(1:K, 2) is an array containing the poles in the secular
+           equation.
+
+    DIFL   (input) REAL array, dimension ( K ).
+           On entry, DIFL(I) is the distance between I-th updated
+           (undeflated) singular value and the I-th (undeflated) old
+           singular value.
+
+    DIFR   (input) REAL array, dimension ( LDGNUM, 2 ).
+           On entry, DIFR(I, 1) contains the distances between I-th
+           updated (undeflated) singular value and the I+1-th
+           (undeflated) old singular value. And DIFR(I, 2) is the
+           normalizing factor for the I-th right singular vector.
+
+    Z      (input) REAL array, dimension ( K )
+           Contain the components of the deflation-adjusted updating row
+           vector.
+
+    K      (input) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    C      (input) REAL
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (input) REAL
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    RWORK  (workspace) REAL array, dimension
+           ( K*(1+NRHS) + 2*NRHS )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    difr_dim1 = *ldgnum;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    poles_dim1 = *ldgnum;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    --difl;
+    --z__;
+    --rwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    }
+
+    n = *nl + *nr + 1;
+
+    if (*nrhs < 1) {
+	*info = -5;
+    } else if (*ldb < n) {
+	*info = -7;
+    } else if (*ldbx < n) {
+	*info = -9;
+    } else if (*givptr < 0) {
+	*info = -11;
+    } else if (*ldgcol < n) {
+	*info = -13;
+    } else if (*ldgnum < n) {
+	*info = -15;
+    } else if (*k < 1) {
+	*info = -20;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLALS0", &i__1);
+	return 0;
+    }
+
+    m = n + *sqre;
+    nlp1 = *nl + 1;
+
+    if (*icompq == 0) {
+
+/*
+          Apply back orthogonal transformations from the left.
+
+          Step (1L): apply back the Givens rotations performed.
+*/
+
+	i__1 = *givptr;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    csrot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
+		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
+		    i__ + ((givnum_dim1) << (1))], &givnum[i__ + givnum_dim1])
+		    ;
+/* L10: */
+	}
+
+/*        Step (2L): permute rows of B. */
+
+	ccopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    ccopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
+		    ldbx);
+/* L20: */
+	}
+
+/*
+          Step (3L): apply the inverse of the left singular vector
+          matrix to BX.
+*/
+
+	if (*k == 1) {
+	    ccopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
+	    if (z__[1] < 0.f) {
+		csscal_(nrhs, &c_b1290, &b[b_offset], ldb);
+	    }
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		diflj = difl[j];
+		dj = poles[j + poles_dim1];
+		dsigj = -poles[j + ((poles_dim1) << (1))];
+		if (j < *k) {
+		    difrj = -difr[j + difr_dim1];
+		    dsigjp = -poles[j + 1 + ((poles_dim1) << (1))];
+		}
+		if ((z__[j] == 0.f) || (poles[j + ((poles_dim1) << (1))] ==
+			0.f)) {
+		    rwork[j] = 0.f;
+		} else {
+		    rwork[j] = -poles[j + ((poles_dim1) << (1))] * z__[j] /
+			    diflj / (poles[j + ((poles_dim1) << (1))] + dj);
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if ((z__[i__] == 0.f) || (poles[i__ + ((poles_dim1) << (1)
+			    )] == 0.f)) {
+			rwork[i__] = 0.f;
+		    } else {
+			rwork[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
+				i__] / (slamc3_(&poles[i__ + ((poles_dim1) <<
+				(1))], &dsigj) - diflj) / (poles[i__ + ((
+				poles_dim1) << (1))] + dj);
+		    }
+/* L30: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if ((z__[i__] == 0.f) || (poles[i__ + ((poles_dim1) << (1)
+			    )] == 0.f)) {
+			rwork[i__] = 0.f;
+		    } else {
+			rwork[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
+				i__] / (slamc3_(&poles[i__ + ((poles_dim1) <<
+				(1))], &dsigjp) + difrj) / (poles[i__ + ((
+				poles_dim1) << (1))] + dj);
+		    }
+/* L40: */
+		}
+		rwork[1] = -1.f;
+		temp = snrm2_(k, &rwork[1], &c__1);
+
+/*
+                Since B and BX are complex, the following call to SGEMV
+                is performed in two steps (real and imaginary parts).
+
+                CALL SGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,
+      $                     B( J, 1 ), LDB )
+*/
+
+		i__ = *k + ((*nrhs) << (1));
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = *k;
+		    for (jrow = 1; jrow <= i__3; ++jrow) {
+			++i__;
+			i__4 = jrow + jcol * bx_dim1;
+			rwork[i__] = bx[i__4].r;
+/* L50: */
+		    }
+/* L60: */
+		}
+		sgemv_("T", k, nrhs, &c_b1011, &rwork[*k + 1 + ((*nrhs) << (1)
+			)], k, &rwork[1], &c__1, &c_b320, &rwork[*k + 1], &
+			c__1);
+		i__ = *k + ((*nrhs) << (1));
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = *k;
+		    for (jrow = 1; jrow <= i__3; ++jrow) {
+			++i__;
+			rwork[i__] = r_imag(&bx[jrow + jcol * bx_dim1]);
+/* L70: */
+		    }
+/* L80: */
+		}
+		sgemv_("T", k, nrhs, &c_b1011, &rwork[*k + 1 + ((*nrhs) << (1)
+			)], k, &rwork[1], &c__1, &c_b320, &rwork[*k + 1 + *
+			nrhs], &c__1);
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = j + jcol * b_dim1;
+		    i__4 = jcol + *k;
+		    i__5 = jcol + *k + *nrhs;
+		    q__1.r = rwork[i__4], q__1.i = rwork[i__5];
+		    b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L90: */
+		}
+		clascl_("G", &c__0, &c__0, &temp, &c_b1011, &c__1, nrhs, &b[j
+			+ b_dim1], ldb, info);
+/* L100: */
+	    }
+	}
+
+/*        Move the deflated rows of BX to B also. */
+
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    clacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
+		    + b_dim1], ldb);
+	}
+    } else {
+
+/*
+          Apply back the right orthogonal transformations.
+
+          Step (1R): apply back the new right singular vector matrix
+          to B.
+*/
+
+	if (*k == 1) {
+	    ccopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		dsigj = poles[j + ((poles_dim1) << (1))];
+		if (z__[j] == 0.f) {
+		    rwork[j] = 0.f;
+		} else {
+		    rwork[j] = -z__[j] / difl[j] / (dsigj + poles[j +
+			    poles_dim1]) / difr[j + ((difr_dim1) << (1))];
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.f) {
+			rwork[i__] = 0.f;
+		    } else {
+			r__1 = -poles[i__ + 1 + ((poles_dim1) << (1))];
+			rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[
+				i__ + difr_dim1]) / (dsigj + poles[i__ +
+				poles_dim1]) / difr[i__ + ((difr_dim1) << (1))
+				];
+		    }
+/* L110: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.f) {
+			rwork[i__] = 0.f;
+		    } else {
+			r__1 = -poles[i__ + ((poles_dim1) << (1))];
+			rwork[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
+				i__]) / (dsigj + poles[i__ + poles_dim1]) /
+				difr[i__ + ((difr_dim1) << (1))];
+		    }
+/* L120: */
+		}
+
+/*
+                Since B and BX are complex, the following call to SGEMV
+                is performed in two steps (real and imaginary parts).
+
+                CALL SGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,
+      $                     BX( J, 1 ), LDBX )
+*/
+
+		i__ = *k + ((*nrhs) << (1));
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = *k;
+		    for (jrow = 1; jrow <= i__3; ++jrow) {
+			++i__;
+			i__4 = jrow + jcol * b_dim1;
+			rwork[i__] = b[i__4].r;
+/* L130: */
+		    }
+/* L140: */
+		}
+		sgemv_("T", k, nrhs, &c_b1011, &rwork[*k + 1 + ((*nrhs) << (1)
+			)], k, &rwork[1], &c__1, &c_b320, &rwork[*k + 1], &
+			c__1);
+		i__ = *k + ((*nrhs) << (1));
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = *k;
+		    for (jrow = 1; jrow <= i__3; ++jrow) {
+			++i__;
+			rwork[i__] = r_imag(&b[jrow + jcol * b_dim1]);
+/* L150: */
+		    }
+/* L160: */
+		}
+		sgemv_("T", k, nrhs, &c_b1011, &rwork[*k + 1 + ((*nrhs) << (1)
+			)], k, &rwork[1], &c__1, &c_b320, &rwork[*k + 1 + *
+			nrhs], &c__1);
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = j + jcol * bx_dim1;
+		    i__4 = jcol + *k;
+		    i__5 = jcol + *k + *nrhs;
+		    q__1.r = rwork[i__4], q__1.i = rwork[i__5];
+		    bx[i__3].r = q__1.r, bx[i__3].i = q__1.i;
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+
+/*
+          Step (2R): if SQRE = 1, apply back the rotation that is
+          related to the right null space of the subproblem.
+*/
+
+	if (*sqre == 1) {
+	    ccopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
+	    csrot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
+		    s);
+	}
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    clacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
+		    bx_dim1], ldbx);
+	}
+
+/*        Step (3R): permute rows of B. */
+
+	ccopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
+	if (*sqre == 1) {
+	    ccopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
+	}
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    ccopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
+		    ldb);
+/* L190: */
+	}
+
+/*        Step (4R): apply back the Givens rotations performed. */
+
+	for (i__ = *givptr; i__ >= 1; --i__) {
+	    r__1 = -givnum[i__ + givnum_dim1];
+	    csrot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
+		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
+		    i__ + ((givnum_dim1) << (1))], &r__1);
+/* L200: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLALS0 */
+
+} /* clals0_ */
+
+/* Subroutine */ int clalsa_(integer *icompq, integer *smlsiz, integer *n,
+	integer *nrhs, complex *b, integer *ldb, complex *bx, integer *ldbx,
+	real *u, integer *ldu, real *vt, integer *k, real *difl, real *difr,
+	real *z__, real *poles, integer *givptr, integer *givcol, integer *
+	ldgcol, integer *perm, real *givnum, real *c__, real *s, real *rwork,
+	integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
+	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset,
+	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset,
+	    z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1,
+	    i__2, i__3, i__4, i__5, i__6;
+    complex q__1;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
+	    ndb1, nlp1, lvl2, nrp1, jcol, nlvl, sqre, jrow, jimag, jreal,
+	    inode, ndiml;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer ndimr;
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
+	    complex *, integer *), clals0_(integer *, integer *, integer *,
+	    integer *, integer *, complex *, integer *, complex *, integer *,
+	    integer *, integer *, integer *, integer *, real *, integer *,
+	    real *, real *, real *, real *, integer *, real *, real *, real *,
+	     integer *), xerbla_(char *, integer *), slasdt_(integer *
+	    , integer *, integer *, integer *, integer *, integer *, integer *
+	    );
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CLALSA is an itermediate step in solving the least squares problem
+    by computing the SVD of the coefficient matrix in compact form (The
+    singular vectors are computed as products of simple orthorgonal
+    matrices.).
+
+    If ICOMPQ = 0, CLALSA applies the inverse of the left singular vector
+    matrix of an upper bidiagonal matrix to the right hand side; and if
+    ICOMPQ = 1, CLALSA applies the right singular vector matrix to the
+    right hand side. The singular vector matrices were generated in
+    compact form by CLALSA.
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether the left or the right singular vector
+           matrix is involved.
+           = 0: Left singular vector matrix
+           = 1: Right singular vector matrix
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The row and column dimensions of the upper bidiagonal matrix.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input) COMPLEX array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,MAX( M, N ) ).
+
+    BX     (output) COMPLEX array, dimension ( LDBX, NRHS )
+           On exit, the result of applying the left or right singular
+           vector matrix to B.
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    U      (input) REAL array, dimension ( LDU, SMLSIZ ).
+           On entry, U contains the left singular vector matrices of all
+           subproblems at the bottom level.
+
+    LDU    (input) INTEGER, LDU = > N.
+           The leading dimension of arrays U, VT, DIFL, DIFR,
+           POLES, GIVNUM, and Z.
+
+    VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
+           On entry, VT' contains the right singular vector matrices of
+           all subproblems at the bottom level.
+
+    K      (input) INTEGER array, dimension ( N ).
+
+    DIFL   (input) REAL array, dimension ( LDU, NLVL ).
+           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+
+    DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).
+           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+           distances between singular values on the I-th level and
+           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+           record the normalizing factors of the right singular vectors
+           matrices of subproblems on I-th level.
+
+    Z      (input) REAL array, dimension ( LDU, NLVL ).
+           On entry, Z(1, I) contains the components of the deflation-
+           adjusted updating row vector for subproblems on the I-th
+           level.
+
+    POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).
+           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+           singular values involved in the secular equations on the I-th
+           level.
+
+    GIVPTR (input) INTEGER array, dimension ( N ).
+           On entry, GIVPTR( I ) records the number of Givens
+           rotations performed on the I-th problem on the computation
+           tree.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+           locations of Givens rotations performed on the I-th level on
+           the computation tree.
+
+    LDGCOL (input) INTEGER, LDGCOL = > N.
+           The leading dimension of arrays GIVCOL and PERM.
+
+    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+           On entry, PERM(*, I) records permutations done on the I-th
+           level of the computation tree.
+
+    GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
+           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+           values of Givens rotations performed on the I-th level on the
+           computation tree.
+
+    C      (input) REAL array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           C( I ) contains the C-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    S      (input) REAL array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           S( I ) contains the S-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    RWORK  (workspace) REAL array, dimension at least
+           max ( N, (SMLSZ+1)*NRHS*3 ).
+
+    IWORK  (workspace) INTEGER array.
+           The dimension must be at least 3 * N
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    givnum_dim1 = *ldu;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    poles_dim1 = *ldu;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    z_dim1 = *ldu;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    difr_dim1 = *ldu;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    difl_dim1 = *ldu;
+    difl_offset = 1 + difl_dim1;
+    difl -= difl_offset;
+    vt_dim1 = *ldu;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    --k;
+    --givptr;
+    perm_dim1 = *ldgcol;
+    perm_offset = 1 + perm_dim1;
+    perm -= perm_offset;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    --c__;
+    --s;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*smlsiz < 3) {
+	*info = -2;
+    } else if (*n < *smlsiz) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if (*ldb < *n) {
+	*info = -6;
+    } else if (*ldbx < *n) {
+	*info = -8;
+    } else if (*ldu < *n) {
+	*info = -10;
+    } else if (*ldgcol < *n) {
+	*info = -19;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLALSA", &i__1);
+	return 0;
+    }
+
+/*     Book-keeping and  setting up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+
+    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       The following code applies back the left singular vector factors.
+       For applying back the right singular vector factors, go to 170.
+*/
+
+    if (*icompq == 1) {
+	goto L170;
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by SLASDQ. The corresponding left and right singular vector
+       matrices are in explicit form. First apply back the left
+       singular vector matrices.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+          IC : center row of each node
+          NL : number of rows of left  subproblem
+          NR : number of rows of right subproblem
+          NLF: starting row of the left   subproblem
+          NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlf = ic - nl;
+	nrf = ic + 1;
+
+/*
+          Since B and BX are complex, the following call to SGEMM
+          is performed in two steps (real and imaginary parts).
+
+          CALL SGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+       $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*/
+
+	j = (nl * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nl - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++j;
+		i__4 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__4].r;
+/* L10: */
+	    }
+/* L20: */
+	}
+	sgemm_("T", "N", &nl, nrhs, &nl, &c_b1011, &u[nlf + u_dim1], ldu, &
+		rwork[((nl * *nrhs) << (1)) + 1], &nl, &c_b320, &rwork[1], &
+		nl);
+	j = (nl * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nl - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++j;
+		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
+/* L30: */
+	    }
+/* L40: */
+	}
+	sgemm_("T", "N", &nl, nrhs, &nl, &c_b1011, &u[nlf + u_dim1], ldu, &
+		rwork[((nl * *nrhs) << (1)) + 1], &nl, &c_b320, &rwork[nl * *
+		nrhs + 1], &nl);
+	jreal = 0;
+	jimag = nl * *nrhs;
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nl - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++jreal;
+		++jimag;
+		i__4 = jrow + jcol * bx_dim1;
+		i__5 = jreal;
+		i__6 = jimag;
+		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
+		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
+/* L50: */
+	    }
+/* L60: */
+	}
+
+/*
+          Since B and BX are complex, the following call to SGEMM
+          is performed in two steps (real and imaginary parts).
+
+          CALL SGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*/
+
+	j = (nr * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nr - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++j;
+		i__4 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__4].r;
+/* L70: */
+	    }
+/* L80: */
+	}
+	sgemm_("T", "N", &nr, nrhs, &nr, &c_b1011, &u[nrf + u_dim1], ldu, &
+		rwork[((nr * *nrhs) << (1)) + 1], &nr, &c_b320, &rwork[1], &
+		nr);
+	j = (nr * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nr - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++j;
+		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
+/* L90: */
+	    }
+/* L100: */
+	}
+	sgemm_("T", "N", &nr, nrhs, &nr, &c_b1011, &u[nrf + u_dim1], ldu, &
+		rwork[((nr * *nrhs) << (1)) + 1], &nr, &c_b320, &rwork[nr * *
+		nrhs + 1], &nr);
+	jreal = 0;
+	jimag = nr * *nrhs;
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nr - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++jreal;
+		++jimag;
+		i__4 = jrow + jcol * bx_dim1;
+		i__5 = jreal;
+		i__6 = jimag;
+		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
+		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
+/* L110: */
+	    }
+/* L120: */
+	}
+
+/* L130: */
+    }
+
+/*
+       Next copy the rows of B that correspond to unchanged rows
+       in the bidiagonal matrix to BX.
+*/
+
+    i__1 = nd;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	ic = iwork[inode + i__ - 1];
+	ccopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
+/* L140: */
+    }
+
+/*
+       Finally go through the left singular vector matrices of all
+       the other subproblems bottom-up on the tree.
+*/
+
+    j = pow_ii(&c__2, &nlvl);
+    sqre = 0;
+
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          find the first node LF and last node LL on
+          the current level LVL
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    --j;
+	    clals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
+		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &rwork[1], info);
+/* L150: */
+	}
+/* L160: */
+    }
+    goto L330;
+
+/*     ICOMPQ = 1: applying back the right singular vector factors. */
+
+L170:
+
+/*
+       First now go through the right singular vector matrices of all
+       the tree nodes top-down.
+*/
+
+    j = 0;
+    i__1 = nlvl;
+    for (lvl = 1; lvl <= i__1; ++lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          Find the first node LF and last node LL on
+          the current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__2 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__2);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__2 = lf;
+	for (i__ = ll; i__ >= i__2; --i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    if (i__ == ll) {
+		sqre = 0;
+	    } else {
+		sqre = 1;
+	    }
+	    ++j;
+	    clals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
+		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &rwork[1], info);
+/* L180: */
+	}
+/* L190: */
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by SLASDQ. The corresponding right singular vector
+       matrices are in explicit form. Apply them back.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlp1 = nl + 1;
+	if (i__ == nd) {
+	    nrp1 = nr;
+	} else {
+	    nrp1 = nr + 1;
+	}
+	nlf = ic - nl;
+	nrf = ic + 1;
+
+/*
+          Since B and BX are complex, the following call to SGEMM is
+          performed in two steps (real and imaginary parts).
+
+          CALL SGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+      $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*/
+
+	j = (nlp1 * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nlp1 - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++j;
+		i__4 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__4].r;
+/* L200: */
+	    }
+/* L210: */
+	}
+	sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1011, &vt[nlf + vt_dim1],
+		ldu, &rwork[((nlp1 * *nrhs) << (1)) + 1], &nlp1, &c_b320, &
+		rwork[1], &nlp1);
+	j = (nlp1 * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nlp1 - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++j;
+		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
+/* L220: */
+	    }
+/* L230: */
+	}
+	sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1011, &vt[nlf + vt_dim1],
+		ldu, &rwork[((nlp1 * *nrhs) << (1)) + 1], &nlp1, &c_b320, &
+		rwork[nlp1 * *nrhs + 1], &nlp1);
+	jreal = 0;
+	jimag = nlp1 * *nrhs;
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nlp1 - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++jreal;
+		++jimag;
+		i__4 = jrow + jcol * bx_dim1;
+		i__5 = jreal;
+		i__6 = jimag;
+		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
+		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
+/* L240: */
+	    }
+/* L250: */
+	}
+
+/*
+          Since B and BX are complex, the following call to SGEMM is
+          performed in two steps (real and imaginary parts).
+
+          CALL SGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*/
+
+	j = (nrp1 * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nrp1 - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++j;
+		i__4 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__4].r;
+/* L260: */
+	    }
+/* L270: */
+	}
+	sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1011, &vt[nrf + vt_dim1],
+		ldu, &rwork[((nrp1 * *nrhs) << (1)) + 1], &nrp1, &c_b320, &
+		rwork[1], &nrp1);
+	j = (nrp1 * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nrp1 - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++j;
+		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
+/* L280: */
+	    }
+/* L290: */
+	}
+	sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1011, &vt[nrf + vt_dim1],
+		ldu, &rwork[((nrp1 * *nrhs) << (1)) + 1], &nrp1, &c_b320, &
+		rwork[nrp1 * *nrhs + 1], &nrp1);
+	jreal = 0;
+	jimag = nrp1 * *nrhs;
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nrp1 - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++jreal;
+		++jimag;
+		i__4 = jrow + jcol * bx_dim1;
+		i__5 = jreal;
+		i__6 = jimag;
+		q__1.r = rwork[i__5], q__1.i = rwork[i__6];
+		bx[i__4].r = q__1.r, bx[i__4].i = q__1.i;
+/* L300: */
+	    }
+/* L310: */
+	}
+
+/* L320: */
+    }
+
+L330:
+
+    return 0;
+
+/*     End of CLALSA */
+
+} /* clalsa_ */
+
+/* Subroutine */ int clalsd_(char *uplo, integer *smlsiz, integer *n, integer
+	*nrhs, real *d__, real *e, complex *b, integer *ldb, real *rcond,
+	integer *rank, complex *work, real *rwork, integer *iwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    real r__1;
+    complex q__1;
+
+    /* Builtin functions */
+    double r_imag(complex *), log(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer c__, i__, j, k;
+    static real r__;
+    static integer s, u, z__;
+    static real cs;
+    static integer bx;
+    static real sn;
+    static integer st, vt, nm1, st1;
+    static real eps;
+    static integer iwk;
+    static real tol;
+    static integer difl, difr, jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow,
+	     irwu, jimag, jreal;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer irwib;
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static integer poles, sizei, irwrb, nsize;
+    extern /* Subroutine */ int csrot_(integer *, complex *, integer *,
+	    complex *, integer *, real *, real *);
+    static integer irwvt, icmpq1, icmpq2;
+    extern /* Subroutine */ int clalsa_(integer *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, integer *, real *,
+	    integer *, real *, integer *, real *, real *, real *, real *,
+	    integer *, integer *, integer *, integer *, real *, real *, real *
+	    , real *, integer *, integer *), clascl_(char *, integer *,
+	    integer *, real *, real *, integer *, integer *, complex *,
+	    integer *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int slasda_(integer *, integer *, integer *,
+	    integer *, real *, real *, real *, integer *, real *, integer *,
+	    real *, real *, real *, real *, integer *, integer *, integer *,
+	    integer *, real *, real *, real *, real *, integer *, integer *),
+	    clacpy_(char *, integer *, integer *, complex *, integer *,
+	    complex *, integer *), claset_(char *, integer *, integer
+	    *, complex *, complex *, complex *, integer *), xerbla_(
+	    char *, integer *), slascl_(char *, integer *, integer *,
+	    real *, real *, integer *, integer *, real *, integer *, integer *
+	    );
+    extern integer isamax_(integer *, real *, integer *);
+    static integer givcol;
+    extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, real *, real *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, integer *),
+	    slaset_(char *, integer *, integer *, real *, real *, real *,
+	    integer *), slartg_(real *, real *, real *, real *, real *
+	    );
+    static real orgnrm;
+    static integer givnum;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+    static integer givptr, nrwork, irwwrk, smlszp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    CLALSD uses the singular value decomposition of A to solve the least
+    squares problem of finding X to minimize the Euclidean norm of each
+    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+    are N-by-NRHS. The solution X overwrites B.
+
+    The singular values of A smaller than RCOND times the largest
+    singular value are treated as zero in solving the least squares
+    problem; in this case a minimum norm solution is returned.
+    The actual singular values are returned in D in ascending order.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    UPLO   (input) CHARACTER*1
+           = 'U': D and E define an upper bidiagonal matrix.
+           = 'L': D and E define a  lower bidiagonal matrix.
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The dimension of the  bidiagonal matrix.  N >= 0.
+
+    NRHS   (input) INTEGER
+           The number of columns of B. NRHS must be at least 1.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry D contains the main diagonal of the bidiagonal
+           matrix. On exit, if INFO = 0, D contains its singular values.
+
+    E      (input) REAL array, dimension (N-1)
+           Contains the super-diagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    B      (input/output) COMPLEX array, dimension (LDB,NRHS)
+           On input, B contains the right hand sides of the least
+           squares problem. On output, B contains the solution X.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,N).
+
+    RCOND  (input) REAL
+           The singular values of A less than or equal to RCOND times
+           the largest singular value are treated as zero in solving
+           the least squares problem. If RCOND is negative,
+           machine precision is used instead.
+           For example, if diag(S)*X=B were the least squares problem,
+           where diag(S) is a diagonal matrix of singular values, the
+           solution would be X(i) = B(i) / S(i) if S(i) is greater than
+           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+           RCOND*max(S).
+
+    RANK   (output) INTEGER
+           The number of singular values of A greater than RCOND times
+           the largest singular value.
+
+    WORK   (workspace) COMPLEX array, dimension at least
+           (N * NRHS).
+
+    RWORK  (workspace) REAL array, dimension at least
+           (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2),
+           where
+           NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+
+    IWORK  (workspace) INTEGER array, dimension at least
+           (3*N*NLVL + 11*N).
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+           > 0:  The algorithm failed to compute an singular value while
+                 working on the submatrix lying in rows and columns
+                 INFO/(N+1) through MOD(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if ((*ldb < 1) || (*ldb < *n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLALSD", &i__1);
+	return 0;
+    }
+
+    eps = slamch_("Epsilon");
+
+/*     Set up the tolerance. */
+
+    if ((*rcond <= 0.f) || (*rcond >= 1.f)) {
+	*rcond = eps;
+    }
+
+    *rank = 0;
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+	if (d__[1] == 0.f) {
+	    claset_("A", &c__1, nrhs, &c_b55, &c_b55, &b[b_offset], ldb);
+	} else {
+	    *rank = 1;
+	    clascl_("G", &c__0, &c__0, &d__[1], &c_b1011, &c__1, nrhs, &b[
+		    b_offset], ldb, info);
+	    d__[1] = dabs(d__[1]);
+	}
+	return 0;
+    }
+
+/*     Rotate the matrix if it is lower bidiagonal. */
+
+    if (*(unsigned char *)uplo == 'L') {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (*nrhs == 1) {
+		csrot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
+			c__1, &cs, &sn);
+	    } else {
+		rwork[((i__) << (1)) - 1] = cs;
+		rwork[i__ * 2] = sn;
+	    }
+/* L10: */
+	}
+	if (*nrhs > 1) {
+	    i__1 = *nrhs;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = *n - 1;
+		for (j = 1; j <= i__2; ++j) {
+		    cs = rwork[((j) << (1)) - 1];
+		    sn = rwork[j * 2];
+		    csrot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__
+			    * b_dim1], &c__1, &cs, &sn);
+/* L20: */
+		}
+/* L30: */
+	    }
+	}
+    }
+
+/*     Scale. */
+
+    nm1 = *n - 1;
+    orgnrm = slanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.f) {
+	claset_("A", n, nrhs, &c_b55, &c_b55, &b[b_offset], ldb);
+	return 0;
+    }
+
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, &c__1, &d__[1], n, info);
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &nm1, &c__1, &e[1], &nm1,
+	    info);
+
+/*
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
+*/
+
+    if (*n <= *smlsiz) {
+	irwu = 1;
+	irwvt = irwu + *n * *n;
+	irwwrk = irwvt + *n * *n;
+	irwrb = irwwrk;
+	irwib = irwrb + *n * *nrhs;
+	irwb = irwib + *n * *nrhs;
+	slaset_("A", n, n, &c_b320, &c_b1011, &rwork[irwu], n);
+	slaset_("A", n, n, &c_b320, &c_b1011, &rwork[irwvt], n);
+	slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &rwork[irwvt], n,
+		&rwork[irwu], n, &rwork[irwwrk], &c__1, &rwork[irwwrk], info);
+	if (*info != 0) {
+	    return 0;
+	}
+
+/*
+          In the real version, B is passed to SLASDQ and multiplied
+          internally by Q'. Here B is complex and that product is
+          computed below in two steps (real and imaginary parts).
+*/
+
+	j = irwb - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++j;
+		i__3 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__3].r;
+/* L40: */
+	    }
+/* L50: */
+	}
+	sgemm_("T", "N", n, nrhs, n, &c_b1011, &rwork[irwu], n, &rwork[irwb],
+		n, &c_b320, &rwork[irwrb], n);
+	j = irwb - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++j;
+		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
+/* L60: */
+	    }
+/* L70: */
+	}
+	sgemm_("T", "N", n, nrhs, n, &c_b1011, &rwork[irwu], n, &rwork[irwb],
+		n, &c_b320, &rwork[irwib], n);
+	jreal = irwrb - 1;
+	jimag = irwib - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++jreal;
+		++jimag;
+		i__3 = jrow + jcol * b_dim1;
+		i__4 = jreal;
+		i__5 = jimag;
+		q__1.r = rwork[i__4], q__1.i = rwork[i__5];
+		b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L80: */
+	    }
+/* L90: */
+	}
+
+	tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (d__[i__] <= tol) {
+		claset_("A", &c__1, nrhs, &c_b55, &c_b55, &b[i__ + b_dim1],
+			ldb);
+	    } else {
+		clascl_("G", &c__0, &c__0, &d__[i__], &c_b1011, &c__1, nrhs, &
+			b[i__ + b_dim1], ldb, info);
+		++(*rank);
+	    }
+/* L100: */
+	}
+
+/*
+          Since B is complex, the following call to SGEMM is performed
+          in two steps (real and imaginary parts). That is for V * B
+          (in the real version of the code V' is stored in WORK).
+
+          CALL SGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
+      $               WORK( NWORK ), N )
+*/
+
+	j = irwb - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++j;
+		i__3 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__3].r;
+/* L110: */
+	    }
+/* L120: */
+	}
+	sgemm_("T", "N", n, nrhs, n, &c_b1011, &rwork[irwvt], n, &rwork[irwb],
+		 n, &c_b320, &rwork[irwrb], n);
+	j = irwb - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++j;
+		rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
+/* L130: */
+	    }
+/* L140: */
+	}
+	sgemm_("T", "N", n, nrhs, n, &c_b1011, &rwork[irwvt], n, &rwork[irwb],
+		 n, &c_b320, &rwork[irwib], n);
+	jreal = irwrb - 1;
+	jimag = irwib - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++jreal;
+		++jimag;
+		i__3 = jrow + jcol * b_dim1;
+		i__4 = jreal;
+		i__5 = jimag;
+		q__1.r = rwork[i__4], q__1.i = rwork[i__5];
+		b[i__3].r = q__1.r, b[i__3].i = q__1.i;
+/* L150: */
+	    }
+/* L160: */
+	}
+
+/*        Unscale. */
+
+	slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n,
+		info);
+	slasrt_("D", n, &d__[1], info);
+	clascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, nrhs, &b[b_offset],
+		ldb, info);
+
+	return 0;
+    }
+
+/*     Book-keeping and setting up some constants. */
+
+    nlvl = (integer) (log((real) (*n) / (real) (*smlsiz + 1)) / log(2.f)) + 1;
+
+    smlszp = *smlsiz + 1;
+
+    u = 1;
+    vt = *smlsiz * *n + 1;
+    difl = vt + smlszp * *n;
+    difr = difl + nlvl * *n;
+    z__ = difr + ((nlvl * *n) << (1));
+    c__ = z__ + nlvl * *n;
+    s = c__ + *n;
+    poles = s + *n;
+    givnum = poles + ((nlvl) << (1)) * *n;
+    nrwork = givnum + ((nlvl) << (1)) * *n;
+    bx = 1;
+
+    irwrb = nrwork;
+    irwib = irwrb + *smlsiz * *nrhs;
+    irwb = irwib + *smlsiz * *nrhs;
+
+    sizei = *n + 1;
+    k = sizei + *n;
+    givptr = k + *n;
+    perm = givptr + *n;
+    givcol = perm + nlvl * *n;
+    iwk = givcol + ((nlvl * *n) << (1));
+
+    st = 1;
+    sqre = 0;
+    icmpq1 = 1;
+    icmpq2 = 0;
+    nsub = 0;
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = d__[i__], dabs(r__1)) < eps) {
+	    d__[i__] = r_sign(&eps, &d__[i__]);
+	}
+/* L170: */
+    }
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (((r__1 = e[i__], dabs(r__1)) < eps) || (i__ == nm1)) {
+	    ++nsub;
+	    iwork[nsub] = st;
+
+/*
+             Subproblem found. First determine its size and then
+             apply divide and conquer on it.
+*/
+
+	    if (i__ < nm1) {
+
+/*              A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else if ((r__1 = e[i__], dabs(r__1)) >= eps) {
+
+/*              A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else {
+
+/*
+                A subproblem with E(NM1) small. This implies an
+                1-by-1 subproblem at D(N), which is not solved
+                explicitly.
+*/
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+		++nsub;
+		iwork[nsub] = *n;
+		iwork[sizei + nsub - 1] = 1;
+		ccopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
+	    }
+	    st1 = st - 1;
+	    if (nsize == 1) {
+
+/*
+                This is a 1-by-1 subproblem and is not solved
+                explicitly.
+*/
+
+		ccopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
+	    } else if (nsize <= *smlsiz) {
+
+/*              This is a small subproblem and is solved by SLASDQ. */
+
+		slaset_("A", &nsize, &nsize, &c_b320, &c_b1011, &rwork[vt +
+			st1], n);
+		slaset_("A", &nsize, &nsize, &c_b320, &c_b1011, &rwork[u +
+			st1], n);
+		slasdq_("U", &c__0, &nsize, &nsize, &nsize, &c__0, &d__[st], &
+			e[st], &rwork[vt + st1], n, &rwork[u + st1], n, &
+			rwork[nrwork], &c__1, &rwork[nrwork], info)
+			;
+		if (*info != 0) {
+		    return 0;
+		}
+
+/*
+                In the real version, B is passed to SLASDQ and multiplied
+                internally by Q'. Here B is complex and that product is
+                computed below in two steps (real and imaginary parts).
+*/
+
+		j = irwb - 1;
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = st + nsize - 1;
+		    for (jrow = st; jrow <= i__3; ++jrow) {
+			++j;
+			i__4 = jrow + jcol * b_dim1;
+			rwork[j] = b[i__4].r;
+/* L180: */
+		    }
+/* L190: */
+		}
+		sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &rwork[u +
+			st1], n, &rwork[irwb], &nsize, &c_b320, &rwork[irwrb],
+			 &nsize);
+		j = irwb - 1;
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = st + nsize - 1;
+		    for (jrow = st; jrow <= i__3; ++jrow) {
+			++j;
+			rwork[j] = r_imag(&b[jrow + jcol * b_dim1]);
+/* L200: */
+		    }
+/* L210: */
+		}
+		sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &rwork[u +
+			st1], n, &rwork[irwb], &nsize, &c_b320, &rwork[irwib],
+			 &nsize);
+		jreal = irwrb - 1;
+		jimag = irwib - 1;
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = st + nsize - 1;
+		    for (jrow = st; jrow <= i__3; ++jrow) {
+			++jreal;
+			++jimag;
+			i__4 = jrow + jcol * b_dim1;
+			i__5 = jreal;
+			i__6 = jimag;
+			q__1.r = rwork[i__5], q__1.i = rwork[i__6];
+			b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L220: */
+		    }
+/* L230: */
+		}
+
+		clacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
+			st1], n);
+	    } else {
+
+/*              A large problem. Solve it using divide and conquer. */
+
+		slasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
+			rwork[u + st1], n, &rwork[vt + st1], &iwork[k + st1],
+			&rwork[difl + st1], &rwork[difr + st1], &rwork[z__ +
+			st1], &rwork[poles + st1], &iwork[givptr + st1], &
+			iwork[givcol + st1], n, &iwork[perm + st1], &rwork[
+			givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &
+			rwork[nrwork], &iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		bxst = bx + st1;
+		clalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
+			work[bxst], n, &rwork[u + st1], n, &rwork[vt + st1], &
+			iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1]
+			, &rwork[z__ + st1], &rwork[poles + st1], &iwork[
+			givptr + st1], &iwork[givcol + st1], n, &iwork[perm +
+			st1], &rwork[givnum + st1], &rwork[c__ + st1], &rwork[
+			s + st1], &rwork[nrwork], &iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    st = i__ + 1;
+	}
+/* L240: */
+    }
+
+/*     Apply the singular values and treat the tiny ones as zero. */
+
+    tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+          Some of the elements in D can be negative because 1-by-1
+          subproblems were not solved explicitly.
+*/
+
+	if ((r__1 = d__[i__], dabs(r__1)) <= tol) {
+	    claset_("A", &c__1, nrhs, &c_b55, &c_b55, &work[bx + i__ - 1], n);
+	} else {
+	    ++(*rank);
+	    clascl_("G", &c__0, &c__0, &d__[i__], &c_b1011, &c__1, nrhs, &
+		    work[bx + i__ - 1], n, info);
+	}
+	d__[i__] = (r__1 = d__[i__], dabs(r__1));
+/* L250: */
+    }
+
+/*     Now apply back the right singular vectors. */
+
+    icmpq2 = 1;
+    i__1 = nsub;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	st = iwork[i__];
+	st1 = st - 1;
+	nsize = iwork[sizei + i__ - 1];
+	bxst = bx + st1;
+	if (nsize == 1) {
+	    ccopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
+	} else if (nsize <= *smlsiz) {
+
+/*
+             Since B and BX are complex, the following call to SGEMM
+             is performed in two steps (real and imaginary parts).
+
+             CALL SGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+      $                  RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO,
+      $                  B( ST, 1 ), LDB )
+*/
+
+	    j = bxst - *n - 1;
+	    jreal = irwb - 1;
+	    i__2 = *nrhs;
+	    for (jcol = 1; jcol <= i__2; ++jcol) {
+		j += *n;
+		i__3 = nsize;
+		for (jrow = 1; jrow <= i__3; ++jrow) {
+		    ++jreal;
+		    i__4 = j + jrow;
+		    rwork[jreal] = work[i__4].r;
+/* L260: */
+		}
+/* L270: */
+	    }
+	    sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &rwork[vt + st1],
+		     n, &rwork[irwb], &nsize, &c_b320, &rwork[irwrb], &nsize);
+	    j = bxst - *n - 1;
+	    jimag = irwb - 1;
+	    i__2 = *nrhs;
+	    for (jcol = 1; jcol <= i__2; ++jcol) {
+		j += *n;
+		i__3 = nsize;
+		for (jrow = 1; jrow <= i__3; ++jrow) {
+		    ++jimag;
+		    rwork[jimag] = r_imag(&work[j + jrow]);
+/* L280: */
+		}
+/* L290: */
+	    }
+	    sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &rwork[vt + st1],
+		     n, &rwork[irwb], &nsize, &c_b320, &rwork[irwib], &nsize);
+	    jreal = irwrb - 1;
+	    jimag = irwib - 1;
+	    i__2 = *nrhs;
+	    for (jcol = 1; jcol <= i__2; ++jcol) {
+		i__3 = st + nsize - 1;
+		for (jrow = st; jrow <= i__3; ++jrow) {
+		    ++jreal;
+		    ++jimag;
+		    i__4 = jrow + jcol * b_dim1;
+		    i__5 = jreal;
+		    i__6 = jimag;
+		    q__1.r = rwork[i__5], q__1.i = rwork[i__6];
+		    b[i__4].r = q__1.r, b[i__4].i = q__1.i;
+/* L300: */
+		}
+/* L310: */
+	    }
+	} else {
+	    clalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
+		    b_dim1], ldb, &rwork[u + st1], n, &rwork[vt + st1], &
+		    iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1], &
+		    rwork[z__ + st1], &rwork[poles + st1], &iwork[givptr +
+		    st1], &iwork[givcol + st1], n, &iwork[perm + st1], &rwork[
+		    givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &rwork[
+		    nrwork], &iwork[iwk], info);
+	    if (*info != 0) {
+		return 0;
+	    }
+	}
+/* L320: */
+    }
+
+/*     Unscale and sort the singular values. */
+
+    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n, info);
+    slasrt_("D", n, &d__[1], info);
+    clascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, nrhs, &b[b_offset], ldb,
+	    info);
+
+    return 0;
+
+/*     End of CLALSD */
+
+} /* clalsd_ */
+
+doublereal clange_(char *norm, integer *m, integer *n, complex *a, integer *
+	lda, real *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real ret_val, r__1, r__2;
+
+    /* Builtin functions */
+    double c_abs(complex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
+	    *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    CLANGE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    complex matrix A.
+
+    Description
+    ===========
+
+    CLANGE returns the value
+
+       CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in CLANGE as described
+            above.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.  When M = 0,
+            CLANGE is set to zero.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.  When N = 0,
+            CLANGE is set to zero.
+
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(M,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (min(*m,*n) == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
+		value = dmax(r__1,r__2);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if ((lsame_(norm, "O")) || (*(unsigned char *
+	    )norm == '1')) {
+
+/*        Find norm1(A). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.f;
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += c_abs(&a[i__ + j * a_dim1]);
+/* L30: */
+	    }
+	    value = dmax(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.f;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += c_abs(&a[i__ + j * a_dim1]);
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.f;
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__1 = value, r__2 = work[i__];
+	    value = dmax(r__1,r__2);
+/* L80: */
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    classq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of CLANGE */
+
+} /* clange_ */
+
+doublereal clanhe_(char *norm, char *uplo, integer *n, complex *a, integer *
+	lda, real *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real ret_val, r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double c_abs(complex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static real sum, absa, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
+	    *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    CLANHE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    complex hermitian matrix A.
+
+    Description
+    ===========
+
+    CLANHE returns the value
+
+       CLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in CLANHE as described
+            above.
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            hermitian matrix A is to be referenced.
+            = 'U':  Upper triangular part of A is referenced
+            = 'L':  Lower triangular part of A is referenced
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, CLANHE is
+            set to zero.
+
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The hermitian matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of A contains the upper triangular part
+            of the matrix A, and the strictly lower triangular part of A
+            is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of A contains the lower triangular part of
+            the matrix A, and the strictly upper triangular part of A is
+            not referenced. Note that the imaginary parts of the diagonal
+            elements need not be set and are assumed to be zero.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+            WORK is not referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
+		    value = dmax(r__1,r__2);
+/* L10: */
+		}
+/* Computing MAX */
+		i__2 = j + j * a_dim1;
+		r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1));
+		value = dmax(r__2,r__3);
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+		i__2 = j + j * a_dim1;
+		r__2 = value, r__3 = (r__1 = a[i__2].r, dabs(r__1));
+		value = dmax(r__2,r__3);
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
+		    value = dmax(r__1,r__2);
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (((lsame_(norm, "I")) || (lsame_(norm,
+	    "O"))) || (*(unsigned char *)norm == '1')) {
+
+/*        Find normI(A) ( = norm1(A), since A is hermitian). */
+
+	value = 0.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = 0.f;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    absa = c_abs(&a[i__ + j * a_dim1]);
+		    sum += absa;
+		    work[i__] += absa;
+/* L50: */
+		}
+		i__2 = j + j * a_dim1;
+		work[j] = sum + (r__1 = a[i__2].r, dabs(r__1));
+/* L60: */
+	    }
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		r__1 = value, r__2 = work[i__];
+		value = dmax(r__1,r__2);
+/* L70: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] = 0.f;
+/* L80: */
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j + j * a_dim1;
+		sum = work[j] + (r__1 = a[i__2].r, dabs(r__1));
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    absa = c_abs(&a[i__ + j * a_dim1]);
+		    sum += absa;
+		    work[i__] += absa;
+/* L90: */
+		}
+		value = dmax(value,sum);
+/* L100: */
+	    }
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		i__2 = j - 1;
+		classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L110: */
+	    }
+	} else {
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		classq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
+/* L120: */
+	    }
+	}
+	sum *= 2;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    if (a[i__2].r != 0.f) {
+		i__2 = i__ + i__ * a_dim1;
+		absa = (r__1 = a[i__2].r, dabs(r__1));
+		if (scale < absa) {
+/* Computing 2nd power */
+		    r__1 = scale / absa;
+		    sum = sum * (r__1 * r__1) + 1.f;
+		    scale = absa;
+		} else {
+/* Computing 2nd power */
+		    r__1 = absa / scale;
+		    sum += r__1 * r__1;
+		}
+	    }
+/* L130: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of CLANHE */
+
+} /* clanhe_ */
+
+doublereal clanhs_(char *norm, integer *n, complex *a, integer *lda, real *
+	work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    real ret_val, r__1, r__2;
+
+    /* Builtin functions */
+    double c_abs(complex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int classq_(integer *, complex *, integer *, real
+	    *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    CLANHS  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    Hessenberg matrix A.
+
+    Description
+    ===========
+
+    CLANHS returns the value
+
+       CLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in CLANHS as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, CLANHS is
+            set to zero.
+
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The n by n upper Hessenberg matrix A; the part of A below the
+            first sub-diagonal is not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		r__1 = value, r__2 = c_abs(&a[i__ + j * a_dim1]);
+		value = dmax(r__1,r__2);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if ((lsame_(norm, "O")) || (*(unsigned char *
+	    )norm == '1')) {
+
+/*        Find norm1(A). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.f;
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += c_abs(&a[i__ + j * a_dim1]);
+/* L30: */
+	    }
+	    value = dmax(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.f;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += c_abs(&a[i__ + j * a_dim1]);
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.f;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__1 = value, r__2 = work[i__];
+	    value = dmax(r__1,r__2);
+/* L80: */
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    classq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of CLANHS */
+
+} /* clanhs_ */
+
+/* Subroutine */ int clarcm_(integer *m, integer *n, real *a, integer *lda,
+	complex *b, integer *ldb, complex *c__, integer *ldc, real *rwork)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5;
+    real r__1;
+    complex q__1;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CLARCM performs a very simple matrix-matrix multiplication:
+             C := A * B,
+    where A is M by M and real; B is M by N and complex;
+    C is M by N and complex.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A and of the matrix C.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns and rows of the matrix B and
+            the number of columns of the matrix C.
+            N >= 0.
+
+    A       (input) REAL array, dimension (LDA, M)
+            A contains the M by M matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >=max(1,M).
+
+    B       (input) REAL array, dimension (LDB, N)
+            B contains the M by N matrix B.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B. LDB >=max(1,M).
+
+    C       (input) COMPLEX array, dimension (LDC, N)
+            C contains the M by N matrix C.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >=max(1,M).
+
+    RWORK   (workspace) REAL array, dimension (2*M*N)
+
+    =====================================================================
+
+
+       Quick return if possible.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --rwork;
+
+    /* Function Body */
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * b_dim1;
+	    rwork[(j - 1) * *m + i__] = b[i__3].r;
+/* L10: */
+	}
+/* L20: */
+    }
+
+    l = *m * *n + 1;
+    sgemm_("N", "N", m, n, m, &c_b1011, &a[a_offset], lda, &rwork[1], m, &
+	    c_b320, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = l + (j - 1) * *m + i__ - 1;
+	    c__[i__3].r = rwork[i__4], c__[i__3].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    rwork[(j - 1) * *m + i__] = r_imag(&b[i__ + j * b_dim1]);
+/* L50: */
+	}
+/* L60: */
+    }
+    sgemm_("N", "N", m, n, m, &c_b1011, &a[a_offset], lda, &rwork[1], m, &
+	    c_b320, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = i__ + j * c_dim1;
+	    r__1 = c__[i__4].r;
+	    i__5 = l + (j - 1) * *m + i__ - 1;
+	    q__1.r = r__1, q__1.i = rwork[i__5];
+	    c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L70: */
+	}
+/* L80: */
+    }
+
+    return 0;
+
+/*     End of CLARCM */
+
+} /* clarcm_ */
+
+/* Subroutine */ int clarf_(char *side, integer *m, integer *n, complex *v,
+	integer *incv, complex *tau, complex *c__, integer *ldc, complex *
+	work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset;
+    complex q__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, integer *),
+	     cgemv_(char *, integer *, integer *, complex *, complex *,
+	    integer *, complex *, integer *, complex *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLARF applies a complex elementary reflector H to a complex M-by-N
+    matrix C, from either the left or the right. H is represented in the
+    form
+
+          H = I - tau * v * v'
+
+    where tau is a complex scalar and v is a complex vector.
+
+    If tau = 0, then H is taken to be the unit matrix.
+
+    To apply H' (the conjugate transpose of H), supply conjg(tau) instead
+    tau.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) COMPLEX array, dimension
+                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+            The vector v in the representation of H. V is not used if
+            TAU = 0.
+
+    INCV    (input) INTEGER
+            The increment between elements of v. INCV <> 0.
+
+    TAU     (input) COMPLEX
+            The value tau in the representation of H.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension
+                           (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C */
+
+	if ((tau->r != 0.f) || (tau->i != 0.f)) {
+
+/*           w := C' * v */
+
+	    cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &
+		    v[1], incv, &c_b55, &work[1], &c__1);
+
+/*           C := C - v * w' */
+
+	    q__1.r = -tau->r, q__1.i = -tau->i;
+	    cgerc_(m, n, &q__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
+		    ldc);
+	}
+    } else {
+
+/*        Form  C * H */
+
+	if ((tau->r != 0.f) || (tau->i != 0.f)) {
+
+/*           w := C * v */
+
+	    cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b55, &work[1], &c__1);
+
+/*           C := C - w * v' */
+
+	    q__1.r = -tau->r, q__1.i = -tau->i;
+	    cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
+		    ldc);
+	}
+    }
+    return 0;
+
+/*     End of CLARF */
+
+} /* clarf_ */
+
+/* Subroutine */ int clarfb_(char *side, char *trans, char *direct, char *
+	storev, integer *m, integer *n, integer *k, complex *v, integer *ldv,
+	complex *t, integer *ldt, complex *c__, integer *ldc, complex *work,
+	integer *ldwork)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
+	    work_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
+	    complex *, integer *), ctrmm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), clacgv_(integer *,
+	    complex *, integer *);
+    static char transt[1];
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLARFB applies a complex block reflector H or its transpose H' to a
+    complex M-by-N matrix C, from either the left or the right.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply H or H' from the Left
+            = 'R': apply H or H' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply H (No transpose)
+            = 'C': apply H' (Conjugate transpose)
+
+    DIRECT  (input) CHARACTER*1
+            Indicates how H is formed from a product of elementary
+            reflectors
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Indicates how the vectors which define the elementary
+            reflectors are stored:
+            = 'C': Columnwise
+            = 'R': Rowwise
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    K       (input) INTEGER
+            The order of the matrix T (= the number of elementary
+            reflectors whose product defines the block reflector).
+
+    V       (input) COMPLEX array, dimension
+                                  (LDV,K) if STOREV = 'C'
+                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
+                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+            if STOREV = 'R', LDV >= K.
+
+    T       (input) COMPLEX array, dimension (LDT,K)
+            The triangular K-by-K matrix T in the representation of the
+            block reflector.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension (LDWORK,K)
+
+    LDWORK  (input) INTEGER
+            The leading dimension of the array WORK.
+            If SIDE = 'L', LDWORK >= max(1,N);
+            if SIDE = 'R', LDWORK >= max(1,M).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    work_dim1 = *ldwork;
+    work_offset = 1 + work_dim1;
+    work -= work_offset;
+
+    /* Function Body */
+    if ((*m <= 0) || (*n <= 0)) {
+	return 0;
+    }
+
+    if (lsame_(trans, "N")) {
+	*(unsigned char *)transt = 'C';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+    if (lsame_(storev, "C")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1 )    (first K rows)
+                       ( V2 )
+             where  V1  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L10: */
+		}
+
+/*              W := W * V1 */
+
+		ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2 */
+
+		    i__1 = *m - *k;
+		    cgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
+			     &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 +
+			    v_dim1], ldv, &c_b56, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2 * W' */
+
+		    i__1 = *m - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
+			     &q__1, &v[*k + 1 + v_dim1], ldv, &work[
+			    work_offset], ldwork, &c_b56, &c__[*k + 1 +
+			    c_dim1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
+			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = j + i__ * c_dim1;
+			i__4 = j + i__ * c_dim1;
+			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
+			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
+				q__2.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L20: */
+		    }
+/* L30: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L40: */
+		}
+
+/*              W := W * V1 */
+
+		ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2 */
+
+		    i__1 = *n - *k;
+		    cgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
+			    1 + v_dim1], ldv, &c_b56, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2' */
+
+		    i__1 = *n - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
+			     &q__1, &work[work_offset], ldwork, &v[*k + 1 +
+			    v_dim1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1],
+			     ldc);
+		}
+
+/*              W := W * V1' */
+
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
+			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1 )
+                       ( V2 )    (last K rows)
+             where  V2  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L70: */
+		}
+
+/*              W := W * V2 */
+
+		ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56,
+			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1 */
+
+		    i__1 = *m - *k;
+		    cgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
+			     &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b56, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1 * W' */
+
+		    i__1 = *m - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
+			     &q__1, &v[v_offset], ldv, &work[work_offset],
+			    ldwork, &c_b56, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
+			&c_b56, &v[*m - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = *m - *k + j + i__ * c_dim1;
+			i__4 = *m - *k + j + i__ * c_dim1;
+			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
+			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
+				q__2.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L80: */
+		    }
+/* L90: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L100: */
+		}
+
+/*              W := W * V2 */
+
+		ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56,
+			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1 */
+
+		    i__1 = *n - *k;
+		    cgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b56, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1' */
+
+		    i__1 = *n - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
+			     &q__1, &work[work_offset], ldwork, &v[v_offset],
+			    ldv, &c_b56, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
+			&c_b56, &v[*n - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + (*n - *k + j) * c_dim1;
+			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L110: */
+		    }
+/* L120: */
+		}
+	    }
+	}
+
+    } else if (lsame_(storev, "R")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1  V2 )    (V1: first K columns)
+             where  V1  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L130: */
+		}
+
+/*              W := W * V1' */
+
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
+			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2' */
+
+		    i__1 = *m - *k;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", n, k,
+			     &i__1, &c_b56, &c__[*k + 1 + c_dim1], ldc, &v[(*
+			    k + 1) * v_dim1 + 1], ldv, &c_b56, &work[
+			    work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		ctrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2' * W' */
+
+		    i__1 = *m - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", &
+			    i__1, n, k, &q__1, &v[(*k + 1) * v_dim1 + 1], ldv,
+			     &work[work_offset], ldwork, &c_b56, &c__[*k + 1
+			    + c_dim1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		ctrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b56,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = j + i__ * c_dim1;
+			i__4 = j + i__ * c_dim1;
+			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
+			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
+				q__2.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L140: */
+		    }
+/* L150: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L160: */
+		}
+
+/*              W := W * V1' */
+
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
+			&c_b56, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2' */
+
+		    i__1 = *n - *k;
+		    cgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
+			     &c_b56, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k
+			    + 1) * v_dim1 + 1], ldv, &c_b56, &work[
+			    work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		ctrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2 */
+
+		    i__1 = *n - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1,
+			     &work[work_offset], ldwork, &v[(*k + 1) * v_dim1
+			    + 1], ldv, &c_b56, &c__[(*k + 1) * c_dim1 + 1],
+			    ldc);
+		}
+
+/*              W := W * V1 */
+
+		ctrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b56,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L170: */
+		    }
+/* L180: */
+		}
+
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1  V2 )    (V2: last K columns)
+             where  V2  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+		    clacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L190: */
+		}
+
+/*              W := W * V2' */
+
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
+			&c_b56, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1' */
+
+		    i__1 = *m - *k;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", n, k,
+			     &i__1, &c_b56, &c__[c_offset], ldc, &v[v_offset],
+			     ldv, &c_b56, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		ctrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1' * W' */
+
+		    i__1 = *m - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("Conjugate transpose", "Conjugate transpose", &
+			    i__1, n, k, &q__1, &v[v_offset], ldv, &work[
+			    work_offset], ldwork, &c_b56, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		ctrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b56,
+			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = *m - *k + j + i__ * c_dim1;
+			i__4 = *m - *k + j + i__ * c_dim1;
+			r_cnjg(&q__2, &work[i__ + j * work_dim1]);
+			q__1.r = c__[i__4].r - q__2.r, q__1.i = c__[i__4].i -
+				q__2.i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L200: */
+		    }
+/* L210: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    ccopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L220: */
+		}
+
+/*              W := W * V2' */
+
+		ctrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
+			&c_b56, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1' */
+
+		    i__1 = *n - *k;
+		    cgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
+			     &c_b56, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b56, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		ctrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b56, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1 */
+
+		    i__1 = *n - *k;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "No transpose", m, &i__1, k, &q__1,
+			     &work[work_offset], ldwork, &v[v_offset], ldv, &
+			    c_b56, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		ctrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b56,
+			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + (*n - *k + j) * c_dim1;
+			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			q__1.r = c__[i__4].r - work[i__5].r, q__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
+/* L230: */
+		    }
+/* L240: */
+		}
+
+	    }
+
+	}
+    }
+
+    return 0;
+
+/*     End of CLARFB */
+
+} /* clarfb_ */
+
+/* Subroutine */ int clarfg_(integer *n, complex *alpha, complex *x, integer *
+	incx, complex *tau)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    double r_imag(complex *), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer j, knt;
+    static real beta;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *);
+    static real alphi, alphr, xnorm;
+    extern doublereal scnrm2_(integer *, complex *, integer *), slapy3_(real *
+	    , real *, real *);
+    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *);
+    static real safmin, rsafmn;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLARFG generates a complex elementary reflector H of order n, such
+    that
+
+          H' * ( alpha ) = ( beta ),   H' * H = I.
+               (   x   )   (   0  )
+
+    where alpha and beta are scalars, with beta real, and x is an
+    (n-1)-element complex vector. H is represented in the form
+
+          H = I - tau * ( 1 ) * ( 1 v' ) ,
+                        ( v )
+
+    where tau is a complex scalar and v is a complex (n-1)-element
+    vector. Note that H is not hermitian.
+
+    If the elements of x are all zero and alpha is real, then tau = 0
+    and H is taken to be the unit matrix.
+
+    Otherwise  1 <= real(tau) <= 2  and  abs(tau-1) <= 1 .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the elementary reflector.
+
+    ALPHA   (input/output) COMPLEX
+            On entry, the value alpha.
+            On exit, it is overwritten with the value beta.
+
+    X       (input/output) COMPLEX array, dimension
+                           (1+(N-2)*abs(INCX))
+            On entry, the vector x.
+            On exit, it is overwritten with the vector v.
+
+    INCX    (input) INTEGER
+            The increment between elements of X. INCX > 0.
+
+    TAU     (output) COMPLEX
+            The value tau.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n <= 0) {
+	tau->r = 0.f, tau->i = 0.f;
+	return 0;
+    }
+
+    i__1 = *n - 1;
+    xnorm = scnrm2_(&i__1, &x[1], incx);
+    alphr = alpha->r;
+    alphi = r_imag(alpha);
+
+    if (xnorm == 0.f && alphi == 0.f) {
+
+/*        H  =  I */
+
+	tau->r = 0.f, tau->i = 0.f;
+    } else {
+
+/*        general case */
+
+	r__1 = slapy3_(&alphr, &alphi, &xnorm);
+	beta = -r_sign(&r__1, &alphr);
+	safmin = slamch_("S") / slamch_("E");
+	rsafmn = 1.f / safmin;
+
+	if (dabs(beta) < safmin) {
+
+/*           XNORM, BETA may be inaccurate; scale X and recompute them */
+
+	    knt = 0;
+L10:
+	    ++knt;
+	    i__1 = *n - 1;
+	    csscal_(&i__1, &rsafmn, &x[1], incx);
+	    beta *= rsafmn;
+	    alphi *= rsafmn;
+	    alphr *= rsafmn;
+	    if (dabs(beta) < safmin) {
+		goto L10;
+	    }
+
+/*           New BETA is at most 1, at least SAFMIN */
+
+	    i__1 = *n - 1;
+	    xnorm = scnrm2_(&i__1, &x[1], incx);
+	    q__1.r = alphr, q__1.i = alphi;
+	    alpha->r = q__1.r, alpha->i = q__1.i;
+	    r__1 = slapy3_(&alphr, &alphi, &xnorm);
+	    beta = -r_sign(&r__1, &alphr);
+	    r__1 = (beta - alphr) / beta;
+	    r__2 = -alphi / beta;
+	    q__1.r = r__1, q__1.i = r__2;
+	    tau->r = q__1.r, tau->i = q__1.i;
+	    q__2.r = alpha->r - beta, q__2.i = alpha->i;
+	    cladiv_(&q__1, &c_b56, &q__2);
+	    alpha->r = q__1.r, alpha->i = q__1.i;
+	    i__1 = *n - 1;
+	    cscal_(&i__1, alpha, &x[1], incx);
+
+/*           If ALPHA is subnormal, it may lose relative accuracy */
+
+	    alpha->r = beta, alpha->i = 0.f;
+	    i__1 = knt;
+	    for (j = 1; j <= i__1; ++j) {
+		q__1.r = safmin * alpha->r, q__1.i = safmin * alpha->i;
+		alpha->r = q__1.r, alpha->i = q__1.i;
+/* L20: */
+	    }
+	} else {
+	    r__1 = (beta - alphr) / beta;
+	    r__2 = -alphi / beta;
+	    q__1.r = r__1, q__1.i = r__2;
+	    tau->r = q__1.r, tau->i = q__1.i;
+	    q__2.r = alpha->r - beta, q__2.i = alpha->i;
+	    cladiv_(&q__1, &c_b56, &q__2);
+	    alpha->r = q__1.r, alpha->i = q__1.i;
+	    i__1 = *n - 1;
+	    cscal_(&i__1, alpha, &x[1], incx);
+	    alpha->r = beta, alpha->i = 0.f;
+	}
+    }
+
+    return 0;
+
+/*     End of CLARFG */
+
+} /* clarfg_ */
+
+/* Subroutine */ int clarft_(char *direct, char *storev, integer *n, integer *
+	k, complex *v, integer *ldv, complex *tau, complex *t, integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__, j;
+    static complex vii;
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *,
+	    complex *, integer *, complex *, integer *), clacgv_(integer *, complex *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLARFT forms the triangular factor T of a complex block reflector H
+    of order n, which is defined as a product of k elementary reflectors.
+
+    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+
+    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+
+    If STOREV = 'C', the vector which defines the elementary reflector
+    H(i) is stored in the i-th column of the array V, and
+
+       H  =  I - V * T * V'
+
+    If STOREV = 'R', the vector which defines the elementary reflector
+    H(i) is stored in the i-th row of the array V, and
+
+       H  =  I - V' * T * V
+
+    Arguments
+    =========
+
+    DIRECT  (input) CHARACTER*1
+            Specifies the order in which the elementary reflectors are
+            multiplied to form the block reflector:
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Specifies how the vectors which define the elementary
+            reflectors are stored (see also Further Details):
+            = 'C': columnwise
+            = 'R': rowwise
+
+    N       (input) INTEGER
+            The order of the block reflector H. N >= 0.
+
+    K       (input) INTEGER
+            The order of the triangular factor T (= the number of
+            elementary reflectors). K >= 1.
+
+    V       (input/output) COMPLEX array, dimension
+                                 (LDV,K) if STOREV = 'C'
+                                 (LDV,N) if STOREV = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i).
+
+    T       (output) COMPLEX array, dimension (LDT,K)
+            The k by k triangular factor T of the block reflector.
+            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+            lower triangular. The rest of the array is not used.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    Further Details
+    ===============
+
+    The shape of the matrix V and the storage of the vectors which define
+    the H(i) is best illustrated by the following example with n = 5 and
+    k = 3. The elements equal to 1 are not stored; the corresponding
+    array elements are modified but restored on exit. The rest of the
+    array is not used.
+
+    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+
+                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+                     ( v1  1    )                     (     1 v2 v2 v2 )
+                     ( v1 v2  1 )                     (        1 v3 v3 )
+                     ( v1 v2 v3 )
+                     ( v1 v2 v3 )
+
+    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+
+                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+                     (     1 v3 )
+                     (        1 )
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__;
+	    if (tau[i__2].r == 0.f && tau[i__2].i == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    i__3 = j + i__ * t_dim1;
+		    t[i__3].r = 0.f, t[i__3].i = 0.f;
+/* L10: */
+		}
+	    } else {
+
+/*              general case */
+
+		i__2 = i__ + i__ * v_dim1;
+		vii.r = v[i__2].r, vii.i = v[i__2].i;
+		i__2 = i__ + i__ * v_dim1;
+		v[i__2].r = 1.f, v[i__2].i = 0.f;
+		if (lsame_(storev, "C")) {
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+
+		    i__2 = *n - i__ + 1;
+		    i__3 = i__ - 1;
+		    i__4 = i__;
+		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
+		    cgemv_("Conjugate transpose", &i__2, &i__3, &q__1, &v[i__
+			    + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, &
+			    c_b55, &t[i__ * t_dim1 + 1], &c__1);
+		} else {
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+
+		    if (i__ < *n) {
+			i__2 = *n - i__;
+			clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
+		    }
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__ + 1;
+		    i__4 = i__;
+		    q__1.r = -tau[i__4].r, q__1.i = -tau[i__4].i;
+		    cgemv_("No transpose", &i__2, &i__3, &q__1, &v[i__ *
+			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
+			    c_b55, &t[i__ * t_dim1 + 1], &c__1);
+		    if (i__ < *n) {
+			i__2 = *n - i__;
+			clacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
+		    }
+		}
+		i__2 = i__ + i__ * v_dim1;
+		v[i__2].r = vii.r, v[i__2].i = vii.i;
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		ctrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		i__2 = i__ + i__ * t_dim1;
+		i__3 = i__;
+		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+	    }
+/* L20: */
+	}
+    } else {
+	for (i__ = *k; i__ >= 1; --i__) {
+	    i__1 = i__;
+	    if (tau[i__1].r == 0.f && tau[i__1].i == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    i__2 = j + i__ * t_dim1;
+		    t[i__2].r = 0.f, t[i__2].i = 0.f;
+/* L30: */
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+			i__1 = *n - *k + i__ + i__ * v_dim1;
+			vii.r = v[i__1].r, vii.i = v[i__1].i;
+			i__1 = *n - *k + i__ + i__ * v_dim1;
+			v[i__1].r = 1.f, v[i__1].i = 0.f;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*/
+
+			i__1 = *n - *k + i__;
+			i__2 = *k - i__;
+			i__3 = i__;
+			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+			cgemv_("Conjugate transpose", &i__1, &i__2, &q__1, &v[
+				(i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1
+				+ 1], &c__1, &c_b55, &t[i__ + 1 + i__ *
+				t_dim1], &c__1);
+			i__1 = *n - *k + i__ + i__ * v_dim1;
+			v[i__1].r = vii.r, v[i__1].i = vii.i;
+		    } else {
+			i__1 = i__ + (*n - *k + i__) * v_dim1;
+			vii.r = v[i__1].r, vii.i = v[i__1].i;
+			i__1 = i__ + (*n - *k + i__) * v_dim1;
+			v[i__1].r = 1.f, v[i__1].i = 0.f;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*/
+
+			i__1 = *n - *k + i__ - 1;
+			clacgv_(&i__1, &v[i__ + v_dim1], ldv);
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__;
+			i__3 = i__;
+			q__1.r = -tau[i__3].r, q__1.i = -tau[i__3].i;
+			cgemv_("No transpose", &i__1, &i__2, &q__1, &v[i__ +
+				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
+				c_b55, &t[i__ + 1 + i__ * t_dim1], &c__1);
+			i__1 = *n - *k + i__ - 1;
+			clacgv_(&i__1, &v[i__ + v_dim1], ldv);
+			i__1 = i__ + (*n - *k + i__) * v_dim1;
+			v[i__1].r = vii.r, v[i__1].i = vii.i;
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    ctrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		}
+		i__1 = i__ + i__ * t_dim1;
+		i__2 = i__;
+		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+	    }
+/* L40: */
+	}
+    }
+    return 0;
+
+/*     End of CLARFT */
+
+} /* clarft_ */
+
+/* Subroutine */ int clarfx_(char *side, integer *m, integer *n, complex *v,
+	complex *tau, complex *c__, integer *ldc, complex *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8,
+	    i__9, i__10, i__11;
+    complex q__1, q__2, q__3, q__4, q__5, q__6, q__7, q__8, q__9, q__10,
+	    q__11, q__12, q__13, q__14, q__15, q__16, q__17, q__18, q__19;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer j;
+    static complex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6,
+	     v7, v8, v9, t10, v10, sum;
+    extern /* Subroutine */ int cgerc_(integer *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLARFX applies a complex elementary reflector H to a complex m by n
+    matrix C, from either the left or the right. H is represented in the
+    form
+
+          H = I - tau * v * v'
+
+    where tau is a complex scalar and v is a complex vector.
+
+    If tau = 0, then H is taken to be the unit matrix
+
+    This version uses inline code if H has order < 11.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) COMPLEX array, dimension (M) if SIDE = 'L'
+                                          or (N) if SIDE = 'R'
+            The vector v in the representation of H.
+
+    TAU     (input) COMPLEX
+            The value tau in the representation of H.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension (N) if SIDE = 'L'
+                                              or (M) if SIDE = 'R'
+            WORK is not referenced if H has order < 11.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (tau->r == 0.f && tau->i == 0.f) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C, where H has order m. */
+
+	switch (*m) {
+	    case 1:  goto L10;
+	    case 2:  goto L30;
+	    case 3:  goto L50;
+	    case 4:  goto L70;
+	    case 5:  goto L90;
+	    case 6:  goto L110;
+	    case 7:  goto L130;
+	    case 8:  goto L150;
+	    case 9:  goto L170;
+	    case 10:  goto L190;
+	}
+
+/*
+          Code for general M
+
+          w := C'*v
+*/
+
+	cgemv_("Conjugate transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1]
+		, &c__1, &c_b55, &work[1], &c__1);
+
+/*        C := C - tau * v * w' */
+
+	q__1.r = -tau->r, q__1.i = -tau->i;
+	cgerc_(m, n, &q__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset],
+		ldc);
+	goto L410;
+L10:
+
+/*        Special code for 1 x 1 Householder */
+
+	q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i
+		+ tau->i * v[1].r;
+	r_cnjg(&q__4, &v[1]);
+	q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i
+		+ q__3.i * q__4.r;
+	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
+	t1.r = q__1.r, t1.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r *
+		    c__[i__3].i + t1.i * c__[i__3].r;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L20: */
+	}
+	goto L410;
+L30:
+
+/*        Special code for 2 x 2 Householder */
+
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L40: */
+	}
+	goto L410;
+L50:
+
+/*        Special code for 3 x 3 Householder */
+
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L60: */
+	}
+	goto L410;
+L70:
+
+/*        Special code for 4 x 4 Householder */
+
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L80: */
+	}
+	goto L410;
+L90:
+
+/*        Special code for 5 x 5 Householder */
+
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r *
+		    c__[i__6].i + v5.i * c__[i__6].r;
+	    q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L100: */
+	}
+	goto L410;
+L110:
+
+/*        Special code for 6 x 6 Householder */
+
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L120: */
+	}
+	goto L410;
+L130:
+
+/*        Special code for 7 x 7 Householder */
+
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	r_cnjg(&q__1, &v[7]);
+	v7.r = q__1.r, v7.i = q__1.i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i;
+	    i__8 = j * c_dim1 + 7;
+	    q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L140: */
+	}
+	goto L410;
+L150:
+
+/*        Special code for 8 x 8 Householder */
+
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	r_cnjg(&q__1, &v[7]);
+	v7.r = q__1.r, v7.i = q__1.i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	r_cnjg(&q__1, &v[8]);
+	v8.r = q__1.r, v8.i = q__1.i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i;
+	    i__8 = j * c_dim1 + 7;
+	    q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i;
+	    i__9 = j * c_dim1 + 8;
+	    q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 8;
+	    i__3 = j * c_dim1 + 8;
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L160: */
+	}
+	goto L410;
+L170:
+
+/*        Special code for 9 x 9 Householder */
+
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	r_cnjg(&q__1, &v[7]);
+	v7.r = q__1.r, v7.i = q__1.i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	r_cnjg(&q__1, &v[8]);
+	v8.r = q__1.r, v8.i = q__1.i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	r_cnjg(&q__1, &v[9]);
+	v9.r = q__1.r, v9.i = q__1.i;
+	r_cnjg(&q__2, &v9);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t9.r = q__1.r, t9.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i;
+	    i__8 = j * c_dim1 + 7;
+	    q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i;
+	    i__9 = j * c_dim1 + 8;
+	    q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i;
+	    i__10 = j * c_dim1 + 9;
+	    q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 8;
+	    i__3 = j * c_dim1 + 8;
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 9;
+	    i__3 = j * c_dim1 + 9;
+	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L180: */
+	}
+	goto L410;
+L190:
+
+/*        Special code for 10 x 10 Householder */
+
+	r_cnjg(&q__1, &v[1]);
+	v1.r = q__1.r, v1.i = q__1.i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	r_cnjg(&q__1, &v[2]);
+	v2.r = q__1.r, v2.i = q__1.i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	r_cnjg(&q__1, &v[3]);
+	v3.r = q__1.r, v3.i = q__1.i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	r_cnjg(&q__1, &v[4]);
+	v4.r = q__1.r, v4.i = q__1.i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	r_cnjg(&q__1, &v[5]);
+	v5.r = q__1.r, v5.i = q__1.i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	r_cnjg(&q__1, &v[6]);
+	v6.r = q__1.r, v6.i = q__1.i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	r_cnjg(&q__1, &v[7]);
+	v7.r = q__1.r, v7.i = q__1.i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	r_cnjg(&q__1, &v[8]);
+	v8.r = q__1.r, v8.i = q__1.i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	r_cnjg(&q__1, &v[9]);
+	v9.r = q__1.r, v9.i = q__1.i;
+	r_cnjg(&q__2, &v9);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t9.r = q__1.r, t9.i = q__1.i;
+	r_cnjg(&q__1, &v[10]);
+	v10.r = q__1.r, v10.i = q__1.i;
+	r_cnjg(&q__2, &v10);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t10.r = q__1.r, t10.i = q__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r
+		    * c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i;
+	    i__4 = j * c_dim1 + 3;
+	    q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i;
+	    i__5 = j * c_dim1 + 4;
+	    q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i;
+	    i__6 = j * c_dim1 + 5;
+	    q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i;
+	    i__7 = j * c_dim1 + 6;
+	    q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i;
+	    i__8 = j * c_dim1 + 7;
+	    q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i;
+	    i__9 = j * c_dim1 + 8;
+	    q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i;
+	    i__10 = j * c_dim1 + 9;
+	    q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i;
+	    i__11 = j * c_dim1 + 10;
+	    q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i =
+		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
+	    q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 8;
+	    i__3 = j * c_dim1 + 8;
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 9;
+	    i__3 = j * c_dim1 + 9;
+	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j * c_dim1 + 10;
+	    i__3 = j * c_dim1 + 10;
+	    q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i +
+		    sum.i * t10.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L200: */
+	}
+	goto L410;
+    } else {
+
+/*        Form  C * H, where H has order n. */
+
+	switch (*n) {
+	    case 1:  goto L210;
+	    case 2:  goto L230;
+	    case 3:  goto L250;
+	    case 4:  goto L270;
+	    case 5:  goto L290;
+	    case 6:  goto L310;
+	    case 7:  goto L330;
+	    case 8:  goto L350;
+	    case 9:  goto L370;
+	    case 10:  goto L390;
+	}
+
+/*
+          Code for general N
+
+          w := C * v
+*/
+
+	cgemv_("No transpose", m, n, &c_b56, &c__[c_offset], ldc, &v[1], &
+		c__1, &c_b55, &work[1], &c__1);
+
+/*        C := C - tau * w * v' */
+
+	q__1.r = -tau->r, q__1.i = -tau->i;
+	cgerc_(m, n, &q__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset],
+		ldc);
+	goto L410;
+L210:
+
+/*        Special code for 1 x 1 Householder */
+
+	q__3.r = tau->r * v[1].r - tau->i * v[1].i, q__3.i = tau->r * v[1].i
+		+ tau->i * v[1].r;
+	r_cnjg(&q__4, &v[1]);
+	q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = q__3.r * q__4.i
+		+ q__3.i * q__4.r;
+	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
+	t1.r = q__1.r, t1.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, q__1.i = t1.r *
+		    c__[i__3].i + t1.i * c__[i__3].r;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L220: */
+	}
+	goto L410;
+L230:
+
+/*        Special code for 2 x 2 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__2.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    q__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__3.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L240: */
+	}
+	goto L410;
+L250:
+
+/*        Special code for 3 x 3 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__3.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    q__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__4.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__5.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L260: */
+	}
+	goto L410;
+L270:
+
+/*        Special code for 4 x 4 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__4.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    q__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__5.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__3.r = q__4.r + q__5.r, q__3.i = q__4.i + q__5.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__6.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__2.r = q__3.r + q__6.r, q__2.i = q__3.i + q__6.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    q__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__7.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__1.r = q__2.r + q__7.r, q__1.i = q__2.i + q__7.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L280: */
+	}
+	goto L410;
+L290:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__5.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    q__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__6.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__4.r = q__5.r + q__6.r, q__4.i = q__5.i + q__6.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__7.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__3.r = q__4.r + q__7.r, q__3.i = q__4.i + q__7.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    q__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__8.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__2.r = q__3.r + q__8.r, q__2.i = q__3.i + q__8.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__9.i = v5.r *
+		    c__[i__6].i + v5.i * c__[i__6].r;
+	    q__1.r = q__2.r + q__9.r, q__1.i = q__2.i + q__9.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L300: */
+	}
+	goto L410;
+L310:
+
+/*        Special code for 6 x 6 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__6.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    q__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__7.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__5.r = q__6.r + q__7.r, q__5.i = q__6.i + q__7.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__8.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__4.r = q__5.r + q__8.r, q__4.i = q__5.i + q__8.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    q__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__9.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    q__3.r = q__4.r + q__9.r, q__3.i = q__4.i + q__9.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__10.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__2.r = q__3.r + q__10.r, q__2.i = q__3.i + q__10.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__11.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__1.r = q__2.r + q__11.r, q__1.i = q__2.i + q__11.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L320: */
+	}
+	goto L410;
+L330:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__7.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    q__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__8.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__6.r = q__7.r + q__8.r, q__6.i = q__7.i + q__8.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__9.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    q__5.r = q__6.r + q__9.r, q__5.i = q__6.i + q__9.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    q__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__10.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__4.r = q__5.r + q__10.r, q__4.i = q__5.i + q__10.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__11.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__3.r = q__4.r + q__11.r, q__3.i = q__4.i + q__11.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__12.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__2.r = q__3.r + q__12.r, q__2.i = q__3.i + q__12.i;
+	    i__8 = j + c_dim1 * 7;
+	    q__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__13.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__1.r = q__2.r + q__13.r, q__1.i = q__2.i + q__13.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L340: */
+	}
+	goto L410;
+L350:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	v8.r = v[8].r, v8.i = v[8].i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__8.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    q__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__9.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    q__7.r = q__8.r + q__9.r, q__7.i = q__8.i + q__9.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__10.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__6.r = q__7.r + q__10.r, q__6.i = q__7.i + q__10.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    q__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__11.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__5.r = q__6.r + q__11.r, q__5.i = q__6.i + q__11.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__12.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__4.r = q__5.r + q__12.r, q__4.i = q__5.i + q__12.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__13.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__3.r = q__4.r + q__13.r, q__3.i = q__4.i + q__13.i;
+	    i__8 = j + c_dim1 * 7;
+	    q__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__14.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__2.r = q__3.r + q__14.r, q__2.i = q__3.i + q__14.i;
+	    i__9 = j + ((c_dim1) << (3));
+	    q__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__15.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__1.r = q__2.r + q__15.r, q__1.i = q__2.i + q__15.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (3));
+	    i__3 = j + ((c_dim1) << (3));
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L360: */
+	}
+	goto L410;
+L370:
+
+/*        Special code for 9 x 9 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	v8.r = v[8].r, v8.i = v[8].i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	v9.r = v[9].r, v9.i = v[9].i;
+	r_cnjg(&q__2, &v9);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t9.r = q__1.r, t9.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__9.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    q__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__10.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    q__8.r = q__9.r + q__10.r, q__8.i = q__9.i + q__10.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__11.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__7.r = q__8.r + q__11.r, q__7.i = q__8.i + q__11.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    q__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__12.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__6.r = q__7.r + q__12.r, q__6.i = q__7.i + q__12.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__13.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__5.r = q__6.r + q__13.r, q__5.i = q__6.i + q__13.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__14.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__4.r = q__5.r + q__14.r, q__4.i = q__5.i + q__14.i;
+	    i__8 = j + c_dim1 * 7;
+	    q__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__15.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__3.r = q__4.r + q__15.r, q__3.i = q__4.i + q__15.i;
+	    i__9 = j + ((c_dim1) << (3));
+	    q__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__16.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__2.r = q__3.r + q__16.r, q__2.i = q__3.i + q__16.i;
+	    i__10 = j + c_dim1 * 9;
+	    q__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__17.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    q__1.r = q__2.r + q__17.r, q__1.i = q__2.i + q__17.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (3));
+	    i__3 = j + ((c_dim1) << (3));
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 9;
+	    i__3 = j + c_dim1 * 9;
+	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L380: */
+	}
+	goto L410;
+L390:
+
+/*        Special code for 10 x 10 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	r_cnjg(&q__2, &v1);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t1.r = q__1.r, t1.i = q__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	r_cnjg(&q__2, &v2);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t2.r = q__1.r, t2.i = q__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	r_cnjg(&q__2, &v3);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t3.r = q__1.r, t3.i = q__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	r_cnjg(&q__2, &v4);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t4.r = q__1.r, t4.i = q__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	r_cnjg(&q__2, &v5);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t5.r = q__1.r, t5.i = q__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	r_cnjg(&q__2, &v6);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t6.r = q__1.r, t6.i = q__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	r_cnjg(&q__2, &v7);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t7.r = q__1.r, t7.i = q__1.i;
+	v8.r = v[8].r, v8.i = v[8].i;
+	r_cnjg(&q__2, &v8);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t8.r = q__1.r, t8.i = q__1.i;
+	v9.r = v[9].r, v9.i = v[9].i;
+	r_cnjg(&q__2, &v9);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t9.r = q__1.r, t9.i = q__1.i;
+	v10.r = v[10].r, v10.i = v[10].i;
+	r_cnjg(&q__2, &v10);
+	q__1.r = tau->r * q__2.r - tau->i * q__2.i, q__1.i = tau->r * q__2.i
+		+ tau->i * q__2.r;
+	t10.r = q__1.r, t10.i = q__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    q__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, q__10.i = v1.r
+		    * c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    q__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, q__11.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    q__9.r = q__10.r + q__11.r, q__9.i = q__10.i + q__11.i;
+	    i__4 = j + c_dim1 * 3;
+	    q__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, q__12.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    q__8.r = q__9.r + q__12.r, q__8.i = q__9.i + q__12.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    q__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, q__13.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    q__7.r = q__8.r + q__13.r, q__7.i = q__8.i + q__13.i;
+	    i__6 = j + c_dim1 * 5;
+	    q__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, q__14.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    q__6.r = q__7.r + q__14.r, q__6.i = q__7.i + q__14.i;
+	    i__7 = j + c_dim1 * 6;
+	    q__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, q__15.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    q__5.r = q__6.r + q__15.r, q__5.i = q__6.i + q__15.i;
+	    i__8 = j + c_dim1 * 7;
+	    q__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, q__16.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    q__4.r = q__5.r + q__16.r, q__4.i = q__5.i + q__16.i;
+	    i__9 = j + ((c_dim1) << (3));
+	    q__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, q__17.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    q__3.r = q__4.r + q__17.r, q__3.i = q__4.i + q__17.i;
+	    i__10 = j + c_dim1 * 9;
+	    q__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, q__18.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    q__2.r = q__3.r + q__18.r, q__2.i = q__3.i + q__18.i;
+	    i__11 = j + c_dim1 * 10;
+	    q__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, q__19.i =
+		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
+	    q__1.r = q__2.r + q__19.r, q__1.i = q__2.i + q__19.i;
+	    sum.r = q__1.r, sum.i = q__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    q__2.r = sum.r * t1.r - sum.i * t1.i, q__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    q__2.r = sum.r * t2.r - sum.i * t2.i, q__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    q__2.r = sum.r * t3.r - sum.i * t3.i, q__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    q__2.r = sum.r * t4.r - sum.i * t4.i, q__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    q__2.r = sum.r * t5.r - sum.i * t5.i, q__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    q__2.r = sum.r * t6.r - sum.i * t6.i, q__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    q__2.r = sum.r * t7.r - sum.i * t7.i, q__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + ((c_dim1) << (3));
+	    i__3 = j + ((c_dim1) << (3));
+	    q__2.r = sum.r * t8.r - sum.i * t8.i, q__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 9;
+	    i__3 = j + c_dim1 * 9;
+	    q__2.r = sum.r * t9.r - sum.i * t9.i, q__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+	    i__2 = j + c_dim1 * 10;
+	    i__3 = j + c_dim1 * 10;
+	    q__2.r = sum.r * t10.r - sum.i * t10.i, q__2.i = sum.r * t10.i +
+		    sum.i * t10.r;
+	    q__1.r = c__[i__3].r - q__2.r, q__1.i = c__[i__3].i - q__2.i;
+	    c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
+/* L400: */
+	}
+	goto L410;
+    }
+L410:
+    return 0;
+
+/*     End of CLARFX */
+
+} /* clarfx_ */
+
+/* Subroutine */ int clascl_(char *type__, integer *kl, integer *ku, real *
+	cfrom, real *cto, integer *m, integer *n, complex *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__, j, k1, k2, k3, k4;
+    static real mul, cto1;
+    static logical done;
+    static real ctoc;
+    extern logical lsame_(char *, char *);
+    static integer itype;
+    static real cfrom1;
+    extern doublereal slamch_(char *);
+    static real cfromc;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real bignum, smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    CLASCL multiplies the M by N complex matrix A by the real scalar
+    CTO/CFROM.  This is done without over/underflow as long as the final
+    result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+    A may be full, upper triangular, lower triangular, upper Hessenberg,
+    or banded.
+
+    Arguments
+    =========
+
+    TYPE    (input) CHARACTER*1
+            TYPE indices the storage type of the input matrix.
+            = 'G':  A is a full matrix.
+            = 'L':  A is a lower triangular matrix.
+            = 'U':  A is an upper triangular matrix.
+            = 'H':  A is an upper Hessenberg matrix.
+            = 'B':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the lower
+                    half stored.
+            = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the upper
+                    half stored.
+            = 'Z':  A is a band matrix with lower bandwidth KL and upper
+                    bandwidth KU.
+
+    KL      (input) INTEGER
+            The lower bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    KU      (input) INTEGER
+            The upper bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    CFROM   (input) REAL
+    CTO     (input) REAL
+            The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+            without over/underflow if the final result CTO*A(I,J)/CFROM
+            can be represented without over/underflow.  CFROM must be
+            nonzero.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,M)
+            The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+            storage type.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    INFO    (output) INTEGER
+            0  - successful exit
+            <0 - if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(type__, "G")) {
+	itype = 0;
+    } else if (lsame_(type__, "L")) {
+	itype = 1;
+    } else if (lsame_(type__, "U")) {
+	itype = 2;
+    } else if (lsame_(type__, "H")) {
+	itype = 3;
+    } else if (lsame_(type__, "B")) {
+	itype = 4;
+    } else if (lsame_(type__, "Q")) {
+	itype = 5;
+    } else if (lsame_(type__, "Z")) {
+	itype = 6;
+    } else {
+	itype = -1;
+    }
+
+    if (itype == -1) {
+	*info = -1;
+    } else if (*cfrom == 0.f) {
+	*info = -4;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (((*n < 0) || (itype == 4 && *n != *m)) || (itype == 5 && *n !=
+	    *m)) {
+	*info = -7;
+    } else if (itype <= 3 && *lda < max(1,*m)) {
+	*info = -9;
+    } else if (itype >= 4) {
+/* Computing MAX */
+	i__1 = *m - 1;
+	if ((*kl < 0) || (*kl > max(i__1,0))) {
+	    *info = -2;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = *n - 1;
+	    if (((*ku < 0) || (*ku > max(i__1,0))) || (((itype == 4) || (
+		    itype == 5)) && *kl != *ku)) {
+		*info = -3;
+	    } else if (((itype == 4 && *lda < *kl + 1) || (itype == 5 && *lda
+		    < *ku + 1)) || (itype == 6 && *lda < ((*kl) << (1)) + *ku
+		    + 1)) {
+		*info = -9;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLASCL", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*m == 0)) {
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+
+    cfromc = *cfrom;
+    ctoc = *cto;
+
+L10:
+    cfrom1 = cfromc * smlnum;
+    cto1 = ctoc / bignum;
+    if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
+	mul = smlnum;
+	done = FALSE_;
+	cfromc = cfrom1;
+    } else if (dabs(cto1) > dabs(cfromc)) {
+	mul = bignum;
+	done = FALSE_;
+	ctoc = cto1;
+    } else {
+	mul = ctoc / cfromc;
+	done = TRUE_;
+    }
+
+    if (itype == 0) {
+
+/*        Full matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L20: */
+	    }
+/* L30: */
+	}
+
+    } else if (itype == 1) {
+
+/*        Lower triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L40: */
+	    }
+/* L50: */
+	}
+
+    } else if (itype == 2) {
+
+/*        Upper triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L60: */
+	    }
+/* L70: */
+	}
+
+    } else if (itype == 3) {
+
+/*        Upper Hessenberg matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j + 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L80: */
+	    }
+/* L90: */
+	}
+
+    } else if (itype == 4) {
+
+/*        Lower half of a symmetric band matrix */
+
+	k3 = *kl + 1;
+	k4 = *n + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = k3, i__4 = k4 - j;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L100: */
+	    }
+/* L110: */
+	}
+
+    } else if (itype == 5) {
+
+/*        Upper half of a symmetric band matrix */
+
+	k1 = *ku + 2;
+	k3 = *ku + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__2 = k1 - j;
+	    i__3 = k3;
+	    for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
+		i__2 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L120: */
+	    }
+/* L130: */
+	}
+
+    } else if (itype == 6) {
+
+/*        Band matrix */
+
+	k1 = *kl + *ku + 2;
+	k2 = *kl + 1;
+	k3 = ((*kl) << (1)) + *ku + 1;
+	k4 = *kl + *ku + 1 + *m;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__3 = k1 - j;
+/* Computing MIN */
+	    i__4 = k3, i__5 = k4 - j;
+	    i__2 = min(i__4,i__5);
+	    for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		q__1.r = mul * a[i__4].r, q__1.i = mul * a[i__4].i;
+		a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L140: */
+	    }
+/* L150: */
+	}
+
+    }
+
+    if (! done) {
+	goto L10;
+    }
+
+    return 0;
+
+/*     End of CLASCL */
+
+} /* clascl_ */
+
+/* Subroutine */ int claset_(char *uplo, integer *m, integer *n, complex *
+	alpha, complex *beta, complex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    CLASET initializes a 2-D array A to BETA on the diagonal and
+    ALPHA on the offdiagonals.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be set.
+            = 'U':      Upper triangular part is set. The lower triangle
+                        is unchanged.
+            = 'L':      Lower triangular part is set. The upper triangle
+                        is unchanged.
+            Otherwise:  All of the matrix A is set.
+
+    M       (input) INTEGER
+            On entry, M specifies the number of rows of A.
+
+    N       (input) INTEGER
+            On entry, N specifies the number of columns of A.
+
+    ALPHA   (input) COMPLEX
+            All the offdiagonal array elements are set to ALPHA.
+
+    BETA    (input) COMPLEX
+            All the diagonal array elements are set to BETA.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j;
+                     A(i,i) = BETA , 1 <= i <= min(m,n)
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+
+/*
+          Set the diagonal to BETA and the strictly upper triangular
+          part of the array to ALPHA.
+*/
+
+	i__1 = *n;
+	for (j = 2; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j - 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
+/* L10: */
+	    }
+/* L20: */
+	}
+	i__1 = min(*n,*m);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = beta->r, a[i__2].i = beta->i;
+/* L30: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
+
+/*
+          Set the diagonal to BETA and the strictly lower triangular
+          part of the array to ALPHA.
+*/
+
+	i__1 = min(*m,*n);
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j + 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
+/* L40: */
+	    }
+/* L50: */
+	}
+	i__1 = min(*n,*m);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = beta->r, a[i__2].i = beta->i;
+/* L60: */
+	}
+
+    } else {
+
+/*
+          Set the array to BETA on the diagonal and ALPHA on the
+          offdiagonal.
+*/
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
+/* L70: */
+	    }
+/* L80: */
+	}
+	i__1 = min(*m,*n);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = beta->r, a[i__2].i = beta->i;
+/* L90: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLASET */
+
+} /* claset_ */
+
+/* Subroutine */ int clasr_(char *side, char *pivot, char *direct, integer *m,
+	 integer *n, real *c__, real *s, complex *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    complex q__1, q__2, q__3;
+
+    /* Local variables */
+    static integer i__, j, info;
+    static complex temp;
+    extern logical lsame_(char *, char *);
+    static real ctemp, stemp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    CLASR   performs the transformation
+
+       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+
+       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+
+    where A is an m by n complex matrix and P is an orthogonal matrix,
+    consisting of a sequence of plane rotations determined by the
+    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+    and z = n when SIDE = 'R' or 'r' ):
+
+    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+
+       P = P( z - 1 )*...*P( 2 )*P( 1 ),
+
+    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+
+       P = P( 1 )*P( 2 )*...*P( z - 1 ),
+
+    where  P( k ) is a plane rotation matrix for the following planes:
+
+       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+          the plane ( k, k + 1 )
+
+       when  PIVOT = 'T' or 't'  ( Top pivot ),
+          the plane ( 1, k + 1 )
+
+       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+          the plane ( k, z )
+
+    c( k ) and s( k )  must contain the  cosine and sine that define the
+    matrix  P( k ).  The two by two plane rotation part of the matrix
+    P( k ), R( k ), is assumed to be of the form
+
+       R( k ) = (  c( k )  s( k ) ).
+                ( -s( k )  c( k ) )
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            Specifies whether the plane rotation matrix P is applied to
+            A on the left or the right.
+            = 'L':  Left, compute A := P*A
+            = 'R':  Right, compute A:= A*P'
+
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+
+    PIVOT   (input) CHARACTER*1
+            Specifies the plane for which P(k) is a plane rotation
+            matrix.
+            = 'V':  Variable pivot, the plane (k,k+1)
+            = 'T':  Top pivot, the plane (1,k+1)
+            = 'B':  Bottom pivot, the plane (k,z)
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  If m <= 1, an immediate
+            return is effected.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  If n <= 1, an
+            immediate return is effected.
+
+    C, S    (input) REAL arrays, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            c(k) and s(k) contain the cosine and sine that define the
+            matrix P(k).  The two by two plane rotation part of the
+            matrix P(k), R(k), is assumed to be of the form
+            R( k ) = (  c( k )  s( k ) ).
+                     ( -s( k )  c( k ) )
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            The m by n matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P' if SIDE = 'L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --c__;
+    --s;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! ((lsame_(side, "L")) || (lsame_(side, "R")))) {
+	info = 1;
+    } else if (! (((lsame_(pivot, "V")) || (lsame_(
+	    pivot, "T"))) || (lsame_(pivot, "B")))) {
+	info = 2;
+    } else if (! ((lsame_(direct, "F")) || (lsame_(
+	    direct, "B")))) {
+	info = 3;
+    } else if (*m < 0) {
+	info = 4;
+    } else if (*n < 0) {
+	info = 5;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("CLASR ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  P * A */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = j + 1 + i__ * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = j + 1 + i__ * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__4 = j + i__ * a_dim1;
+			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = j + i__ * a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__4 = j + i__ * a_dim1;
+			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L10: */
+			}
+		    }
+/* L20: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = j + 1 + i__ * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = j + 1 + i__ * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__3 = j + i__ * a_dim1;
+			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = j + i__ * a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__3 = j + i__ * a_dim1;
+			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L30: */
+			}
+		    }
+/* L40: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = j + i__ * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = j + i__ * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__4 = i__ * a_dim1 + 1;
+			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = i__ * a_dim1 + 1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__4 = i__ * a_dim1 + 1;
+			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = j + i__ * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = j + i__ * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__3 = i__ * a_dim1 + 1;
+			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = i__ * a_dim1 + 1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__3 = i__ * a_dim1 + 1;
+			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = j + i__ * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = j + i__ * a_dim1;
+			    i__4 = *m + i__ * a_dim1;
+			    q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[
+				    i__4].i;
+			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = *m + i__ * a_dim1;
+			    i__4 = *m + i__ * a_dim1;
+			    q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[
+				    i__4].i;
+			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L90: */
+			}
+		    }
+/* L100: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = j + i__ * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = j + i__ * a_dim1;
+			    i__3 = *m + i__ * a_dim1;
+			    q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[
+				    i__3].i;
+			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = *m + i__ * a_dim1;
+			    i__3 = *m + i__ * a_dim1;
+			    q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[
+				    i__3].i;
+			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+	    }
+	}
+    } else if (lsame_(side, "R")) {
+
+/*        Form A * P' */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + (j + 1) * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = i__ + (j + 1) * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__4 = i__ + j * a_dim1;
+			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = i__ + j * a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__4 = i__ + j * a_dim1;
+			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L130: */
+			}
+		    }
+/* L140: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + (j + 1) * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = i__ + (j + 1) * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__3 = i__ + j * a_dim1;
+			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = i__ + j * a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__3 = i__ + j * a_dim1;
+			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = i__ + j * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__4 = i__ + a_dim1;
+			    q__3.r = stemp * a[i__4].r, q__3.i = stemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = i__ + a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__4 = i__ + a_dim1;
+			    q__3.r = ctemp * a[i__4].r, q__3.i = ctemp * a[
+				    i__4].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L170: */
+			}
+		    }
+/* L180: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = i__ + j * a_dim1;
+			    q__2.r = ctemp * temp.r, q__2.i = ctemp * temp.i;
+			    i__3 = i__ + a_dim1;
+			    q__3.r = stemp * a[i__3].r, q__3.i = stemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = i__ + a_dim1;
+			    q__2.r = stemp * temp.r, q__2.i = stemp * temp.i;
+			    i__3 = i__ + a_dim1;
+			    q__3.r = ctemp * a[i__3].r, q__3.i = ctemp * a[
+				    i__3].i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L190: */
+			}
+		    }
+/* L200: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = i__ + *n * a_dim1;
+			    q__2.r = stemp * a[i__4].r, q__2.i = stemp * a[
+				    i__4].i;
+			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+			    i__3 = i__ + *n * a_dim1;
+			    i__4 = i__ + *n * a_dim1;
+			    q__2.r = ctemp * a[i__4].r, q__2.i = ctemp * a[
+				    i__4].i;
+			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__3].r = q__1.r, a[i__3].i = q__1.i;
+/* L210: */
+			}
+		    }
+/* L220: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = i__ + j * a_dim1;
+			    i__3 = i__ + *n * a_dim1;
+			    q__2.r = stemp * a[i__3].r, q__2.i = stemp * a[
+				    i__3].i;
+			    q__3.r = ctemp * temp.r, q__3.i = ctemp * temp.i;
+			    q__1.r = q__2.r + q__3.r, q__1.i = q__2.i +
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+			    i__2 = i__ + *n * a_dim1;
+			    i__3 = i__ + *n * a_dim1;
+			    q__2.r = ctemp * a[i__3].r, q__2.i = ctemp * a[
+				    i__3].i;
+			    q__3.r = stemp * temp.r, q__3.i = stemp * temp.i;
+			    q__1.r = q__2.r - q__3.r, q__1.i = q__2.i -
+				    q__3.i;
+			    a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+/* L230: */
+			}
+		    }
+/* L240: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CLASR */
+
+} /* clasr_ */
+
+/* Subroutine */ int classq_(integer *n, complex *x, integer *incx, real *
+	scale, real *sumsq)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    real r__1;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+
+    /* Local variables */
+    static integer ix;
+    static real temp1;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CLASSQ returns the values scl and ssq such that
+
+       ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
+
+    where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is
+    assumed to be at least unity and the value of ssq will then satisfy
+
+       1.0 .le. ssq .le. ( sumsq + 2*n ).
+
+    scale is assumed to be non-negative and scl returns the value
+
+       scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ),
+              i
+
+    scale and sumsq must be supplied in SCALE and SUMSQ respectively.
+    SCALE and SUMSQ are overwritten by scl and ssq respectively.
+
+    The routine makes only one pass through the vector X.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of elements to be used from the vector X.
+
+    X       (input) COMPLEX array, dimension (N)
+            The vector x as described above.
+               x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
+
+    INCX    (input) INTEGER
+            The increment between successive values of the vector X.
+            INCX > 0.
+
+    SCALE   (input/output) REAL
+            On entry, the value  scale  in the equation above.
+            On exit, SCALE is overwritten with the value  scl .
+
+    SUMSQ   (input/output) REAL
+            On entry, the value  sumsq  in the equation above.
+            On exit, SUMSQ is overwritten with the value  ssq .
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n > 0) {
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    i__3 = ix;
+	    if (x[i__3].r != 0.f) {
+		i__3 = ix;
+		temp1 = (r__1 = x[i__3].r, dabs(r__1));
+		if (*scale < temp1) {
+/* Computing 2nd power */
+		    r__1 = *scale / temp1;
+		    *sumsq = *sumsq * (r__1 * r__1) + 1;
+		    *scale = temp1;
+		} else {
+/* Computing 2nd power */
+		    r__1 = temp1 / *scale;
+		    *sumsq += r__1 * r__1;
+		}
+	    }
+	    if (r_imag(&x[ix]) != 0.f) {
+		temp1 = (r__1 = r_imag(&x[ix]), dabs(r__1));
+		if (*scale < temp1) {
+/* Computing 2nd power */
+		    r__1 = *scale / temp1;
+		    *sumsq = *sumsq * (r__1 * r__1) + 1;
+		    *scale = temp1;
+		} else {
+/* Computing 2nd power */
+		    r__1 = temp1 / *scale;
+		    *sumsq += r__1 * r__1;
+		}
+	    }
+/* L10: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLASSQ */
+
+} /* classq_ */
+
+/* Subroutine */ int claswp_(integer *n, complex *a, integer *lda, integer *
+	k1, integer *k2, integer *ipiv, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+
+    /* Local variables */
+    static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
+    static complex temp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CLASWP performs a series of row interchanges on the matrix A.
+    One row interchange is initiated for each of rows K1 through K2 of A.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the matrix of column dimension N to which the row
+            interchanges will be applied.
+            On exit, the permuted matrix.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+
+    K1      (input) INTEGER
+            The first element of IPIV for which a row interchange will
+            be done.
+
+    K2      (input) INTEGER
+            The last element of IPIV for which a row interchange will
+            be done.
+
+    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+            The vector of pivot indices.  Only the elements in positions
+            K1 through K2 of IPIV are accessed.
+            IPIV(K) = L implies rows K and L are to be interchanged.
+
+    INCX    (input) INTEGER
+            The increment between successive values of IPIV.  If IPIV
+            is negative, the pivots are applied in reverse order.
+
+    Further Details
+    ===============
+
+    Modified by
+     R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+
+   =====================================================================
+
+
+       Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    if (*incx > 0) {
+	ix0 = *k1;
+	i1 = *k1;
+	i2 = *k2;
+	inc = 1;
+    } else if (*incx < 0) {
+	ix0 = (1 - *k2) * *incx + 1;
+	i1 = *k2;
+	i2 = *k1;
+	inc = -1;
+    } else {
+	return 0;
+    }
+
+    n32 = (*n / 32) << (5);
+    if (n32 != 0) {
+	i__1 = n32;
+	for (j = 1; j <= i__1; j += 32) {
+	    ix = ix0;
+	    i__2 = i2;
+	    i__3 = inc;
+	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3)
+		    {
+		ip = ipiv[ix];
+		if (ip != i__) {
+		    i__4 = j + 31;
+		    for (k = j; k <= i__4; ++k) {
+			i__5 = i__ + k * a_dim1;
+			temp.r = a[i__5].r, temp.i = a[i__5].i;
+			i__5 = i__ + k * a_dim1;
+			i__6 = ip + k * a_dim1;
+			a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i;
+			i__5 = ip + k * a_dim1;
+			a[i__5].r = temp.r, a[i__5].i = temp.i;
+/* L10: */
+		    }
+		}
+		ix += *incx;
+/* L20: */
+	    }
+/* L30: */
+	}
+    }
+    if (n32 != *n) {
+	++n32;
+	ix = ix0;
+	i__1 = i2;
+	i__3 = inc;
+	for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) {
+	    ip = ipiv[ix];
+	    if (ip != i__) {
+		i__2 = *n;
+		for (k = n32; k <= i__2; ++k) {
+		    i__4 = i__ + k * a_dim1;
+		    temp.r = a[i__4].r, temp.i = a[i__4].i;
+		    i__4 = i__ + k * a_dim1;
+		    i__5 = ip + k * a_dim1;
+		    a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i;
+		    i__4 = ip + k * a_dim1;
+		    a[i__4].r = temp.r, a[i__4].i = temp.i;
+/* L40: */
+		}
+	    }
+	    ix += *incx;
+/* L50: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLASWP */
+
+} /* claswp_ */
+
+/* Subroutine */ int clatrd_(char *uplo, integer *n, integer *nb, complex *a,
+	integer *lda, real *e, complex *tau, complex *w, integer *ldw)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
+    real r__1;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Local variables */
+    static integer i__, iw;
+    static complex alpha;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *);
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), chemv_(char *, integer *, complex *,
+	    complex *, integer *, complex *, integer *, complex *, complex *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *,
+	    integer *, complex *, integer *), clarfg_(integer *, complex *,
+	    complex *, integer *, complex *), clacgv_(integer *, complex *,
+	    integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLATRD reduces NB rows and columns of a complex Hermitian matrix A to
+    Hermitian tridiagonal form by a unitary similarity
+    transformation Q' * A * Q, and returns the matrices V and W which are
+    needed to apply the transformation to the unreduced part of A.
+
+    If UPLO = 'U', CLATRD reduces the last NB rows and columns of a
+    matrix, of which the upper triangle is supplied;
+    if UPLO = 'L', CLATRD reduces the first NB rows and columns of a
+    matrix, of which the lower triangle is supplied.
+
+    This is an auxiliary routine called by CHETRD.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored:
+            = 'U': Upper triangular
+            = 'L': Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    NB      (input) INTEGER
+            The number of rows and columns to be reduced.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit:
+            if UPLO = 'U', the last NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements above the diagonal
+              with the array TAU, represent the unitary matrix Q as a
+              product of elementary reflectors;
+            if UPLO = 'L', the first NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements below the diagonal
+              with the array TAU, represent the  unitary matrix Q as a
+              product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    E       (output) REAL array, dimension (N-1)
+            If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+            elements of the last NB columns of the reduced matrix;
+            if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+            the first NB columns of the reduced matrix.
+
+    TAU     (output) COMPLEX array, dimension (N-1)
+            The scalar factors of the elementary reflectors, stored in
+            TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+            See Further Details.
+
+    W       (output) COMPLEX array, dimension (LDW,NB)
+            The n-by-nb matrix W required to update the unreduced part
+            of A.
+
+    LDW     (input) INTEGER
+            The leading dimension of the array W. LDW >= max(1,N).
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n) H(n-1) . . . H(n-nb+1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+    and tau in TAU(i-1).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+    and tau in TAU(i).
+
+    The elements of the vectors v together form the n-by-nb matrix V
+    which is needed, with W, to apply the transformation to the unreduced
+    part of the matrix, using a Hermitian rank-2k update of the form:
+    A := A - V*W' - W*V'.
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5 and nb = 2:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  a   a   a   v4  v5 )              (  d                  )
+      (      a   a   v4  v5 )              (  1   d              )
+      (          a   1   v5 )              (  v1  1   a          )
+      (              d   1  )              (  v1  v2  a   a      )
+      (                  d  )              (  v1  v2  a   a   a  )
+
+    where d denotes a diagonal element of the reduced matrix, a denotes
+    an element of the original matrix that is unchanged, and vi denotes
+    an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --e;
+    --tau;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1;
+    w -= w_offset;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (lsame_(uplo, "U")) {
+
+/*        Reduce last NB columns of upper triangle */
+
+	i__1 = *n - *nb + 1;
+	for (i__ = *n; i__ >= i__1; --i__) {
+	    iw = i__ - *n + *nb;
+	    if (i__ < *n) {
+
+/*              Update A(1:i,i) */
+
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = i__ + i__ * a_dim1;
+		r__1 = a[i__3].r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+		i__2 = *n - i__;
+		clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__, &i__2, &q__1, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
+			c_b56, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = *n - i__;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__, &i__2, &q__1, &w[(iw + 1) *
+			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b56, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = i__ + i__ * a_dim1;
+		r__1 = a[i__3].r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+	    }
+	    if (i__ > 1) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(1:i-2,i)
+*/
+
+		i__2 = i__ - 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = i__ - 1;
+		clarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__
+			- 1]);
+		i__2 = i__ - 1;
+		e[i__2] = alpha.r;
+		i__2 = i__ - 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute W(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		chemv_("Upper", &i__2, &c_b56, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b55, &w[iw * w_dim1 + 1], &
+			c__1);
+		if (i__ < *n) {
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[(
+			    iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1],
+			    &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[(i__ + 1) *
+			     a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[(
+			    i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1],
+			     &c__1, &c_b55, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemv_("No transpose", &i__2, &i__3, &q__1, &w[(iw + 1) *
+			    w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b56, &w[iw * w_dim1 + 1], &c__1);
+		}
+		i__2 = i__ - 1;
+		cscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
+		q__3.r = -.5f, q__3.i = -0.f;
+		i__2 = i__ - 1;
+		q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i =
+			 q__3.r * tau[i__2].i + q__3.i * tau[i__2].r;
+		i__3 = i__ - 1;
+		cdotc_(&q__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ *
+			a_dim1 + 1], &c__1);
+		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
+			q__4.i + q__2.i * q__4.r;
+		alpha.r = q__1.r, alpha.i = q__1.i;
+		i__2 = i__ - 1;
+		caxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
+			w_dim1 + 1], &c__1);
+	    }
+
+/* L10: */
+	}
+    } else {
+
+/*        Reduce first NB columns of lower triangle */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:n,i) */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__ + i__ * a_dim1;
+	    r__1 = a[i__3].r;
+	    a[i__2].r = r__1, a[i__2].i = 0.f;
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &w[i__ + w_dim1], ldw);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + a_dim1], lda,
+		     &w[i__ + w_dim1], ldw, &c_b56, &a[i__ + i__ * a_dim1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &w[i__ + w_dim1], ldw);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    q__1.r = -1.f, q__1.i = -0.f;
+	    cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + w_dim1], ldw,
+		     &a[i__ + a_dim1], lda, &c_b56, &a[i__ + i__ * a_dim1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__ + i__ * a_dim1;
+	    r__1 = a[i__3].r;
+	    a[i__2].r = r__1, a[i__2].i = 0.f;
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:n,i)
+*/
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		clarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1,
+			 &tau[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+
+/*              Compute W(i+1:n,i) */
+
+		i__2 = *n - i__;
+		chemv_("Lower", &i__2, &c_b56, &a[i__ + 1 + (i__ + 1) *
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &w[i__ +
+			1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[i__ + 1 +
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b55, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &w[i__ + 1 +
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b56, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		cscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
+		q__3.r = -.5f, q__3.i = -0.f;
+		i__2 = i__;
+		q__2.r = q__3.r * tau[i__2].r - q__3.i * tau[i__2].i, q__2.i =
+			 q__3.r * tau[i__2].i + q__3.i * tau[i__2].r;
+		i__3 = *n - i__;
+		cdotc_(&q__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
+		q__1.r = q__2.r * q__4.r - q__2.i * q__4.i, q__1.i = q__2.r *
+			q__4.i + q__2.i * q__4.r;
+		alpha.r = q__1.r, alpha.i = q__1.i;
+		i__2 = *n - i__;
+		caxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+	    }
+
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLATRD */
+
+} /* clatrd_ */
+
+/* Subroutine */ int clatrs_(char *uplo, char *trans, char *diag, char *
+	normin, integer *n, complex *a, integer *lda, complex *x, real *scale,
+	 real *cnorm, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3, r__4;
+    complex q__1, q__2, q__3, q__4;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j;
+    static real xj, rec, tjj;
+    static integer jinc;
+    static real xbnd;
+    static integer imax;
+    static real tmax;
+    static complex tjjs;
+    static real xmax, grow;
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static real tscal;
+    static complex uscal;
+    static integer jlast;
+    extern /* Complex */ VOID cdotu_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    static complex csumj;
+    extern /* Subroutine */ int caxpy_(integer *, complex *, complex *,
+	    integer *, complex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ctrsv_(char *, char *, char *, integer *,
+	    complex *, integer *, complex *, integer *), slabad_(real *, real *);
+    extern integer icamax_(integer *, complex *, integer *);
+    extern /* Complex */ VOID cladiv_(complex *, complex *, complex *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), xerbla_(char *, integer *);
+    static real bignum;
+    extern integer isamax_(integer *, real *, integer *);
+    extern doublereal scasum_(integer *, complex *, integer *);
+    static logical notran;
+    static integer jfirst;
+    static real smlnum;
+    static logical nounit;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1992
+
+
+    Purpose
+    =======
+
+    CLATRS solves one of the triangular systems
+
+       A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+
+    with scaling to prevent overflow.  Here A is an upper or lower
+    triangular matrix, A**T denotes the transpose of A, A**H denotes the
+    conjugate transpose of A, x and b are n-element vectors, and s is a
+    scaling factor, usually less than or equal to 1, chosen so that the
+    components of x will be less than the overflow threshold.  If the
+    unscaled problem will not cause overflow, the Level 2 BLAS routine
+    CTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
+    then s is set to 0 and a non-trivial solution to A*x = 0 is returned.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    TRANS   (input) CHARACTER*1
+            Specifies the operation applied to A.
+            = 'N':  Solve A * x = s*b     (No transpose)
+            = 'T':  Solve A**T * x = s*b  (Transpose)
+            = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
+
+    NORMIN  (input) CHARACTER*1
+            Specifies whether CNORM has been set or not.
+            = 'Y':  CNORM contains the column norms on entry
+            = 'N':  CNORM is not set on entry.  On exit, the norms will
+                    be computed and stored in CNORM.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The triangular matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of the array A contains the upper
+            triangular matrix, and the strictly lower triangular part of
+            A is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of the array A contains the lower triangular
+            matrix, and the strictly upper triangular part of A is not
+            referenced.  If DIAG = 'U', the diagonal elements of A are
+            also not referenced and are assumed to be 1.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max (1,N).
+
+    X       (input/output) COMPLEX array, dimension (N)
+            On entry, the right hand side b of the triangular system.
+            On exit, X is overwritten by the solution vector x.
+
+    SCALE   (output) REAL
+            The scaling factor s for the triangular system
+               A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+            If SCALE = 0, the matrix A is singular or badly scaled, and
+            the vector x is an exact or approximate solution to A*x = 0.
+
+    CNORM   (input or output) REAL array, dimension (N)
+
+            If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+            contains the norm of the off-diagonal part of the j-th column
+            of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+            to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+            must be greater than or equal to the 1-norm.
+
+            If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+            returns the 1-norm of the offdiagonal part of the j-th column
+            of A.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -k, the k-th argument had an illegal value
+
+    Further Details
+    ======= =======
+
+    A rough bound on x is computed; if that is less than overflow, CTRSV
+    is called, otherwise, specific code is used which checks for possible
+    overflow or divide-by-zero at every operation.
+
+    A columnwise scheme is used for solving A*x = b.  The basic algorithm
+    if A is lower triangular is
+
+         x[1:n] := b[1:n]
+         for j = 1, ..., n
+              x(j) := x(j) / A(j,j)
+              x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+         end
+
+    Define bounds on the components of x after j iterations of the loop:
+       M(j) = bound on x[1:j]
+       G(j) = bound on x[j+1:n]
+    Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+
+    Then for iteration j+1 we have
+       M(j+1) <= G(j) / | A(j+1,j+1) |
+       G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+              <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+
+    where CNORM(j+1) is greater than or equal to the infinity-norm of
+    column j+1 of A, not counting the diagonal.  Hence
+
+       G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+                    1<=i<=j
+    and
+
+       |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+                                     1<=i< j
+
+    Since |x(j)| <= M(j), we use the Level 2 BLAS routine CTRSV if the
+    reciprocal of the largest M(j), j=1,..,n, is larger than
+    max(underflow, 1/overflow).
+
+    The bound on x(j) is also used to determine when a step in the
+    columnwise method can be performed without fear of overflow.  If
+    the computed bound is greater than a large constant, x is scaled to
+    prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+    1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+
+    Similarly, a row-wise scheme is used to solve A**T *x = b  or
+    A**H *x = b.  The basic algorithm for A upper triangular is
+
+         for j = 1, ..., n
+              x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+         end
+
+    We simultaneously compute two bounds
+         G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+         M(j) = bound on x(i), 1<=i<=j
+
+    The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+    add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+    Then the bound on x(j) is
+
+         M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+
+              <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+                        1<=i<=j
+
+    and we can safely call CTRSV if 1/M(n) and 1/G(n) are both greater
+    than max(underflow, 1/overflow).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --cnorm;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    notran = lsame_(trans, "N");
+    nounit = lsame_(diag, "N");
+
+/*     Test the input parameters. */
+
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && !
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -3;
+    } else if (! lsame_(normin, "Y") && ! lsame_(normin,
+	     "N")) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLATRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine machine dependent parameters to control overflow. */
+
+    smlnum = slamch_("Safe minimum");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum /= slamch_("Precision");
+    bignum = 1.f / smlnum;
+    *scale = 1.f;
+
+    if (lsame_(normin, "N")) {
+
+/*        Compute the 1-norm of each column, not including the diagonal. */
+
+	if (upper) {
+
+/*           A is upper triangular. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j - 1;
+		cnorm[j] = scasum_(&i__2, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
+	    }
+	} else {
+
+/*           A is lower triangular. */
+
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		cnorm[j] = scasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1);
+/* L20: */
+	    }
+	    cnorm[*n] = 0.f;
+	}
+    }
+
+/*
+       Scale the column norms by TSCAL if the maximum element in CNORM is
+       greater than BIGNUM/2.
+*/
+
+    imax = isamax_(n, &cnorm[1], &c__1);
+    tmax = cnorm[imax];
+    if (tmax <= bignum * .5f) {
+	tscal = 1.f;
+    } else {
+	tscal = .5f / (smlnum * tmax);
+	sscal_(n, &tscal, &cnorm[1], &c__1);
+    }
+
+/*
+       Compute a bound on the computed solution vector to see if the
+       Level 2 BLAS routine CTRSV can be used.
+*/
+
+    xmax = 0.f;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	i__2 = j;
+	r__3 = xmax, r__4 = (r__1 = x[i__2].r / 2.f, dabs(r__1)) + (r__2 =
+		r_imag(&x[j]) / 2.f, dabs(r__2));
+	xmax = dmax(r__3,r__4);
+/* L30: */
+    }
+    xbnd = xmax;
+
+    if (notran) {
+
+/*        Compute the growth in A * x = b. */
+
+	if (upper) {
+	    jfirst = *n;
+	    jlast = 1;
+	    jinc = -1;
+	} else {
+	    jfirst = 1;
+	    jlast = *n;
+	    jinc = 1;
+	}
+
+	if (tscal != 1.f) {
+	    grow = 0.f;
+	    goto L60;
+	}
+
+	if (nounit) {
+
+/*
+             A is non-unit triangular.
+
+             Compute GROW = 1/G(j) and XBND = 1/M(j).
+             Initially, G(0) = max{x(i), i=1,...,n}.
+*/
+
+	    grow = .5f / dmax(xbnd,smlnum);
+	    xbnd = grow;
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*              Exit the loop if the growth factor is too small. */
+
+		if (grow <= smlnum) {
+		    goto L60;
+		}
+
+		i__3 = j + j * a_dim1;
+		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
+		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			dabs(r__2));
+
+		if (tjj >= smlnum) {
+
+/*
+                   M(j) = G(j-1) / abs(A(j,j))
+
+   Computing MIN
+*/
+		    r__1 = xbnd, r__2 = dmin(1.f,tjj) * grow;
+		    xbnd = dmin(r__1,r__2);
+		} else {
+
+/*                 M(j) could overflow, set XBND to 0. */
+
+		    xbnd = 0.f;
+		}
+
+		if (tjj + cnorm[j] >= smlnum) {
+
+/*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */
+
+		    grow *= tjj / (tjj + cnorm[j]);
+		} else {
+
+/*                 G(j) could overflow, set GROW to 0. */
+
+		    grow = 0.f;
+		}
+/* L40: */
+	    }
+	    grow = xbnd;
+	} else {
+
+/*
+             A is unit triangular.
+
+             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+
+   Computing MIN
+*/
+	    r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum);
+	    grow = dmin(r__1,r__2);
+	    i__2 = jlast;
+	    i__1 = jinc;
+	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*              Exit the loop if the growth factor is too small. */
+
+		if (grow <= smlnum) {
+		    goto L60;
+		}
+
+/*              G(j) = G(j-1)*( 1 + CNORM(j) ) */
+
+		grow *= 1.f / (cnorm[j] + 1.f);
+/* L50: */
+	    }
+	}
+L60:
+
+	;
+    } else {
+
+/*        Compute the growth in A**T * x = b  or  A**H * x = b. */
+
+	if (upper) {
+	    jfirst = 1;
+	    jlast = *n;
+	    jinc = 1;
+	} else {
+	    jfirst = *n;
+	    jlast = 1;
+	    jinc = -1;
+	}
+
+	if (tscal != 1.f) {
+	    grow = 0.f;
+	    goto L90;
+	}
+
+	if (nounit) {
+
+/*
+             A is non-unit triangular.
+
+             Compute GROW = 1/G(j) and XBND = 1/M(j).
+             Initially, M(0) = max{x(i), i=1,...,n}.
+*/
+
+	    grow = .5f / dmax(xbnd,smlnum);
+	    xbnd = grow;
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*              Exit the loop if the growth factor is too small. */
+
+		if (grow <= smlnum) {
+		    goto L90;
+		}
+
+/*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */
+
+		xj = cnorm[j] + 1.f;
+/* Computing MIN */
+		r__1 = grow, r__2 = xbnd / xj;
+		grow = dmin(r__1,r__2);
+
+		i__3 = j + j * a_dim1;
+		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
+		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			dabs(r__2));
+
+		if (tjj >= smlnum) {
+
+/*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */
+
+		    if (xj > tjj) {
+			xbnd *= tjj / xj;
+		    }
+		} else {
+
+/*                 M(j) could overflow, set XBND to 0. */
+
+		    xbnd = 0.f;
+		}
+/* L70: */
+	    }
+	    grow = dmin(grow,xbnd);
+	} else {
+
+/*
+             A is unit triangular.
+
+             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+
+   Computing MIN
+*/
+	    r__1 = 1.f, r__2 = .5f / dmax(xbnd,smlnum);
+	    grow = dmin(r__1,r__2);
+	    i__2 = jlast;
+	    i__1 = jinc;
+	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*              Exit the loop if the growth factor is too small. */
+
+		if (grow <= smlnum) {
+		    goto L90;
+		}
+
+/*              G(j) = ( 1 + CNORM(j) )*G(j-1) */
+
+		xj = cnorm[j] + 1.f;
+		grow /= xj;
+/* L80: */
+	    }
+	}
+L90:
+	;
+    }
+
+    if (grow * tscal > smlnum) {
+
+/*
+          Use the Level 2 BLAS solve if the reciprocal of the bound on
+          elements of X is not too small.
+*/
+
+	ctrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1);
+    } else {
+
+/*        Use a Level 1 BLAS solve, scaling intermediate results. */
+
+	if (xmax > bignum * .5f) {
+
+/*
+             Scale X so that its components are less than or equal to
+             BIGNUM in absolute value.
+*/
+
+	    *scale = bignum * .5f / xmax;
+	    csscal_(n, scale, &x[1], &c__1);
+	    xmax = bignum;
+	} else {
+	    xmax *= 2.f;
+	}
+
+	if (notran) {
+
+/*           Solve A * x = b */
+
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */
+
+		i__3 = j;
+		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]),
+			dabs(r__2));
+		if (nounit) {
+		    i__3 = j + j * a_dim1;
+		    q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3].i;
+		    tjjs.r = q__1.r, tjjs.i = q__1.i;
+		} else {
+		    tjjs.r = tscal, tjjs.i = 0.f;
+		    if (tscal == 1.f) {
+			goto L105;
+		    }
+		}
+		tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			dabs(r__2));
+		if (tjj > smlnum) {
+
+/*                    abs(A(j,j)) > SMLNUM: */
+
+		    if (tjj < 1.f) {
+			if (xj > tjj * bignum) {
+
+/*                          Scale x by 1/b(j). */
+
+			    rec = 1.f / xj;
+			    csscal_(n, &rec, &x[1], &c__1);
+			    *scale *= rec;
+			    xmax *= rec;
+			}
+		    }
+		    i__3 = j;
+		    cladiv_(&q__1, &x[j], &tjjs);
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    i__3 = j;
+		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
+			    ), dabs(r__2));
+		} else if (tjj > 0.f) {
+
+/*                    0 < abs(A(j,j)) <= SMLNUM: */
+
+		    if (xj > tjj * bignum) {
+
+/*
+                         Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+                         to avoid overflow when dividing by A(j,j).
+*/
+
+			rec = tjj * bignum / xj;
+			if (cnorm[j] > 1.f) {
+
+/*
+                            Scale by 1/CNORM(j) to avoid overflow when
+                            multiplying x(j) times column j.
+*/
+
+			    rec /= cnorm[j];
+			}
+			csscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+			xmax *= rec;
+		    }
+		    i__3 = j;
+		    cladiv_(&q__1, &x[j], &tjjs);
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    i__3 = j;
+		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
+			    ), dabs(r__2));
+		} else {
+
+/*
+                      A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                      scale = 0, and compute a solution to A*x = 0.
+*/
+
+		    i__3 = *n;
+		    for (i__ = 1; i__ <= i__3; ++i__) {
+			i__4 = i__;
+			x[i__4].r = 0.f, x[i__4].i = 0.f;
+/* L100: */
+		    }
+		    i__3 = j;
+		    x[i__3].r = 1.f, x[i__3].i = 0.f;
+		    xj = 1.f;
+		    *scale = 0.f;
+		    xmax = 0.f;
+		}
+L105:
+
+/*
+                Scale x if necessary to avoid overflow when adding a
+                multiple of column j of A.
+*/
+
+		if (xj > 1.f) {
+		    rec = 1.f / xj;
+		    if (cnorm[j] > (bignum - xmax) * rec) {
+
+/*                    Scale x by 1/(2*abs(x(j))). */
+
+			rec *= .5f;
+			csscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+		    }
+		} else if (xj * cnorm[j] > bignum - xmax) {
+
+/*                 Scale x by 1/2. */
+
+		    csscal_(n, &c_b2206, &x[1], &c__1);
+		    *scale *= .5f;
+		}
+
+		if (upper) {
+		    if (j > 1) {
+
+/*
+                      Compute the update
+                         x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*/
+
+			i__3 = j - 1;
+			i__4 = j;
+			q__2.r = -x[i__4].r, q__2.i = -x[i__4].i;
+			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
+			caxpy_(&i__3, &q__1, &a[j * a_dim1 + 1], &c__1, &x[1],
+				 &c__1);
+			i__3 = j - 1;
+			i__ = icamax_(&i__3, &x[1], &c__1);
+			i__3 = i__;
+			xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
+				r_imag(&x[i__]), dabs(r__2));
+		    }
+		} else {
+		    if (j < *n) {
+
+/*
+                      Compute the update
+                         x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*/
+
+			i__3 = *n - j;
+			i__4 = j;
+			q__2.r = -x[i__4].r, q__2.i = -x[i__4].i;
+			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
+			caxpy_(&i__3, &q__1, &a[j + 1 + j * a_dim1], &c__1, &
+				x[j + 1], &c__1);
+			i__3 = *n - j;
+			i__ = j + icamax_(&i__3, &x[j + 1], &c__1);
+			i__3 = i__;
+			xmax = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
+				r_imag(&x[i__]), dabs(r__2));
+		    }
+		}
+/* L110: */
+	    }
+
+	} else if (lsame_(trans, "T")) {
+
+/*           Solve A**T * x = b */
+
+	    i__2 = jlast;
+	    i__1 = jinc;
+	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*
+                Compute x(j) = b(j) - sum A(k,j)*x(k).
+                                      k<>j
+*/
+
+		i__3 = j;
+		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]),
+			dabs(r__2));
+		uscal.r = tscal, uscal.i = 0.f;
+		rec = 1.f / dmax(xmax,1.f);
+		if (cnorm[j] > (bignum - xj) * rec) {
+
+/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
+
+		    rec *= .5f;
+		    if (nounit) {
+			i__3 = j + j * a_dim1;
+			q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3]
+				.i;
+			tjjs.r = q__1.r, tjjs.i = q__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.f;
+		    }
+		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			     dabs(r__2));
+		    if (tjj > 1.f) {
+
+/*
+                         Divide by A(j,j) when scaling x if A(j,j) > 1.
+
+   Computing MIN
+*/
+			r__1 = 1.f, r__2 = rec * tjj;
+			rec = dmin(r__1,r__2);
+			cladiv_(&q__1, &uscal, &tjjs);
+			uscal.r = q__1.r, uscal.i = q__1.i;
+		    }
+		    if (rec < 1.f) {
+			csscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+			xmax *= rec;
+		    }
+		}
+
+		csumj.r = 0.f, csumj.i = 0.f;
+		if (uscal.r == 1.f && uscal.i == 0.f) {
+
+/*
+                   If the scaling needed for A in the dot product is 1,
+                   call CDOTU to perform the dot product.
+*/
+
+		    if (upper) {
+			i__3 = j - 1;
+			cdotu_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1],
+				 &c__1);
+			csumj.r = q__1.r, csumj.i = q__1.i;
+		    } else if (j < *n) {
+			i__3 = *n - j;
+			cdotu_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
+				x[j + 1], &c__1);
+			csumj.r = q__1.r, csumj.i = q__1.i;
+		    }
+		} else {
+
+/*                 Otherwise, use in-line code for the dot product. */
+
+		    if (upper) {
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * a_dim1;
+			    q__3.r = a[i__4].r * uscal.r - a[i__4].i *
+				    uscal.i, q__3.i = a[i__4].r * uscal.i + a[
+				    i__4].i * uscal.r;
+			    i__5 = i__;
+			    q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i,
+				    q__2.i = q__3.r * x[i__5].i + q__3.i * x[
+				    i__5].r;
+			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
+				    q__2.i;
+			    csumj.r = q__1.r, csumj.i = q__1.i;
+/* L120: */
+			}
+		    } else if (j < *n) {
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * a_dim1;
+			    q__3.r = a[i__4].r * uscal.r - a[i__4].i *
+				    uscal.i, q__3.i = a[i__4].r * uscal.i + a[
+				    i__4].i * uscal.r;
+			    i__5 = i__;
+			    q__2.r = q__3.r * x[i__5].r - q__3.i * x[i__5].i,
+				    q__2.i = q__3.r * x[i__5].i + q__3.i * x[
+				    i__5].r;
+			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
+				    q__2.i;
+			    csumj.r = q__1.r, csumj.i = q__1.i;
+/* L130: */
+			}
+		    }
+		}
+
+		q__1.r = tscal, q__1.i = 0.f;
+		if (uscal.r == q__1.r && uscal.i == q__1.i) {
+
+/*
+                   Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+                   was not used to scale the dotproduct.
+*/
+
+		    i__3 = j;
+		    i__4 = j;
+		    q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i -
+			    csumj.i;
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    i__3 = j;
+		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
+			    ), dabs(r__2));
+		    if (nounit) {
+			i__3 = j + j * a_dim1;
+			q__1.r = tscal * a[i__3].r, q__1.i = tscal * a[i__3]
+				.i;
+			tjjs.r = q__1.r, tjjs.i = q__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.f;
+			if (tscal == 1.f) {
+			    goto L145;
+			}
+		    }
+
+/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
+
+		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			     dabs(r__2));
+		    if (tjj > smlnum) {
+
+/*                       abs(A(j,j)) > SMLNUM: */
+
+			if (tjj < 1.f) {
+			    if (xj > tjj * bignum) {
+
+/*                             Scale X by 1/abs(x(j)). */
+
+				rec = 1.f / xj;
+				csscal_(n, &rec, &x[1], &c__1);
+				*scale *= rec;
+				xmax *= rec;
+			    }
+			}
+			i__3 = j;
+			cladiv_(&q__1, &x[j], &tjjs);
+			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    } else if (tjj > 0.f) {
+
+/*                       0 < abs(A(j,j)) <= SMLNUM: */
+
+			if (xj > tjj * bignum) {
+
+/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
+
+			    rec = tjj * bignum / xj;
+			    csscal_(n, &rec, &x[1], &c__1);
+			    *scale *= rec;
+			    xmax *= rec;
+			}
+			i__3 = j;
+			cladiv_(&q__1, &x[j], &tjjs);
+			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    } else {
+
+/*
+                         A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                         scale = 0 and compute a solution to A**T *x = 0.
+*/
+
+			i__3 = *n;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__;
+			    x[i__4].r = 0.f, x[i__4].i = 0.f;
+/* L140: */
+			}
+			i__3 = j;
+			x[i__3].r = 1.f, x[i__3].i = 0.f;
+			*scale = 0.f;
+			xmax = 0.f;
+		    }
+L145:
+		    ;
+		} else {
+
+/*
+                   Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+                   product has already been divided by 1/A(j,j).
+*/
+
+		    i__3 = j;
+		    cladiv_(&q__2, &x[j], &tjjs);
+		    q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i;
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		}
+/* Computing MAX */
+		i__3 = j;
+		r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
+			r_imag(&x[j]), dabs(r__2));
+		xmax = dmax(r__3,r__4);
+/* L150: */
+	    }
+
+	} else {
+
+/*           Solve A**H * x = b */
+
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*
+                Compute x(j) = b(j) - sum A(k,j)*x(k).
+                                      k<>j
+*/
+
+		i__3 = j;
+		xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]),
+			dabs(r__2));
+		uscal.r = tscal, uscal.i = 0.f;
+		rec = 1.f / dmax(xmax,1.f);
+		if (cnorm[j] > (bignum - xj) * rec) {
+
+/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
+
+		    rec *= .5f;
+		    if (nounit) {
+			r_cnjg(&q__2, &a[j + j * a_dim1]);
+			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
+			tjjs.r = q__1.r, tjjs.i = q__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.f;
+		    }
+		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			     dabs(r__2));
+		    if (tjj > 1.f) {
+
+/*
+                         Divide by A(j,j) when scaling x if A(j,j) > 1.
+
+   Computing MIN
+*/
+			r__1 = 1.f, r__2 = rec * tjj;
+			rec = dmin(r__1,r__2);
+			cladiv_(&q__1, &uscal, &tjjs);
+			uscal.r = q__1.r, uscal.i = q__1.i;
+		    }
+		    if (rec < 1.f) {
+			csscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+			xmax *= rec;
+		    }
+		}
+
+		csumj.r = 0.f, csumj.i = 0.f;
+		if (uscal.r == 1.f && uscal.i == 0.f) {
+
+/*
+                   If the scaling needed for A in the dot product is 1,
+                   call CDOTC to perform the dot product.
+*/
+
+		    if (upper) {
+			i__3 = j - 1;
+			cdotc_(&q__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1],
+				 &c__1);
+			csumj.r = q__1.r, csumj.i = q__1.i;
+		    } else if (j < *n) {
+			i__3 = *n - j;
+			cdotc_(&q__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
+				x[j + 1], &c__1);
+			csumj.r = q__1.r, csumj.i = q__1.i;
+		    }
+		} else {
+
+/*                 Otherwise, use in-line code for the dot product. */
+
+		    if (upper) {
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    r_cnjg(&q__4, &a[i__ + j * a_dim1]);
+			    q__3.r = q__4.r * uscal.r - q__4.i * uscal.i,
+				    q__3.i = q__4.r * uscal.i + q__4.i *
+				    uscal.r;
+			    i__4 = i__;
+			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i,
+				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
+				    i__4].r;
+			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
+				    q__2.i;
+			    csumj.r = q__1.r, csumj.i = q__1.i;
+/* L160: */
+			}
+		    } else if (j < *n) {
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    r_cnjg(&q__4, &a[i__ + j * a_dim1]);
+			    q__3.r = q__4.r * uscal.r - q__4.i * uscal.i,
+				    q__3.i = q__4.r * uscal.i + q__4.i *
+				    uscal.r;
+			    i__4 = i__;
+			    q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i,
+				    q__2.i = q__3.r * x[i__4].i + q__3.i * x[
+				    i__4].r;
+			    q__1.r = csumj.r + q__2.r, q__1.i = csumj.i +
+				    q__2.i;
+			    csumj.r = q__1.r, csumj.i = q__1.i;
+/* L170: */
+			}
+		    }
+		}
+
+		q__1.r = tscal, q__1.i = 0.f;
+		if (uscal.r == q__1.r && uscal.i == q__1.i) {
+
+/*
+                   Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+                   was not used to scale the dotproduct.
+*/
+
+		    i__3 = j;
+		    i__4 = j;
+		    q__1.r = x[i__4].r - csumj.r, q__1.i = x[i__4].i -
+			    csumj.i;
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    i__3 = j;
+		    xj = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 = r_imag(&x[j]
+			    ), dabs(r__2));
+		    if (nounit) {
+			r_cnjg(&q__2, &a[j + j * a_dim1]);
+			q__1.r = tscal * q__2.r, q__1.i = tscal * q__2.i;
+			tjjs.r = q__1.r, tjjs.i = q__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.f;
+			if (tscal == 1.f) {
+			    goto L185;
+			}
+		    }
+
+/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
+
+		    tjj = (r__1 = tjjs.r, dabs(r__1)) + (r__2 = r_imag(&tjjs),
+			     dabs(r__2));
+		    if (tjj > smlnum) {
+
+/*                       abs(A(j,j)) > SMLNUM: */
+
+			if (tjj < 1.f) {
+			    if (xj > tjj * bignum) {
+
+/*                             Scale X by 1/abs(x(j)). */
+
+				rec = 1.f / xj;
+				csscal_(n, &rec, &x[1], &c__1);
+				*scale *= rec;
+				xmax *= rec;
+			    }
+			}
+			i__3 = j;
+			cladiv_(&q__1, &x[j], &tjjs);
+			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    } else if (tjj > 0.f) {
+
+/*                       0 < abs(A(j,j)) <= SMLNUM: */
+
+			if (xj > tjj * bignum) {
+
+/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
+
+			    rec = tjj * bignum / xj;
+			    csscal_(n, &rec, &x[1], &c__1);
+			    *scale *= rec;
+			    xmax *= rec;
+			}
+			i__3 = j;
+			cladiv_(&q__1, &x[j], &tjjs);
+			x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		    } else {
+
+/*
+                         A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                         scale = 0 and compute a solution to A**H *x = 0.
+*/
+
+			i__3 = *n;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__;
+			    x[i__4].r = 0.f, x[i__4].i = 0.f;
+/* L180: */
+			}
+			i__3 = j;
+			x[i__3].r = 1.f, x[i__3].i = 0.f;
+			*scale = 0.f;
+			xmax = 0.f;
+		    }
+L185:
+		    ;
+		} else {
+
+/*
+                   Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+                   product has already been divided by 1/A(j,j).
+*/
+
+		    i__3 = j;
+		    cladiv_(&q__2, &x[j], &tjjs);
+		    q__1.r = q__2.r - csumj.r, q__1.i = q__2.i - csumj.i;
+		    x[i__3].r = q__1.r, x[i__3].i = q__1.i;
+		}
+/* Computing MAX */
+		i__3 = j;
+		r__3 = xmax, r__4 = (r__1 = x[i__3].r, dabs(r__1)) + (r__2 =
+			r_imag(&x[j]), dabs(r__2));
+		xmax = dmax(r__3,r__4);
+/* L190: */
+	    }
+	}
+	*scale /= tscal;
+    }
+
+/*     Scale the column norms by 1/TSCAL for return. */
+
+    if (tscal != 1.f) {
+	r__1 = 1.f / tscal;
+	sscal_(n, &r__1, &cnorm[1], &c__1);
+    }
+
+    return 0;
+
+/*     End of CLATRS */
+
+} /* clatrs_ */
+
+/* Subroutine */ int clauu2_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__;
+    static real aii;
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
+    static logical upper;
+    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
+	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *,
+	    integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLAUU2 computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the unblocked form of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLAUU2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the product U * U'. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    aii = a[i__2].r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = *n - i__;
+		cdotc_(&q__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[
+			i__ + (i__ + 1) * a_dim1], lda);
+		r__1 = aii * aii + q__1.r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		q__1.r = aii, q__1.i = 0.f;
+		cgemv_("No transpose", &i__2, &i__3, &c_b56, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			q__1, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		clacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    } else {
+		csscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+/* L10: */
+	}
+
+    } else {
+
+/*        Compute the product L' * L. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    aii = a[i__2].r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = *n - i__;
+		cdotc_(&q__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
+		r__1 = aii * aii + q__1.r;
+		a[i__2].r = r__1, a[i__2].i = 0.f;
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &a[i__ + a_dim1], lda);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		q__1.r = aii, q__1.i = 0.f;
+		cgemv_("Conjugate transpose", &i__2, &i__3, &c_b56, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			q__1, &a[i__ + a_dim1], lda);
+		i__2 = i__ - 1;
+		clacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    } else {
+		csscal_(&i__, &aii, &a[i__ + a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of CLAUU2 */
+
+} /* clauu2_ */
+
+/* Subroutine */ int clauum_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, ib, nb;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *), cherk_(char *,
+	    char *, integer *, integer *, real *, complex *, integer *, real *
+	    , complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *);
+    static logical upper;
+    extern /* Subroutine */ int clauu2_(char *, integer *, complex *, integer
+	    *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CLAUUM computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the blocked form of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CLAUUM", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "CLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code */
+
+	clauu2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute the product U * U'. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		ctrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", &
+			i__3, &ib, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[
+			i__ * a_dim1 + 1], lda);
+		clauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    cgemm_("No transpose", "Conjugate transpose", &i__3, &ib,
+			    &i__4, &c_b56, &a[(i__ + ib) * a_dim1 + 1], lda, &
+			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b56, &a[i__
+			    * a_dim1 + 1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    cherk_("Upper", "No transpose", &ib, &i__3, &c_b1011, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b1011, &a[i__
+			    + i__ * a_dim1], lda);
+		}
+/* L10: */
+	    }
+	} else {
+
+/*           Compute the product L' * L. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		ctrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", &
+			ib, &i__3, &c_b56, &a[i__ + i__ * a_dim1], lda, &a[
+			i__ + a_dim1], lda);
+		clauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    cgemm_("Conjugate transpose", "No transpose", &ib, &i__3,
+			    &i__4, &c_b56, &a[i__ + ib + i__ * a_dim1], lda, &
+			    a[i__ + ib + a_dim1], lda, &c_b56, &a[i__ +
+			    a_dim1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    cherk_("Lower", "Conjugate transpose", &ib, &i__3, &
+			    c_b1011, &a[i__ + ib + i__ * a_dim1], lda, &
+			    c_b1011, &a[i__ + i__ * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CLAUUM */
+
+} /* clauum_ */
+
+/* Subroutine */ int cpotf2_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer j;
+    static real ajj;
+    extern /* Complex */ VOID cdotc_(complex *, integer *, complex *, integer
+	    *, complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
+    static logical upper;
+    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
+	    csscal_(integer *, real *, complex *, integer *), xerbla_(char *,
+	    integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CPOTF2 computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U' * U ,  if UPLO = 'U', or
+       A = L  * L',  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the unblocked version of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n by n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U'*U  or A = L*L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, the leading minor of order k is not
+                 positive definite, and the factorization could not be
+                 completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CPOTF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the Cholesky factorization A = U'*U. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute U(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j + j * a_dim1;
+	    r__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    cdotc_(&q__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1]
+		    , &c__1);
+	    q__1.r = r__1 - q__2.r, q__1.i = -q__2.i;
+	    ajj = q__1.r;
+	    if (ajj <= 0.f) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.f;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.f;
+
+/*           Compute elements J+1:N of row J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = j - 1;
+		i__3 = *n - j;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("Transpose", &i__2, &i__3, &q__1, &a[(j + 1) * a_dim1
+			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b56, &a[j + (
+			j + 1) * a_dim1], lda);
+		i__2 = j - 1;
+		clacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = *n - j;
+		r__1 = 1.f / ajj;
+		csscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Compute the Cholesky factorization A = L*L'. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute L(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j + j * a_dim1;
+	    r__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    cdotc_(&q__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
+	    q__1.r = r__1 - q__2.r, q__1.i = -q__2.i;
+	    ajj = q__1.r;
+	    if (ajj <= 0.f) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.f;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.f;
+
+/*           Compute elements J+1:N of column J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		clacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		i__3 = j - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		cgemv_("No transpose", &i__2, &i__3, &q__1, &a[j + 1 + a_dim1]
+			, lda, &a[j + a_dim1], lda, &c_b56, &a[j + 1 + j *
+			a_dim1], &c__1);
+		i__2 = j - 1;
+		clacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		r__1 = 1.f / ajj;
+		csscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    goto L40;
+
+L30:
+    *info = j;
+
+L40:
+    return 0;
+
+/*     End of CPOTF2 */
+
+} /* cpotf2_ */
+
+/* Subroutine */ int cpotrf_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    complex q__1;
+
+    /* Local variables */
+    static integer j, jb, nb;
+    extern /* Subroutine */ int cgemm_(char *, char *, integer *, integer *,
+	    integer *, complex *, complex *, integer *, complex *, integer *,
+	    complex *, complex *, integer *), cherk_(char *,
+	    char *, integer *, integer *, real *, complex *, integer *, real *
+	    , complex *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *);
+    static logical upper;
+    extern /* Subroutine */ int cpotf2_(char *, integer *, complex *, integer
+	    *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CPOTRF computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U**H * U,  if UPLO = 'U', or
+       A = L  * L**H,  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the block version of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U**H*U or A = L*L**H.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the leading minor of order i is not
+                  positive definite, and the factorization could not be
+                  completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CPOTRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "CPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code. */
+
+	cpotf2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code. */
+
+	if (upper) {
+
+/*           Compute the Cholesky factorization A = U'*U. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		cherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1290, &
+			a[j * a_dim1 + 1], lda, &c_b1011, &a[j + j * a_dim1],
+			lda);
+		cpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block row. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
+			    &i__4, &q__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
+			     * a_dim1 + 1], lda, &c_b56, &a[j + (j + jb) *
+			    a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
+			     &jb, &i__3, &c_b56, &a[j + j * a_dim1], lda, &a[
+			    j + (j + jb) * a_dim1], lda);
+		}
+/* L10: */
+	    }
+
+	} else {
+
+/*           Compute the Cholesky factorization A = L*L'. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		cherk_("Lower", "No transpose", &jb, &i__3, &c_b1290, &a[j +
+			a_dim1], lda, &c_b1011, &a[j + j * a_dim1], lda);
+		cpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block column. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    cgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
+			    &i__4, &q__1, &a[j + jb + a_dim1], lda, &a[j +
+			    a_dim1], lda, &c_b56, &a[j + jb + j * a_dim1],
+			    lda);
+		    i__3 = *n - j - jb + 1;
+		    ctrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
+			    , &i__3, &jb, &c_b56, &a[j + j * a_dim1], lda, &a[
+			    j + jb + j * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+    goto L40;
+
+L30:
+    *info = *info + j - 1;
+
+L40:
+    return 0;
+
+/*     End of CPOTRF */
+
+} /* cpotrf_ */
+
+/* Subroutine */ int cpotri_(char *uplo, integer *n, complex *a, integer *lda,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), clauum_(
+	    char *, integer *, complex *, integer *, integer *),
+	    ctrtri_(char *, char *, integer *, complex *, integer *, integer *
+	    );
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    CPOTRI computes the inverse of a complex Hermitian positive definite
+    matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+    computed by CPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular factor U or L from the Cholesky
+            factorization A = U**H*U or A = L*L**H, as computed by
+            CPOTRF.
+            On exit, the upper or lower triangle of the (Hermitian)
+            inverse of A, overwriting the input factor U or L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the (i,i) element of the factor U or L is
+                  zero, and the inverse could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CPOTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Invert the triangular Cholesky factor U or L. */
+
+    ctrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
+
+    clauum_(uplo, n, &a[a_offset], lda, info);
+
+    return 0;
+
+/*     End of CPOTRI */
+
+} /* cpotri_ */
+
+/* Subroutine */ int cpotrs_(char *uplo, integer *n, integer *nrhs, complex *
+	a, integer *lda, complex *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrsm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CPOTRS solves a system of linear equations A*X = B with a Hermitian
+    positive definite matrix A using the Cholesky factorization
+    A = U**H*U or A = L*L**H computed by CPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,N)
+            The triangular factor U or L from the Cholesky factorization
+            A = U**H*U or A = L*L**H, as computed by CPOTRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    B       (input/output) COMPLEX array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CPOTRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*nrhs == 0)) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*
+          Solve A*X = B where A = U'*U.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	ctrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, &
+		c_b56, &a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	ctrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b56, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A*X = B where A = L*L'.
+
+          Solve L*X = B, overwriting B with X.
+*/
+
+	ctrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b56, &
+		a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	ctrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, &
+		c_b56, &a[a_offset], lda, &b[b_offset], ldb);
+    }
+
+    return 0;
+
+/*     End of CPOTRS */
+
+} /* cpotrs_ */
+
+/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex *
+	cy, integer *incy, real *c__, real *s)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    complex q__1, q__2, q__3;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static complex ctemp;
+
+
+/*
+       applies a plane rotation, where the cos and sin (c and s) are real
+       and the vectors cx and cy are complex.
+       jack dongarra, linpack, 3/11/78.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments not equal
+            to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
+	i__3 = iy;
+	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
+	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	i__2 = iy;
+	i__3 = iy;
+	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
+	i__4 = ix;
+	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
+	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+	i__2 = ix;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
+	i__3 = i__;
+	q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
+	q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
+	ctemp.r = q__1.r, ctemp.i = q__1.i;
+	i__2 = i__;
+	i__3 = i__;
+	q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
+	i__4 = i__;
+	q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
+	q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
+	cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
+	i__2 = i__;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+/* L30: */
+    }
+    return 0;
+} /* csrot_ */
+
+/* Subroutine */ int cstedc_(char *compz, integer *n, real *d__, real *e,
+	complex *z__, integer *ldz, complex *work, integer *lwork, real *
+	rwork, integer *lrwork, integer *iwork, integer *liwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k, m;
+    static real p;
+    static integer ii, ll, end, lgn;
+    static real eps, tiny;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static integer lwmin;
+    extern /* Subroutine */ int claed0_(integer *, integer *, real *, real *,
+	    complex *, integer *, complex *, integer *, real *, integer *,
+	    integer *);
+    static integer start;
+    extern /* Subroutine */ int clacrm_(integer *, integer *, complex *,
+	    integer *, real *, integer *, complex *, integer *, real *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex
+	    *, integer *, complex *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), sstedc_(char *, integer *, real *, real *, real *,
+	    integer *, real *, integer *, integer *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *);
+    static integer liwmin, icompz;
+    extern /* Subroutine */ int csteqr_(char *, integer *, real *, real *,
+	    complex *, integer *, real *, integer *);
+    static real orgnrm;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
+    static integer lrwmin;
+    static logical lquery;
+    static integer smlsiz;
+    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this
+    matrix to tridiagonal form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See SLAED3 for details.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'I':  Compute eigenvectors of tridiagonal matrix also.
+            = 'V':  Compute eigenvectors of original Hermitian matrix
+                    also.  On entry, Z contains the unitary matrix used
+                    to reduce the original matrix to tridiagonal form.
+
+    N       (input) INTEGER
+            The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) REAL array, dimension (N-1)
+            On entry, the subdiagonal elements of the tridiagonal matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) COMPLEX array, dimension (LDZ,N)
+            On entry, if COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If  COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1.
+            If eigenvectors are desired, then LDZ >= max(1,N).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    RWORK   (workspace/output) REAL array,
+                                           dimension (LRWORK)
+            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+
+    LRWORK  (input) INTEGER
+            The dimension of the array RWORK.
+            If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LRWORK must be at least
+                           1 + 3*N + 2*N*lg N + 3*N**2 ,
+                           where lg( N ) = smallest integer k such
+                           that 2**k >= N.
+            If COMPZ = 'I' and N > 1, LRWORK must be at least
+                           1 + 4*N + 2*N**2 .
+
+            If LRWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the RWORK array,
+            returns this value as the first entry of the RWORK array, and
+            no error message related to LRWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
+            If COMPZ = 'V' or N > 1,  LIWORK must be at least
+                                      6 + 6*N + 5*N*lg N.
+            If COMPZ = 'I' or N > 1,  LIWORK must be at least
+                                      3 + 5*N .
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = ((*lwork == -1) || (*lrwork == -1)) || (*liwork == -1);
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if ((*n <= 1) || (icompz <= 0)) {
+	lwmin = 1;
+	liwmin = 1;
+	lrwmin = 1;
+    } else {
+	lgn = (integer) (log((real) (*n)) / log(2.f));
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (icompz == 1) {
+	    lwmin = *n * *n;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = *n * 3 + 1 + ((*n) << (1)) * lgn + i__1 * i__1 * 3;
+	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	} else if (icompz == 2) {
+	    lwmin = 1;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = ((*n) << (2)) + 1 + ((i__1 * i__1) << (1));
+	    liwmin = *n * 5 + 3;
+	}
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+	*info = -6;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*lrwork < lrwmin && ! lquery) {
+	*info = -10;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	work[1].r = (real) lwmin, work[1].i = 0.f;
+	rwork[1] = (real) lrwmin;
+	iwork[1] = liwmin;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CSTEDC", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	if (icompz != 0) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
+	}
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "CSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       If the following conditional clause is removed, then the routine
+       will use the Divide and Conquer routine to compute only the
+       eigenvalues, which requires (3N + 3N**2) real workspace and
+       (2 + 5N + 2N lg(N)) integer workspace.
+       Since on many architectures SSTERF is much faster than any other
+       algorithm for finding eigenvalues only, it is used here
+       as the default.
+
+       If COMPZ = 'N', use SSTERF to compute the eigenvalues.
+*/
+
+    if (icompz == 0) {
+	ssterf_(n, &d__[1], &e[1], info);
+	return 0;
+    }
+
+/*
+       If N is smaller than the minimum divide size (SMLSIZ+1), then
+       solve the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompz == 0) {
+	    ssterf_(n, &d__[1], &e[1], info);
+	    return 0;
+	} else if (icompz == 2) {
+	    csteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	} else {
+	    csteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	}
+    }
+
+/*     If COMPZ = 'I', we simply call SSTEDC instead. */
+
+    if (icompz == 2) {
+	slaset_("Full", n, n, &c_b320, &c_b1011, &rwork[1], n);
+	ll = *n * *n + 1;
+	i__1 = *lrwork - ll + 1;
+	sstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
+		iwork[1], liwork, info);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * z_dim1;
+		i__4 = (j - 1) * *n + i__;
+		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*
+       From now on, only option left to be handled is COMPZ = 'V',
+       i.e. ICOMPZ = 1.
+
+       Scale.
+*/
+
+    orgnrm = slanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.f) {
+	return 0;
+    }
+
+    eps = slamch_("Epsilon");
+
+    start = 1;
+
+/*     while ( START <= N ) */
+
+L30:
+    if (start <= *n) {
+
+/*
+       Let END be the position of the next subdiagonal entry such that
+       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+       matrix identified by the elements between START and END
+       constitutes an independent sub-problem.
+*/
+
+	end = start;
+L40:
+	if (end < *n) {
+	    tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 =
+		    d__[end + 1], dabs(r__2)));
+	    if ((r__1 = e[end], dabs(r__1)) > tiny) {
+		++end;
+		goto L40;
+	    }
+	}
+
+/*        (Sub) Problem determined.  Compute its size and solve it. */
+
+	m = end - start + 1;
+	if (m > smlsiz) {
+	    *info = smlsiz;
+
+/*           Scale. */
+
+	    orgnrm = slanst_("M", &m, &d__[start], &e[start]);
+	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &m, &c__1, &d__[
+		    start], &m, info);
+	    i__1 = m - 1;
+	    i__2 = m - 1;
+	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &i__1, &c__1, &e[
+		    start], &i__2, info);
+
+	    claed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
+		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
+	    if (*info > 0) {
+		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
+			+ 1) + start - 1;
+		return 0;
+	    }
+
+/*           Scale back. */
+
+	    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, &m, &c__1, &d__[
+		    start], &m, info);
+
+	} else {
+	    ssteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
+		     m + 1], info);
+	    clacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
+		    work[1], n, &rwork[m * m + 1]);
+	    clacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
+	    if (*info > 0) {
+		*info = start * (*n + 1) + end;
+		return 0;
+	    }
+	}
+
+	start = end + 1;
+	goto L30;
+    }
+
+/*
+       endwhile
+
+       If the problem split any number of times, then the eigenvalues
+       will not be properly ordered.  Here we permute the eigenvalues
+       (and the associated eigenvectors) into ascending order.
+*/
+
+    if (m != *n) {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L50: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L60: */
+	}
+    }
+
+    work[1].r = (real) lwmin, work[1].i = 0.f;
+    rwork[1] = (real) lrwmin;
+    iwork[1] = liwmin;
+
+    return 0;
+
+/*     End of CSTEDC */
+
+} /* cstedc_ */
+
+/* Subroutine */ int csteqr_(char *compz, integer *n, real *d__, real *e,
+	complex *z__, integer *ldz, real *work, integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static real b, c__, f, g;
+    static integer i__, j, k, l, m;
+    static real p, r__, s;
+    static integer l1, ii, mm, lm1, mm1, nm1;
+    static real rt1, rt2, eps;
+    static integer lsv;
+    static real tst, eps2;
+    static integer lend, jtot;
+    extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
+	    ;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int clasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, complex *, integer *);
+    static real anorm;
+    extern /* Subroutine */ int cswap_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static integer lendm1, lendp1;
+    extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real *
+	    , real *, real *);
+    extern doublereal slapy2_(real *, real *);
+    static integer iscale;
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int claset_(char *, integer *, integer *, complex
+	    *, complex *, complex *, integer *);
+    static real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real safmax;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
+    static integer lendsv;
+    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
+	    );
+    static real ssfmin;
+    static integer nmaxit, icompz;
+    static real ssfmax;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the implicit QL or QR method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if CHETRD or CHPTRD or CHBTRD has been used to reduce this
+    matrix to tridiagonal form.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'V':  Compute eigenvalues and eigenvectors of the original
+                    Hermitian matrix.  On entry, Z must contain the
+                    unitary matrix used to reduce the original matrix
+                    to tridiagonal form.
+            = 'I':  Compute eigenvalues and eigenvectors of the
+                    tridiagonal matrix.  Z is initialized to the identity
+                    matrix.
+
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) REAL array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) COMPLEX array, dimension (LDZ, N)
+            On entry, if  COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1, and if
+            eigenvectors are desired, then  LDZ >= max(1,N).
+
+    WORK    (workspace) REAL array, dimension (max(1,2*N-2))
+            If COMPZ = 'N', then WORK is not referenced.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm has failed to find all the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero; on exit, D
+                  and E contain the elements of a symmetric tridiagonal
+                  matrix which is unitarily similar to the original
+                  matrix.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CSTEQR", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	if (icompz == 2) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1.f, z__[i__1].i = 0.f;
+	}
+	return 0;
+    }
+
+/*     Determine the unit roundoff and over/underflow thresholds. */
+
+    eps = slamch_("E");
+/* Computing 2nd power */
+    r__1 = eps;
+    eps2 = r__1 * r__1;
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    ssfmax = sqrt(safmax) / 3.f;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*
+       Compute the eigenvalues and eigenvectors of the tridiagonal
+       matrix.
+*/
+
+    if (icompz == 2) {
+	claset_("Full", n, n, &c_b55, &c_b56, &z__[z_offset], ldz);
+    }
+
+    nmaxit = *n * 30;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+    nm1 = *n - 1;
+
+L10:
+    if (l1 > *n) {
+	goto L160;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.f;
+    }
+    if (l1 <= nm1) {
+	i__1 = nm1;
+	for (m = l1; m <= i__1; ++m) {
+	    tst = (r__1 = e[m], dabs(r__1));
+	    if (tst == 0.f) {
+		goto L30;
+	    }
+	    if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m
+		    + 1], dabs(r__2))) * eps) {
+		e[m] = 0.f;
+		goto L30;
+	    }
+/* L20: */
+	}
+    }
+    m = *n;
+
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
+
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = slanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm == 0.f) {
+	goto L10;
+    }
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
+
+/*     Choose between QL and QR iteration */
+
+    if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
+
+    if (lend > l) {
+
+/*
+          QL Iteration
+
+          Look for small subdiagonal element.
+*/
+
+L40:
+	if (l != lend) {
+	    lendm1 = lend - 1;
+	    i__1 = lendm1;
+	    for (m = l; m <= i__1; ++m) {
+/* Computing 2nd power */
+		r__2 = (r__1 = e[m], dabs(r__1));
+		tst = r__2 * r__2;
+		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
+			+ 1], dabs(r__2)) + safmin) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+	}
+
+	m = lend;
+
+L60:
+	if (m < lend) {
+	    e[m] = 0.f;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L80;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l + 1) {
+	    if (icompz > 0) {
+		slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		work[l] = c__;
+		work[*n - 1 + l] = s;
+		clasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+			z__[l * z_dim1 + 1], ldz);
+	    } else {
+		slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+	    }
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.f;
+	    l += 2;
+	    if (l <= lend) {
+		goto L40;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l + 1] - p) / (e[l] * 2.f);
+	r__ = slapy2_(&g, &c_b1011);
+	g = d__[m] - p + e[l] / (g + r_sign(&r__, &g));
+
+	s = 1.f;
+	c__ = 1.f;
+	p = 0.f;
+
+/*        Inner loop */
+
+	mm1 = m - 1;
+	i__1 = l;
+	for (i__ = mm1; i__ >= i__1; --i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    slartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m - 1) {
+		e[i__ + 1] = r__;
+	    }
+	    g = d__[i__ + 1] - p;
+	    r__ = (d__[i__] - g) * s + c__ * 2.f * b;
+	    p = s * r__;
+	    d__[i__ + 1] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = -s;
+	    }
+
+/* L70: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = m - l + 1;
+	    clasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[l] = g;
+	goto L40;
+
+/*        Eigenvalue found. */
+
+L80:
+	d__[l] = p;
+
+	++l;
+	if (l <= lend) {
+	    goto L40;
+	}
+	goto L140;
+
+    } else {
+
+/*
+          QR Iteration
+
+          Look for small superdiagonal element.
+*/
+
+L90:
+	if (l != lend) {
+	    lendp1 = lend + 1;
+	    i__1 = lendp1;
+	    for (m = l; m >= i__1; --m) {
+/* Computing 2nd power */
+		r__2 = (r__1 = e[m - 1], dabs(r__1));
+		tst = r__2 * r__2;
+		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
+			- 1], dabs(r__2)) + safmin) {
+		    goto L110;
+		}
+/* L100: */
+	    }
+	}
+
+	m = lend;
+
+L110:
+	if (m > lend) {
+	    e[m - 1] = 0.f;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L130;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l - 1) {
+	    if (icompz > 0) {
+		slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+			;
+		work[m] = c__;
+		work[*n - 1 + m] = s;
+		clasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+			z__[(l - 1) * z_dim1 + 1], ldz);
+	    } else {
+		slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
+	    }
+	    d__[l - 1] = rt1;
+	    d__[l] = rt2;
+	    e[l - 1] = 0.f;
+	    l += -2;
+	    if (l >= lend) {
+		goto L90;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.f);
+	r__ = slapy2_(&g, &c_b1011);
+	g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g));
+
+	s = 1.f;
+	c__ = 1.f;
+	p = 0.f;
+
+/*        Inner loop */
+
+	lm1 = l - 1;
+	i__1 = lm1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    slartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m) {
+		e[i__ - 1] = r__;
+	    }
+	    g = d__[i__] - p;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b;
+	    p = s * r__;
+	    d__[i__] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = s;
+	    }
+
+/* L120: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = l - m + 1;
+	    clasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[lm1] = g;
+	goto L90;
+
+/*        Eigenvalue found. */
+
+L130:
+	d__[l] = p;
+
+	--l;
+	if (l >= lend) {
+	    goto L90;
+	}
+	goto L140;
+
+    }
+
+/*     Undo scaling if necessary */
+
+L140:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    } else if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    }
+
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
+
+    if (jtot == nmaxit) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (e[i__] != 0.f) {
+		++(*info);
+	    }
+/* L150: */
+	}
+	return 0;
+    }
+    goto L10;
+
+/*     Order eigenvalues and eigenvectors. */
+
+L160:
+    if (icompz == 0) {
+
+/*        Use Quick Sort */
+
+	slasrt_("I", n, &d__[1], info);
+
+    } else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L170: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		cswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L180: */
+	}
+    }
+    return 0;
+
+/*     End of CSTEQR */
+
+} /* csteqr_ */
+
+/* Subroutine */ int ctrevc_(char *side, char *howmny, logical *select,
+	integer *n, complex *t, integer *ldt, complex *vl, integer *ldvl,
+	complex *vr, integer *ldvr, integer *mm, integer *m, complex *work,
+	real *rwork, integer *info)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4, i__5;
+    real r__1, r__2, r__3;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    double r_imag(complex *);
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, k, ii, ki, is;
+    static real ulp;
+    static logical allv;
+    static real unfl, ovfl, smin;
+    static logical over;
+    static real scale;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int cgemv_(char *, integer *, integer *, complex *
+	    , complex *, integer *, complex *, integer *, complex *, complex *
+	    , integer *);
+    static real remax;
+    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *,
+	    complex *, integer *);
+    static logical leftv, bothv, somev;
+    extern /* Subroutine */ int slabad_(real *, real *);
+    extern integer icamax_(integer *, complex *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
+	    *), xerbla_(char *, integer *), clatrs_(char *, char *,
+	    char *, char *, integer *, complex *, integer *, complex *, real *
+	    , real *, integer *);
+    extern doublereal scasum_(integer *, complex *, integer *);
+    static logical rightv;
+    static real smlnum;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CTREVC computes some or all of the right and/or left eigenvectors of
+    a complex upper triangular matrix T.
+
+    The right eigenvector x and the left eigenvector y of T corresponding
+    to an eigenvalue w are defined by:
+
+                 T*x = w*x,     y'*T = w*y'
+
+    where y' denotes the conjugate transpose of the vector y.
+
+    If all eigenvectors are requested, the routine may either return the
+    matrices X and/or Y of right or left eigenvectors of T, or the
+    products Q*X and/or Q*Y, where Q is an input unitary
+    matrix. If T was obtained from the Schur factorization of an
+    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+    right or left eigenvectors of A.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  compute right eigenvectors only;
+            = 'L':  compute left eigenvectors only;
+            = 'B':  compute both right and left eigenvectors.
+
+    HOWMNY  (input) CHARACTER*1
+            = 'A':  compute all right and/or left eigenvectors;
+            = 'B':  compute all right and/or left eigenvectors,
+                    and backtransform them using the input matrices
+                    supplied in VR and/or VL;
+            = 'S':  compute selected right and/or left eigenvectors,
+                    specified by the logical array SELECT.
+
+    SELECT  (input) LOGICAL array, dimension (N)
+            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+            computed.
+            If HOWMNY = 'A' or 'B', SELECT is not referenced.
+            To select the eigenvector corresponding to the j-th
+            eigenvalue, SELECT(j) must be set to .TRUE..
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input/output) COMPLEX array, dimension (LDT,N)
+            The upper triangular matrix T.  T is modified, but restored
+            on exit.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    VL      (input/output) COMPLEX array, dimension (LDVL,MM)
+            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by CHSEQR).
+            On exit, if SIDE = 'L' or 'B', VL contains:
+            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+                             VL is lower triangular. The i-th column
+                             VL(i) of VL is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*Y;
+            if HOWMNY = 'S', the left eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VL, in the same order as their
+                             eigenvalues.
+            If SIDE = 'R', VL is not referenced.
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= max(1,N) if
+            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+
+    VR      (input/output) COMPLEX array, dimension (LDVR,MM)
+            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by CHSEQR).
+            On exit, if SIDE = 'R' or 'B', VR contains:
+            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+                             VR is upper triangular. The i-th column
+                             VR(i) of VR is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*X;
+            if HOWMNY = 'S', the right eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VR, in the same order as their
+                             eigenvalues.
+            If SIDE = 'L', VR is not referenced.
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= max(1,N) if
+             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+
+    MM      (input) INTEGER
+            The number of columns in the arrays VL and/or VR. MM >= M.
+
+    M       (output) INTEGER
+            The number of columns in the arrays VL and/or VR actually
+            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+            is set to N.  Each selected eigenvector occupies one
+            column.
+
+    WORK    (workspace) COMPLEX array, dimension (2*N)
+
+    RWORK   (workspace) REAL array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The algorithm used in this program is basically backward (forward)
+    substitution, with scaling to make the the code robust against
+    possible overflow.
+
+    Each eigenvector is normalized so that the element of largest
+    magnitude has magnitude 1; here the magnitude of a complex number
+    (x,y) is taken to be |x| + |y|.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --select;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+    --rwork;
+
+    /* Function Body */
+    bothv = lsame_(side, "B");
+    rightv = (lsame_(side, "R")) || (bothv);
+    leftv = (lsame_(side, "L")) || (bothv);
+
+    allv = lsame_(howmny, "A");
+    over = lsame_(howmny, "B");
+    somev = lsame_(howmny, "S");
+
+/*
+       Set M to the number of columns required to store the selected
+       eigenvectors.
+*/
+
+    if (somev) {
+	*m = 0;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (select[j]) {
+		++(*m);
+	    }
+/* L10: */
+	}
+    } else {
+	*m = *n;
+    }
+
+    *info = 0;
+    if (! rightv && ! leftv) {
+	*info = -1;
+    } else if (! allv && ! over && ! somev) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ldt < max(1,*n)) {
+	*info = -6;
+    } else if ((*ldvl < 1) || (leftv && *ldvl < *n)) {
+	*info = -8;
+    } else if ((*ldvr < 1) || (rightv && *ldvr < *n)) {
+	*info = -10;
+    } else if (*mm < *m) {
+	*info = -11;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CTREVC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Set the constants to control overflow. */
+
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
+    smlnum = unfl * (*n / ulp);
+
+/*     Store the diagonal elements of T in working array WORK. */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__ + *n;
+	i__3 = i__ + i__ * t_dim1;
+	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
+/* L20: */
+    }
+
+/*
+       Compute 1-norm of each column of strictly upper triangular
+       part of T to control overflow in triangular solver.
+*/
+
+    rwork[1] = 0.f;
+    i__1 = *n;
+    for (j = 2; j <= i__1; ++j) {
+	i__2 = j - 1;
+	rwork[j] = scasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
+/* L30: */
+    }
+
+    if (rightv) {
+
+/*        Compute right eigenvectors. */
+
+	is = *m;
+	for (ki = *n; ki >= 1; --ki) {
+
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L80;
+		}
+	    }
+/* Computing MAX */
+	    i__1 = ki + ki * t_dim1;
+	    r__3 = ulp * ((r__1 = t[i__1].r, dabs(r__1)) + (r__2 = r_imag(&t[
+		    ki + ki * t_dim1]), dabs(r__2)));
+	    smin = dmax(r__3,smlnum);
+
+	    work[1].r = 1.f, work[1].i = 0.f;
+
+/*           Form right-hand side. */
+
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k;
+		i__3 = k + ki * t_dim1;
+		q__1.r = -t[i__3].r, q__1.i = -t[i__3].i;
+		work[i__2].r = q__1.r, work[i__2].i = q__1.i;
+/* L40: */
+	    }
+
+/*
+             Solve the triangular system:
+                (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
+*/
+
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + k * t_dim1;
+		i__4 = ki + ki * t_dim1;
+		q__1.r = t[i__3].r - t[i__4].r, q__1.i = t[i__3].i - t[i__4]
+			.i;
+		t[i__2].r = q__1.r, t[i__2].i = q__1.i;
+		i__2 = k + k * t_dim1;
+		if ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k *
+			 t_dim1]), dabs(r__2)) < smin) {
+		    i__3 = k + k * t_dim1;
+		    t[i__3].r = smin, t[i__3].i = 0.f;
+		}
+/* L50: */
+	    }
+
+	    if (ki > 1) {
+		i__1 = ki - 1;
+		clatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
+			t_offset], ldt, &work[1], &scale, &rwork[1], info);
+		i__1 = ki;
+		work[i__1].r = scale, work[i__1].i = 0.f;
+	    }
+
+/*           Copy the vector x or Q*x to VR and normalize. */
+
+	    if (! over) {
+		ccopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1);
+
+		ii = icamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		i__1 = ii + is * vr_dim1;
+		remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 =
+			r_imag(&vr[ii + is * vr_dim1]), dabs(r__2)));
+		csscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		i__1 = *n;
+		for (k = ki + 1; k <= i__1; ++k) {
+		    i__2 = k + is * vr_dim1;
+		    vr[i__2].r = 0.f, vr[i__2].i = 0.f;
+/* L60: */
+		}
+	    } else {
+		if (ki > 1) {
+		    i__1 = ki - 1;
+		    q__1.r = scale, q__1.i = 0.f;
+		    cgemv_("N", n, &i__1, &c_b56, &vr[vr_offset], ldvr, &work[
+			    1], &c__1, &q__1, &vr[ki * vr_dim1 + 1], &c__1);
+		}
+
+		ii = icamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		i__1 = ii + ki * vr_dim1;
+		remax = 1.f / ((r__1 = vr[i__1].r, dabs(r__1)) + (r__2 =
+			r_imag(&vr[ii + ki * vr_dim1]), dabs(r__2)));
+		csscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+	    }
+
+/*           Set back the original diagonal elements of T. */
+
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + *n;
+		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
+/* L70: */
+	    }
+
+	    --is;
+L80:
+	    ;
+	}
+    }
+
+    if (leftv) {
+
+/*        Compute left eigenvectors. */
+
+	is = 1;
+	i__1 = *n;
+	for (ki = 1; ki <= i__1; ++ki) {
+
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L130;
+		}
+	    }
+/* Computing MAX */
+	    i__2 = ki + ki * t_dim1;
+	    r__3 = ulp * ((r__1 = t[i__2].r, dabs(r__1)) + (r__2 = r_imag(&t[
+		    ki + ki * t_dim1]), dabs(r__2)));
+	    smin = dmax(r__3,smlnum);
+
+	    i__2 = *n;
+	    work[i__2].r = 1.f, work[i__2].i = 0.f;
+
+/*           Form right-hand side. */
+
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k;
+		r_cnjg(&q__2, &t[ki + k * t_dim1]);
+		q__1.r = -q__2.r, q__1.i = -q__2.i;
+		work[i__3].r = q__1.r, work[i__3].i = q__1.i;
+/* L90: */
+	    }
+
+/*
+             Solve the triangular system:
+                (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
+*/
+
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + k * t_dim1;
+		i__5 = ki + ki * t_dim1;
+		q__1.r = t[i__4].r - t[i__5].r, q__1.i = t[i__4].i - t[i__5]
+			.i;
+		t[i__3].r = q__1.r, t[i__3].i = q__1.i;
+		i__3 = k + k * t_dim1;
+		if ((r__1 = t[i__3].r, dabs(r__1)) + (r__2 = r_imag(&t[k + k *
+			 t_dim1]), dabs(r__2)) < smin) {
+		    i__4 = k + k * t_dim1;
+		    t[i__4].r = smin, t[i__4].i = 0.f;
+		}
+/* L100: */
+	    }
+
+	    if (ki < *n) {
+		i__2 = *n - ki;
+		clatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
+			i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki +
+			1], &scale, &rwork[1], info);
+		i__2 = ki;
+		work[i__2].r = scale, work[i__2].i = 0.f;
+	    }
+
+/*           Copy the vector x or Q*x to VL and normalize. */
+
+	    if (! over) {
+		i__2 = *n - ki + 1;
+		ccopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1)
+			;
+
+		i__2 = *n - ki + 1;
+		ii = icamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1;
+		i__2 = ii + is * vl_dim1;
+		remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 =
+			r_imag(&vl[ii + is * vl_dim1]), dabs(r__2)));
+		i__2 = *n - ki + 1;
+		csscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+
+		i__2 = ki - 1;
+		for (k = 1; k <= i__2; ++k) {
+		    i__3 = k + is * vl_dim1;
+		    vl[i__3].r = 0.f, vl[i__3].i = 0.f;
+/* L110: */
+		}
+	    } else {
+		if (ki < *n) {
+		    i__2 = *n - ki;
+		    q__1.r = scale, q__1.i = 0.f;
+		    cgemv_("N", n, &i__2, &c_b56, &vl[(ki + 1) * vl_dim1 + 1],
+			     ldvl, &work[ki + 1], &c__1, &q__1, &vl[ki *
+			    vl_dim1 + 1], &c__1);
+		}
+
+		ii = icamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		i__2 = ii + ki * vl_dim1;
+		remax = 1.f / ((r__1 = vl[i__2].r, dabs(r__1)) + (r__2 =
+			r_imag(&vl[ii + ki * vl_dim1]), dabs(r__2)));
+		csscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+	    }
+
+/*           Set back the original diagonal elements of T. */
+
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + *n;
+		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
+/* L120: */
+	    }
+
+	    ++is;
+L130:
+	    ;
+	}
+    }
+
+    return 0;
+
+/*     End of CTREVC */
+
+} /* ctrevc_ */
+
+/* Subroutine */ int ctrti2_(char *uplo, char *diag, integer *n, complex *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    complex q__1;
+
+    /* Builtin functions */
+    void c_div(complex *, complex *, complex *);
+
+    /* Local variables */
+    static integer j;
+    static complex ajj;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int ctrmv_(char *, char *, char *, integer *,
+	    complex *, integer *, complex *, integer *), xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CTRTI2 computes the inverse of a complex upper or lower triangular
+    matrix.
+
+    This is the Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading n by n upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CTRTI2", &i__1);
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute inverse of upper triangular matrix. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (nounit) {
+		i__2 = j + j * a_dim1;
+		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
+		a[i__2].r = q__1.r, a[i__2].i = q__1.i;
+		i__2 = j + j * a_dim1;
+		q__1.r = -a[i__2].r, q__1.i = -a[i__2].i;
+		ajj.r = q__1.r, ajj.i = q__1.i;
+	    } else {
+		q__1.r = -1.f, q__1.i = -0.f;
+		ajj.r = q__1.r, ajj.i = q__1.i;
+	    }
+
+/*           Compute elements 1:j-1 of j-th column. */
+
+	    i__2 = j - 1;
+	    ctrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
+		    a[j * a_dim1 + 1], &c__1);
+	    i__2 = j - 1;
+	    cscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
+	}
+    } else {
+
+/*        Compute inverse of lower triangular matrix. */
+
+	for (j = *n; j >= 1; --j) {
+	    if (nounit) {
+		i__1 = j + j * a_dim1;
+		c_div(&q__1, &c_b56, &a[j + j * a_dim1]);
+		a[i__1].r = q__1.r, a[i__1].i = q__1.i;
+		i__1 = j + j * a_dim1;
+		q__1.r = -a[i__1].r, q__1.i = -a[i__1].i;
+		ajj.r = q__1.r, ajj.i = q__1.i;
+	    } else {
+		q__1.r = -1.f, q__1.i = -0.f;
+		ajj.r = q__1.r, ajj.i = q__1.i;
+	    }
+	    if (j < *n) {
+
+/*              Compute elements j+1:n of j-th column. */
+
+		i__1 = *n - j;
+		ctrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
+			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
+		i__1 = *n - j;
+		cscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of CTRTI2 */
+
+} /* ctrti2_ */
+
+/* Subroutine */ int ctrtri_(char *uplo, char *diag, integer *n, complex *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5;
+    complex q__1;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer j, jb, nb, nn;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ctrmm_(char *, char *, char *, char *,
+	    integer *, integer *, complex *, complex *, integer *, complex *,
+	    integer *), ctrsm_(char *, char *,
+	     char *, char *, integer *, integer *, complex *, complex *,
+	    integer *, complex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ctrti2_(char *, char *, integer *, complex *,
+	    integer *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CTRTRI computes the inverse of a complex upper or lower triangular
+    matrix A.
+
+    This is the Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  A is upper triangular;
+            = 'L':  A is lower triangular.
+
+    DIAG    (input) CHARACTER*1
+            = 'N':  A is non-unit triangular;
+            = 'U':  A is unit triangular.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+                 matrix is singular and its inverse can not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CTRTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Check for singularity if non-unit. */
+
+    if (nounit) {
+	i__1 = *n;
+	for (*info = 1; *info <= i__1; ++(*info)) {
+	    i__2 = *info + *info * a_dim1;
+	    if (a[i__2].r == 0.f && a[i__2].i == 0.f) {
+		return 0;
+	    }
+/* L10: */
+	}
+	*info = 0;
+    }
+
+/*
+       Determine the block size for this environment.
+
+   Writing concatenation
+*/
+    i__3[0] = 1, a__1[0] = uplo;
+    i__3[1] = 1, a__1[1] = diag;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    nb = ilaenv_(&c__1, "CTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code */
+
+	ctrti2_(uplo, diag, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute inverse of upper triangular matrix */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+		i__4 = nb, i__5 = *n - j + 1;
+		jb = min(i__4,i__5);
+
+/*              Compute rows 1:j-1 of current block column */
+
+		i__4 = j - 1;
+		ctrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b56, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+		i__4 = j - 1;
+		q__1.r = -1.f, q__1.i = -0.f;
+		ctrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
+			q__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
+			lda);
+
+/*              Compute inverse of current diagonal block */
+
+		ctrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L20: */
+	    }
+	} else {
+
+/*           Compute inverse of lower triangular matrix */
+
+	    nn = (*n - 1) / nb * nb + 1;
+	    i__2 = -nb;
+	    for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) {
+/* Computing MIN */
+		i__1 = nb, i__4 = *n - j + 1;
+		jb = min(i__1,i__4);
+		if (j + jb <= *n) {
+
+/*                 Compute rows j+jb:n of current block column */
+
+		    i__1 = *n - j - jb + 1;
+		    ctrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
+			    &c_b56, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
+			    + jb + j * a_dim1], lda);
+		    i__1 = *n - j - jb + 1;
+		    q__1.r = -1.f, q__1.i = -0.f;
+		    ctrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
+			     &q__1, &a[j + j * a_dim1], lda, &a[j + jb + j *
+			    a_dim1], lda);
+		}
+
+/*              Compute inverse of current diagonal block */
+
+		ctrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L30: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of CTRTRI */
+
+} /* ctrtri_ */
+
+/* Subroutine */ int cung2r_(integer *m, integer *n, integer *k, complex *a,
+	integer *lda, complex *tau, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), clarf_(char *, integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *, complex *),
+	    xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CUNG2R generates an m by n complex matrix Q with orthonormal columns,
+    which is defined as the first n columns of a product of k elementary
+    reflectors of order m
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by CGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by CGEQRF in the first k columns of its array
+            argument A.
+            On exit, the m by n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQRF.
+
+    WORK    (workspace) COMPLEX array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if ((*n < 0) || (*n > *m)) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNG2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+/*     Initialise columns k+1:n to columns of the unit matrix */
+
+    i__1 = *n;
+    for (j = *k + 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (l = 1; l <= i__2; ++l) {
+	    i__3 = l + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
+	}
+	i__2 = j + j * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+/* L20: */
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i) to A(i:m,i:n) from the left */
+
+	if (i__ < *n) {
+	    i__1 = i__ + i__ * a_dim1;
+	    a[i__1].r = 1.f, a[i__1].i = 0.f;
+	    i__1 = *m - i__ + 1;
+	    i__2 = *n - i__;
+	    clarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	}
+	if (i__ < *m) {
+	    i__1 = *m - i__;
+	    i__2 = i__;
+	    q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i;
+	    cscal_(&i__1, &q__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+	}
+	i__1 = i__ + i__ * a_dim1;
+	i__2 = i__;
+	q__1.r = 1.f - tau[i__2].r, q__1.i = 0.f - tau[i__2].i;
+	a[i__1].r = q__1.r, a[i__1].i = q__1.i;
+
+/*        Set A(1:i-1,i) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    i__2 = l + i__ * a_dim1;
+	    a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of CUNG2R */
+
+} /* cung2r_ */
+
+/* Subroutine */ int cungbr_(char *vect, integer *m, integer *n, integer *k,
+	complex *a, integer *lda, complex *tau, complex *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, nb, mn;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical wantq;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cunglq_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, integer *),
+	     cungqr_(integer *, integer *, integer *, complex *, integer *,
+	    complex *, complex *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNGBR generates one of the complex unitary matrices Q or P**H
+    determined by CGEBRD when reducing a complex matrix A to bidiagonal
+    form: A = Q * B * P**H.  Q and P**H are defined as products of
+    elementary reflectors H(i) or G(i) respectively.
+
+    If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+    is of order M:
+    if m >= k, Q = H(1) H(2) . . . H(k) and CUNGBR returns the first n
+    columns of Q, where m >= n >= k;
+    if m < k, Q = H(1) H(2) . . . H(m-1) and CUNGBR returns Q as an
+    M-by-M matrix.
+
+    If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H
+    is of order N:
+    if k < n, P**H = G(k) . . . G(2) G(1) and CUNGBR returns the first m
+    rows of P**H, where n >= m >= k;
+    if k >= n, P**H = G(n-1) . . . G(2) G(1) and CUNGBR returns P**H as
+    an N-by-N matrix.
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            Specifies whether the matrix Q or the matrix P**H is
+            required, as defined in the transformation applied by CGEBRD:
+            = 'Q':  generate Q;
+            = 'P':  generate P**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q or P**H to be returned.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q or P**H to be returned.
+            N >= 0.
+            If VECT = 'Q', M >= N >= min(M,K);
+            if VECT = 'P', N >= M >= min(N,K).
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original M-by-K
+            matrix reduced by CGEBRD.
+            If VECT = 'P', the number of rows in the original K-by-N
+            matrix reduced by CGEBRD.
+            K >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by CGEBRD.
+            On exit, the M-by-N matrix Q or P**H.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= M.
+
+    TAU     (input) COMPLEX array, dimension
+                                  (min(M,K)) if VECT = 'Q'
+                                  (min(N,K)) if VECT = 'P'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i), which determines Q or P**H, as
+            returned by CGEBRD in its array argument TAUQ or TAUP.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+            For optimum performance LWORK >= min(M,N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(vect, "Q");
+    mn = min(*m,*n);
+    lquery = *lwork == -1;
+    if (! wantq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (((*n < 0) || (wantq && ((*n > *m) || (*n < min(*m,*k))))) || (!
+	     wantq && ((*m > *n) || (*m < min(*n,*k))))) {
+	*info = -3;
+    } else if (*k < 0) {
+	*info = -4;
+    } else if (*lda < max(1,*m)) {
+	*info = -6;
+    } else if (*lwork < max(1,mn) && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+	if (wantq) {
+	    nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	} else {
+	    nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	}
+	lwkopt = max(1,mn) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGBR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    if (wantq) {
+
+/*
+          Form Q, determined by a call to CGEBRD to reduce an m-by-k
+          matrix
+*/
+
+	if (*m >= *k) {
+
+/*           If m >= k, assume m >= n >= k */
+
+	    cungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If m < k, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             column to the right, and set the first row and column of Q
+             to those of the unit matrix
+*/
+
+	    for (j = *m; j >= 2; --j) {
+		i__1 = j * a_dim1 + 1;
+		a[i__1].r = 0.f, a[i__1].i = 0.f;
+		i__1 = *m;
+		for (i__ = j + 1; i__ <= i__1; ++i__) {
+		    i__2 = i__ + j * a_dim1;
+		    i__3 = i__ + (j - 1) * a_dim1;
+		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
+/* L10: */
+		}
+/* L20: */
+	    }
+	    i__1 = a_dim1 + 1;
+	    a[i__1].r = 1.f, a[i__1].i = 0.f;
+	    i__1 = *m;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		i__2 = i__ + a_dim1;
+		a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L30: */
+	    }
+	    if (*m > 1) {
+
+/*              Form Q(2:m,2:m) */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		cungqr_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
+			tau[1], &work[1], lwork, &iinfo);
+	    }
+	}
+    } else {
+
+/*
+          Form P', determined by a call to CGEBRD to reduce a k-by-n
+          matrix
+*/
+
+	if (*k < *n) {
+
+/*           If k < n, assume k <= m <= n */
+
+	    cunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If k >= n, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             row downward, and set the first row and column of P' to
+             those of the unit matrix
+*/
+
+	    i__1 = a_dim1 + 1;
+	    a[i__1].r = 1.f, a[i__1].i = 0.f;
+	    i__1 = *n;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		i__2 = i__ + a_dim1;
+		a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L40: */
+	    }
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		for (i__ = j - 1; i__ >= 2; --i__) {
+		    i__2 = i__ + j * a_dim1;
+		    i__3 = i__ - 1 + j * a_dim1;
+		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
+/* L50: */
+		}
+		i__2 = j * a_dim1 + 1;
+		a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L60: */
+	    }
+	    if (*n > 1) {
+
+/*              Form P'(2:n,2:n) */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		cunglq_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
+			tau[1], &work[1], lwork, &iinfo);
+	    }
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNGBR */
+
+} /* cungbr_ */
+
+/* Subroutine */ int cunghr_(integer *n, integer *ilo, integer *ihi, complex *
+	a, integer *lda, complex *tau, complex *work, integer *lwork, integer
+	*info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, nb, nh, iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cungqr_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNGHR generates a complex unitary matrix Q which is defined as the
+    product of IHI-ILO elementary reflectors of order N, as returned by
+    CGEHRD:
+
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix Q. N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of CGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by CGEHRD.
+            On exit, the N-by-N unitary matrix Q.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,N).
+
+    TAU     (input) COMPLEX array, dimension (N-1)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEHRD.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= IHI-ILO.
+            For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,nh) && ! lquery) {
+	*info = -8;
+    }
+
+    if (*info == 0) {
+	nb = ilaenv_(&c__1, "CUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	lwkopt = max(1,nh) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGHR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+/*
+       Shift the vectors which define the elementary reflectors one
+       column to the right, and set the first ilo and the last n-ihi
+       rows and columns to those of the unit matrix
+*/
+
+    i__1 = *ilo + 1;
+    for (j = *ihi; j >= i__1; --j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
+	}
+	i__2 = *ihi;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    i__4 = i__ + (j - 1) * a_dim1;
+	    a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i;
+/* L20: */
+	}
+	i__2 = *n;
+	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    i__1 = *ilo;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L50: */
+	}
+	i__2 = j + j * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+/* L60: */
+    }
+    i__1 = *n;
+    for (j = *ihi + 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L70: */
+	}
+	i__2 = j + j * a_dim1;
+	a[i__2].r = 1.f, a[i__2].i = 0.f;
+/* L80: */
+    }
+
+    if (nh > 0) {
+
+/*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
+
+	cungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
+		ilo], &work[1], lwork, &iinfo);
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNGHR */
+
+} /* cunghr_ */
+
+/* Subroutine */ int cungl2_(integer *m, integer *n, integer *k, complex *a,
+	integer *lda, complex *tau, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    complex q__1, q__2;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
+	    integer *), clarf_(char *, integer *, integer *, complex *,
+	    integer *, complex *, complex *, integer *, complex *),
+	    clacgv_(integer *, complex *, integer *), xerbla_(char *, integer
+	    *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNGL2 generates an m-by-n complex matrix Q with orthonormal rows,
+    which is defined as the first m rows of a product of k elementary
+    reflectors of order n
+
+          Q  =  H(k)' . . . H(2)' H(1)'
+
+    as returned by CGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by CGELQF in the first k rows of its array argument A.
+            On exit, the m by n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGELQF.
+
+    WORK    (workspace) COMPLEX array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *m)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGL2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	return 0;
+    }
+
+    if (*k < *m) {
+
+/*        Initialise rows k+1:m to rows of the unit matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (l = *k + 1; l <= i__2; ++l) {
+		i__3 = l + j * a_dim1;
+		a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
+	    }
+	    if (j > *k && j <= *m) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = 1.f, a[i__2].i = 0.f;
+	    }
+/* L20: */
+	}
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i)' to A(i:m,i:n) from the right */
+
+	if (i__ < *n) {
+	    i__1 = *n - i__;
+	    clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    if (i__ < *m) {
+		i__1 = i__ + i__ * a_dim1;
+		a[i__1].r = 1.f, a[i__1].i = 0.f;
+		i__1 = *m - i__;
+		i__2 = *n - i__ + 1;
+		r_cnjg(&q__1, &tau[i__]);
+		clarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
+			q__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
+	    i__1 = *n - i__;
+	    i__2 = i__;
+	    q__1.r = -tau[i__2].r, q__1.i = -tau[i__2].i;
+	    cscal_(&i__1, &q__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    i__1 = *n - i__;
+	    clacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+	i__1 = i__ + i__ * a_dim1;
+	r_cnjg(&q__2, &tau[i__]);
+	q__1.r = 1.f - q__2.r, q__1.i = 0.f - q__2.i;
+	a[i__1].r = q__1.r, a[i__1].i = q__1.i;
+
+/*        Set A(i,1:i-1,i) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    i__2 = i__ + l * a_dim1;
+	    a[i__2].r = 0.f, a[i__2].i = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of CUNGL2 */
+
+} /* cungl2_ */
+
+/* Subroutine */ int cunglq_(integer *m, integer *n, integer *k, complex *a,
+	integer *lda, complex *tau, complex *work, integer *lwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int cungl2_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *), clarfb_(
+	    char *, char *, char *, char *, integer *, integer *, integer *,
+	    complex *, integer *, complex *, integer *, complex *, integer *,
+	    complex *, integer *), clarft_(
+	    char *, char *, integer *, integer *, complex *, integer *,
+	    complex *, complex *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNGLQ generates an M-by-N complex matrix Q with orthonormal rows,
+    which is defined as the first M rows of a product of K elementary
+    reflectors of order N
+
+          Q  =  H(k)' . . . H(2)' H(1)'
+
+    as returned by CGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by CGELQF in the first k rows of its array argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGELQF.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit;
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "CUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*m) * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *m)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGLQ", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGLQ", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk rows are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(kk+1:m,1:kk) to zero. */
+
+	i__1 = kk;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *m) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	cungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *n - i__ + 1;
+		clarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i+ib:m,i:n) from the right */
+
+		i__2 = *m - i__ - ib + 1;
+		i__3 = *n - i__ + 1;
+		clarfb_("Right", "Conjugate transpose", "Forward", "Rowwise",
+			&i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
+			1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[
+			ib + 1], &ldwork);
+	    }
+
+/*           Apply H' to columns i:n of current block */
+
+	    i__2 = *n - i__ + 1;
+	    cungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set columns 1:i-1 of current block to zero */
+
+	    i__2 = i__ - 1;
+	    for (j = 1; j <= i__2; ++j) {
+		i__3 = i__ + ib - 1;
+		for (l = i__; l <= i__3; ++l) {
+		    i__4 = l + j * a_dim1;
+		    a[i__4].r = 0.f, a[i__4].i = 0.f;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1].r = (real) iws, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNGLQ */
+
+} /* cunglq_ */
+
+/* Subroutine */ int cungqr_(integer *m, integer *n, integer *k, complex *a,
+	integer *lda, complex *tau, complex *work, integer *lwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int cung2r_(integer *, integer *, integer *,
+	    complex *, integer *, complex *, complex *, integer *), clarfb_(
+	    char *, char *, char *, char *, integer *, integer *, integer *,
+	    complex *, integer *, complex *, integer *, complex *, integer *,
+	    complex *, integer *), clarft_(
+	    char *, char *, integer *, integer *, complex *, integer *,
+	    complex *, complex *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
+    which is defined as the first N columns of a product of K elementary
+    reflectors of order M
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by CGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) COMPLEX array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by CGEQRF in the first k columns of its array
+            argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQRF.
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "CUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*n) * nb;
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if ((*n < 0) || (*n > *m)) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNGQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGQR", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGQR", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk columns are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(1:kk,kk+1:n) to zero. */
+
+	i__1 = *n;
+	for (j = kk + 1; j <= i__1; ++j) {
+	    i__2 = kk;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = 0.f, a[i__3].i = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *n) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	cung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *m - i__ + 1;
+		clarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i:m,i+ib:n) from the left */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__ - ib + 1;
+		clarfb_("Left", "No transpose", "Forward", "Columnwise", &
+			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
+			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
+			work[ib + 1], &ldwork);
+	    }
+
+/*           Apply H to rows i:m of current block */
+
+	    i__2 = *m - i__ + 1;
+	    cung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set rows 1:i-1 of current block to zero */
+
+	    i__2 = i__ + ib - 1;
+	    for (j = i__; j <= i__2; ++j) {
+		i__3 = i__ - 1;
+		for (l = 1; l <= i__3; ++l) {
+		    i__4 = l + j * a_dim1;
+		    a[i__4].r = 0.f, a[i__4].i = 0.f;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1].r = (real) iws, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNGQR */
+
+} /* cungqr_ */
+
+/* Subroutine */ int cunm2l_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, mi, ni, nq;
+    static complex aii;
+    static logical left;
+    static complex taui;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CUNM2L overwrites the general complex m-by-n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'C', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'C',
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by CGEQLF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q' (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQLF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the m-by-n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNM2L", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && notran) || (! left && ! notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+    } else {
+	mi = *m;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) or H(i)' is applied to C(1:m-k+i,1:n) */
+
+	    mi = *m - *k + i__;
+	} else {
+
+/*           H(i) or H(i)' is applied to C(1:m,1:n-k+i) */
+
+	    ni = *n - *k + i__;
+	}
+
+/*        Apply H(i) or H(i)' */
+
+	if (notran) {
+	    i__3 = i__;
+	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
+	} else {
+	    r_cnjg(&q__1, &tau[i__]);
+	    taui.r = q__1.r, taui.i = q__1.i;
+	}
+	i__3 = nq - *k + i__ + i__ * a_dim1;
+	aii.r = a[i__3].r, aii.i = a[i__3].i;
+	i__3 = nq - *k + i__ + i__ * a_dim1;
+	a[i__3].r = 1.f, a[i__3].i = 0.f;
+	clarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[
+		c_offset], ldc, &work[1]);
+	i__3 = nq - *k + i__ + i__ * a_dim1;
+	a[i__3].r = aii.r, a[i__3].i = aii.i;
+/* L10: */
+    }
+    return 0;
+
+/*     End of CUNM2L */
+
+} /* cunm2l_ */
+
+/* Subroutine */ int cunm2r_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static complex aii;
+    static logical left;
+    static complex taui;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CUNM2R overwrites the general complex m-by-n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'C', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'C',
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by CGEQRF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q' (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQRF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the m-by-n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNM2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && ! notran) || (! left && notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) or H(i)' is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) or H(i)' is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) or H(i)' */
+
+	if (notran) {
+	    i__3 = i__;
+	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
+	} else {
+	    r_cnjg(&q__1, &tau[i__]);
+	    taui.r = q__1.r, taui.i = q__1.i;
+	}
+	i__3 = i__ + i__ * a_dim1;
+	aii.r = a[i__3].r, aii.i = a[i__3].i;
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = 1.f, a[i__3].i = 0.f;
+	clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic
+		+ jc * c_dim1], ldc, &work[1]);
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = aii.r, a[i__3].i = aii.i;
+/* L10: */
+    }
+    return 0;
+
+/*     End of CUNM2R */
+
+} /* cunm2r_ */
+
+/* Subroutine */ int cunmbr_(char *vect, char *side, char *trans, integer *m,
+	integer *n, integer *k, complex *a, integer *lda, complex *tau,
+	complex *c__, integer *ldc, complex *work, integer *lwork, integer *
+	info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cunmlq_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *);
+    static logical notran;
+    extern /* Subroutine */ int cunmqr_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *);
+    static logical applyq;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    If VECT = 'Q', CUNMBR overwrites the general complex M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    If VECT = 'P', CUNMBR overwrites the general complex M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      P * C          C * P
+    TRANS = 'C':      P**H * C       C * P**H
+
+    Here Q and P**H are the unitary matrices determined by CGEBRD when
+    reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q
+    and P**H are defined as products of elementary reflectors H(i) and
+    G(i) respectively.
+
+    Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+    order of the unitary matrix Q or P**H that is applied.
+
+    If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+    if nq >= k, Q = H(1) H(2) . . . H(k);
+    if nq < k, Q = H(1) H(2) . . . H(nq-1).
+
+    If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+    if k < nq, P = G(1) G(2) . . . G(k);
+    if k >= nq, P = G(1) G(2) . . . G(nq-1).
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            = 'Q': apply Q or Q**H;
+            = 'P': apply P or P**H.
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q, Q**H, P or P**H from the Left;
+            = 'R': apply Q, Q**H, P or P**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q or P;
+            = 'C':  Conjugate transpose, apply Q**H or P**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original
+            matrix reduced by CGEBRD.
+            If VECT = 'P', the number of rows in the original
+            matrix reduced by CGEBRD.
+            K >= 0.
+
+    A       (input) COMPLEX array, dimension
+                                  (LDA,min(nq,K)) if VECT = 'Q'
+                                  (LDA,nq)        if VECT = 'P'
+            The vectors which define the elementary reflectors H(i) and
+            G(i), whose products determine the matrices Q and P, as
+            returned by CGEBRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If VECT = 'Q', LDA >= max(1,nq);
+            if VECT = 'P', LDA >= max(1,min(nq,K)).
+
+    TAU     (input) COMPLEX array, dimension (min(nq,K))
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i) which determines Q or P, as returned
+            by CGEBRD in the array argument TAUQ or TAUP.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q
+            or P*C or P**H*C or C*P or C*P**H.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    applyq = lsame_(vect, "Q");
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q or P and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! applyq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (! left && ! lsame_(side, "R")) {
+	*info = -2;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*k < 0) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = min(nq,*k);
+	if ((applyq && *lda < max(1,nq)) || (! applyq && *lda < max(i__1,i__2)
+		)) {
+	    *info = -8;
+	} else if (*ldc < max(1,*m)) {
+	    *info = -11;
+	} else if (*lwork < max(1,nw) && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info == 0) {
+	if (applyq) {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "CUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "CUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNMBR", &i__1);
+	return 0;
+    } else if (lquery) {
+    }
+
+/*     Quick return if possible */
+
+    work[1].r = 1.f, work[1].i = 0.f;
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    if (applyq) {
+
+/*        Apply Q */
+
+	if (nq >= *k) {
+
+/*           Q was determined by a call to CGEBRD with nq >= k */
+
+	    cunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           Q was determined by a call to CGEBRD with nq < k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    cunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
+		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+	}
+    } else {
+
+/*        Apply P */
+
+	if (notran) {
+	    *(unsigned char *)transt = 'C';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+	if (nq > *k) {
+
+/*           P was determined by a call to CGEBRD with nq > k */
+
+	    cunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           P was determined by a call to CGEBRD with nq <= k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    cunmlq_(side, transt, &mi, &ni, &i__1, &a[((a_dim1) << (1)) + 1],
+		    lda, &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1],
+		    lwork, &iinfo);
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMBR */
+
+} /* cunmbr_ */
+
+/* Subroutine */ int cunml2_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    complex q__1;
+
+    /* Builtin functions */
+    void r_cnjg(complex *, complex *);
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static complex aii;
+    static logical left;
+    static complex taui;
+    extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex *
+	    , integer *, complex *, complex *, integer *, complex *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int clacgv_(integer *, complex *, integer *),
+	    xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    CUNML2 overwrites the general complex m-by-n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'C', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'C',
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k)' . . . H(2)' H(1)'
+
+    as returned by CGELQF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q' (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGELQF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the m-by-n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNML2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && notran) || (! left && ! notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) or H(i)' is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) or H(i)' is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) or H(i)' */
+
+	if (notran) {
+	    r_cnjg(&q__1, &tau[i__]);
+	    taui.r = q__1.r, taui.i = q__1.i;
+	} else {
+	    i__3 = i__;
+	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
+	}
+	if (i__ < nq) {
+	    i__3 = nq - i__;
+	    clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+	i__3 = i__ + i__ * a_dim1;
+	aii.r = a[i__3].r, aii.i = a[i__3].i;
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = 1.f, a[i__3].i = 0.f;
+	clarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic +
+		jc * c_dim1], ldc, &work[1]);
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = aii.r, a[i__3].i = aii.i;
+	if (i__ < nq) {
+	    i__3 = nq - i__;
+	    clacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of CUNML2 */
+
+} /* cunml2_ */
+
+/* Subroutine */ int cunmlq_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static complex t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int cunml2_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *), clarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNMLQ overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k)' . . . H(2)' H(1)'
+
+    as returned by CGELQF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Conjugate transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGELQF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMLQ", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNMLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMLQ", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	cunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && notran) || (! left && ! notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	if (notran) {
+	    *(unsigned char *)transt = 'C';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    clarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
+		    lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    clarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
+		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
+		    ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMLQ */
+
+} /* cunmlq_ */
+
+/* Subroutine */ int cunmql_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static complex t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int cunm2l_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *), clarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNMQL overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by CGEQLF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQLF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQL", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNMQL", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQL", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	cunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && notran) || (! left && ! notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	} else {
+	    mi = *m;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i+ib-1) . . . H(i+1) H(i)
+*/
+
+	    i__4 = nq - *k + i__ + ib - 1;
+	    clarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
+		    , lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(1:m-k+i+ib-1,1:n) */
+
+		mi = *m - *k + i__ + ib - 1;
+	    } else {
+
+/*              H or H' is applied to C(1:m,1:n-k+i+ib-1) */
+
+		ni = *n - *k + i__ + ib - 1;
+	    }
+
+/*           Apply H or H' */
+
+	    clarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
+		    work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMQL */
+
+} /* cunmql_ */
+
+/* Subroutine */ int cunmqr_(char *side, char *trans, integer *m, integer *n,
+	integer *k, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static complex t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int cunm2r_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *), clarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, complex *,
+	    integer *, complex *, integer *, complex *, integer *, complex *,
+	    integer *), clarft_(char *, char *
+	    , integer *, integer *, complex *, integer *, complex *, complex *
+	    , integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNMQR overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by CGEQRF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Conjugate transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            CGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CGEQRF.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "CUNMQR", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("CUNMQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "CUNMQR", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	cunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && ! notran) || (! left && notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    clarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
+		    a_dim1], lda, &tau[i__], t, &c__65)
+		    ;
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    clarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
+		    c_dim1], ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMQR */
+
+} /* cunmqr_ */
+
+/* Subroutine */ int cunmtr_(char *side, char *uplo, char *trans, integer *m,
+	integer *n, complex *a, integer *lda, complex *tau, complex *c__,
+	integer *ldc, complex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int cunmql_(char *, char *, integer *, integer *,
+	    integer *, complex *, integer *, complex *, complex *, integer *,
+	    complex *, integer *, integer *), cunmqr_(char *,
+	    char *, integer *, integer *, integer *, complex *, integer *,
+	    complex *, complex *, integer *, complex *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    CUNMTR overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    nq-1 elementary reflectors, as returned by CHETRD:
+
+    if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+
+    if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    UPLO    (input) CHARACTER*1
+            = 'U': Upper triangle of A contains elementary reflectors
+                   from CHETRD;
+            = 'L': Lower triangle of A contains elementary reflectors
+                   from CHETRD.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Conjugate transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    A       (input) COMPLEX array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by CHETRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) COMPLEX array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by CHETRD.
+
+    C       (input/output) COMPLEX array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >=M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	*info = -2;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "C")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	if (upper) {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "CUNMQL", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "CUNMQL", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "CUNMQR", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "CUNMQR", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (real) lwkopt, work[1].i = 0.f;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("CUNMTR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (nq == 1)) {
+	work[1].r = 1.f, work[1].i = 0.f;
+	return 0;
+    }
+
+    if (left) {
+	mi = *m - 1;
+	ni = *n;
+    } else {
+	mi = *m;
+	ni = *n - 1;
+    }
+
+    if (upper) {
+
+/*        Q was determined by a call to CHETRD with UPLO = 'U' */
+
+	i__2 = nq - 1;
+	cunmql_(side, trans, &mi, &ni, &i__2, &a[((a_dim1) << (1)) + 1], lda,
+		&tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
+    } else {
+
+/*        Q was determined by a call to CHETRD with UPLO = 'L' */
+
+	if (left) {
+	    i1 = 2;
+	    i2 = 1;
+	} else {
+	    i1 = 1;
+	    i2 = 2;
+	}
+	i__2 = nq - 1;
+	cunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
+		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+    }
+    work[1].r = (real) lwkopt, work[1].i = 0.f;
+    return 0;
+
+/*     End of CUNMTR */
+
+} /* cunmtr_ */
+
+/* Subroutine */ int dbdsdc_(char *uplo, char *compq, integer *n, doublereal *
+	d__, doublereal *e, doublereal *u, integer *ldu, doublereal *vt,
+	integer *ldvt, doublereal *q, integer *iq, doublereal *work, integer *
+	iwork, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double d_sign(doublereal *, doublereal *), log(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static doublereal p, r__;
+    static integer z__, ic, ii, kk;
+    static doublereal cs;
+    static integer is, iu;
+    static doublereal sn;
+    static integer nm1;
+    static doublereal eps;
+    static integer ivt, difl, difr, ierr, perm, mlvl, sqre;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *
+	    , doublereal *, integer *), dswap_(integer *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static integer poles, iuplo, nsize, start;
+    extern /* Subroutine */ int dlasd0_(integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    integer *, integer *, doublereal *, integer *);
+
+    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *), dlascl_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaset_(char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer givcol;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    static integer icompq;
+    static doublereal orgnrm;
+    static integer givnum, givptr, qstart, smlsiz, wstart, smlszp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 1, 1999
+
+
+    Purpose
+    =======
+
+    DBDSDC computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
+    using a divide and conquer method, where S is a diagonal matrix
+    with non-negative diagonal elements (the singular values of B), and
+    U and VT are orthogonal matrices of left and right singular vectors,
+    respectively. DBDSDC can be used to compute all singular values,
+    and optionally, singular vectors or singular vectors in compact form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See DLASD3 for details.
+
+    The code currently call DLASDQ if singular values only are desired.
+    However, it can be slightly modified to compute singular values
+    using the divide and conquer method.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal.
+            = 'L':  B is lower bidiagonal.
+
+    COMPQ   (input) CHARACTER*1
+            Specifies whether singular vectors are to be computed
+            as follows:
+            = 'N':  Compute singular values only;
+            = 'P':  Compute singular values and compute singular
+                    vectors in compact form;
+            = 'I':  Compute singular values and singular vectors.
+
+    N       (input) INTEGER
+            The order of the matrix B.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the elements of E contain the offdiagonal
+            elements of the bidiagonal matrix whose SVD is desired.
+            On exit, E has been destroyed.
+
+    U       (output) DOUBLE PRECISION array, dimension (LDU,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, U contains the left singular vectors
+               of the bidiagonal matrix.
+            For other values of COMPQ, U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1.
+            If singular vectors are desired, then LDU >= max( 1, N ).
+
+    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, VT' contains the right singular
+               vectors of the bidiagonal matrix.
+            For other values of COMPQ, VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1.
+            If singular vectors are desired, then LDVT >= max( 1, N ).
+
+    Q       (output) DOUBLE PRECISION array, dimension (LDQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, Q contains all the DOUBLE PRECISION data in
+               LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, Q is not referenced.
+
+    IQ      (output) INTEGER array, dimension (LDIQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, IQ contains all INTEGER data in
+               LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, IQ is not referenced.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
+            If COMPQ = 'N' then LWORK >= (4 * N).
+            If COMPQ = 'P' then LWORK >= (6 * N).
+            If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
+
+    IWORK   (workspace) INTEGER array, dimension (8*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an singular value.
+                  The update process of divide and conquer failed.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --q;
+    --iq;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    iuplo = 0;
+    if (lsame_(uplo, "U")) {
+	iuplo = 1;
+    }
+    if (lsame_(uplo, "L")) {
+	iuplo = 2;
+    }
+    if (lsame_(compq, "N")) {
+	icompq = 0;
+    } else if (lsame_(compq, "P")) {
+	icompq = 1;
+    } else if (lsame_(compq, "I")) {
+	icompq = 2;
+    } else {
+	icompq = -1;
+    }
+    if (iuplo == 0) {
+	*info = -1;
+    } else if (icompq < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ldu < 1) || (icompq == 2 && *ldu < *n)) {
+	*info = -7;
+    } else if ((*ldvt < 1) || (icompq == 2 && *ldvt < *n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DBDSDC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    smlsiz = ilaenv_(&c__9, "DBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+    if (*n == 1) {
+	if (icompq == 1) {
+	    q[1] = d_sign(&c_b2865, &d__[1]);
+	    q[smlsiz * *n + 1] = 1.;
+	} else if (icompq == 2) {
+	    u[u_dim1 + 1] = d_sign(&c_b2865, &d__[1]);
+	    vt[vt_dim1 + 1] = 1.;
+	}
+	d__[1] = abs(d__[1]);
+	return 0;
+    }
+    nm1 = *n - 1;
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
+*/
+
+    wstart = 1;
+    qstart = 3;
+    if (icompq == 1) {
+	dcopy_(n, &d__[1], &c__1, &q[1], &c__1);
+	i__1 = *n - 1;
+	dcopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
+    }
+    if (iuplo == 2) {
+	qstart = 5;
+	wstart = ((*n) << (1)) - 1;
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (icompq == 1) {
+		q[i__ + ((*n) << (1))] = cs;
+		q[i__ + *n * 3] = sn;
+	    } else if (icompq == 2) {
+		work[i__] = cs;
+		work[nm1 + i__] = -sn;
+	    }
+/* L10: */
+	}
+    }
+
+/*     If ICOMPQ = 0, use DLASDQ to compute the singular values. */
+
+    if (icompq == 0) {
+	dlasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		wstart], info);
+	goto L40;
+    }
+
+/*
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompq == 2) {
+	    dlaset_("A", n, n, &c_b2879, &c_b2865, &u[u_offset], ldu);
+	    dlaset_("A", n, n, &c_b2879, &c_b2865, &vt[vt_offset], ldvt);
+	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		    wstart], info);
+	} else if (icompq == 1) {
+	    iu = 1;
+	    ivt = iu + *n;
+	    dlaset_("A", n, n, &c_b2879, &c_b2865, &q[iu + (qstart - 1) * *n],
+		     n);
+	    dlaset_("A", n, n, &c_b2879, &c_b2865, &q[ivt + (qstart - 1) * *n]
+		    , n);
+	    dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
+		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
+		    iu + (qstart - 1) * *n], n, &work[wstart], info);
+	}
+	goto L40;
+    }
+
+    if (icompq == 2) {
+	dlaset_("A", n, n, &c_b2879, &c_b2865, &u[u_offset], ldu);
+	dlaset_("A", n, n, &c_b2879, &c_b2865, &vt[vt_offset], ldvt);
+    }
+
+/*     Scale. */
+
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	return 0;
+    }
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, n, &c__1, &d__[1], n, &ierr);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &nm1, &c__1, &e[1], &nm1, &
+	    ierr);
+
+    eps = EPSILON;
+
+    mlvl = (integer) (log((doublereal) (*n) / (doublereal) (smlsiz + 1)) /
+	    log(2.)) + 1;
+    smlszp = smlsiz + 1;
+
+    if (icompq == 1) {
+	iu = 1;
+	ivt = smlsiz + 1;
+	difl = ivt + smlszp;
+	difr = difl + mlvl;
+	z__ = difr + ((mlvl) << (1));
+	ic = z__ + mlvl;
+	is = ic + 1;
+	poles = is + 1;
+	givnum = poles + ((mlvl) << (1));
+
+	k = 1;
+	givptr = 2;
+	perm = 3;
+	givcol = perm + mlvl;
+    }
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) < eps) {
+	    d__[i__] = d_sign(&eps, &d__[i__]);
+	}
+/* L20: */
+    }
+
+    start = 1;
+    sqre = 0;
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (((d__1 = e[i__], abs(d__1)) < eps) || (i__ == nm1)) {
+
+/*
+          Subproblem found. First determine its size and then
+          apply divide and conquer on it.
+*/
+
+	    if (i__ < nm1) {
+
+/*        A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - start + 1;
+	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
+
+/*        A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - start + 1;
+	    } else {
+
+/*
+          A subproblem with E(NM1) small. This implies an
+          1-by-1 subproblem at D(N). Solve this 1-by-1 problem
+          first.
+*/
+
+		nsize = i__ - start + 1;
+		if (icompq == 2) {
+		    u[*n + *n * u_dim1] = d_sign(&c_b2865, &d__[*n]);
+		    vt[*n + *n * vt_dim1] = 1.;
+		} else if (icompq == 1) {
+		    q[*n + (qstart - 1) * *n] = d_sign(&c_b2865, &d__[*n]);
+		    q[*n + (smlsiz + qstart - 1) * *n] = 1.;
+		}
+		d__[*n] = (d__1 = d__[*n], abs(d__1));
+	    }
+	    if (icompq == 2) {
+		dlasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start +
+			start * u_dim1], ldu, &vt[start + start * vt_dim1],
+			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
+	    } else {
+		dlasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
+			start], &q[start + (iu + qstart - 2) * *n], n, &q[
+			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
+			 &q[start + (difl + qstart - 2) * *n], &q[start + (
+			difr + qstart - 2) * *n], &q[start + (z__ + qstart -
+			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
+			start + givptr * *n], &iq[start + givcol * *n], n, &
+			iq[start + perm * *n], &q[start + (givnum + qstart -
+			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
+			start + (is + qstart - 2) * *n], &work[wstart], &
+			iwork[1], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    start = i__ + 1;
+	}
+/* L30: */
+    }
+
+/*     Unscale */
+
+    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, n, &c__1, &d__[1], n, &ierr);
+L40:
+
+/*     Use Selection Sort to minimize swaps of singular vectors */
+
+    i__1 = *n;
+    for (ii = 2; ii <= i__1; ++ii) {
+	i__ = ii - 1;
+	kk = i__;
+	p = d__[i__];
+	i__2 = *n;
+	for (j = ii; j <= i__2; ++j) {
+	    if (d__[j] > p) {
+		kk = j;
+		p = d__[j];
+	    }
+/* L50: */
+	}
+	if (kk != i__) {
+	    d__[kk] = d__[i__];
+	    d__[i__] = p;
+	    if (icompq == 1) {
+		iq[i__] = kk;
+	    } else if (icompq == 2) {
+		dswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
+			c__1);
+		dswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
+	    }
+	} else if (icompq == 1) {
+	    iq[i__] = i__;
+	}
+/* L60: */
+    }
+
+/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
+
+    if (icompq == 1) {
+	if (iuplo == 1) {
+	    iq[*n] = 1;
+	} else {
+	    iq[*n] = 0;
+	}
+    }
+
+/*
+       If B is lower bidiagonal, update U by those Givens rotations
+       which rotated B to be upper bidiagonal
+*/
+
+    if (iuplo == 2 && icompq == 2) {
+	dlasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
+    }
+
+    return 0;
+
+/*     End of DBDSDC */
+
+} /* dbdsdc_ */
+
+/* Subroutine */ int dbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
+	nru, integer *ncc, doublereal *d__, doublereal *e, doublereal *vt,
+	integer *ldvt, doublereal *u, integer *ldu, doublereal *c__, integer *
+	ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Builtin functions */
+    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), d_sign(
+	    doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal f, g, h__;
+    static integer i__, j, m;
+    static doublereal r__, cs;
+    static integer ll;
+    static doublereal sn, mu;
+    static integer nm1, nm12, nm13, lll;
+    static doublereal eps, sll, tol, abse;
+    static integer idir;
+    static doublereal abss;
+    static integer oldm;
+    static doublereal cosl;
+    static integer isub, iter;
+    static doublereal unfl, sinl, cosr, smin, smax, sinr;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *), dlas2_(
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *), dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    static doublereal oldcs;
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *);
+    static integer oldll;
+    static doublereal shift, sigmn, oldsn;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer maxit;
+    static doublereal sminl, sigmx;
+    static logical lower;
+    extern /* Subroutine */ int dlasq1_(integer *, doublereal *, doublereal *,
+	     doublereal *, integer *), dlasv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
+	    integer *);
+    static doublereal sminoa, thresh;
+    static logical rotate;
+    static doublereal sminlo, tolmul;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DBDSQR computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
+    denotes the transpose of P), where S is a diagonal matrix with
+    non-negative diagonal elements (the singular values of B), and Q
+    and P are orthogonal matrices.
+
+    The routine computes S, and optionally computes U * Q, P' * VT,
+    or Q' * C, for given real input matrices U, VT, and C.
+
+    See "Computing  Small Singular Values of Bidiagonal Matrices With
+    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
+    no. 5, pp. 873-912, Sept 1990) and
+    "Accurate singular values and differential qd algorithms," by
+    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
+    Department, University of California at Berkeley, July 1992
+    for a detailed description of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal;
+            = 'L':  B is lower bidiagonal.
+
+    N       (input) INTEGER
+            The order of the matrix B.  N >= 0.
+
+    NCVT    (input) INTEGER
+            The number of columns of the matrix VT. NCVT >= 0.
+
+    NRU     (input) INTEGER
+            The number of rows of the matrix U. NRU >= 0.
+
+    NCC     (input) INTEGER
+            The number of columns of the matrix C. NCC >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B in decreasing
+            order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the elements of E contain the
+            offdiagonal elements of the bidiagonal matrix whose SVD
+            is desired. On normal exit (INFO = 0), E is destroyed.
+            If the algorithm does not converge (INFO > 0), D and E
+            will contain the diagonal and superdiagonal elements of a
+            bidiagonal matrix orthogonally equivalent to the one given
+            as input. E(N) is used for workspace.
+
+    VT      (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
+            On entry, an N-by-NCVT matrix VT.
+            On exit, VT is overwritten by P' * VT.
+            VT is not referenced if NCVT = 0.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.
+            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
+
+    U       (input/output) DOUBLE PRECISION array, dimension (LDU, N)
+            On entry, an NRU-by-N matrix U.
+            On exit, U is overwritten by U * Q.
+            U is not referenced if NRU = 0.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= max(1,NRU).
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
+            On entry, an N-by-NCC matrix C.
+            On exit, C is overwritten by Q' * C.
+            C is not referenced if NCC = 0.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C.
+            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (4*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  If INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm did not converge; D and E contain the
+                  elements of a bidiagonal matrix which is orthogonally
+                  similar to the input matrix B;  if INFO = i, i
+                  elements of E have not converged to zero.
+
+    Internal Parameters
+    ===================
+
+    TOLMUL  DOUBLE PRECISION, default = max(10,min(100,EPS**(-1/8)))
+            TOLMUL controls the convergence criterion of the QR loop.
+            If it is positive, TOLMUL*EPS is the desired relative
+               precision in the computed singular values.
+            If it is negative, abs(TOLMUL*EPS*sigma_max) is the
+               desired absolute accuracy in the computed singular
+               values (corresponds to relative accuracy
+               abs(TOLMUL*EPS) in the largest singular value.
+            abs(TOLMUL) should be between 1 and 1/EPS, and preferably
+               between 10 (for fast convergence) and .1/EPS
+               (for there to be some accuracy in the results).
+            Default is to lose at either one eighth or 2 of the
+               available decimal digits in each computed singular value
+               (whichever is smaller).
+
+    MAXITR  INTEGER, default = 6
+            MAXITR controls the maximum number of passes of the
+            algorithm through its inner loop. The algorithms stops
+            (and so fails to converge) if the number of passes
+            through the inner loop exceeds MAXITR*N**2.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lower = lsame_(uplo, "L");
+    if (! lsame_(uplo, "U") && ! lower) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ncvt < 0) {
+	*info = -3;
+    } else if (*nru < 0) {
+	*info = -4;
+    } else if (*ncc < 0) {
+	*info = -5;
+    } else if ((*ncvt == 0 && *ldvt < 1) || (*ncvt > 0 && *ldvt < max(1,*n)))
+	    {
+	*info = -9;
+    } else if (*ldu < max(1,*nru)) {
+	*info = -11;
+    } else if ((*ncc == 0 && *ldc < 1) || (*ncc > 0 && *ldc < max(1,*n))) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DBDSQR", &i__1);
+	return 0;
+    }
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	goto L160;
+    }
+
+/*     ROTATE is true if any singular vectors desired, false otherwise */
+
+    rotate = ((*ncvt > 0) || (*nru > 0)) || (*ncc > 0);
+
+/*     If no singular vectors desired, use qd algorithm */
+
+    if (! rotate) {
+	dlasq1_(n, &d__[1], &e[1], &work[1], info);
+	return 0;
+    }
+
+    nm1 = *n - 1;
+    nm12 = nm1 + nm1;
+    nm13 = nm12 + nm1;
+    idir = 0;
+
+/*     Get machine constants */
+
+    eps = EPSILON;
+    unfl = SAFEMINIMUM;
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
+*/
+
+    if (lower) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    work[i__] = cs;
+	    work[nm1 + i__] = sn;
+/* L10: */
+	}
+
+/*        Update singular vectors if desired */
+
+	if (*nru > 0) {
+	    dlasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
+		    ldu);
+	}
+	if (*ncc > 0) {
+	    dlasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
+		     ldc);
+	}
+    }
+
+/*
+       Compute singular values to relative accuracy TOL
+       (By setting TOL to be negative, algorithm will compute
+       singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+
+   Computing MAX
+   Computing MIN
+*/
+    d__3 = 100., d__4 = pow_dd(&eps, &c_b2944);
+    d__1 = 10., d__2 = min(d__3,d__4);
+    tolmul = max(d__1,d__2);
+    tol = tolmul * eps;
+
+/*     Compute approximate maximum, minimum singular values */
+
+    smax = 0.;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__2 = smax, d__3 = (d__1 = d__[i__], abs(d__1));
+	smax = max(d__2,d__3);
+/* L20: */
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__2 = smax, d__3 = (d__1 = e[i__], abs(d__1));
+	smax = max(d__2,d__3);
+/* L30: */
+    }
+    sminl = 0.;
+    if (tol >= 0.) {
+
+/*        Relative accuracy desired */
+
+	sminoa = abs(d__[1]);
+	if (sminoa == 0.) {
+	    goto L50;
+	}
+	mu = sminoa;
+	i__1 = *n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    mu = (d__2 = d__[i__], abs(d__2)) * (mu / (mu + (d__1 = e[i__ - 1]
+		    , abs(d__1))));
+	    sminoa = min(sminoa,mu);
+	    if (sminoa == 0.) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	sminoa /= sqrt((doublereal) (*n));
+/* Computing MAX */
+	d__1 = tol * sminoa, d__2 = *n * 6 * *n * unfl;
+	thresh = max(d__1,d__2);
+    } else {
+
+/*
+          Absolute accuracy desired
+
+   Computing MAX
+*/
+	d__1 = abs(tol) * smax, d__2 = *n * 6 * *n * unfl;
+	thresh = max(d__1,d__2);
+    }
+
+/*
+       Prepare for main iteration loop for the singular values
+       (MAXIT is the maximum number of passes through the inner
+       loop permitted before nonconvergence signalled.)
+*/
+
+    maxit = *n * 6 * *n;
+    iter = 0;
+    oldll = -1;
+    oldm = -1;
+
+/*     M points to last element of unconverged part of matrix */
+
+    m = *n;
+
+/*     Begin main iteration loop */
+
+L60:
+
+/*     Check for convergence or exceeding iteration count */
+
+    if (m <= 1) {
+	goto L160;
+    }
+    if (iter > maxit) {
+	goto L200;
+    }
+
+/*     Find diagonal block of matrix to work on */
+
+    if (tol < 0. && (d__1 = d__[m], abs(d__1)) <= thresh) {
+	d__[m] = 0.;
+    }
+    smax = (d__1 = d__[m], abs(d__1));
+    smin = smax;
+    i__1 = m - 1;
+    for (lll = 1; lll <= i__1; ++lll) {
+	ll = m - lll;
+	abss = (d__1 = d__[ll], abs(d__1));
+	abse = (d__1 = e[ll], abs(d__1));
+	if (tol < 0. && abss <= thresh) {
+	    d__[ll] = 0.;
+	}
+	if (abse <= thresh) {
+	    goto L80;
+	}
+	smin = min(smin,abss);
+/* Computing MAX */
+	d__1 = max(smax,abss);
+	smax = max(d__1,abse);
+/* L70: */
+    }
+    ll = 0;
+    goto L90;
+L80:
+    e[ll] = 0.;
+
+/*     Matrix splits since E(LL) = 0 */
+
+    if (ll == m - 1) {
+
+/*        Convergence of bottom singular value, return to top of loop */
+
+	--m;
+	goto L60;
+    }
+L90:
+    ++ll;
+
+/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
+
+    if (ll == m - 1) {
+
+/*        2 by 2 block, handle separately */
+
+	dlasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
+		 &sinl, &cosl);
+	d__[m - 1] = sigmx;
+	e[m - 1] = 0.;
+	d__[m] = sigmn;
+
+/*        Compute singular vectors, if desired */
+
+	if (*ncvt > 0) {
+	    drot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
+		    cosr, &sinr);
+	}
+	if (*nru > 0) {
+	    drot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
+		    c__1, &cosl, &sinl);
+	}
+	if (*ncc > 0) {
+	    drot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
+		    cosl, &sinl);
+	}
+	m += -2;
+	goto L60;
+    }
+
+/*
+       If working on new submatrix, choose shift direction
+       (from larger end diagonal element towards smaller)
+*/
+
+    if ((ll > oldm) || (m < oldll)) {
+	if ((d__1 = d__[ll], abs(d__1)) >= (d__2 = d__[m], abs(d__2))) {
+
+/*           Chase bulge from top (big end) to bottom (small end) */
+
+	    idir = 1;
+	} else {
+
+/*           Chase bulge from bottom (big end) to top (small end) */
+
+	    idir = 2;
+	}
+    }
+
+/*     Apply convergence tests */
+
+    if (idir == 1) {
+
+/*
+          Run convergence test in forward direction
+          First apply standard test to bottom of matrix
+*/
+
+	if (((d__2 = e[m - 1], abs(d__2)) <= abs(tol) * (d__1 = d__[m], abs(
+		d__1))) || (tol < 0. && (d__3 = e[m - 1], abs(d__3)) <=
+		thresh)) {
+	    e[m - 1] = 0.;
+	    goto L60;
+	}
+
+	if (tol >= 0.) {
+
+/*
+             If relative accuracy desired,
+             apply convergence criterion forward
+*/
+
+	    mu = (d__1 = d__[ll], abs(d__1));
+	    sminl = mu;
+	    i__1 = m - 1;
+	    for (lll = ll; lll <= i__1; ++lll) {
+		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+		    e[lll] = 0.;
+		    goto L60;
+		}
+		sminlo = sminl;
+		mu = (d__2 = d__[lll + 1], abs(d__2)) * (mu / (mu + (d__1 = e[
+			lll], abs(d__1))));
+		sminl = min(sminl,mu);
+/* L100: */
+	    }
+	}
+
+    } else {
+
+/*
+          Run convergence test in backward direction
+          First apply standard test to top of matrix
+*/
+
+	if (((d__2 = e[ll], abs(d__2)) <= abs(tol) * (d__1 = d__[ll], abs(
+		d__1))) || (tol < 0. && (d__3 = e[ll], abs(d__3)) <= thresh))
+		{
+	    e[ll] = 0.;
+	    goto L60;
+	}
+
+	if (tol >= 0.) {
+
+/*
+             If relative accuracy desired,
+             apply convergence criterion backward
+*/
+
+	    mu = (d__1 = d__[m], abs(d__1));
+	    sminl = mu;
+	    i__1 = ll;
+	    for (lll = m - 1; lll >= i__1; --lll) {
+		if ((d__1 = e[lll], abs(d__1)) <= tol * mu) {
+		    e[lll] = 0.;
+		    goto L60;
+		}
+		sminlo = sminl;
+		mu = (d__2 = d__[lll], abs(d__2)) * (mu / (mu + (d__1 = e[lll]
+			, abs(d__1))));
+		sminl = min(sminl,mu);
+/* L110: */
+	    }
+	}
+    }
+    oldll = ll;
+    oldm = m;
+
+/*
+       Compute shift.  First, test if shifting would ruin relative
+       accuracy, and if so set the shift to zero.
+
+   Computing MAX
+*/
+    d__1 = eps, d__2 = tol * .01;
+    if (tol >= 0. && *n * tol * (sminl / smax) <= max(d__1,d__2)) {
+
+/*        Use a zero shift to avoid loss of relative accuracy */
+
+	shift = 0.;
+    } else {
+
+/*        Compute the shift from 2-by-2 block at end of matrix */
+
+	if (idir == 1) {
+	    sll = (d__1 = d__[ll], abs(d__1));
+	    dlas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
+	} else {
+	    sll = (d__1 = d__[m], abs(d__1));
+	    dlas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
+	}
+
+/*        Test if shift negligible, and if so set to zero */
+
+	if (sll > 0.) {
+/* Computing 2nd power */
+	    d__1 = shift / sll;
+	    if (d__1 * d__1 < eps) {
+		shift = 0.;
+	    }
+	}
+    }
+
+/*     Increment iteration count */
+
+    iter = iter + m - ll;
+
+/*     If SHIFT = 0, do simplified QR iteration */
+
+    if (shift == 0.) {
+	if (idir == 1) {
+
+/*
+             Chase bulge from top to bottom
+             Save cosines and sines for later singular vector updates
+*/
+
+	    cs = 1.;
+	    oldcs = 1.;
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		d__1 = d__[i__] * cs;
+		dlartg_(&d__1, &e[i__], &cs, &sn, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = oldsn * r__;
+		}
+		d__1 = oldcs * r__;
+		d__2 = d__[i__ + 1] * sn;
+		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll + 1] = cs;
+		work[i__ - ll + 1 + nm1] = sn;
+		work[i__ - ll + 1 + nm12] = oldcs;
+		work[i__ - ll + 1 + nm13] = oldsn;
+/* L120: */
+	    }
+	    h__ = d__[m] * cs;
+	    d__[m] = h__ * oldcs;
+	    e[m - 1] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+		e[m - 1] = 0.;
+	    }
+
+	} else {
+
+/*
+             Chase bulge from bottom to top
+             Save cosines and sines for later singular vector updates
+*/
+
+	    cs = 1.;
+	    oldcs = 1.;
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		d__1 = d__[i__] * cs;
+		dlartg_(&d__1, &e[i__ - 1], &cs, &sn, &r__);
+		if (i__ < m) {
+		    e[i__] = oldsn * r__;
+		}
+		d__1 = oldcs * r__;
+		d__2 = d__[i__ - 1] * sn;
+		dlartg_(&d__1, &d__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll] = cs;
+		work[i__ - ll + nm1] = -sn;
+		work[i__ - ll + nm12] = oldcs;
+		work[i__ - ll + nm13] = -oldsn;
+/* L130: */
+	    }
+	    h__ = d__[ll] * cs;
+	    d__[ll] = h__ * oldcs;
+	    e[ll] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+		e[ll] = 0.;
+	    }
+	}
+    } else {
+
+/*        Use nonzero shift */
+
+	if (idir == 1) {
+
+/*
+             Chase bulge from top to bottom
+             Save cosines and sines for later singular vector updates
+*/
+
+	    f = ((d__1 = d__[ll], abs(d__1)) - shift) * (d_sign(&c_b2865, &
+		    d__[ll]) + shift / d__[ll]);
+	    g = e[ll];
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		dlartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__];
+		e[i__] = cosr * e[i__] - sinr * d__[i__];
+		g = sinr * d__[i__ + 1];
+		d__[i__ + 1] = cosr * d__[i__ + 1];
+		dlartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__] + sinl * d__[i__ + 1];
+		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
+		if (i__ < m - 1) {
+		    g = sinl * e[i__ + 1];
+		    e[i__ + 1] = cosl * e[i__ + 1];
+		}
+		work[i__ - ll + 1] = cosr;
+		work[i__ - ll + 1 + nm1] = sinr;
+		work[i__ - ll + 1 + nm12] = cosl;
+		work[i__ - ll + 1 + nm13] = sinl;
+/* L140: */
+	    }
+	    e[m - 1] = f;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((d__1 = e[m - 1], abs(d__1)) <= thresh) {
+		e[m - 1] = 0.;
+	    }
+
+	} else {
+
+/*
+             Chase bulge from bottom to top
+             Save cosines and sines for later singular vector updates
+*/
+
+	    f = ((d__1 = d__[m], abs(d__1)) - shift) * (d_sign(&c_b2865, &d__[
+		    m]) + shift / d__[m]);
+	    g = e[m - 1];
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		dlartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ < m) {
+		    e[i__] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__ - 1];
+		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
+		g = sinr * d__[i__ - 1];
+		d__[i__ - 1] = cosr * d__[i__ - 1];
+		dlartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
+		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
+		if (i__ > ll + 1) {
+		    g = sinl * e[i__ - 2];
+		    e[i__ - 2] = cosl * e[i__ - 2];
+		}
+		work[i__ - ll] = cosr;
+		work[i__ - ll + nm1] = -sinr;
+		work[i__ - ll + nm12] = cosl;
+		work[i__ - ll + nm13] = -sinl;
+/* L150: */
+	    }
+	    e[ll] = f;
+
+/*           Test convergence */
+
+	    if ((d__1 = e[ll], abs(d__1)) <= thresh) {
+		e[ll] = 0.;
+	    }
+
+/*           Update singular vectors if desired */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		dlasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+	}
+    }
+
+/*     QR iteration finished, go back and check convergence */
+
+    goto L60;
+
+/*     All singular values converged, so make them positive */
+
+L160:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (d__[i__] < 0.) {
+	    d__[i__] = -d__[i__];
+
+/*           Change sign of singular vectors, if desired */
+
+	    if (*ncvt > 0) {
+		dscal_(ncvt, &c_b3001, &vt[i__ + vt_dim1], ldvt);
+	    }
+	}
+/* L170: */
+    }
+
+/*
+       Sort the singular values into decreasing order (insertion sort on
+       singular values, but only one transposition per singular vector)
+*/
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Scan for smallest D(I) */
+
+	isub = 1;
+	smin = d__[1];
+	i__2 = *n + 1 - i__;
+	for (j = 2; j <= i__2; ++j) {
+	    if (d__[j] <= smin) {
+		isub = j;
+		smin = d__[j];
+	    }
+/* L180: */
+	}
+	if (isub != *n + 1 - i__) {
+
+/*           Swap singular values and vectors */
+
+	    d__[isub] = d__[*n + 1 - i__];
+	    d__[*n + 1 - i__] = smin;
+	    if (*ncvt > 0) {
+		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ +
+			vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) *
+			u_dim1 + 1], &c__1);
+	    }
+	    if (*ncc > 0) {
+		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ +
+			c_dim1], ldc);
+	    }
+	}
+/* L190: */
+    }
+    goto L220;
+
+/*     Maximum number of iterations exceeded, failure to converge */
+
+L200:
+    *info = 0;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.) {
+	    ++(*info);
+	}
+/* L210: */
+    }
+L220:
+    return 0;
+
+/*     End of DBDSQR */
+
+} /* dbdsqr_ */
+
+/* Subroutine */ int dgebak_(char *job, char *side, integer *n, integer *ilo,
+	integer *ihi, doublereal *scale, integer *m, doublereal *v, integer *
+	ldv, integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, i__1;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal s;
+    static integer ii;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical leftv;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical rightv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DGEBAK forms the right or left eigenvectors of a real general matrix
+    by backward transformation on the computed eigenvectors of the
+    balanced matrix output by DGEBAL.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the type of backward transformation required:
+            = 'N', do nothing, return immediately;
+            = 'P', do backward transformation for permutation only;
+            = 'S', do backward transformation for scaling only;
+            = 'B', do backward transformations for both permutation and
+                   scaling.
+            JOB must be the same as the argument JOB supplied to DGEBAL.
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  V contains right eigenvectors;
+            = 'L':  V contains left eigenvectors.
+
+    N       (input) INTEGER
+            The number of rows of the matrix V.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            The integers ILO and IHI determined by DGEBAL.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    SCALE   (input) DOUBLE PRECISION array, dimension (N)
+            Details of the permutation and scaling factors, as returned
+            by DGEBAL.
+
+    M       (input) INTEGER
+            The number of columns of the matrix V.  M >= 0.
+
+    V       (input/output) DOUBLE PRECISION array, dimension (LDV,M)
+            On entry, the matrix of right or left eigenvectors to be
+            transformed, as returned by DHSEIN or DTREVC.
+            On exit, V is overwritten by the transformed eigenvectors.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V. LDV >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Decode and Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --scale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+
+    /* Function Body */
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
+
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (! rightv && ! leftv) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -4;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*ldv < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBAK", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	goto L30;
+    }
+
+/*     Backward balance */
+
+    if ((lsame_(job, "S")) || (lsame_(job, "B"))) {
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = scale[i__];
+		dscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L10: */
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = 1. / scale[i__];
+		dscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
+
+    }
+
+/*
+       Backward permutation
+
+       For  I = ILO-1 step -1 until 1,
+                IHI+1 step 1 until N do --
+*/
+
+L30:
+    if ((lsame_(job, "P")) || (lsame_(job, "B"))) {
+	if (rightv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L40;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = (integer) scale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L50;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = (integer) scale[i__];
+		if (k == i__) {
+		    goto L50;
+		}
+		dswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L50:
+		;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DGEBAK */
+
+} /* dgebak_ */
+
+/* Subroutine */ int dgebal_(char *job, integer *n, doublereal *a, integer *
+	lda, integer *ilo, integer *ihi, doublereal *scale, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static doublereal c__, f, g;
+    static integer i__, j, k, l, m;
+    static doublereal r__, s, ca, ra;
+    static integer ica, ira, iexc;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static doublereal sfmin1, sfmin2, sfmax1, sfmax2;
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical noconv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGEBAL balances a general real matrix A.  This involves, first,
+    permuting A by a similarity transformation to isolate eigenvalues
+    in the first 1 to ILO-1 and last IHI+1 to N elements on the
+    diagonal; and second, applying a diagonal similarity transformation
+    to rows and columns ILO to IHI to make the rows and columns as
+    close in norm as possible.  Both steps are optional.
+
+    Balancing may reduce the 1-norm of the matrix, and improve the
+    accuracy of the computed eigenvalues and/or eigenvectors.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the operations to be performed on A:
+            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+                    for i = 1,...,N;
+            = 'P':  permute only;
+            = 'S':  scale only;
+            = 'B':  both permute and scale.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the input matrix A.
+            On exit,  A is overwritten by the balanced matrix.
+            If JOB = 'N', A is not referenced.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    ILO     (output) INTEGER
+    IHI     (output) INTEGER
+            ILO and IHI are set to integers such that on exit
+            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+
+    SCALE   (output) DOUBLE PRECISION array, dimension (N)
+            Details of the permutations and scaling factors applied to
+            A.  If P(j) is the index of the row and column interchanged
+            with row and column j and D(j) is the scaling factor
+            applied to row and column j, then
+            SCALE(j) = P(j)    for j = 1,...,ILO-1
+                     = D(j)    for j = ILO,...,IHI
+                     = P(j)    for j = IHI+1,...,N.
+            The order in which the interchanges are made is N to IHI+1,
+            then 1 to ILO-1.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The permutations consist of row and column interchanges which put
+    the matrix in the form
+
+               ( T1   X   Y  )
+       P A P = (  0   B   Z  )
+               (  0   0   T2 )
+
+    where T1 and T2 are upper triangular matrices whose eigenvalues lie
+    along the diagonal.  The column indices ILO and IHI mark the starting
+    and ending columns of the submatrix B. Balancing consists of applying
+    a diagonal similarity transformation inv(D) * B * D to make the
+    1-norms of each row of B and its corresponding column nearly equal.
+    The output matrix is
+
+       ( T1     X*D          Y    )
+       (  0  inv(D)*B*D  inv(D)*Z ).
+       (  0      0           T2   )
+
+    Information about the permutations P and the diagonal matrix D is
+    returned in the vector SCALE.
+
+    This subroutine is based on the EISPACK routine BALANC.
+
+    Modified by Tzu-Yi Chen, Computer Science Division, University of
+      California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --scale;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBAL", &i__1);
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+
+    if (*n == 0) {
+	goto L210;
+    }
+
+    if (lsame_(job, "N")) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scale[i__] = 1.;
+/* L10: */
+	}
+	goto L210;
+    }
+
+    if (lsame_(job, "S")) {
+	goto L120;
+    }
+
+/*     Permutation to isolate eigenvalues if possible */
+
+    goto L50;
+
+/*     Row and column exchange. */
+
+L20:
+    scale[m] = (doublereal) j;
+    if (j == m) {
+	goto L30;
+    }
+
+    dswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    i__1 = *n - k + 1;
+    dswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+
+L30:
+    switch (iexc) {
+	case 1:  goto L40;
+	case 2:  goto L80;
+    }
+
+/*     Search for rows isolating an eigenvalue and push them down. */
+
+L40:
+    if (l == 1) {
+	goto L210;
+    }
+    --l;
+
+L50:
+    for (j = l; j >= 1; --j) {
+
+	i__1 = l;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (i__ == j) {
+		goto L60;
+	    }
+	    if (a[j + i__ * a_dim1] != 0.) {
+		goto L70;
+	    }
+L60:
+	    ;
+	}
+
+	m = l;
+	iexc = 1;
+	goto L20;
+L70:
+	;
+    }
+
+    goto L90;
+
+/*     Search for columns isolating an eigenvalue and push them left. */
+
+L80:
+    ++k;
+
+L90:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+
+	i__2 = l;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    if (i__ == j) {
+		goto L100;
+	    }
+	    if (a[i__ + j * a_dim1] != 0.) {
+		goto L110;
+	    }
+L100:
+	    ;
+	}
+
+	m = k;
+	iexc = 2;
+	goto L20;
+L110:
+	;
+    }
+
+L120:
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	scale[i__] = 1.;
+/* L130: */
+    }
+
+    if (lsame_(job, "P")) {
+	goto L210;
+    }
+
+/*
+       Balance the submatrix in rows K to L.
+
+       Iterative loop for norm reduction
+*/
+
+    sfmin1 = SAFEMINIMUM / PRECISION;
+    sfmax1 = 1. / sfmin1;
+    sfmin2 = sfmin1 * 8.;
+    sfmax2 = 1. / sfmin2;
+L140:
+    noconv = FALSE_;
+
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	c__ = 0.;
+	r__ = 0.;
+
+	i__2 = l;
+	for (j = k; j <= i__2; ++j) {
+	    if (j == i__) {
+		goto L150;
+	    }
+	    c__ += (d__1 = a[j + i__ * a_dim1], abs(d__1));
+	    r__ += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+L150:
+	    ;
+	}
+	ica = idamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = (d__1 = a[ica + i__ * a_dim1], abs(d__1));
+	i__2 = *n - k + 1;
+	ira = idamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = (d__1 = a[i__ + (ira + k - 1) * a_dim1], abs(d__1));
+
+/*        Guard against zero C or R due to underflow. */
+
+	if ((c__ == 0.) || (r__ == 0.)) {
+	    goto L200;
+	}
+	g = r__ / 8.;
+	f = 1.;
+	s = c__ + r__;
+L160:
+/* Computing MAX */
+	d__1 = max(f,c__);
+/* Computing MIN */
+	d__2 = min(r__,g);
+	if (((c__ >= g) || (max(d__1,ca) >= sfmax2)) || (min(d__2,ra) <=
+		sfmin2)) {
+	    goto L170;
+	}
+	f *= 8.;
+	c__ *= 8.;
+	ca *= 8.;
+	r__ /= 8.;
+	g /= 8.;
+	ra /= 8.;
+	goto L160;
+
+L170:
+	g = c__ / 8.;
+L180:
+/* Computing MIN */
+	d__1 = min(f,c__), d__1 = min(d__1,g);
+	if (((g < r__) || (max(r__,ra) >= sfmax2)) || (min(d__1,ca) <= sfmin2)
+		) {
+	    goto L190;
+	}
+	f /= 8.;
+	c__ /= 8.;
+	g /= 8.;
+	ca /= 8.;
+	r__ *= 8.;
+	ra *= 8.;
+	goto L180;
+
+/*        Now balance. */
+
+L190:
+	if (c__ + r__ >= s * .95) {
+	    goto L200;
+	}
+	if (f < 1. && scale[i__] < 1.) {
+	    if (f * scale[i__] <= sfmin1) {
+		goto L200;
+	    }
+	}
+	if (f > 1. && scale[i__] > 1.) {
+	    if (scale[i__] >= sfmax1 / f) {
+		goto L200;
+	    }
+	}
+	g = 1. / f;
+	scale[i__] *= f;
+	noconv = TRUE_;
+
+	i__2 = *n - k + 1;
+	dscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	dscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+
+L200:
+	;
+    }
+
+    if (noconv) {
+	goto L140;
+    }
+
+L210:
+    *ilo = k;
+    *ihi = l;
+
+    return 0;
+
+/*     End of DGEBAL */
+
+} /* dgebal_ */
+
+/* Subroutine */ int dgebd2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
+	taup, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DGEBD2 reduces a real general m by n matrix A to upper or lower
+    bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the orthogonal matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the orthogonal matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (max(M,N))
+
+    INFO    (output) INTEGER
+            = 0: successful exit.
+            < 0: if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBD2", &i__1);
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector G(i) to annihilate
+                A(i,i+2:n)
+*/
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
+
+/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
+			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		a[i__ + (i__ + 1) * a_dim1] = e[i__];
+	    } else {
+		taup[i__] = 0.;
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+
+/*           Apply G(i) to A(i+1:m,i:n) from the right */
+
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+/* Computing MIN */
+	    i__4 = i__ + 1;
+	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
+		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *m) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:m,i)
+*/
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*              Apply H(i) to A(i+1:m,i+1:n) from the left */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		a[i__ + 1 + i__ * a_dim1] = e[i__];
+	    } else {
+		tauq[i__] = 0.;
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of DGEBD2 */
+
+} /* dgebd2_ */
+
+/* Subroutine */ int dgebrd_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tauq, doublereal *
+	taup, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, nb, nx;
+    static doublereal ws;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer nbmin, iinfo, minmn;
+    extern /* Subroutine */ int dgebd2_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *), dlabrd_(integer *, integer *, integer *
+	    , doublereal *, integer *, doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *, integer *, doublereal *, integer *)
+	    , xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwrkx, ldwrky, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGEBRD reduces a general real M-by-N matrix A to upper or lower
+    bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the orthogonal matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the orthogonal matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) DOUBLE PRECISION array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,M,N).
+            For optimum performance LWORK >= (M+N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = ilaenv_(&c__1, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = max(i__1,i__2);
+    lwkopt = (*m + *n) * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = max(1,*m);
+	if (*lwork < max(i__1,*n) && ! lquery) {
+	    *info = -10;
+	}
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("DGEBRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    minmn = min(*m,*n);
+    if (minmn == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    ws = (doublereal) max(*m,*n);
+    ldwrkx = *m;
+    ldwrky = *n;
+
+    if (nb > 1 && nb < minmn) {
+
+/*
+          Set the crossover point NX.
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+
+/*        Determine when to switch from blocked to unblocked code. */
+
+	if (nx < minmn) {
+	    ws = (doublereal) ((*m + *n) * nb);
+	    if ((doublereal) (*lwork) < ws) {
+
+/*
+                Not enough work space for the optimal NB, consider using
+                a smaller block size.
+*/
+
+		nbmin = ilaenv_(&c__2, "DGEBRD", " ", m, n, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		if (*lwork >= (*m + *n) * nbmin) {
+		    nb = *lwork / (*m + *n);
+		} else {
+		    nb = 1;
+		    nx = minmn;
+		}
+	    }
+	}
+    } else {
+	nx = minmn;
+    }
+
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+
+/*
+          Reduce rows and columns i:i+nb-1 to bidiagonal form and return
+          the matrices X and Y which are needed to update the unreduced
+          part of the matrix
+*/
+
+	i__3 = *m - i__ + 1;
+	i__4 = *n - i__ + 1;
+	dlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
+		* nb + 1], &ldwrky);
+
+/*
+          Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
+          of the form  A := A - V*Y' - X*U'
+*/
+
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	dgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b3001, &a[
+		i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
+		ldwrky, &c_b2865, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	dgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b3001, &
+		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
+		c_b2865, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*        Copy diagonal and off-diagonal elements of B back into A */
+
+	if (*m >= *n) {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + (j + 1) * a_dim1] = e[j];
+/* L10: */
+	    }
+	} else {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + 1 + j * a_dim1] = e[j];
+/* L20: */
+	    }
+	}
+/* L30: */
+    }
+
+/*     Use unblocked code to reduce the remainder of the matrix */
+
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    dgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+	    tauq[i__], &taup[i__], &work[1], &iinfo);
+    work[1] = ws;
+    return 0;
+
+/*     End of DGEBRD */
+
+} /* dgebrd_ */
+
+/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *
+	a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl,
+	integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal r__, cs, sn;
+    static integer ihi;
+    static doublereal scl;
+    static integer ilo;
+    static doublereal dum[1], eps;
+    static integer ibal;
+    static char side[1];
+    static integer maxb;
+    static doublereal anrm;
+    static integer ierr, itau;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer iwrk, nout;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebak_(
+	    char *, char *, integer *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, integer *),
+	    dgebal_(char *, integer *, doublereal *, integer *, integer *,
+	    integer *, doublereal *, integer *);
+    static logical scalea;
+
+    static doublereal cscale;
+    extern doublereal dlange_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *);
+    extern /* Subroutine */ int dgehrd_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dlascl_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *),
+	    dlartg_(doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *), xerbla_(char *, integer *);
+    static logical select[1];
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static doublereal bignum;
+    extern /* Subroutine */ int dorghr_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dhseqr_(char *, char *, integer *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *), dtrevc_(char *, char *, logical *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, doublereal *, integer *);
+    static integer minwrk, maxwrk;
+    static logical wantvl;
+    static doublereal smlnum;
+    static integer hswork;
+    static logical lquery, wantvr;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 8, 1999
+
+
+    Purpose
+    =======
+
+    DGEEV computes for an N-by-N real nonsymmetric matrix A, the
+    eigenvalues and, optionally, the left and/or right eigenvectors.
+
+    The right eigenvector v(j) of A satisfies
+                     A * v(j) = lambda(j) * v(j)
+    where lambda(j) is its eigenvalue.
+    The left eigenvector u(j) of A satisfies
+                  u(j)**H * A = lambda(j) * u(j)**H
+    where u(j)**H denotes the conjugate transpose of u(j).
+
+    The computed eigenvectors are normalized to have Euclidean norm
+    equal to 1 and largest component real.
+
+    Arguments
+    =========
+
+    JOBVL   (input) CHARACTER*1
+            = 'N': left eigenvectors of A are not computed;
+            = 'V': left eigenvectors of A are computed.
+
+    JOBVR   (input) CHARACTER*1
+            = 'N': right eigenvectors of A are not computed;
+            = 'V': right eigenvectors of A are computed.
+
+    N       (input) INTEGER
+            The order of the matrix A. N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the N-by-N matrix A.
+            On exit, A has been overwritten.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    WR      (output) DOUBLE PRECISION array, dimension (N)
+    WI      (output) DOUBLE PRECISION array, dimension (N)
+            WR and WI contain the real and imaginary parts,
+            respectively, of the computed eigenvalues.  Complex
+            conjugate pairs of eigenvalues appear consecutively
+            with the eigenvalue having the positive imaginary part
+            first.
+
+    VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
+            If JOBVL = 'V', the left eigenvectors u(j) are stored one
+            after another in the columns of VL, in the same order
+            as their eigenvalues.
+            If JOBVL = 'N', VL is not referenced.
+            If the j-th eigenvalue is real, then u(j) = VL(:,j),
+            the j-th column of VL.
+            If the j-th and (j+1)-st eigenvalues form a complex
+            conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
+            u(j+1) = VL(:,j) - i*VL(:,j+1).
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= 1; if
+            JOBVL = 'V', LDVL >= N.
+
+    VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
+            If JOBVR = 'V', the right eigenvectors v(j) are stored one
+            after another in the columns of VR, in the same order
+            as their eigenvalues.
+            If JOBVR = 'N', VR is not referenced.
+            If the j-th eigenvalue is real, then v(j) = VR(:,j),
+            the j-th column of VR.
+            If the j-th and (j+1)-st eigenvalues form a complex
+            conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
+            v(j+1) = VR(:,j) - i*VR(:,j+1).
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= 1; if
+            JOBVR = 'V', LDVR >= N.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,3*N), and
+            if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good
+            performance, LWORK must generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = i, the QR algorithm failed to compute all the
+                  eigenvalues, and no eigenvectors have been computed;
+                  elements i+1:N of WR and WI contain eigenvalues which
+                  have converged.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -1;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if ((*ldvl < 1) || (wantvl && *ldvl < *n)) {
+	*info = -9;
+    } else if ((*ldvr < 1) || (wantvr && *ldvr < *n)) {
+	*info = -11;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         NB refers to the optimal block size for the immediately
+         following subroutine, as returned by ILAENV.
+         HSWORK refers to the workspace preferred by DHSEQR, as
+         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+         the worst case.)
+*/
+
+    minwrk = 1;
+    if (*info == 0 && ((*lwork >= 1) || (lquery))) {
+	maxwrk = ((*n) << (1)) + *n * ilaenv_(&c__1, "DGEHRD", " ", n, &c__1,
+		n, &c__0, (ftnlen)6, (ftnlen)1);
+	if (! wantvl && ! wantvr) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n * 3;
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "DHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "EN", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
+		    hswork;
+	    maxwrk = max(i__1,i__2);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = (*n) << (2);
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (*n - 1) * ilaenv_(&c__1,
+		    "DORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "DHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "DHSEQR", "SV", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
+		    hswork;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = (*n) << (2);
+	    maxwrk = max(i__1,i__2);
+	}
+	work[1] = (doublereal) maxwrk;
+    }
+    if (*lwork < minwrk && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEEV ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = PRECISION;
+    smlnum = SAFEMINIMUM;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0. && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	dlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*
+       Balance the matrix
+       (Workspace: need N)
+*/
+
+    ibal = 1;
+    dgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*
+       Reduce to upper Hessenberg form
+       (Workspace: need 3*N, prefer 2*N+N*NB)
+*/
+
+    itau = ibal + *n;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    dgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvl) {
+
+/*
+          Want left eigenvectors
+          Copy Householder vectors to VL
+*/
+
+	*(unsigned char *)side = 'L';
+	dlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*
+          Generate orthogonal matrix in VL
+          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VL
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vl[vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*
+             Want left and right eigenvectors
+             Copy Schur vectors to VR
+*/
+
+	    *(unsigned char *)side = 'B';
+	    dlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*
+          Want right eigenvectors
+          Copy Householder vectors to VR
+*/
+
+	*(unsigned char *)side = 'R';
+	dlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*
+          Generate orthogonal matrix in VR
+          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	dorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VR
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*
+          Compute eigenvalues only
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	dhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO > 0 from DHSEQR, then quit */
+
+    if (*info > 0) {
+	goto L50;
+    }
+
+    if ((wantvl) || (wantvr)) {
+
+/*
+          Compute left and/or right eigenvectors
+          (Workspace: need 4*N)
+*/
+
+	dtrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
+		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr);
+    }
+
+    if (wantvl) {
+
+/*
+          Undo balancing of left eigenvectors
+          (Workspace: need N)
+*/
+
+	dgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
+		 &ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    d__1 = vl[k + i__ * vl_dim1];
+/* Computing 2nd power */
+		    d__2 = vl[k + (i__ + 1) * vl_dim1];
+		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
+/* L10: */
+		}
+		k = idamax_(n, &work[iwrk], &c__1);
+		dlartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1],
+			&cs, &sn, &r__);
+		drot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) *
+			vl_dim1 + 1], &c__1, &cs, &sn);
+		vl[k + (i__ + 1) * vl_dim1] = 0.;
+	    }
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*
+          Undo balancing of right eigenvectors
+          (Workspace: need N)
+*/
+
+	dgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
+		 &ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.) {
+		scl = 1. / dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.) {
+		d__1 = dnrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		d__2 = dnrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		scl = 1. / dlapy2_(&d__1, &d__2);
+		dscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+		dscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    d__1 = vr[k + i__ * vr_dim1];
+/* Computing 2nd power */
+		    d__2 = vr[k + (i__ + 1) * vr_dim1];
+		    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
+/* L30: */
+		}
+		k = idamax_(n, &work[iwrk], &c__1);
+		dlartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1],
+			&cs, &sn, &r__);
+		drot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) *
+			vr_dim1 + 1], &c__1, &cs, &sn);
+		vr[k + (i__ + 1) * vr_dim1] = 0.;
+	    }
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info +
+		1], &i__2, &ierr);
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info +
+		1], &i__2, &ierr);
+	if (*info > 0) {
+	    i__1 = ilo - 1;
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1],
+		    n, &ierr);
+	    i__1 = ilo - 1;
+	    dlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1],
+		    n, &ierr);
+	}
+    }
+
+    work[1] = (doublereal) maxwrk;
+    return 0;
+
+/*     End of DGEEV */
+
+} /* dgeev_ */
+
+/* Subroutine */ int dgehd2_(integer *n, integer *ilo, integer *ihi,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    static doublereal aii;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DGEHD2 reduces a real general matrix A to upper Hessenberg form H by
+    an orthogonal similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to DGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= max(1,N).
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the n by n general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the orthogonal matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEHD2", &i__1);
+	return 0;
+    }
+
+    i__1 = *ihi - 1;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+
+/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
+
+	i__2 = *ihi - i__;
+/* Computing MIN */
+	i__3 = i__ + 2;
+	dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	aii = a[i__ + 1 + i__ * a_dim1];
+	a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
+
+	i__2 = *ihi - i__;
+	dlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
+
+/*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
+
+	i__2 = *ihi - i__;
+	i__3 = *n - i__;
+	dlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
+
+	a[i__ + 1 + i__ * a_dim1] = aii;
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of DGEHD2 */
+
+} /* dgehd2_ */
+
+/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__;
+    static doublereal t[4160]	/* was [65][64] */;
+    static integer ib;
+    static doublereal ei;
+    static integer nb, nh, nx, iws;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int dgehd2_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), dlahrd_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGEHRD reduces a real general matrix A to upper Hessenberg form H by
+    an orthogonal similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to DGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the N-by-N general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the orthogonal matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+            zero.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MIN */
+    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    lwkopt = *n * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEHRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	tau[i__] = 0.;
+/* L10: */
+    }
+    i__1 = *n - 1;
+    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
+	tau[i__] = 0.;
+/* L20: */
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1] = 1.;
+	return 0;
+    }
+
+/*
+       Determine the block size.
+
+   Computing MIN
+*/
+    i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    nbmin = 2;
+    iws = 1;
+    if (nb > 1 && nb < nh) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    iws = *n * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code.
+
+   Computing MAX
+*/
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+		if (*lwork >= *n * nbmin) {
+		    nb = *lwork / *n;
+		} else {
+		    nb = 1;
+		}
+	    }
+	}
+    }
+    ldwork = *n;
+
+    if ((nb < nbmin) || (nb >= nh)) {
+
+/*        Use unblocked code below */
+
+	i__ = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	i__1 = *ihi - 1 - nx;
+	i__2 = nb;
+	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - i__;
+	    ib = min(i__3,i__4);
+
+/*
+             Reduce columns i:i+ib-1 to Hessenberg form, returning the
+             matrices V and T of the block reflector H = I - V*T*V'
+             which performs the reduction, and also the matrix Y = A*V*T
+*/
+
+	    dlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+		    c__65, &work[1], &ldwork);
+
+/*
+             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+             to 1.
+*/
+
+	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.;
+	    i__3 = *ihi - i__ - ib + 1;
+	    dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b3001, &
+		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
+		    c_b2865, &a[(i__ + ib) * a_dim1 + 1], lda);
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
+
+/*
+             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+             left
+*/
+
+	    i__3 = *ihi - i__;
+	    i__4 = *n - i__ - ib + 1;
+	    dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
+		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
+/* L30: */
+	}
+    }
+
+/*     Use unblocked code to reduce the rest of the matrix */
+
+    dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1] = (doublereal) iws;
+
+    return 0;
+
+/*     End of DGEHRD */
+
+} /* dgehrd_ */
+
+/* Subroutine */ int dgelq2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal aii;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DGELQ2 computes an LQ factorization of a real m by n matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m by min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELQ2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
+
+	i__2 = *n - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1]
+		, lda, &tau[i__]);
+	if (i__ < *m) {
+
+/*           Apply H(i) to A(i+1:m,i:n) from the right */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+	    dlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGELQ2 */
+
+} /* dgelq2_ */
+
+/* Subroutine */ int dgelqf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int dgelq2_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
+	     char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGELQF computes an LQ factorization of a real M-by-N matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *m * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELQF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGELQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGELQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the LQ factorization of the current block
+             A(i:i+ib-1,i:n)
+*/
+
+	    i__3 = *n - i__ + 1;
+	    dgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *n - i__ + 1;
+		dlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i+ib:m,i:n) from the right */
+
+		i__3 = *m - i__ - ib + 1;
+		i__4 = *n - i__ + 1;
+		dlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	dgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGELQF */
+
+} /* dgelqf_ */
+
+/* Subroutine */ int dgelsd_(integer *m, integer *n, integer *nrhs,
+	doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
+	s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
+	 integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+
+    /* Builtin functions */
+    double log(doublereal);
+
+    /* Local variables */
+    static integer ie, il, mm;
+    static doublereal eps, anrm, bnrm;
+    static integer itau, nlvl, iascl, ibscl;
+    static doublereal sfmin;
+    static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *), dgebrd_(
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *);
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, integer *),
+	    dlalsd_(char *, integer *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, integer *), dlascl_(char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    integer *, doublereal *, integer *, integer *), dgeqrf_(
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static doublereal bignum;
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *);
+    static integer wlalsd;
+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static integer ldwork;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static integer minwrk, maxwrk;
+    static doublereal smlnum;
+    static logical lquery;
+    static integer smlsiz;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DGELSD computes the minimum-norm solution to a real linear least
+    squares problem:
+        minimize 2-norm(| b - A*x |)
+    using the singular value decomposition (SVD) of A. A is an M-by-N
+    matrix which may be rank-deficient.
+
+    Several right hand side vectors b and solution vectors x can be
+    handled in a single call; they are stored as the columns of the
+    M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+    matrix X.
+
+    The problem is solved in three steps:
+    (1) Reduce the coefficient matrix A to bidiagonal form with
+        Householder transformations, reducing the original problem
+        into a "bidiagonal least squares problem" (BLS)
+    (2) Solve the BLS using a divide and conquer approach.
+    (3) Apply back all the Householder tranformations to solve
+        the original least squares problem.
+
+    The effective rank of A is determined by treating as zero those
+    singular values which are less than RCOND times the largest singular
+    value.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of A. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of A. N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrices B and X. NRHS >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, A has been destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+            On entry, the M-by-NRHS right hand side matrix B.
+            On exit, B is overwritten by the N-by-NRHS solution
+            matrix X.  If m >= n and RANK = n, the residual
+            sum-of-squares for the solution in the i-th column is given
+            by the sum of squares of elements n+1:m in that column.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B. LDB >= max(1,max(M,N)).
+
+    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The singular values of A in decreasing order.
+            The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+
+    RCOND   (input) DOUBLE PRECISION
+            RCOND is used to determine the effective rank of A.
+            Singular values S(i) <= RCOND*S(1) are treated as zero.
+            If RCOND < 0, machine precision is used instead.
+
+    RANK    (output) INTEGER
+            The effective rank of A, i.e., the number of singular values
+            which are greater than RCOND*S(1).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK must be at least 1.
+            The exact minimum amount of workspace needed depends on M,
+            N and NRHS. As long as LWORK is at least
+                12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
+            if M is greater than or equal to N or
+                12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
+            if M is less than N, the code will execute correctly.
+            SMLSIZ is returned by ILAENV and is equal to the maximum
+            size of the subproblems at the bottom of the computation
+            tree (usually about 25), and
+               NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+            For good performance, LWORK should generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    IWORK   (workspace) INTEGER array, dimension (LIWORK)
+            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+            where MINMN = MIN( M,N ).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  the algorithm for computing the SVD failed to converge;
+                  if INFO = i, i off-diagonal elements of an intermediate
+                  bidiagonal form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input arguments.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    maxmn = max(*m,*n);
+    mnthr = ilaenv_(&c__6, "DGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*ldb < max(1,maxmn)) {
+	*info = -7;
+    }
+
+    smlsiz = ilaenv_(&c__9, "DGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Compute workspace.
+       (Note: Comments in the code beginning "Workspace:" describe the
+       minimal amount of workspace needed at that point in the code,
+       as well as the preferred amount for good performance.
+       NB refers to the optimal block size for the immediately
+       following subroutine, as returned by ILAENV.)
+*/
+
+    minwrk = 1;
+    minmn = max(1,minmn);
+/* Computing MAX */
+    i__1 = (integer) (log((doublereal) minmn / (doublereal) (smlsiz + 1)) /
+	    log(2.)) + 1;
+    nlvl = max(i__1,0);
+
+    if (*info == 0) {
+	maxwrk = 0;
+	mm = *m;
+	if (*m >= *n && *m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m,
+		    n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "DORMQR", "LT",
+		    m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
+	    maxwrk = max(i__1,i__2);
+	}
+	if (*m >= *n) {
+
+/*
+             Path 1 - overdetermined or exactly determined.
+
+   Computing MAX
+*/
+	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "DGEBRD"
+		    , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "DORMBR",
+		    "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "DORMBR",
+		     "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = max(i__1,i__2);
+/* Computing 2nd power */
+	    i__1 = smlsiz + 1;
+	    wlalsd = *n * 9 + ((*n) << (1)) * smlsiz + ((*n) << (3)) * nlvl +
+		    *n * *nrhs + i__1 * i__1;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2),
+		    i__2 = *n * 3 + wlalsd;
+	    minwrk = max(i__1,i__2);
+	}
+	if (*n > *m) {
+/* Computing 2nd power */
+	    i__1 = smlsiz + 1;
+	    wlalsd = *m * 9 + ((*m) << (1)) * smlsiz + ((*m) << (3)) * nlvl +
+		    *m * *nrhs + i__1 * i__1;
+	    if (*n >= mnthr) {
+
+/*
+                Path 2a - underdetermined, with many more columns
+                than rows.
+*/
+
+		maxwrk = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1,
+			&c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + ((*m) << (1))
+			* ilaenv_(&c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *nrhs *
+			ilaenv_(&c__1, "DORMBR", "QLT", m, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + (*m - 1) *
+			ilaenv_(&c__1, "DORMBR", "PLN", m, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+		if (*nrhs > 1) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+		    maxwrk = max(i__1,i__2);
+		} else {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + ((*m) << (1));
+		    maxwrk = max(i__1,i__2);
+		}
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "DORMLQ",
+			"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + wlalsd;
+		maxwrk = max(i__1,i__2);
+	    } else {
+
+/*              Path 2 - remaining underdetermined cases. */
+
+		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "DGEBRD", " ", m,
+			 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "DORMBR"
+			, "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR",
+			"PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
+		maxwrk = max(i__1,i__2);
+	    }
+/* Computing MAX */
+	    i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2),
+		    i__2 = *m * 3 + wlalsd;
+	    minwrk = max(i__1,i__2);
+	}
+	minwrk = min(minwrk,maxwrk);
+	work[1] = (doublereal) maxwrk;
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGELSD", &i__1);
+	return 0;
+    } else if (lquery) {
+	goto L10;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*m == 0) || (*n == 0)) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters. */
+
+    eps = PRECISION;
+    sfmin = SAFEMINIMUM;
+    smlnum = sfmin / eps;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 2;
+    } else if (anrm == 0.) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = max(*m,*n);
+	dlaset_("F", &i__1, nrhs, &c_b2879, &c_b2879, &b[b_offset], ldb);
+	dlaset_("F", &minmn, &c__1, &c_b2879, &c_b2879, &s[1], &c__1);
+	*rank = 0;
+	goto L10;
+    }
+
+/*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */
+
+    bnrm = dlange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	dlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     If M < N make sure certain entries of B are zero. */
+
+    if (*m < *n) {
+	i__1 = *n - *m;
+	dlaset_("F", &i__1, nrhs, &c_b2879, &c_b2879, &b[*m + 1 + b_dim1],
+		ldb);
+    }
+
+/*     Overdetermined case. */
+
+    if (*m >= *n) {
+
+/*        Path 1 - overdetermined or exactly determined. */
+
+	mm = *m;
+	if (*m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+	    itau = 1;
+	    nwork = itau + *n;
+
+/*
+             Compute A=Q*R.
+             (Workspace: need 2*N, prefer N+N*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+
+/*
+             Multiply B by transpose(Q).
+             (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+/*           Zero out below R. */
+
+	    if (*n > 1) {
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b2879, &c_b2879, &a[a_dim1 + 2],
+			 lda);
+	    }
+	}
+
+	ie = 1;
+	itauq = ie + *n;
+	itaup = itauq + *n;
+	nwork = itaup + *n;
+
+/*
+          Bidiagonalize R in A.
+          (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	dgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of R.
+          (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	dormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
+		&b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	dlalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb,
+		rcond, rank, &work[nwork], &iwork[1], info);
+	if (*info != 0) {
+	    goto L10;
+	}
+
+/*        Multiply B by right bidiagonalizing vectors of R. */
+
+	i__1 = *lwork - nwork + 1;
+	dormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
+		b[b_offset], ldb, &work[nwork], &i__1, info);
+
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = *m, i__2 = ((*m) << (1)) - 4, i__1 = max(i__1,i__2), i__1 =
+		max(i__1,*nrhs), i__2 = *n - *m * 3;
+	if (*n >= mnthr && *lwork >= ((*m) << (2)) + *m * *m + max(i__1,i__2))
+		 {
+
+/*
+          Path 2a - underdetermined, with many more columns than rows
+          and sufficient workspace for an efficient algorithm.
+*/
+
+	    ldwork = *m;
+/*
+   Computing MAX
+   Computing MAX
+*/
+	    i__3 = *m, i__4 = ((*m) << (1)) - 4, i__3 = max(i__3,i__4), i__3 =
+		     max(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__1 = ((*m) << (2)) + *m * *lda + max(i__3,i__4), i__2 = *m * *
+		    lda + *m + *m * *nrhs;
+	    if (*lwork >= max(i__1,i__2)) {
+		ldwork = *lda;
+	    }
+	    itau = 1;
+	    nwork = *m + 1;
+
+/*
+          Compute A=L*Q.
+          (Workspace: need 2*M, prefer M+M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+	    il = nwork;
+
+/*        Copy L to WORK(IL), zeroing out above its diagonal. */
+
+	    dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
+	    i__1 = *m - 1;
+	    i__2 = *m - 1;
+	    dlaset_("U", &i__1, &i__2, &c_b2879, &c_b2879, &work[il + ldwork],
+		     &ldwork);
+	    ie = il + ldwork * *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+          Bidiagonalize L in WORK(IL).
+          (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of L.
+          (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
+		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    dlalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of L. */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
+		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Zero out below first M rows of B. */
+
+	    i__1 = *n - *m;
+	    dlaset_("F", &i__1, nrhs, &c_b2879, &c_b2879, &b[*m + 1 + b_dim1],
+		     ldb);
+	    nwork = itau + *m;
+
+/*
+          Multiply transpose(Q) by B.
+          (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+	} else {
+
+/*        Path 2 - remaining underdetermined cases. */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+          Bidiagonalize A.
+          (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors.
+          (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    dlalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of A. */
+
+	    i__1 = *lwork - nwork + 1;
+	    dormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+	}
+    }
+
+/*     Undo scaling. */
+
+    if (iascl == 1) {
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    } else if (iascl == 2) {
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    }
+    if (ibscl == 1) {
+	dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	dlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L10:
+    work[1] = (doublereal) maxwrk;
+    return 0;
+
+/*     End of DGELSD */
+
+} /* dgelsd_ */
+
+/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal aii;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *), dlarfg_(integer *, doublereal *,
+	    doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DGEQR2 computes a QR factorization of a real m by n matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(m,n) by n upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQR2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
+		, &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    dlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGEQR2 */
+
+} /* dgeqr2_ */
+
+/* Subroutine */ int dgeqrf_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublereal *tau, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int dgeqr2_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *), dlarfb_(char *,
+	     char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DGEQRF computes a QR factorization of a real M-by-N matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of min(m,n) elementary reflectors (see Further
+            Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGEQRF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the QR factorization of the current block
+             A(i:m,i:i+ib-1)
+*/
+
+	    i__3 = *m - i__ + 1;
+	    dgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *m - i__ + 1;
+		dlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib
+			+ 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	dgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DGEQRF */
+
+} /* dgeqrf_ */
+
+/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal *
+	a, integer *lda, doublereal *s, doublereal *u, integer *ldu,
+	doublereal *vt, integer *ldvt, doublereal *work, integer *lwork,
+	integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2, i__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, ie, il, ir, iu, blk;
+    static doublereal dum[1], eps;
+    static integer ivt, iscl;
+    static doublereal anrm;
+    static integer idum[1], ierr, itau;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    static integer chunk, minmn, wrkbl, itaup, itauq, mnthr;
+    static logical wntqa;
+    static integer nwork;
+    static logical wntqn, wntqo, wntqs;
+    extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal
+	    *, doublereal *, doublereal *, integer *, doublereal *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, integer *), dgebrd_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    extern doublereal dlamch_(char *), dlange_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *);
+    static integer bdspac;
+    extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, integer *),
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dgeqrf_(integer *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *, integer *), dlacpy_(char *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *), dlaset_(char *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *), dorgbr_(char *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static doublereal bignum;
+    extern /* Subroutine */ int dormbr_(char *, char *, char *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *), dorglq_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *, integer *);
+    static integer ldwrkl, ldwrkr, minwrk, ldwrku, maxwrk, ldwkvt;
+    static doublereal smlnum;
+    static logical wntqas, lquery;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DGESDD computes the singular value decomposition (SVD) of a real
+    M-by-N matrix A, optionally computing the left and right singular
+    vectors.  If singular vectors are desired, it uses a
+    divide-and-conquer algorithm.
+
+    The SVD is written
+
+         A = U * SIGMA * transpose(V)
+
+    where SIGMA is an M-by-N matrix which is zero except for its
+    min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+    V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+    are the singular values of A; they are real and non-negative, and
+    are returned in descending order.  The first min(m,n) columns of
+    U and V are the left and right singular vectors of A.
+
+    Note that the routine returns VT = V**T, not V.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            Specifies options for computing all or part of the matrix U:
+            = 'A':  all M columns of U and all N rows of V**T are
+                    returned in the arrays U and VT;
+            = 'S':  the first min(M,N) columns of U and the first
+                    min(M,N) rows of V**T are returned in the arrays U
+                    and VT;
+            = 'O':  If M >= N, the first N columns of U are overwritten
+                    on the array A and all rows of V**T are returned in
+                    the array VT;
+                    otherwise, all columns of U are returned in the
+                    array U and the first M rows of V**T are overwritten
+                    in the array VT;
+            = 'N':  no columns of U or rows of V**T are computed.
+
+    M       (input) INTEGER
+            The number of rows of the input matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the input matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit,
+            if JOBZ = 'O',  A is overwritten with the first N columns
+                            of U (the left singular vectors, stored
+                            columnwise) if M >= N;
+                            A is overwritten with the first M rows
+                            of V**T (the right singular vectors, stored
+                            rowwise) otherwise.
+            if JOBZ .ne. 'O', the contents of A are destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The singular values of A, sorted so that S(i) >= S(i+1).
+
+    U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
+            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+            UCOL = min(M,N) if JOBZ = 'S'.
+            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+            orthogonal matrix U;
+            if JOBZ = 'S', U contains the first min(M,N) columns of U
+            (the left singular vectors, stored columnwise);
+            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1; if
+            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+
+    VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
+            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+            N-by-N orthogonal matrix V**T;
+            if JOBZ = 'S', VT contains the first min(M,N) rows of
+            V**T (the right singular vectors, stored rowwise);
+            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1; if
+            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+            if JOBZ = 'S', LDVT >= min(M,N).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= 1.
+            If JOBZ = 'N',
+              LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
+            If JOBZ = 'O',
+              LWORK >= 3*min(M,N)*min(M,N) +
+                       max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
+            If JOBZ = 'S' or 'A'
+              LWORK >= 3*min(M,N)*min(M,N) +
+                       max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
+            For good performance, LWORK should generally be larger.
+            If LWORK < 0 but other input arguments are legal, WORK(1)
+            returns the optimal LWORK.
+
+    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  DBDSDC did not converge, updating process failed.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    mnthr = (integer) (minmn * 11. / 6.);
+    wntqa = lsame_(jobz, "A");
+    wntqs = lsame_(jobz, "S");
+    wntqas = (wntqa) || (wntqs);
+    wntqo = lsame_(jobz, "O");
+    wntqn = lsame_(jobz, "N");
+    minwrk = 1;
+    maxwrk = 1;
+    lquery = *lwork == -1;
+
+    if (! ((((wntqa) || (wntqs)) || (wntqo)) || (wntqn))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (((*ldu < 1) || (wntqas && *ldu < *m)) || (wntqo && *m < *n && *
+	    ldu < *m)) {
+	*info = -8;
+    } else if ((((*ldvt < 1) || (wntqa && *ldvt < *n)) || (wntqs && *ldvt <
+	    minmn)) || (wntqo && *m >= *n && *ldvt < *n)) {
+	*info = -10;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         NB refers to the optimal block size for the immediately
+         following subroutine, as returned by ILAENV.)
+*/
+
+    if (*info == 0 && *m > 0 && *n > 0) {
+	if (*m >= *n) {
+
+/*           Compute space needed for DBDSDC */
+
+	    if (wntqn) {
+		bdspac = *n * 7;
+	    } else {
+		bdspac = *n * 3 * *n + ((*n) << (2));
+	    }
+	    if (*m >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1 (M much larger than N, JOBZ='N') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
+			    c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = bdspac + *n;
+		} else if (wntqo) {
+
+/*                 Path 2 (M much larger than N, JOBZ='O') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
+			    c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + ((*n) << (1)) * *n;
+		    minwrk = bdspac + ((*n) << (1)) * *n + *n * 3;
+		} else if (wntqs) {
+
+/*                 Path 3 (M much larger than N, JOBZ='S') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "DORGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
+			    c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		} else if (wntqa) {
+
+/*                 Path 4 (M much larger than N, JOBZ='A') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "DGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "DORGQR",
+			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
+			    c__1, "DGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		}
+	    } else {
+
+/*              Path 5 (M at least N, but not much larger) */
+
+		wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
+			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		if (wntqn) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		} else if (wntqo) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *m, i__2 = *n * *n + bdspac;
+		    minwrk = *n * 3 + max(i__1,i__2);
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		}
+	    }
+	} else {
+
+/*           Compute space needed for DBDSDC */
+
+	    if (wntqn) {
+		bdspac = *m * 7;
+	    } else {
+		bdspac = *m * 3 * *m + ((*m) << (2));
+	    }
+	    if (*n >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1t (N much larger than M, JOBZ='N') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
+			    c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = bdspac + *m;
+		} else if (wntqo) {
+
+/*                 Path 2t (N much larger than M, JOBZ='O') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
+			    c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + ((*m) << (1)) * *m;
+		    minwrk = bdspac + ((*m) << (1)) * *m + *m * 3;
+		} else if (wntqs) {
+
+/*                 Path 3t (N much larger than M, JOBZ='S') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "DORGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
+			    c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		} else if (wntqa) {
+
+/*                 Path 4t (N much larger than M, JOBZ='A') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "DGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "DORGLQ",
+			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
+			    c__1, "DGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		}
+	    } else {
+
+/*              Path 5t (N greater than M, but not much larger) */
+
+		wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "DGEBRD", " ", m,
+			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		if (wntqn) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		} else if (wntqo) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *n, i__2 = *m * *m + bdspac;
+		    minwrk = *m * 3 + max(i__1,i__2);
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "DORMBR"
+			    , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		}
+	    }
+	}
+	work[1] = (doublereal) maxwrk;
+    }
+
+    if (*lwork < minwrk && ! lquery) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESDD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	if (*lwork >= 1) {
+	    work[1] = 1.;
+	}
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = PRECISION;
+    smlnum = sqrt(SAFEMINIMUM) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = dlange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*
+          A has at least as many rows as columns. If A has sufficiently
+          more rows than columns, first reduce using the QR
+          decomposition (if sufficient workspace available)
+*/
+
+	if (*m >= mnthr) {
+
+	    if (wntqn) {
+
+/*
+                Path 1 (M much larger than N, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need 2*N, prefer N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out below R */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b2879, &c_b2879, &a[a_dim1 + 2],
+			 lda);
+		ie = 1;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *n;
+
+/*
+                Perform bidiagonal SVD, computing singular values only
+                (Workspace: need N+BDSPAC)
+*/
+
+		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2 (M much larger than N, JOBZ = 'O')
+                N left singular vectors to be overwritten on A and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is LDWRKR by N */
+
+		if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) {
+		    ldwrkr = *lda;
+		} else {
+		    ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n;
+		}
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		dlaset_("L", &i__1, &i__2, &c_b2879, &c_b2879, &work[ir + 1],
+			&ldwrkr);
+
+/*
+                Generate Q in A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in VT, copying result to WORK(IR)
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*              WORK(IU) is N by N */
+
+		iu = nwork;
+		nwork = iu + *n * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite WORK(IU) by left singular vectors of R
+                and VT by right singular vectors of R
+                (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &work[iu], n, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IU), storing result in WORK(IR) and copying to A
+                (Workspace: need 2*N*N, prefer N*N+M*N)
+*/
+
+		i__1 = *m;
+		i__2 = ldwrkr;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrkr);
+		    dgemm_("N", "N", &chunk, n, n, &c_b2865, &a[i__ + a_dim1],
+			     lda, &work[iu], n, &c_b2879, &work[ir], &ldwrkr);
+		    dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+			    a_dim1], lda);
+/* L10: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3 (M much larger than N, JOBZ='S')
+                N left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is N by N */
+
+		ldwrkr = *n;
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		dlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		dlaset_("L", &i__2, &i__1, &c_b2879, &c_b2879, &work[ir + 1],
+			&ldwrkr);
+
+/*
+                Generate Q in A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in WORK(IR)
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagoal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of R and VT
+                by right singular vectors of R
+                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IR), storing result in U
+                (Workspace: need N*N)
+*/
+
+		dlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		dgemm_("N", "N", m, n, n, &c_b2865, &a[a_offset], lda, &work[
+			ir], &ldwrkr, &c_b2879, &u[u_offset], ldu);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4 (M much larger than N, JOBZ='A')
+                M left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		itau = iu + ldwrku * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R, copying result to U
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		dlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Generate Q in U
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+		i__2 = *lwork - nwork + 1;
+		dorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+
+/*              Produce R in A, zeroing out other entries */
+
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		dlaset_("L", &i__2, &i__1, &c_b2879, &c_b2879, &a[a_dim1 + 2],
+			 lda);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite WORK(IU) by left singular vectors of R and VT
+                by right singular vectors of R
+                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			ierr);
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in U by left singular vectors of R in
+                WORK(IU), storing result in A
+                (Workspace: need N*N)
+*/
+
+		dgemm_("N", "N", m, n, n, &c_b2865, &u[u_offset], ldu, &work[
+			iu], &ldwrku, &c_b2879, &a[a_offset], lda);
+
+/*              Copy left singular vectors of A from A to U */
+
+		dlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+	    }
+
+	} else {
+
+/*
+             M .LT. MNTHR
+
+             Path 5 (M at least N, but not much larger)
+             Reduce to bidiagonal form without QR decomposition
+*/
+
+	    ie = 1;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*
+             Bidiagonalize A
+             (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Perform bidiagonal SVD, only computing singular values
+                (Workspace: need N+BDSPAC)
+*/
+
+		dbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		    nwork = iu + ldwrku * *n;
+		    dlaset_("F", m, n, &c_b2879, &c_b2879, &work[iu], &ldwrku);
+		} else {
+
+/*                 WORK( IU ) is N by N */
+
+		    ldwrku = *n;
+		    nwork = iu + ldwrku * *n;
+
+/*                 WORK(IR) is LDWRKR by N */
+
+		    ir = nwork;
+		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
+			vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[
+			1], info);
+
+/*
+                Overwrite VT by right singular vectors of A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*
+                   Overwrite WORK(IU) by left singular vectors of A
+                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			    itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			    ierr);
+
+/*                 Copy left singular vectors of A from WORK(IU) to A */
+
+		    dlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate Q in A
+                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    dorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by left singular vectors of
+                   bidiagonal matrix in WORK(IU), storing result in
+                   WORK(IR) and copying to A
+                   (Workspace: need 2*N*N, prefer N*N+M*N)
+*/
+
+		    i__2 = *m;
+		    i__1 = ldwrkr;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *m - i__ + 1;
+			chunk = min(i__3,ldwrkr);
+			dgemm_("N", "N", &chunk, n, n, &c_b2865, &a[i__ +
+				a_dim1], lda, &work[iu], &ldwrku, &c_b2879, &
+				work[ir], &ldwrkr);
+			dlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+				a_dim1], lda);
+/* L20: */
+		    }
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		dlaset_("F", m, n, &c_b2879, &c_b2879, &u[u_offset], ldu);
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 3*N, prefer 2*N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		dlaset_("F", m, m, &c_b2879, &c_b2879, &u[u_offset], ldu);
+		dbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*              Set the right corner of U to identity matrix */
+
+		i__1 = *m - *n;
+		i__2 = *m - *n;
+		dlaset_("F", &i__1, &i__2, &c_b2879, &c_b2865, &u[*n + 1 + (*
+			n + 1) * u_dim1], ldu);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    } else {
+
+/*
+          A has more columns than rows. If A has sufficiently more
+          columns than rows, first reduce using the LQ decomposition (if
+          sufficient workspace available)
+*/
+
+	if (*n >= mnthr) {
+
+	    if (wntqn) {
+
+/*
+                Path 1t (N much larger than M, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need 2*M, prefer M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out above L */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		dlaset_("U", &i__1, &i__2, &c_b2879, &c_b2879, &a[((a_dim1) <<
+			 (1)) + 1], lda);
+		ie = 1;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *m;
+
+/*
+                Perform bidiagonal SVD, computing singular values only
+                (Workspace: need M+BDSPAC)
+*/
+
+		dbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2t (N much larger than M, JOBZ='O')
+                M right singular vectors to be overwritten on A and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              IVT is M by M */
+
+		il = ivt + *m * *m;
+		if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) {
+
+/*                 WORK(IL) is M by N */
+
+		    ldwrkl = *m;
+		    chunk = *n;
+		} else {
+		    ldwrkl = *m;
+		    chunk = (*lwork - *m * *m) / *m;
+		}
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy L to WORK(IL), zeroing about above it */
+
+		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		dlaset_("U", &i__1, &i__2, &c_b2879, &c_b2879, &work[il +
+			ldwrkl], &ldwrkl);
+
+/*
+                Generate Q in A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IL)
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U, and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M+M*M+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], m, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of L and WORK(IVT)
+                by right singular vectors of L
+                (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &work[ivt], m, &work[nwork], &i__1, &ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by Q
+                in A, storing result in WORK(IL) and copying to A
+                (Workspace: need 2*M*M, prefer M*M+M*N)
+*/
+
+		i__1 = *n;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    dgemm_("N", "N", m, &blk, m, &c_b2865, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b2879, &work[il], &
+			    ldwrkl);
+		    dlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
+			    + 1], lda);
+/* L30: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3t (N much larger than M, JOBZ='S')
+                M right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		il = 1;
+
+/*              WORK(IL) is M by M */
+
+		ldwrkl = *m;
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy L to WORK(IL), zeroing out above it */
+
+		dlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		dlaset_("U", &i__2, &i__1, &c_b2879, &c_b2879, &work[il +
+			ldwrkl], &ldwrkl);
+
+/*
+                Generate Q in A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IU), copying result to U
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of L and VT
+                by right singular vectors of L
+                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IL) by
+                Q in A, storing result in VT
+                (Workspace: need M*M)
+*/
+
+		dlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		dgemm_("N", "N", m, n, m, &c_b2865, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b2879, &vt[vt_offset], ldvt);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4t (N much larger than M, JOBZ='A')
+                N right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+
+		ldwkvt = *m;
+		itau = ivt + ldwkvt * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q, copying result to VT
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		dlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Generate Q in VT
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+			nwork], &i__2, &ierr);
+
+/*              Produce L in A, zeroing out other entries */
+
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		dlaset_("U", &i__2, &i__1, &c_b2879, &c_b2879, &a[((a_dim1) <<
+			 (1)) + 1], lda);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M+M*M+BDSPAC)
+*/
+
+		dbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*
+                Overwrite U by left singular vectors of L and WORK(IVT)
+                by right singular vectors of L
+                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by
+                Q in VT, storing result in A
+                (Workspace: need M*M)
+*/
+
+		dgemm_("N", "N", m, n, m, &c_b2865, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b2879, &a[a_offset], lda);
+
+/*              Copy right singular vectors of A from A to VT */
+
+		dlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+	    }
+
+	} else {
+
+/*
+             N .LT. MNTHR
+
+             Path 5t (N greater than M, but not much larger)
+             Reduce to bidiagonal form without LQ decomposition
+*/
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+             Bidiagonalize A
+             (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    dgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Perform bidiagonal SVD, only computing singular values
+                (Workspace: need M+BDSPAC)
+*/
+
+		dbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+		ldwkvt = *m;
+		ivt = nwork;
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    dlaset_("F", m, n, &c_b2879, &c_b2879, &work[ivt], &
+			    ldwkvt);
+		    nwork = ivt + ldwkvt * *n;
+		} else {
+
+/*                 WORK( IVT ) is M by M */
+
+		    nwork = ivt + ldwkvt * *m;
+		    il = nwork;
+
+/*                 WORK(IL) is M by CHUNK */
+
+		    chunk = (*lwork - *m * *m - *m * 3) / *m;
+		}
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M*M+BDSPAC)
+*/
+
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*
+                Overwrite U by left singular vectors of A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*
+                   Overwrite WORK(IVT) by left singular vectors of A
+                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
+			    &ierr);
+
+/*                 Copy right singular vectors of A from WORK(IVT) to A */
+
+		    dlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate P**T in A
+                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    dorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by right singular vectors of
+                   bidiagonal matrix in WORK(IVT), storing result in
+                   WORK(IL) and copying to A
+                   (Workspace: need 2*M*M, prefer M*M+M*N)
+*/
+
+		    i__2 = *n;
+		    i__1 = chunk;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *n - i__ + 1;
+			blk = min(i__3,chunk);
+			dgemm_("N", "N", m, &blk, m, &c_b2865, &work[ivt], &
+				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b2879, &
+				work[il], m);
+			dlacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 +
+				1], lda);
+/* L40: */
+		    }
+		}
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		dlaset_("F", m, n, &c_b2879, &c_b2879, &vt[vt_offset], ldvt);
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 3*M, prefer 2*M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		dlaset_("F", n, n, &c_b2879, &c_b2879, &vt[vt_offset], ldvt);
+		dbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*              Set the right corner of VT to identity matrix */
+
+		i__1 = *n - *m;
+		i__2 = *n - *m;
+		dlaset_("F", &i__1, &i__2, &c_b2879, &c_b2865, &vt[*m + 1 + (*
+			m + 1) * vt_dim1], ldvt);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 2*M+N, prefer 2*M+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		dormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		dormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (doublereal) maxwrk;
+
+    return 0;
+
+/*     End of DGESDD */
+
+} /* dgesdd_ */
+
+/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer
+	*lda, integer *ipiv, doublereal *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dgetrf_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *), dgetrs_(char *, integer *, integer *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DGESV computes the solution to a real system of linear equations
+       A * X = B,
+    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+
+    The LU decomposition with partial pivoting and row interchanges is
+    used to factor A as
+       A = P * L * U,
+    where P is a permutation matrix, L is unit lower triangular, and U is
+    upper triangular.  The factored form of A is then used to solve the
+    system of equations A * X = B.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of linear equations, i.e., the order of the
+            matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the N-by-N coefficient matrix A.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (output) INTEGER array, dimension (N)
+            The pivot indices that define the permutation matrix P;
+            row i of the matrix was interchanged with row IPIV(i).
+
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+            On entry, the N-by-NRHS matrix of right hand side matrix B.
+            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+                  has been completed, but the factor U is exactly
+                  singular, so the solution could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGESV ", &i__1);
+	return 0;
+    }
+
+/*     Compute the LU factorization of A. */
+
+    dgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
+
+/*        Solve the system A*X = B, overwriting B with X. */
+
+	dgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+		b_offset], ldb, info);
+    }
+    return 0;
+
+/*     End of DGESV */
+
+} /* dgesv_ */
+
+/* Subroutine */ int dgetf2_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer j, jp;
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *), dscal_(integer *, doublereal *, doublereal *, integer
+	    *), dswap_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1992
+
+
+    Purpose
+    =======
+
+    DGETF2 computes an LU factorization of a general m-by-n matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+                 has been completed, but the factor U is exactly
+                 singular, and division by zero will occur if it is used
+                 to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    i__1 = min(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
+
+/*        Find pivot and test for singularity. */
+
+	i__2 = *m - j + 1;
+	jp = j - 1 + idamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	ipiv[j] = jp;
+	if (a[jp + j * a_dim1] != 0.) {
+
+/*           Apply the interchange to columns 1:N. */
+
+	    if (jp != j) {
+		dswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	    }
+
+/*           Compute elements J+1:M of J-th column. */
+
+	    if (j < *m) {
+		i__2 = *m - j;
+		d__1 = 1. / a[j + j * a_dim1];
+		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+
+	} else if (*info == 0) {
+
+	    *info = j;
+	}
+
+	if (j < min(*m,*n)) {
+
+/*           Update trailing submatrix. */
+
+	    i__2 = *m - j;
+	    i__3 = *n - j;
+	    dger_(&i__2, &i__3, &c_b3001, &a[j + 1 + j * a_dim1], &c__1, &a[j
+		    + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1],
+		    lda);
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of DGETF2 */
+
+} /* dgetf2_ */
+
+/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer *
+	lda, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer i__, j, jb, nb;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer iinfo;
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), dgetf2_(
+	    integer *, integer *, doublereal *, integer *, integer *, integer
+	    *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dlaswp_(integer *, doublereal *, integer *,
+	    integer *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DGETRF computes an LU factorization of a general M-by-N matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the M-by-N matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+                  has been completed, but the factor U is exactly
+                  singular, and division by zero will occur if it is used
+                  to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "DGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    if ((nb <= 1) || (nb >= min(*m,*n))) {
+
+/*        Use unblocked code. */
+
+	dgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code. */
+
+	i__1 = min(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = min(*m,*n) - j + 1;
+	    jb = min(i__3,nb);
+
+/*
+             Factor diagonal and subdiagonal blocks and test for exact
+             singularity.
+*/
+
+	    i__3 = *m - j + 1;
+	    dgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+
+/*           Adjust INFO and the pivot indices. */
+
+	    if (*info == 0 && iinfo > 0) {
+		*info = iinfo + j - 1;
+	    }
+/* Computing MIN */
+	    i__4 = *m, i__5 = j + jb - 1;
+	    i__3 = min(i__4,i__5);
+	    for (i__ = j; i__ <= i__3; ++i__) {
+		ipiv[i__] = j - 1 + ipiv[i__];
+/* L10: */
+	    }
+
+/*           Apply interchanges to columns 1:J-1. */
+
+	    i__3 = j - 1;
+	    i__4 = j + jb - 1;
+	    dlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+
+	    if (j + jb <= *n) {
+
+/*              Apply interchanges to columns J+JB:N. */
+
+		i__3 = *n - j - jb + 1;
+		i__4 = j + jb - 1;
+		dlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+			ipiv[1], &c__1);
+
+/*              Compute block row of U. */
+
+		i__3 = *n - j - jb + 1;
+		dtrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b2865, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+			a_dim1], lda);
+		if (j + jb <= *m) {
+
+/*                 Update trailing submatrix. */
+
+		    i__3 = *m - j - jb + 1;
+		    i__4 = *n - j - jb + 1;
+		    dgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
+			    &c_b3001, &a[j + jb + j * a_dim1], lda, &a[j + (j
+			    + jb) * a_dim1], lda, &c_b2865, &a[j + jb + (j +
+			    jb) * a_dim1], lda);
+		}
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of DGETRF */
+
+} /* dgetrf_ */
+
+/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs,
+	doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
+	ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), xerbla_(
+	    char *, integer *), dlaswp_(integer *, doublereal *,
+	    integer *, integer *, integer *, integer *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DGETRS solves a system of linear equations
+       A * X = B  or  A' * X = B
+    with a general N-by-N matrix A using the LU factorization computed
+    by DGETRF.
+
+    Arguments
+    =========
+
+    TRANS   (input) CHARACTER*1
+            Specifies the form of the system of equations:
+            = 'N':  A * X = B  (No transpose)
+            = 'T':  A'* X = B  (Transpose)
+            = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The factors L and U from the factorization A = P*L*U
+            as computed by DGETRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (input) INTEGER array, dimension (N)
+            The pivot indices from DGETRF; for 1<=i<=N, row i of the
+            matrix was interchanged with row IPIV(i).
+
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DGETRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*nrhs == 0)) {
+	return 0;
+    }
+
+    if (notran) {
+
+/*
+          Solve A * X = B.
+
+          Apply row interchanges to the right hand sides.
+*/
+
+	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+
+/*        Solve L*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b2865, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b2865,
+		 &a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A' * X = B.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b2865, &
+		a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b2865, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Apply row interchanges to the solution vectors. */
+
+	dlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
+
+    return 0;
+
+/*     End of DGETRS */
+
+} /* dgetrs_ */
+
+/* Subroutine */ int dhseqr_(char *job, char *compz, integer *n, integer *ilo,
+	 integer *ihi, doublereal *h__, integer *ldh, doublereal *wr,
+	doublereal *wi, doublereal *z__, integer *ldz, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    doublereal d__1, d__2;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__, j, k, l;
+    static doublereal s[225]	/* was [15][15] */, v[16];
+    static integer i1, i2, ii, nh, nr, ns, nv;
+    static doublereal vv[16];
+    static integer itn;
+    static doublereal tau;
+    static integer its;
+    static doublereal ulp, tst1;
+    static integer maxb;
+    static doublereal absw;
+    static integer ierr;
+    static doublereal unfl, temp, ovfl;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static integer itemp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical initz, wantt, wantz;
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
+	    doublereal *);
+    extern /* Subroutine */ int dlahqr_(logical *, logical *, integer *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *), dlacpy_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlaset_(char *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int xerbla_(char *, integer *), dlarfx_(
+	    char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *);
+    static doublereal smlnum;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
+    and, optionally, the matrices T and Z from the Schur decomposition
+    H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
+    form), and Z is the orthogonal matrix of Schur vectors.
+
+    Optionally Z may be postmultiplied into an input orthogonal matrix Q,
+    so that this routine can give the Schur factorization of a matrix A
+    which has been reduced to the Hessenberg form H by the orthogonal
+    matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            = 'E':  compute eigenvalues only;
+            = 'S':  compute eigenvalues and the Schur form T.
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  no Schur vectors are computed;
+            = 'I':  Z is initialized to the unit matrix and the matrix Z
+                    of Schur vectors of H is returned;
+            = 'V':  Z must contain an orthogonal matrix Q on entry, and
+                    the product Q*Z is returned.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to DGEBAL, and then passed to SGEHRD
+            when the matrix output by DGEBAL is reduced to Hessenberg
+            form. Otherwise ILO and IHI should be set to 1 and N
+            respectively.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if JOB = 'S', H contains the upper quasi-triangular
+            matrix T from the Schur decomposition (the Schur form);
+            2-by-2 diagonal blocks (corresponding to complex conjugate
+            pairs of eigenvalues) are returned in standard form, with
+            H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
+            the contents of H are unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    WR      (output) DOUBLE PRECISION array, dimension (N)
+    WI      (output) DOUBLE PRECISION array, dimension (N)
+            The real and imaginary parts, respectively, of the computed
+            eigenvalues. If two eigenvalues are computed as a complex
+            conjugate pair, they are stored in consecutive elements of
+            WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
+            WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
+            same order as on the diagonal of the Schur form returned in
+            H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
+            diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
+            WI(i+1) = -WI(i).
+
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+            If COMPZ = 'N': Z is not referenced.
+            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+            contains the orthogonal matrix Z of the Schur vectors of H.
+            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+            which is assumed to be equal to the unit matrix except for
+            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+            Normally Q is the orthogonal matrix generated by DORGHR after
+            the call to DGEHRD which formed the Hessenberg matrix H.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.
+            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, DHSEQR failed to compute all of the
+                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+                  elements 1:ilo-1 and i+1:n of WR and WI contain those
+                  eigenvalues which have been successfully computed.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    wantt = lsame_(job, "S");
+    initz = lsame_(compz, "I");
+    wantz = (initz) || (lsame_(compz, "V"));
+
+    *info = 0;
+    work[1] = (doublereal) max(1,*n);
+    lquery = *lwork == -1;
+    if (! lsame_(job, "E") && ! wantt) {
+	*info = -1;
+    } else if (! lsame_(compz, "N") && ! wantz) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -4;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -5;
+    } else if (*ldh < max(1,*n)) {
+	*info = -7;
+    } else if ((*ldz < 1) || (wantz && *ldz < max(1,*n))) {
+	*info = -11;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DHSEQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize Z, if necessary */
+
+    if (initz) {
+	dlaset_("Full", n, n, &c_b2879, &c_b2865, &z__[z_offset], ldz);
+    }
+
+/*     Store the eigenvalues isolated by DGEBAL. */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.;
+/* L10: */
+    }
+    i__1 = *n;
+    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.;
+/* L20: */
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	wi[*ilo] = 0.;
+	return 0;
+    }
+
+/*
+       Set rows and columns ILO to IHI to zero below the first
+       subdiagonal.
+*/
+
+    i__1 = *ihi - 2;
+    for (j = *ilo; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = j + 2; i__ <= i__2; ++i__) {
+	    h__[i__ + j * h_dim1] = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    nh = *ihi - *ilo + 1;
+
+/*
+       Determine the order of the multi-shift QR algorithm to be used.
+
+   Writing concatenation
+*/
+    i__3[0] = 1, a__1[0] = job;
+    i__3[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    ns = ilaenv_(&c__4, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+/* Writing concatenation */
+    i__3[0] = 1, a__1[0] = job;
+    i__3[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    maxb = ilaenv_(&c__8, "DHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (((ns <= 2) || (ns > nh)) || (maxb >= nh)) {
+
+/*        Use the standard double-shift algorithm */
+
+	dlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
+		1], ilo, ihi, &z__[z_offset], ldz, info);
+	return 0;
+    }
+    maxb = max(3,maxb);
+/* Computing MIN */
+    i__1 = min(ns,maxb);
+    ns = min(i__1,15);
+
+/*
+       Now 2 < NS <= MAXB < NH.
+
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (nh / ulp);
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of multiple-shift QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of at most MAXB. Each iteration of the loop
+       works with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L50:
+    l = *ilo;
+    if (i__ < *ilo) {
+	goto L170;
+    }
+
+/*
+       Perform multiple-shift QR iterations on rows and columns ILO to I
+       until a submatrix of order at most MAXB splits off at the bottom
+       because a subdiagonal element has become negligible.
+*/
+
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
+		     h__[k + k * h_dim1], abs(d__2));
+	    if (tst1 == 0.) {
+		i__4 = i__ - l + 1;
+		tst1 = dlanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
+			);
+	    }
+/* Computing MAX */
+	    d__2 = ulp * tst1;
+	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
+		    smlnum)) {
+		goto L70;
+	    }
+/* L60: */
+	}
+L70:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible. */
+
+	    h__[l + (l - 1) * h_dim1] = 0.;
+	}
+
+/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+
+	if (l >= i__ - maxb + 1) {
+	    goto L160;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! wantt) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if ((its == 20) || (its == 30)) {
+
+/*           Exceptional shifts. */
+
+	    i__2 = i__;
+	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
+		wr[ii] = ((d__1 = h__[ii + (ii - 1) * h_dim1], abs(d__1)) + (
+			d__2 = h__[ii + ii * h_dim1], abs(d__2))) * 1.5;
+		wi[ii] = 0.;
+/* L80: */
+	    }
+	} else {
+
+/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
+
+	    dlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
+		    h_dim1], ldh, s, &c__15);
+	    dlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
+		    ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset],
+		    ldz, &ierr);
+	    if (ierr > 0) {
+
+/*
+                If DLAHQR failed to compute all NS eigenvalues, use the
+                unconverged diagonal elements as the remaining shifts.
+*/
+
+		i__2 = ierr;
+		for (ii = 1; ii <= i__2; ++ii) {
+		    wr[i__ - ns + ii] = s[ii + ii * 15 - 16];
+		    wi[i__ - ns + ii] = 0.;
+/* L90: */
+		}
+	    }
+	}
+
+/*
+          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+          where G is the Hessenberg submatrix H(L:I,L:I) and w is
+          the vector of shifts (stored in WR and WI). The result is
+          stored in the local array V.
+*/
+
+	v[0] = 1.;
+	i__2 = ns + 1;
+	for (ii = 2; ii <= i__2; ++ii) {
+	    v[ii - 1] = 0.;
+/* L100: */
+	}
+	nv = 1;
+	i__2 = i__;
+	for (j = i__ - ns + 1; j <= i__2; ++j) {
+	    if (wi[j] >= 0.) {
+		if (wi[j] == 0.) {
+
+/*                 real shift */
+
+		    i__4 = nv + 1;
+		    dcopy_(&i__4, v, &c__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    d__1 = -wr[j];
+		    dgemv_("No transpose", &i__4, &nv, &c_b2865, &h__[l + l *
+			    h_dim1], ldh, vv, &c__1, &d__1, v, &c__1);
+		    ++nv;
+		} else if (wi[j] > 0.) {
+
+/*                 complex conjugate pair of shifts */
+
+		    i__4 = nv + 1;
+		    dcopy_(&i__4, v, &c__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    d__1 = wr[j] * -2.;
+		    dgemv_("No transpose", &i__4, &nv, &c_b2865, &h__[l + l *
+			    h_dim1], ldh, v, &c__1, &d__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    itemp = idamax_(&i__4, vv, &c__1);
+/* Computing MAX */
+		    d__2 = (d__1 = vv[itemp - 1], abs(d__1));
+		    temp = 1. / max(d__2,smlnum);
+		    i__4 = nv + 1;
+		    dscal_(&i__4, &temp, vv, &c__1);
+		    absw = dlapy2_(&wr[j], &wi[j]);
+		    temp = temp * absw * absw;
+		    i__4 = nv + 2;
+		    i__5 = nv + 1;
+		    dgemv_("No transpose", &i__4, &i__5, &c_b2865, &h__[l + l
+			    * h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
+		    nv += 2;
+		}
+
+/*
+                Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+                reset it to the unit vector.
+*/
+
+		itemp = idamax_(&nv, v, &c__1);
+		temp = (d__1 = v[itemp - 1], abs(d__1));
+		if (temp == 0.) {
+		    v[0] = 1.;
+		    i__4 = nv;
+		    for (ii = 2; ii <= i__4; ++ii) {
+			v[ii - 1] = 0.;
+/* L110: */
+		    }
+		} else {
+		    temp = max(temp,smlnum);
+		    d__1 = 1. / temp;
+		    dscal_(&nv, &d__1, v, &c__1);
+		}
+	    }
+/* L120: */
+	}
+
+/*        Multiple-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = l; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__4 = ns + 1, i__5 = i__ - k + 1;
+	    nr = min(i__4,i__5);
+	    if (k > l) {
+		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    dlarfg_(&nr, v, &v[1], &c__1, &tau);
+	    if (k > l) {
+		h__[k + (k - 1) * h_dim1] = v[0];
+		i__4 = i__;
+		for (ii = k + 1; ii <= i__4; ++ii) {
+		    h__[ii + (k - 1) * h_dim1] = 0.;
+/* L130: */
+		}
+	    }
+	    v[0] = 1.;
+
+/*
+             Apply G from the left to transform the rows of the matrix in
+             columns K to I2.
+*/
+
+	    i__4 = i2 - k + 1;
+	    dlarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
+		    work[1]);
+
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+NR,I).
+
+   Computing MIN
+*/
+	    i__5 = k + nr;
+	    i__4 = min(i__5,i__) - i1 + 1;
+	    dlarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
+		    &work[1]);
+
+	    if (wantz) {
+
+/*              Accumulate transformations in the matrix Z */
+
+		dlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
+			ldz, &work[1]);
+	    }
+/* L140: */
+	}
+
+/* L150: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L160:
+
+/*
+       A submatrix of order <= MAXB in rows and columns L to I has split
+       off. Use the double-shift QR algorithm to handle it.
+*/
+
+    dlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
+	    ilo, ihi, &z__[z_offset], ldz, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with a new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L50;
+
+L170:
+    work[1] = (doublereal) max(1,*n);
+    return 0;
+
+/*     End of DHSEQR */
+
+} /* dhseqr_ */
+
+/* Subroutine */ int dlabad_(doublereal *small, doublereal *large)
+{
+    /* Builtin functions */
+    double d_lg10(doublereal *), sqrt(doublereal);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLABAD takes as input the values computed by DLAMCH for underflow and
+    overflow, and returns the square root of each of these values if the
+    log of LARGE is sufficiently large.  This subroutine is intended to
+    identify machines with a large exponent range, such as the Crays, and
+    redefine the underflow and overflow limits to be the square roots of
+    the values computed by DLAMCH.  This subroutine is needed because
+    DLAMCH does not compensate for poor arithmetic in the upper half of
+    the exponent range, as is found on a Cray.
+
+    Arguments
+    =========
+
+    SMALL   (input/output) DOUBLE PRECISION
+            On entry, the underflow threshold as computed by DLAMCH.
+            On exit, if LOG10(LARGE) is sufficiently large, the square
+            root of SMALL, otherwise unchanged.
+
+    LARGE   (input/output) DOUBLE PRECISION
+            On entry, the overflow threshold as computed by DLAMCH.
+            On exit, if LOG10(LARGE) is sufficiently large, the square
+            root of LARGE, otherwise unchanged.
+
+    =====================================================================
+
+
+       If it looks like we're on a Cray, take the square root of
+       SMALL and LARGE to avoid overflow and underflow problems.
+*/
+
+    if (d_lg10(large) > 2e3) {
+	*small = sqrt(*small);
+	*large = sqrt(*large);
+    }
+
+    return 0;
+
+/*     End of DLABAD */
+
+} /* dlabad_ */
+
+/* Subroutine */ int dlabrd_(integer *m, integer *n, integer *nb, doublereal *
+	a, integer *lda, doublereal *d__, doublereal *e, doublereal *tauq,
+	doublereal *taup, doublereal *x, integer *ldx, doublereal *y, integer
+	*ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
+	     doublereal *, integer *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLABRD reduces the first NB rows and columns of a real general
+    m by n matrix A to upper or lower bidiagonal form by an orthogonal
+    transformation Q' * A * P, and returns the matrices X and Y which
+    are needed to apply the transformation to the unreduced part of A.
+
+    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+    bidiagonal form.
+
+    This is an auxiliary routine called by DGEBRD
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.
+
+    NB      (input) INTEGER
+            The number of leading rows and columns of A to be reduced.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit, the first NB rows and columns of the matrix are
+            overwritten; the rest of the array is unchanged.
+            If m >= n, elements on and below the diagonal in the first NB
+              columns, with the array TAUQ, represent the orthogonal
+              matrix Q as a product of elementary reflectors; and
+              elements above the diagonal in the first NB rows, with the
+              array TAUP, represent the orthogonal matrix P as a product
+              of elementary reflectors.
+            If m < n, elements below the diagonal in the first NB
+              columns, with the array TAUQ, represent the orthogonal
+              matrix Q as a product of elementary reflectors, and
+              elements on and above the diagonal in the first NB rows,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) DOUBLE PRECISION array, dimension (NB)
+            The diagonal elements of the first NB rows and columns of
+            the reduced matrix.  D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (NB)
+            The off-diagonal elements of the first NB rows and columns of
+            the reduced matrix.
+
+    TAUQ    (output) DOUBLE PRECISION array dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) DOUBLE PRECISION array, dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    X       (output) DOUBLE PRECISION array, dimension (LDX,NB)
+            The m-by-nb matrix X required to update the unreduced part
+            of A.
+
+    LDX     (input) INTEGER
+            The leading dimension of the array X. LDX >= M.
+
+    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
+            The n-by-nb matrix Y required to update the unreduced part
+            of A.
+
+    LDY     (output) INTEGER
+            The leading dimension of the array Y. LDY >= N.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors.
+
+    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The elements of the vectors v and u together form the m-by-nb matrix
+    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+    the transformation to the unreduced part of the matrix, using a block
+    update of the form:  A := A - V*Y' - X*U'.
+
+    The contents of A on exit are illustrated by the following examples
+    with nb = 2:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )
+
+    where a denotes an element of the original matrix which is unchanged,
+    vi denotes an element of the vector defining H(i), and ui an element
+    of the vector defining G(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if ((*m <= 0) || (*n <= 0)) {
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:m,i) */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + a_dim1],
+		    lda, &y[i__ + y_dim1], ldy, &c_b2865, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &x[i__ + x_dim1],
+		    ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2865, &a[i__ + i__ *
+		     a_dim1], &c__1);
+
+/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		a[i__ + i__ * a_dim1] = 1.;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + (i__ + 1)
+			 * a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &
+			c_b2879, &y[i__ + 1 + i__ * y_dim1], &c__1)
+			;
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + a_dim1],
+			lda, &a[i__ + i__ * a_dim1], &c__1, &c_b2879, &y[i__ *
+			 y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2865, &
+			y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &x[i__ + x_dim1],
+			ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b2879, &y[i__ *
+			 y_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__3, &c_b3001, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b2865, &y[i__ + 1 + i__ * y_dim1], &c__1)
+			;
+		i__2 = *n - i__;
+		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+
+/*              Update A(i,i+1:n) */
+
+		i__2 = *n - i__;
+		dgemv_("No transpose", &i__2, &i__, &c_b3001, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b2865, &a[i__
+			+ (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__3, &c_b3001, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b2865, &a[
+			i__ + (i__ + 1) * a_dim1], lda);
+
+/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 + (
+			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
+			 lda, &c_b2879, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__, &c_b2865, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b2879, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		dgemv_("No transpose", &i__2, &i__, &c_b3001, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2865, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b2879, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2865, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i,i:n) */
+
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &y[i__ + y_dim1],
+		    ldy, &a[i__ + a_dim1], lda, &c_b2865, &a[i__ + i__ *
+		    a_dim1], lda);
+	    i__2 = i__ - 1;
+	    i__3 = *n - i__ + 1;
+	    dgemv_("Transpose", &i__2, &i__3, &c_b3001, &a[i__ * a_dim1 + 1],
+		    lda, &x[i__ + x_dim1], ldx, &c_b2865, &a[i__ + i__ *
+		    a_dim1], lda);
+
+/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    dlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    if (i__ < *m) {
+		a[i__ + i__ * a_dim1] = 1.;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 +
+			i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &
+			c_b2879, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__ + 1;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &y[i__ + y_dim1],
+			ldy, &a[i__ + i__ * a_dim1], lda, &c_b2879, &x[i__ *
+			x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b2865, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__ + 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[i__ *
+			a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, &
+			c_b2879, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b2865, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		dscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+
+/*              Update A(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + 1 +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b2865, &a[i__
+			+ 1 + i__ * a_dim1], &c__1);
+		i__2 = *m - i__;
+		dgemv_("No transpose", &i__2, &i__, &c_b3001, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b2865, &
+			a[i__ + 1 + i__ * a_dim1], &c__1);
+
+/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 + (i__
+			+ 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &c_b2879, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2879, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b2865, &
+			y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		dgemv_("Transpose", &i__2, &i__, &c_b2865, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2879, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		dgemv_("Transpose", &i__, &i__2, &c_b3001, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b2865, &y[i__ + 1 + i__ * y_dim1], &c__1)
+			;
+		i__2 = *n - i__;
+		dscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of DLABRD */
+
+} /* dlabrd_ */
+
+/* Subroutine */ int dlacpy_(char *uplo, integer *m, integer *n, doublereal *
+	a, integer *lda, doublereal *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLACPY copies all or part of a two-dimensional matrix A to another
+    matrix B.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper triangle
+            or trapezoid is accessed; if UPLO = 'L', only the lower
+            triangle or trapezoid is accessed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (output) DOUBLE PRECISION array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+    return 0;
+
+/*     End of DLACPY */
+
+} /* dlacpy_ */
+
+/* Subroutine */ int dladiv_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *d__, doublereal *p, doublereal *q)
+{
+    static doublereal e, f;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLADIV performs complex division in  real arithmetic
+
+                          a + i*b
+               p + i*q = ---------
+                          c + i*d
+
+    The algorithm is due to Robert L. Smith and can be found
+    in D. Knuth, The art of Computer Programming, Vol.2, p.195
+
+    Arguments
+    =========
+
+    A       (input) DOUBLE PRECISION
+    B       (input) DOUBLE PRECISION
+    C       (input) DOUBLE PRECISION
+    D       (input) DOUBLE PRECISION
+            The scalars a, b, c, and d in the above expression.
+
+    P       (output) DOUBLE PRECISION
+    Q       (output) DOUBLE PRECISION
+            The scalars p and q in the above expression.
+
+    =====================================================================
+*/
+
+
+    if (abs(*d__) < abs(*c__)) {
+	e = *d__ / *c__;
+	f = *c__ + *d__ * e;
+	*p = (*a + *b * e) / f;
+	*q = (*b - *a * e) / f;
+    } else {
+	e = *c__ / *d__;
+	f = *d__ + *c__ * e;
+	*p = (*b + *a * e) / f;
+	*q = (-(*a) + *b * e) / f;
+    }
+
+    return 0;
+
+/*     End of DLADIV */
+
+} /* dladiv_ */
+
+/* Subroutine */ int dlae2_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *rt1, doublereal *rt2)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal ab, df, tb, sm, rt, adf, acmn, acmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
+       [  A   B  ]
+       [  B   C  ].
+    On return, RT1 is the eigenvalue of larger absolute value, and RT2
+    is the eigenvalue of smaller absolute value.
+
+    Arguments
+    =========
+
+    A       (input) DOUBLE PRECISION
+            The (1,1) element of the 2-by-2 matrix.
+
+    B       (input) DOUBLE PRECISION
+            The (1,2) and (2,1) elements of the 2-by-2 matrix.
+
+    C       (input) DOUBLE PRECISION
+            The (2,2) element of the 2-by-2 matrix.
+
+    RT1     (output) DOUBLE PRECISION
+            The eigenvalue of larger absolute value.
+
+    RT2     (output) DOUBLE PRECISION
+            The eigenvalue of smaller absolute value.
+
+    Further Details
+    ===============
+
+    RT1 is accurate to a few ulps barring over/underflow.
+
+    RT2 may be inaccurate if there is massive cancellation in the
+    determinant A*C-B*B; higher precision or correctly rounded or
+    correctly truncated arithmetic would be needed to compute RT2
+    accurately in all cases.
+
+    Overflow is possible only if RT1 is within a factor of 5 of overflow.
+    Underflow is harmless if the input data is 0 or exceeds
+       underflow_threshold / macheps.
+
+   =====================================================================
+
+
+       Compute the eigenvalues
+*/
+
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = abs(df);
+    tb = *b + *b;
+    ab = abs(tb);
+    if (abs(*a) > abs(*c__)) {
+	acmx = *a;
+	acmn = *c__;
+    } else {
+	acmx = *c__;
+	acmn = *a;
+    }
+    if (adf > ab) {
+/* Computing 2nd power */
+	d__1 = ab / adf;
+	rt = adf * sqrt(d__1 * d__1 + 1.);
+    } else if (adf < ab) {
+/* Computing 2nd power */
+	d__1 = adf / ab;
+	rt = ab * sqrt(d__1 * d__1 + 1.);
+    } else {
+
+/*        Includes case AB=ADF=0 */
+
+	rt = ab * sqrt(2.);
+    }
+    if (sm < 0.) {
+	*rt1 = (sm - rt) * .5;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.) {
+	*rt1 = (sm + rt) * .5;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
+
+/*        Includes case RT1 = RT2 = 0 */
+
+	*rt1 = rt * .5;
+	*rt2 = rt * -.5;
+    }
+    return 0;
+
+/*     End of DLAE2 */
+
+} /* dlae2_ */
+
+/* Subroutine */ int dlaed0_(integer *icompq, integer *qsiz, integer *n,
+	doublereal *d__, doublereal *e, doublereal *q, integer *ldq,
+	doublereal *qstore, integer *ldqs, doublereal *work, integer *iwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
+    static doublereal temp;
+    static integer curr;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer iperm;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer indxq, iwrem;
+    extern /* Subroutine */ int dlaed1_(integer *, doublereal *, doublereal *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *);
+    static integer iqptr;
+    extern /* Subroutine */ int dlaed7_(integer *, integer *, integer *,
+	    integer *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, doublereal *, integer *, integer *);
+    static integer tlvls;
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *);
+    static integer igivcl;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer igivnm, submat, curprb, subpbs, igivpt;
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *);
+    static integer curlvl, matsiz, iprmpt, smlsiz;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAED0 computes all eigenvalues and corresponding eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+            = 2:  Compute eigenvalues and eigenvectors of tridiagonal
+                  matrix.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the main diagonal of the tridiagonal matrix.
+           On exit, its eigenvalues.
+
+    E      (input) DOUBLE PRECISION array, dimension (N-1)
+           The off-diagonal elements of the tridiagonal matrix.
+           On exit, E has been destroyed.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+           On entry, Q must contain an N-by-N orthogonal matrix.
+           If ICOMPQ = 0    Q is not referenced.
+           If ICOMPQ = 1    On entry, Q is a subset of the columns of the
+                            orthogonal matrix used to reduce the full
+                            matrix to tridiagonal form corresponding to
+                            the subset of the full matrix which is being
+                            decomposed at this time.
+           If ICOMPQ = 2    On entry, Q will be the identity matrix.
+                            On exit, Q contains the eigenvectors of the
+                            tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  If eigenvectors are
+           desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
+
+    QSTORE (workspace) DOUBLE PRECISION array, dimension (LDQS, N)
+           Referenced only when ICOMPQ = 1.  Used to store parts of
+           the eigenvector matrix when the updating matrix multiplies
+           take place.
+
+    LDQS   (input) INTEGER
+           The leading dimension of the array QSTORE.  If ICOMPQ = 1,
+           then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
+
+    WORK   (workspace) DOUBLE PRECISION array,
+           If ICOMPQ = 0 or 1, the dimension of WORK must be at least
+                       1 + 3*N + 2*N*lg N + 2*N**2
+                       ( lg( N ) = smallest integer k
+                                   such that 2^k >= N )
+           If ICOMPQ = 2, the dimension of WORK must be at least
+                       4*N + N**2.
+
+    IWORK  (workspace) INTEGER array,
+           If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
+                          6 + 6*N + 5*N*lg N.
+                          ( lg( N ) = smallest integer k
+                                      such that 2^k >= N )
+           If ICOMPQ = 2, the dimension of IWORK must be at least
+                          3 + 5*N.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    qstore_dim1 = *ldqs;
+    qstore_offset = 1 + qstore_dim1;
+    qstore -= qstore_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 2)) {
+	*info = -1;
+    } else if (*icompq == 1 && *qsiz < max(0,*n)) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ldq < max(1,*n)) {
+	*info = -7;
+    } else if (*ldqs < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED0", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "DLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Determine the size and placement of the submatrices, and save in
+       the leading elements of IWORK.
+*/
+
+    iwork[1] = *n;
+    subpbs = 1;
+    tlvls = 0;
+L10:
+    if (iwork[subpbs] > smlsiz) {
+	for (j = subpbs; j >= 1; --j) {
+	    iwork[j * 2] = (iwork[j] + 1) / 2;
+	    iwork[((j) << (1)) - 1] = iwork[j] / 2;
+/* L20: */
+	}
+	++tlvls;
+	subpbs <<= 1;
+	goto L10;
+    }
+    i__1 = subpbs;
+    for (j = 2; j <= i__1; ++j) {
+	iwork[j] += iwork[j - 1];
+/* L30: */
+    }
+
+/*
+       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+       using rank-1 modifications (cuts).
+*/
+
+    spm1 = subpbs - 1;
+    i__1 = spm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	submat = iwork[i__] + 1;
+	smm1 = submat - 1;
+	d__[smm1] -= (d__1 = e[smm1], abs(d__1));
+	d__[submat] -= (d__1 = e[smm1], abs(d__1));
+/* L40: */
+    }
+
+    indxq = ((*n) << (2)) + 3;
+    if (*icompq != 2) {
+
+/*
+          Set up workspaces for eigenvalues only/accumulate new vectors
+          routine
+*/
+
+	temp = log((doublereal) (*n)) / log(2.);
+	lgn = (integer) temp;
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	iprmpt = indxq + *n + 1;
+	iperm = iprmpt + *n * lgn;
+	iqptr = iperm + *n * lgn;
+	igivpt = iqptr + *n + 2;
+	igivcl = igivpt + *n * lgn;
+
+	igivnm = 1;
+	iq = igivnm + ((*n) << (1)) * lgn;
+/* Computing 2nd power */
+	i__1 = *n;
+	iwrem = iq + i__1 * i__1 + 1;
+
+/*        Initialize pointers */
+
+	i__1 = subpbs;
+	for (i__ = 0; i__ <= i__1; ++i__) {
+	    iwork[iprmpt + i__] = 1;
+	    iwork[igivpt + i__] = 1;
+/* L50: */
+	}
+	iwork[iqptr] = 1;
+    }
+
+/*
+       Solve each submatrix eigenproblem at the bottom of the divide and
+       conquer tree.
+*/
+
+    curr = 0;
+    i__1 = spm1;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	if (i__ == 0) {
+	    submat = 1;
+	    matsiz = iwork[1];
+	} else {
+	    submat = iwork[i__] + 1;
+	    matsiz = iwork[i__ + 1] - iwork[i__];
+	}
+	if (*icompq == 2) {
+	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat +
+		    submat * q_dim1], ldq, &work[1], info);
+	    if (*info != 0) {
+		goto L130;
+	    }
+	} else {
+	    dsteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
+		    iwork[iqptr + curr]], &matsiz, &work[1], info);
+	    if (*info != 0) {
+		goto L130;
+	    }
+	    if (*icompq == 1) {
+		dgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b2865, &q[submat *
+			 q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]]
+			, &matsiz, &c_b2879, &qstore[submat * qstore_dim1 + 1]
+			, ldqs);
+	    }
+/* Computing 2nd power */
+	    i__2 = matsiz;
+	    iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
+	    ++curr;
+	}
+	k = 1;
+	i__2 = iwork[i__ + 1];
+	for (j = submat; j <= i__2; ++j) {
+	    iwork[indxq + j] = k;
+	    ++k;
+/* L60: */
+	}
+/* L70: */
+    }
+
+/*
+       Successively merge eigensystems of adjacent submatrices
+       into eigensystem for the corresponding larger matrix.
+
+       while ( SUBPBS > 1 )
+*/
+
+    curlvl = 1;
+L80:
+    if (subpbs > 1) {
+	spm2 = subpbs - 2;
+	i__1 = spm2;
+	for (i__ = 0; i__ <= i__1; i__ += 2) {
+	    if (i__ == 0) {
+		submat = 1;
+		matsiz = iwork[2];
+		msd2 = iwork[1];
+		curprb = 0;
+	    } else {
+		submat = iwork[i__] + 1;
+		matsiz = iwork[i__ + 2] - iwork[i__];
+		msd2 = matsiz / 2;
+		++curprb;
+	    }
+
+/*
+       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+       into an eigensystem of size MATSIZ.
+       DLAED1 is used only for the full eigensystem of a tridiagonal
+       matrix.
+       DLAED7 handles the cases in which eigenvalues only or eigenvalues
+       and eigenvectors of a full symmetric matrix (which was reduced to
+       tridiagonal form) are desired.
+*/
+
+	    if (*icompq == 2) {
+		dlaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1],
+			ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], &
+			msd2, &work[1], &iwork[subpbs + 1], info);
+	    } else {
+		dlaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
+			submat], &qstore[submat * qstore_dim1 + 1], ldqs, &
+			iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
+			work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm]
+			, &iwork[igivpt], &iwork[igivcl], &work[igivnm], &
+			work[iwrem], &iwork[subpbs + 1], info);
+	    }
+	    if (*info != 0) {
+		goto L130;
+	    }
+	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
+/* L90: */
+	}
+	subpbs /= 2;
+	++curlvl;
+	goto L80;
+    }
+
+/*
+       end while
+
+       Re-merge the eigenvalues/vectors which were deflated at the final
+       merge step.
+*/
+
+    if (*icompq == 1) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+	    dcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1
+		    + 1], &c__1);
+/* L100: */
+	}
+	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    } else if (*icompq == 2) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+	    dcopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
+/* L110: */
+	}
+	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+	dlacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+/* L120: */
+	}
+	dcopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    }
+    goto L140;
+
+L130:
+    *info = submat * (*n + 1) + submat + matsiz - 1;
+
+L140:
+    return 0;
+
+/*     End of DLAED0 */
+
+} /* dlaed0_ */
+
+/* Subroutine */ int dlaed1_(integer *n, doublereal *d__, doublereal *q,
+	integer *ldq, integer *indxq, doublereal *rho, integer *cutpnt,
+	doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, k, n1, n2, is, iw, iz, iq2, zpp1, indx, indxc;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer indxp;
+    extern /* Subroutine */ int dlaed2_(integer *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *, integer *, integer *, integer *), dlaed3_(integer *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static integer idlmda;
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAED1 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix.  This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and eigenvectors of a tridiagonal matrix.  DLAED7 handles
+    the case in which eigenvalues only or eigenvalues and eigenvectors
+    of a full symmetric matrix (which was reduced to tridiagonal form)
+    are desired.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+       where Z = Q'u, u is a vector of length N with ones in the
+       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine DLAED2.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine DLAED4 (as called by DLAED3).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input/output) INTEGER array, dimension (N)
+           On entry, the permutation which separately sorts the two
+           subproblems in D into ascending order.
+           On exit, the permutation which will reintegrate the
+           subproblems back into sorted order,
+           i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
+
+    RHO    (input) DOUBLE PRECISION
+           The subdiagonal entry used to create the rank-1 modification.
+
+    CUTPNT (input) INTEGER
+           The location of the last eigenvalue in the leading sub-matrix.
+           min(1,N) <= CUTPNT <= N/2.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension (4*N + N**2)
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ldq < max(1,*n)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MIN */
+	i__1 = 1, i__2 = *n / 2;
+	if ((min(i__1,i__2) > *cutpnt) || (*n / 2 < *cutpnt)) {
+	    *info = -7;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED1", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are integer pointers which indicate
+       the portion of the workspace
+       used by a particular array in DLAED2 and DLAED3.
+*/
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    dcopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
+    zpp1 = *cutpnt + 1;
+    i__1 = *n - *cutpnt;
+    dcopy_(&i__1, &q[zpp1 + zpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
+
+/*     Deflate eigenvalues. */
+
+    dlaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
+	    iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
+	    indxc], &iwork[indxp], &iwork[coltyp], info);
+
+    if (*info != 0) {
+	goto L20;
+    }
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp +
+		1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
+	dlaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
+		 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
+		is], info);
+	if (*info != 0) {
+	    goto L20;
+	}
+
+/*     Prepare the INDXQ sorting permutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L10: */
+	}
+    }
+
+L20:
+    return 0;
+
+/*     End of DLAED1 */
+
+} /* dlaed1_ */
+
+/* Subroutine */ int dlaed2_(integer *k, integer *n, integer *n1, doublereal *
+	d__, doublereal *q, integer *ldq, integer *indxq, doublereal *rho,
+	doublereal *z__, doublereal *dlamda, doublereal *w, doublereal *q2,
+	integer *indx, integer *indxc, integer *indxp, integer *coltyp,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal c__;
+    static integer i__, j;
+    static doublereal s, t;
+    static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
+    static doublereal eps, tau, tol;
+    static integer psm[4], imax, jmax;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer ctot[4];
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dcopy_(integer *, doublereal *, integer *, doublereal
+	    *, integer *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLAED2 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny entry in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    K      (output) INTEGER
+           The number of non-deflated eigenvalues, and the order of the
+           related secular equation. 0 <= K <=N.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    N1     (input) INTEGER
+           The location of the last eigenvalue in the leading sub-matrix.
+           min(1,N) <= N1 <= N/2.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, D contains the eigenvalues of the two submatrices to
+           be combined.
+           On exit, D contains the trailing (N-K) updated eigenvalues
+           (those which were deflated) sorted into increasing order.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+           On entry, Q contains the eigenvectors of two submatrices in
+           the two square blocks with corners at (1,1), (N1,N1)
+           and (N1+1, N1+1), (N,N).
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input/output) INTEGER array, dimension (N)
+           The permutation which separately sorts the two sub-problems
+           in D into ascending order.  Note that elements in the second
+           half of this permutation must first have N1 added to their
+           values. Destroyed on exit.
+
+    RHO    (input/output) DOUBLE PRECISION
+           On entry, the off-diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined.
+           On exit, RHO has been modified to the value required by
+           DLAED3.
+
+    Z      (input) DOUBLE PRECISION array, dimension (N)
+           On entry, Z contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).
+           On exit, the contents of Z have been destroyed by the updating
+           process.
+
+    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
+           A copy of the first K eigenvalues which will be used by
+           DLAED3 to form the secular equation.
+
+    W      (output) DOUBLE PRECISION array, dimension (N)
+           The first k values of the final deflation-altered z-vector
+           which will be passed to DLAED3.
+
+    Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
+           A copy of the first K eigenvectors which will be used by
+           DLAED3 in a matrix multiply (DGEMM) to solve for the new
+           eigenvectors.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           The permutation used to sort the contents of DLAMDA into
+           ascending order.
+
+    INDXC  (output) INTEGER array, dimension (N)
+           The permutation used to arrange the columns of the deflated
+           Q matrix into three groups:  the first group contains non-zero
+           elements only at and above N1, the second contains
+           non-zero elements only below N1, and the third is dense.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           The permutation used to place deflated values of D at the end
+           of the array.  INDXP(1:K) points to the nondeflated D-values
+           and INDXP(K+1:N) points to the deflated eigenvalues.
+
+    COLTYP (workspace/output) INTEGER array, dimension (N)
+           During execution, a label which will indicate which of the
+           following types a column in the Q2 matrix is:
+           1 : non-zero in the upper half only;
+           2 : dense;
+           3 : non-zero in the lower half only;
+           4 : deflated.
+           On exit, COLTYP(i) is the number of columns of type i,
+           for i=1 to 4 only.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --z__;
+    --dlamda;
+    --w;
+    --q2;
+    --indx;
+    --indxc;
+    --indxp;
+    --coltyp;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MIN */
+	i__1 = 1, i__2 = *n / 2;
+	if ((min(i__1,i__2) > *n1) || (*n / 2 < *n1)) {
+	    *info = -3;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n2 = *n - *n1;
+    n1p1 = *n1 + 1;
+
+    if (*rho < 0.) {
+	dscal_(&n2, &c_b3001, &z__[n1p1], &c__1);
+    }
+
+/*
+       Normalize z so that norm(z) = 1.  Since z is the concatenation of
+       two normalized vectors, norm2(z) = sqrt(2).
+*/
+
+    t = 1. / sqrt(2.);
+    dscal_(n, &t, &z__[1], &c__1);
+
+/*     RHO = ABS( norm(z)**2 * RHO ) */
+
+    *rho = (d__1 = *rho * 2., abs(d__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+	indxq[i__] += *n1;
+/* L10: */
+    }
+
+/*     re-integrate the deflated parts from the last pass */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+/* L20: */
+    }
+    dlamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	indx[i__] = indxq[indxc[i__]];
+/* L30: */
+    }
+
+/*     Calculate the allowable deflation tolerance */
+
+    imax = idamax_(n, &z__[1], &c__1);
+    jmax = idamax_(n, &d__[1], &c__1);
+    eps = EPSILON;
+/* Computing MAX */
+    d__3 = (d__1 = d__[jmax], abs(d__1)), d__4 = (d__2 = z__[imax], abs(d__2))
+	    ;
+    tol = eps * 8. * max(d__3,d__4);
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
+	*k = 0;
+	iq2 = 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__ = indx[j];
+	    dcopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	    dlamda[j] = d__[i__];
+	    iq2 += *n;
+/* L40: */
+	}
+	dlacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
+	dcopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
+	goto L190;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    i__1 = *n1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	coltyp[i__] = 1;
+/* L50: */
+    }
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+	coltyp[i__] = 3;
+/* L60: */
+    }
+
+
+    *k = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	nj = indx[j];
+	if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    coltyp[nj] = 4;
+	    indxp[k2] = nj;
+	    if (j == *n) {
+		goto L100;
+	    }
+	} else {
+	    pj = nj;
+	    goto L80;
+	}
+/* L70: */
+    }
+L80:
+    ++j;
+    nj = indx[j];
+    if (j > *n) {
+	goto L100;
+    }
+    if (*rho * (d__1 = z__[nj], abs(d__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	coltyp[nj] = 4;
+	indxp[k2] = nj;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[pj];
+	c__ = z__[nj];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = dlapy2_(&c__, &s);
+	t = d__[nj] - d__[pj];
+	c__ /= tau;
+	s = -s / tau;
+	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[nj] = tau;
+	    z__[pj] = 0.;
+	    if (coltyp[nj] != coltyp[pj]) {
+		coltyp[nj] = 2;
+	    }
+	    coltyp[pj] = 4;
+	    drot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
+		    c__, &s);
+/* Computing 2nd power */
+	    d__1 = c__;
+/* Computing 2nd power */
+	    d__2 = s;
+	    t = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
+/* Computing 2nd power */
+	    d__1 = s;
+/* Computing 2nd power */
+	    d__2 = c__;
+	    d__[nj] = d__[pj] * (d__1 * d__1) + d__[nj] * (d__2 * d__2);
+	    d__[pj] = t;
+	    --k2;
+	    i__ = 1;
+L90:
+	    if (k2 + i__ <= *n) {
+		if (d__[pj] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = pj;
+		    ++i__;
+		    goto L90;
+		} else {
+		    indxp[k2 + i__ - 1] = pj;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = pj;
+	    }
+	    pj = nj;
+	} else {
+	    ++(*k);
+	    dlamda[*k] = d__[pj];
+	    w[*k] = z__[pj];
+	    indxp[*k] = pj;
+	    pj = nj;
+	}
+    }
+    goto L80;
+L100:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    dlamda[*k] = d__[pj];
+    w[*k] = z__[pj];
+    indxp[*k] = pj;
+
+/*
+       Count up the total number of the various types of columns, then
+       form a permutation which positions the four column types into
+       four uniform groups (although one or more of these groups may be
+       empty).
+*/
+
+    for (j = 1; j <= 4; ++j) {
+	ctot[j - 1] = 0;
+/* L110: */
+    }
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	ct = coltyp[j];
+	++ctot[ct - 1];
+/* L120: */
+    }
+
+/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
+
+    psm[0] = 1;
+    psm[1] = ctot[0] + 1;
+    psm[2] = psm[1] + ctot[1];
+    psm[3] = psm[2] + ctot[2];
+    *k = *n - ctot[3];
+
+/*
+       Fill out the INDXC array so that the permutation which it induces
+       will place all type-1 columns first, all type-2 columns next,
+       then all type-3's, and finally all type-4's.
+*/
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	js = indxp[j];
+	ct = coltyp[js];
+	indx[psm[ct - 1]] = js;
+	indxc[psm[ct - 1]] = j;
+	++psm[ct - 1];
+/* L130: */
+    }
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    i__ = 1;
+    iq1 = 1;
+    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
+    i__1 = ctot[0];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq1 += *n1;
+/* L140: */
+    }
+
+    i__1 = ctot[1];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	dcopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq1 += *n1;
+	iq2 += n2;
+/* L150: */
+    }
+
+    i__1 = ctot[2];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	dcopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq2 += n2;
+/* L160: */
+    }
+
+    iq1 = iq2;
+    i__1 = ctot[3];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	dcopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	iq2 += *n;
+	z__[i__] = d__[js];
+	++i__;
+/* L170: */
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    dlacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
+    i__1 = *n - *k;
+    dcopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
+
+/*     Copy CTOT into COLTYP for referencing in DLAED3. */
+
+    for (j = 1; j <= 4; ++j) {
+	coltyp[j] = ctot[j - 1];
+/* L180: */
+    }
+
+L190:
+    return 0;
+
+/*     End of DLAED2 */
+
+} /* dlaed2_ */
+
+/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal *
+	d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda,
+	 doublereal *q2, integer *indx, integer *ctot, doublereal *w,
+	doublereal *s, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j, n2, n12, ii, n23, iq2;
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *),
+	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
+	    *), dlaed4_(integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *),
+	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAED3 finds the roots of the secular equation, as defined by the
+    values in D, W, and RHO, between 1 and K.  It makes the
+    appropriate calls to DLAED4 and then updates the eigenvectors by
+    multiplying the matrix of eigenvectors of the pair of eigensystems
+    being combined by the matrix of eigenvectors of the K-by-K system
+    which is solved here.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved by
+            DLAED4.  K >= 0.
+
+    N       (input) INTEGER
+            The number of rows and columns in the Q matrix.
+            N >= K (deflation may result in N>K).
+
+    N1      (input) INTEGER
+            The location of the last eigenvalue in the leading submatrix.
+            min(1,N) <= N1 <= N/2.
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            D(I) contains the updated eigenvalues for
+            1 <= I <= K.
+
+    Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)
+            Initially the first K columns are used as workspace.
+            On output the columns 1 to K contain
+            the updated eigenvectors.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    RHO     (input) DOUBLE PRECISION
+            The value of the parameter in the rank one update equation.
+            RHO >= 0 required.
+
+    DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation. May be changed on output by
+            having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
+            Cray-2, or Cray C-90, as described above.
+
+    Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N)
+            The first K columns of this matrix contain the non-deflated
+            eigenvectors for the split problem.
+
+    INDX    (input) INTEGER array, dimension (N)
+            The permutation used to arrange the columns of the deflated
+            Q matrix into three groups (see DLAED2).
+            The rows of the eigenvectors found by DLAED4 must be likewise
+            permuted before the matrix multiply can take place.
+
+    CTOT    (input) INTEGER array, dimension (4)
+            A count of the total number of the various types of columns
+            in Q, as described in INDX.  The fourth column type is any
+            column which has been deflated.
+
+    W       (input/output) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating vector. Destroyed on
+            output.
+
+    S       (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K
+            Will contain the eigenvectors of the repaired matrix which
+            will be multiplied by the previously accumulated eigenvectors
+            to update the system.
+
+    LDS     (input) INTEGER
+            The leading dimension of S.  LDS >= max(1,K).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --q2;
+    --indx;
+    --ctot;
+    --w;
+    --s;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*k < 0) {
+	*info = -1;
+    } else if (*n < *k) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED3", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 0) {
+	return 0;
+    }
+
+/*
+       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DLAMDA(I) if it is 1; this makes the subsequent
+       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DLAMDA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DLAMDA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+/* L10: */
+    }
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+		info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    goto L120;
+	}
+/* L20: */
+    }
+
+    if (*k == 1) {
+	goto L110;
+    }
+    if (*k == 2) {
+	i__1 = *k;
+	for (j = 1; j <= i__1; ++j) {
+	    w[1] = q[j * q_dim1 + 1];
+	    w[2] = q[j * q_dim1 + 2];
+	    ii = indx[1];
+	    q[j * q_dim1 + 1] = w[ii];
+	    ii = indx[2];
+	    q[j * q_dim1 + 2] = w[ii];
+/* L30: */
+	}
+	goto L110;
+    }
+
+/*     Compute updated W. */
+
+    dcopy_(k, &w[1], &c__1, &s[1], &c__1);
+
+/*     Initialize W(I) = Q(I,I) */
+
+    i__1 = *ldq + 1;
+    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L40: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L50: */
+	}
+/* L60: */
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__1 = sqrt(-w[i__]);
+	w[i__] = d_sign(&d__1, &s[i__]);
+/* L70: */
+    }
+
+/*     Compute eigenvectors of the modified rank-1 modification. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    s[i__] = w[i__] / q[i__ + j * q_dim1];
+/* L80: */
+	}
+	temp = dnrm2_(k, &s[1], &c__1);
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    ii = indx[i__];
+	    q[i__ + j * q_dim1] = s[ii] / temp;
+/* L90: */
+	}
+/* L100: */
+    }
+
+/*     Compute the updated eigenvectors. */
+
+L110:
+
+    n2 = *n - *n1;
+    n12 = ctot[1] + ctot[2];
+    n23 = ctot[2] + ctot[3];
+
+    dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23);
+    iq2 = *n1 * n12 + 1;
+    if (n23 != 0) {
+	dgemm_("N", "N", &n2, k, &n23, &c_b2865, &q2[iq2], &n2, &s[1], &n23, &
+		c_b2879, &q[*n1 + 1 + q_dim1], ldq);
+    } else {
+	dlaset_("A", &n2, k, &c_b2879, &c_b2879, &q[*n1 + 1 + q_dim1], ldq);
+    }
+
+    dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
+    if (n12 != 0) {
+	dgemm_("N", "N", n1, k, &n12, &c_b2865, &q2[1], n1, &s[1], &n12, &
+		c_b2879, &q[q_offset], ldq);
+    } else {
+	dlaset_("A", n1, k, &c_b2879, &c_b2879, &q[q_dim1 + 1], ldq);
+    }
+
+
+L120:
+    return 0;
+
+/*     End of DLAED3 */
+
+} /* dlaed3_ */
+
+/* Subroutine */ int dlaed4_(integer *n, integer *i__, doublereal *d__,
+	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *dlam,
+	 integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal a, b, c__;
+    static integer j;
+    static doublereal w;
+    static integer ii;
+    static doublereal dw, zz[3];
+    static integer ip1;
+    static doublereal del, eta, phi, eps, tau, psi;
+    static integer iim1, iip1;
+    static doublereal dphi, dpsi;
+    static integer iter;
+    static doublereal temp, prew, temp1, dltlb, dltub, midpt;
+    static integer niter;
+    static logical swtch;
+    extern /* Subroutine */ int dlaed5_(integer *, doublereal *, doublereal *,
+	     doublereal *, doublereal *, doublereal *), dlaed6_(integer *,
+	    logical *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *);
+    static logical swtch3;
+
+    static logical orgati;
+    static doublereal erretm, rhoinv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       December 23, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the I-th updated eigenvalue of a symmetric
+    rank-one modification to a diagonal matrix whose elements are
+    given in the array d, and that
+
+               D(i) < D(j)  for  i < j
+
+    and that RHO > 0.  This is arranged by the calling routine, and is
+    no loss in generality.  The rank-one modified system is thus
+
+               diag( D )  +  RHO *  Z * Z_transpose.
+
+    where we assume the Euclidean norm of Z is 1.
+
+    The method consists of approximating the rational functions in the
+    secular equation by simpler interpolating rational functions.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The length of all arrays.
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  1 <= I <= N.
+
+    D      (input) DOUBLE PRECISION array, dimension (N)
+           The original eigenvalues.  It is assumed that they are in
+           order, D(I) < D(J)  for I < J.
+
+    Z      (input) DOUBLE PRECISION array, dimension (N)
+           The components of the updating vector.
+
+    DELTA  (output) DOUBLE PRECISION array, dimension (N)
+           If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
+           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+           contains the information necessary to construct the
+           eigenvectors.
+
+    RHO    (input) DOUBLE PRECISION
+           The scalar in the symmetric updating formula.
+
+    DLAM   (output) DOUBLE PRECISION
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit
+           > 0:  if INFO = 1, the updating process failed.
+
+    Internal Parameters
+    ===================
+
+    Logical variable ORGATI (origin-at-i?) is used for distinguishing
+    whether D(i) or D(i+1) is treated as the origin.
+
+              ORGATI = .true.    origin at i
+              ORGATI = .false.   origin at i+1
+
+     Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+     if we are working with THREE poles!
+
+     MAXIT is the maximum number of iterations allowed for each
+     eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Since this routine is called in an inner loop, we do no argument
+       checking.
+
+       Quick return for N=1 and 2.
+*/
+
+    /* Parameter adjustments */
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n == 1) {
+
+/*         Presumably, I=1 upon entry */
+
+	*dlam = d__[1] + *rho * z__[1] * z__[1];
+	delta[1] = 1.;
+	return 0;
+    }
+    if (*n == 2) {
+	dlaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
+	return 0;
+    }
+
+/*     Compute machine epsilon */
+
+    eps = EPSILON;
+    rhoinv = 1. / *rho;
+
+/*     The case I = N */
+
+    if (*i__ == *n) {
+
+/*        Initialize some basic variables */
+
+	ii = *n - 1;
+	niter = 1;
+
+/*        Calculate initial guess */
+
+	midpt = *rho / 2.;
+
+/*
+          If ||Z||_2 is not one, then TEMP should be set to
+          RHO * ||Z||_2^2 / TWO
+*/
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - midpt;
+/* L10: */
+	}
+
+	psi = 0.;
+	i__1 = *n - 2;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / delta[j];
+/* L20: */
+	}
+
+	c__ = rhoinv + psi;
+	w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*
+		n];
+
+	if (w <= 0.) {
+	    temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho)
+		    + z__[*n] * z__[*n] / *rho;
+	    if (c__ <= temp) {
+		tau = *rho;
+	    } else {
+		del = d__[*n] - d__[*n - 1];
+		a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]
+			;
+		b = z__[*n] * z__[*n] * del;
+		if (a < 0.) {
+		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+		} else {
+		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+		}
+	    }
+
+/*
+             It can be proved that
+                 D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
+*/
+
+	    dltlb = midpt;
+	    dltub = *rho;
+	} else {
+	    del = d__[*n] - d__[*n - 1];
+	    a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
+	    b = z__[*n] * z__[*n] * del;
+	    if (a < 0.) {
+		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+	    } else {
+		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+	    }
+
+/*
+             It can be proved that
+                 D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
+*/
+
+	    dltlb = 0.;
+	    dltub = midpt;
+	}
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - tau;
+/* L30: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L40: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / delta[*n];
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Test for convergence */
+
+	if (abs(w) <= eps * erretm) {
+	    *dlam = d__[*i__] + tau;
+	    goto L250;
+	}
+
+	if (w <= 0.) {
+	    dltlb = max(dltlb,tau);
+	} else {
+	    dltub = min(dltub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+	a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (
+		dpsi + dphi);
+	b = delta[*n - 1] * delta[*n] * w;
+	if (c__ < 0.) {
+	    c__ = abs(c__);
+	}
+	if (c__ == 0.) {
+/*
+            ETA = B/A
+             ETA = RHO - TAU
+*/
+	    eta = dltub - tau;
+	} else if (a >= 0.) {
+	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
+		    * 2.);
+	} else {
+	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
+		    );
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta > 0.) {
+	    eta = -w / (dpsi + dphi);
+	}
+	temp = tau + eta;
+	if ((temp > dltub) || (temp < dltlb)) {
+	    if (w < 0.) {
+		eta = (dltub - tau) / 2.;
+	    } else {
+		eta = (dltlb - tau) / 2.;
+	    }
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+/* L50: */
+	}
+
+	tau += eta;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L60: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / delta[*n];
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 30; ++niter) {
+
+/*           Test for convergence */
+
+	    if (abs(w) <= eps * erretm) {
+		*dlam = d__[*i__] + tau;
+		goto L250;
+	    }
+
+	    if (w <= 0.) {
+		dltlb = max(dltlb,tau);
+	    } else {
+		dltub = min(dltub,tau);
+	    }
+
+/*           Calculate the new step */
+
+	    c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+	    a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] *
+		    (dpsi + dphi);
+	    b = delta[*n - 1] * delta[*n] * w;
+	    if (a >= 0.) {
+		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    } else {
+		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta > 0.) {
+		eta = -w / (dpsi + dphi);
+	    }
+	    temp = tau + eta;
+	    if ((temp > dltub) || (temp < dltlb)) {
+		if (w < 0.) {
+		    eta = (dltub - tau) / 2.;
+		} else {
+		    eta = (dltlb - tau) / 2.;
+		}
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+/* L70: */
+	    }
+
+	    tau += eta;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
+	    i__1 = ii;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / delta[j];
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L80: */
+	    }
+	    erretm = abs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    temp = z__[*n] / delta[*n];
+	    phi = z__[*n] * temp;
+	    dphi = temp * temp;
+	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
+		    dpsi + dphi);
+
+	    w = rhoinv + phi + psi;
+/* L90: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	*dlam = d__[*i__] + tau;
+	goto L250;
+
+/*        End for the case I = N */
+
+    } else {
+
+/*        The case for I < N */
+
+	niter = 1;
+	ip1 = *i__ + 1;
+
+/*        Calculate initial guess */
+
+	del = d__[ip1] - d__[*i__];
+	midpt = del / 2.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - midpt;
+/* L100: */
+	}
+
+	psi = 0.;
+	i__1 = *i__ - 1;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / delta[j];
+/* L110: */
+	}
+
+	phi = 0.;
+	i__1 = *i__ + 2;
+	for (j = *n; j >= i__1; --j) {
+	    phi += z__[j] * z__[j] / delta[j];
+/* L120: */
+	}
+	c__ = rhoinv + psi + phi;
+	w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] /
+		delta[ip1];
+
+	if (w > 0.) {
+
+/*
+             d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
+
+             We choose d(i) as origin.
+*/
+
+	    orgati = TRUE_;
+	    a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
+	    b = z__[*i__] * z__[*i__] * del;
+	    if (a > 0.) {
+		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    } else {
+		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    }
+	    dltlb = 0.;
+	    dltub = midpt;
+	} else {
+
+/*
+             (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
+
+             We choose d(i+1) as origin.
+*/
+
+	    orgati = FALSE_;
+	    a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
+	    b = z__[ip1] * z__[ip1] * del;
+	    if (a < 0.) {
+		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
+			d__1))));
+	    } else {
+		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
+			(c__ * 2.);
+	    }
+	    dltlb = -midpt;
+	    dltub = 0.;
+	}
+
+	if (orgati) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] = d__[j] - d__[*i__] - tau;
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] = d__[j] - d__[ip1] - tau;
+/* L140: */
+	    }
+	}
+	if (orgati) {
+	    ii = *i__;
+	} else {
+	    ii = *i__ + 1;
+	}
+	iim1 = ii - 1;
+	iip1 = ii + 1;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L150: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.;
+	phi = 0.;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / delta[j];
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L160: */
+	}
+
+	w = rhoinv + phi + psi;
+
+/*
+          W is the value of the secular function with
+          its ii-th element removed.
+*/
+
+	swtch3 = FALSE_;
+	if (orgati) {
+	    if (w < 0.) {
+		swtch3 = TRUE_;
+	    }
+	} else {
+	    if (w > 0.) {
+		swtch3 = TRUE_;
+	    }
+	}
+	if ((ii == 1) || (ii == *n)) {
+	    swtch3 = FALSE_;
+	}
+
+	temp = z__[ii] / delta[ii];
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w += temp;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+		abs(tau) * dw;
+
+/*        Test for convergence */
+
+	if (abs(w) <= eps * erretm) {
+	    if (orgati) {
+		*dlam = d__[*i__] + tau;
+	    } else {
+		*dlam = d__[ip1] + tau;
+	    }
+	    goto L250;
+	}
+
+	if (w <= 0.) {
+	    dltlb = max(dltlb,tau);
+	} else {
+	    dltub = min(dltub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	if (! swtch3) {
+	    if (orgati) {
+/* Computing 2nd power */
+		d__1 = z__[*i__] / delta[*i__];
+		c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (d__1 *
+			d__1);
+	    } else {
+/* Computing 2nd power */
+		d__1 = z__[ip1] / delta[ip1];
+		c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (d__1 *
+			d__1);
+	    }
+	    a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] *
+		    dw;
+	    b = delta[*i__] * delta[ip1] * w;
+	    if (c__ == 0.) {
+		if (a == 0.) {
+		    if (orgati) {
+			a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] *
+				(dpsi + dphi);
+		    } else {
+			a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] *
+				(dpsi + dphi);
+		    }
+		}
+		eta = b / a;
+	    } else if (a <= 0.) {
+		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    } else {
+		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    }
+	} else {
+
+/*           Interpolation using THREE most relevant poles */
+
+	    temp = rhoinv + psi + phi;
+	    if (orgati) {
+		temp1 = z__[iim1] / delta[iim1];
+		temp1 *= temp1;
+		c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[
+			iip1]) * temp1;
+		zz[0] = z__[iim1] * z__[iim1];
+		zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
+	    } else {
+		temp1 = z__[iip1] / delta[iip1];
+		temp1 *= temp1;
+		c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[
+			iim1]) * temp1;
+		zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
+		zz[2] = z__[iip1] * z__[iip1];
+	    }
+	    zz[1] = z__[ii] * z__[ii];
+	    dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
+	    if (*info != 0) {
+		goto L250;
+	    }
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta >= 0.) {
+	    eta = -w / dw;
+	}
+	temp = tau + eta;
+	if ((temp > dltub) || (temp < dltlb)) {
+	    if (w < 0.) {
+		eta = (dltub - tau) / 2.;
+	    } else {
+		eta = (dltlb - tau) / 2.;
+	    }
+	}
+
+	prew = w;
+
+/* L170: */
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+/* L180: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L190: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.;
+	phi = 0.;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / delta[j];
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L200: */
+	}
+
+	temp = z__[ii] / delta[ii];
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w = rhoinv + phi + psi + temp;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. + (
+		d__1 = tau + eta, abs(d__1)) * dw;
+
+	swtch = FALSE_;
+	if (orgati) {
+	    if (-w > abs(prew) / 10.) {
+		swtch = TRUE_;
+	    }
+	} else {
+	    if (w > abs(prew) / 10.) {
+		swtch = TRUE_;
+	    }
+	}
+
+	tau += eta;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 30; ++niter) {
+
+/*           Test for convergence */
+
+	    if (abs(w) <= eps * erretm) {
+		if (orgati) {
+		    *dlam = d__[*i__] + tau;
+		} else {
+		    *dlam = d__[ip1] + tau;
+		}
+		goto L250;
+	    }
+
+	    if (w <= 0.) {
+		dltlb = max(dltlb,tau);
+	    } else {
+		dltub = min(dltub,tau);
+	    }
+
+/*           Calculate the new step */
+
+	    if (! swtch3) {
+		if (! swtch) {
+		    if (orgati) {
+/* Computing 2nd power */
+			d__1 = z__[*i__] / delta[*i__];
+			c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (
+				d__1 * d__1);
+		    } else {
+/* Computing 2nd power */
+			d__1 = z__[ip1] / delta[ip1];
+			c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) *
+				(d__1 * d__1);
+		    }
+		} else {
+		    temp = z__[ii] / delta[ii];
+		    if (orgati) {
+			dpsi += temp * temp;
+		    } else {
+			dphi += temp * temp;
+		    }
+		    c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
+		}
+		a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1]
+			* dw;
+		b = delta[*i__] * delta[ip1] * w;
+		if (c__ == 0.) {
+		    if (a == 0.) {
+			if (! swtch) {
+			    if (orgati) {
+				a = z__[*i__] * z__[*i__] + delta[ip1] *
+					delta[ip1] * (dpsi + dphi);
+			    } else {
+				a = z__[ip1] * z__[ip1] + delta[*i__] * delta[
+					*i__] * (dpsi + dphi);
+			    }
+			} else {
+			    a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
+				    * delta[ip1] * dphi;
+			}
+		    }
+		    eta = b / a;
+		} else if (a <= 0.) {
+		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
+			     / (c__ * 2.);
+		} else {
+		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
+			    abs(d__1))));
+		}
+	    } else {
+
+/*              Interpolation using THREE most relevant poles */
+
+		temp = rhoinv + psi + phi;
+		if (swtch) {
+		    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
+		    zz[0] = delta[iim1] * delta[iim1] * dpsi;
+		    zz[2] = delta[iip1] * delta[iip1] * dphi;
+		} else {
+		    if (orgati) {
+			temp1 = z__[iim1] / delta[iim1];
+			temp1 *= temp1;
+			c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
+				- d__[iip1]) * temp1;
+			zz[0] = z__[iim1] * z__[iim1];
+			zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
+				dphi);
+		    } else {
+			temp1 = z__[iip1] / delta[iip1];
+			temp1 *= temp1;
+			c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
+				- d__[iim1]) * temp1;
+			zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
+				temp1));
+			zz[2] = z__[iip1] * z__[iip1];
+		    }
+		}
+		dlaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
+			info);
+		if (*info != 0) {
+		    goto L250;
+		}
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta >= 0.) {
+		eta = -w / dw;
+	    }
+	    temp = tau + eta;
+	    if ((temp > dltub) || (temp < dltlb)) {
+		if (w < 0.) {
+		    eta = (dltub - tau) / 2.;
+		} else {
+		    eta = (dltlb - tau) / 2.;
+		}
+	    }
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+/* L210: */
+	    }
+
+	    tau += eta;
+	    prew = w;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
+	    i__1 = iim1;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / delta[j];
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L220: */
+	    }
+	    erretm = abs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    dphi = 0.;
+	    phi = 0.;
+	    i__1 = iip1;
+	    for (j = *n; j >= i__1; --j) {
+		temp = z__[j] / delta[j];
+		phi += z__[j] * temp;
+		dphi += temp * temp;
+		erretm += phi;
+/* L230: */
+	    }
+
+	    temp = z__[ii] / delta[ii];
+	    dw = dpsi + dphi + temp * temp;
+	    temp = z__[ii] * temp;
+	    w = rhoinv + phi + psi + temp;
+	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
+		    + abs(tau) * dw;
+	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
+		swtch = ! swtch;
+	    }
+
+/* L240: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	if (orgati) {
+	    *dlam = d__[*i__] + tau;
+	} else {
+	    *dlam = d__[ip1] + tau;
+	}
+
+    }
+
+L250:
+
+    return 0;
+
+/*     End of DLAED4 */
+
+} /* dlaed4_ */
+
+/* Subroutine */ int dlaed5_(integer *i__, doublereal *d__, doublereal *z__,
+	doublereal *delta, doublereal *rho, doublereal *dlam)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal b, c__, w, del, tau, temp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    This subroutine computes the I-th eigenvalue of a symmetric rank-one
+    modification of a 2-by-2 diagonal matrix
+
+               diag( D )  +  RHO *  Z * transpose(Z) .
+
+    The diagonal elements in the array D are assumed to satisfy
+
+               D(i) < D(j)  for  i < j .
+
+    We also assume RHO > 0 and that the Euclidean norm of the vector
+    Z is one.
+
+    Arguments
+    =========
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  I = 1 or I = 2.
+
+    D      (input) DOUBLE PRECISION array, dimension (2)
+           The original eigenvalues.  We assume D(1) < D(2).
+
+    Z      (input) DOUBLE PRECISION array, dimension (2)
+           The components of the updating vector.
+
+    DELTA  (output) DOUBLE PRECISION array, dimension (2)
+           The vector DELTA contains the information necessary
+           to construct the eigenvectors.
+
+    RHO    (input) DOUBLE PRECISION
+           The scalar in the symmetric updating formula.
+
+    DLAM   (output) DOUBLE PRECISION
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    del = d__[2] - d__[1];
+    if (*i__ == 1) {
+	w = *rho * 2. * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.;
+	if (w > 0.) {
+	    b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[1] * z__[1] * del;
+
+/*           B > ZERO, always */
+
+	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
+	    *dlam = d__[1] + tau;
+	    delta[1] = -z__[1] / tau;
+	    delta[2] = z__[2] / (del - tau);
+	} else {
+	    b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[2] * z__[2] * del;
+	    if (b > 0.) {
+		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
+	    } else {
+		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
+	    }
+	    *dlam = d__[2] + tau;
+	    delta[1] = -z__[1] / (del + tau);
+	    delta[2] = -z__[2] / tau;
+	}
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
+    } else {
+
+/*     Now I=2 */
+
+	b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	c__ = *rho * z__[2] * z__[2] * del;
+	if (b > 0.) {
+	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
+	} else {
+	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
+	}
+	*dlam = d__[2] + tau;
+	delta[1] = -z__[1] / (del + tau);
+	delta[2] = -z__[2] / tau;
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
+    }
+    return 0;
+
+/*     End OF DLAED5 */
+
+} /* dlaed5_ */
+
+/* Subroutine */ int dlaed6_(integer *kniter, logical *orgati, doublereal *
+	rho, doublereal *d__, doublereal *z__, doublereal *finit, doublereal *
+	tau, integer *info)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal), log(doublereal), pow_di(doublereal *, integer *);
+
+    /* Local variables */
+    static doublereal a, b, c__, f;
+    static integer i__;
+    static doublereal fc, df, ddf, eta, eps, base;
+    static integer iter;
+    static doublereal temp, temp1, temp2, temp3, temp4;
+    static logical scale;
+    static integer niter;
+    static doublereal small1, small2, sminv1, sminv2;
+
+    static doublereal dscale[3], sclfac, zscale[3], erretm, sclinv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAED6 computes the positive or negative root (closest to the origin)
+    of
+                     z(1)        z(2)        z(3)
+    f(x) =   rho + --------- + ---------- + ---------
+                    d(1)-x      d(2)-x      d(3)-x
+
+    It is assumed that
+
+          if ORGATI = .true. the root is between d(2) and d(3);
+          otherwise it is between d(1) and d(2)
+
+    This routine will be called by DLAED4 when necessary. In most cases,
+    the root sought is the smallest in magnitude, though it might not be
+    in some extremely rare situations.
+
+    Arguments
+    =========
+
+    KNITER       (input) INTEGER
+                 Refer to DLAED4 for its significance.
+
+    ORGATI       (input) LOGICAL
+                 If ORGATI is true, the needed root is between d(2) and
+                 d(3); otherwise it is between d(1) and d(2).  See
+                 DLAED4 for further details.
+
+    RHO          (input) DOUBLE PRECISION
+                 Refer to the equation f(x) above.
+
+    D            (input) DOUBLE PRECISION array, dimension (3)
+                 D satisfies d(1) < d(2) < d(3).
+
+    Z            (input) DOUBLE PRECISION array, dimension (3)
+                 Each of the elements in z must be positive.
+
+    FINIT        (input) DOUBLE PRECISION
+                 The value of f at 0. It is more accurate than the one
+                 evaluated inside this routine (if someone wants to do
+                 so).
+
+    TAU          (output) DOUBLE PRECISION
+                 The root of the equation f(x).
+
+    INFO         (output) INTEGER
+                 = 0: successful exit
+                 > 0: if INFO = 1, failure to converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+    --d__;
+
+    /* Function Body */
+
+    *info = 0;
+
+    niter = 1;
+    *tau = 0.;
+    if (*kniter == 2) {
+	if (*orgati) {
+	    temp = (d__[3] - d__[2]) / 2.;
+	    c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
+	    a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
+	    b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
+	} else {
+	    temp = (d__[1] - d__[2]) / 2.;
+	    c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
+	    a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
+	    b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
+	}
+/* Computing MAX */
+	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
+	temp = max(d__1,d__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.) {
+	    *tau = b / a;
+	} else if (a <= 0.) {
+	    *tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+		    c__ * 2.);
+	} else {
+	    *tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))
+		    ));
+	}
+	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
+		z__[3] / (d__[3] - *tau);
+	if (abs(*finit) <= abs(temp)) {
+	    *tau = 0.;
+	}
+    }
+
+/*
+       On first call to routine, get machine parameters for
+       possible scaling to avoid overflow
+*/
+
+    if (first) {
+	eps = EPSILON;
+	base = BASE;
+	i__1 = (integer) (log(SAFEMINIMUM) / log(base) / 3.);
+	small1 = pow_di(&base, &i__1);
+	sminv1 = 1. / small1;
+	small2 = small1 * small1;
+	sminv2 = sminv1 * sminv1;
+	first = FALSE_;
+    }
+
+/*
+       Determine if scaling of inputs necessary to avoid overflow
+       when computing 1/TEMP**3
+*/
+
+    if (*orgati) {
+/* Computing MIN */
+	d__3 = (d__1 = d__[2] - *tau, abs(d__1)), d__4 = (d__2 = d__[3] - *
+		tau, abs(d__2));
+	temp = min(d__3,d__4);
+    } else {
+/* Computing MIN */
+	d__3 = (d__1 = d__[1] - *tau, abs(d__1)), d__4 = (d__2 = d__[2] - *
+		tau, abs(d__2));
+	temp = min(d__3,d__4);
+    }
+    scale = FALSE_;
+    if (temp <= small1) {
+	scale = TRUE_;
+	if (temp <= small2) {
+
+/*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
+
+	    sclfac = sminv2;
+	    sclinv = small2;
+	} else {
+
+/*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
+
+	    sclfac = sminv1;
+	    sclinv = small1;
+	}
+
+/*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
+
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__] * sclfac;
+	    zscale[i__ - 1] = z__[i__] * sclfac;
+/* L10: */
+	}
+	*tau *= sclfac;
+    } else {
+
+/*        Copy D and Z to DSCALE and ZSCALE */
+
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__];
+	    zscale[i__ - 1] = z__[i__];
+/* L20: */
+	}
+    }
+
+    fc = 0.;
+    df = 0.;
+    ddf = 0.;
+    for (i__ = 1; i__ <= 3; ++i__) {
+	temp = 1. / (dscale[i__ - 1] - *tau);
+	temp1 = zscale[i__ - 1] * temp;
+	temp2 = temp1 * temp;
+	temp3 = temp2 * temp;
+	fc += temp1 / dscale[i__ - 1];
+	df += temp2;
+	ddf += temp3;
+/* L30: */
+    }
+    f = *finit + *tau * fc;
+
+    if (abs(f) <= 0.) {
+	goto L60;
+    }
+
+/*
+          Iteration begins
+
+       It is not hard to see that
+
+             1) Iterations will go up monotonically
+                if FINIT < 0;
+
+             2) Iterations will go down monotonically
+                if FINIT > 0.
+*/
+
+    iter = niter + 1;
+
+    for (niter = iter; niter <= 20; ++niter) {
+
+	if (*orgati) {
+	    temp1 = dscale[1] - *tau;
+	    temp2 = dscale[2] - *tau;
+	} else {
+	    temp1 = dscale[0] - *tau;
+	    temp2 = dscale[1] - *tau;
+	}
+	a = (temp1 + temp2) * f - temp1 * temp2 * df;
+	b = temp1 * temp2 * f;
+	c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
+/* Computing MAX */
+	d__1 = abs(a), d__2 = abs(b), d__1 = max(d__1,d__2), d__2 = abs(c__);
+	temp = max(d__1,d__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.) {
+	    eta = b / a;
+	} else if (a <= 0.) {
+	    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
+		    * 2.);
+	} else {
+	    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
+		    );
+	}
+	if (f * eta >= 0.) {
+	    eta = -f / df;
+	}
+
+	temp = eta + *tau;
+	if (*orgati) {
+	    if (eta > 0. && temp >= dscale[2]) {
+		eta = (dscale[2] - *tau) / 2.;
+	    }
+	    if (eta < 0. && temp <= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.;
+	    }
+	} else {
+	    if (eta > 0. && temp >= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.;
+	    }
+	    if (eta < 0. && temp <= dscale[0]) {
+		eta = (dscale[0] - *tau) / 2.;
+	    }
+	}
+	*tau += eta;
+
+	fc = 0.;
+	erretm = 0.;
+	df = 0.;
+	ddf = 0.;
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    temp = 1. / (dscale[i__ - 1] - *tau);
+	    temp1 = zscale[i__ - 1] * temp;
+	    temp2 = temp1 * temp;
+	    temp3 = temp2 * temp;
+	    temp4 = temp1 / dscale[i__ - 1];
+	    fc += temp4;
+	    erretm += abs(temp4);
+	    df += temp2;
+	    ddf += temp3;
+/* L40: */
+	}
+	f = *finit + *tau * fc;
+	erretm = (abs(*finit) + abs(*tau) * erretm) * 8. + abs(*tau) * df;
+	if (abs(f) <= eps * erretm) {
+	    goto L60;
+	}
+/* L50: */
+    }
+    *info = 1;
+L60:
+
+/*     Undo scaling */
+
+    if (scale) {
+	*tau *= sclinv;
+    }
+    return 0;
+
+/*     End of DLAED6 */
+
+} /* dlaed6_ */
+
+/* Subroutine */ int dlaed7_(integer *icompq, integer *n, integer *qsiz,
+	integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__,
+	doublereal *q, integer *ldq, integer *indxq, doublereal *rho, integer
+	*cutpnt, doublereal *qstore, integer *qptr, integer *prmptr, integer *
+	perm, integer *givptr, integer *givcol, doublereal *givnum,
+	doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer indxc, indxp;
+    extern /* Subroutine */ int dlaed8_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, integer *, integer *,
+	    doublereal *, integer *, integer *, integer *), dlaed9_(integer *,
+	     integer *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *), dlaeda_(integer *, integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, doublereal *, integer *, doublereal *, doublereal *, integer *)
+	    ;
+    static integer idlmda;
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAED7 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix. This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and optionally eigenvectors of a dense symmetric matrix
+    that has been reduced to tridiagonal form.  DLAED1 handles
+    the case in which all eigenvalues and eigenvectors of a symmetric
+    tridiagonal matrix are desired.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+       where Z = Q'u, u is a vector of length N with ones in the
+       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine DLAED8.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine DLAED4 (as called by DLAED9).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= CURLVL <= TLVLS.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (output) INTEGER array, dimension (N)
+           The permutation which will reintegrate the subproblem just
+           solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
+           will be in ascending order.
+
+    RHO    (input) DOUBLE PRECISION
+           The subdiagonal element used to create the rank-1
+           modification.
+
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1)
+           Stores eigenvectors of submatrices encountered during
+           divide and conquer, packed together. QPTR points to
+           beginning of the submatrices.
+
+    QPTR   (input/output) INTEGER array, dimension (N+2)
+           List of indices pointing to beginning of submatrices stored
+           in QSTORE. The submatrices are numbered starting at the
+           bottom left of the divide and conquer tree, from left to
+           right and bottom to top.
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and also the size of
+           the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension (3*N+QSIZ*N)
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --qstore;
+    --qptr;
+    --prmptr;
+    --perm;
+    --givptr;
+    givcol -= 3;
+    givnum -= 3;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*icompq == 1 && *qsiz < *n) {
+	*info = -4;
+    } else if (*ldq < max(1,*n)) {
+	*info = -9;
+    } else if ((min(1,*n) > *cutpnt) || (*n < *cutpnt)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED7", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in DLAED8 and DLAED9.
+*/
+
+    if (*icompq == 1) {
+	ldq2 = *qsiz;
+    } else {
+	ldq2 = *n;
+    }
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+    is = iq2 + *n * ldq2;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *tlvls - i__;
+	ptr += pow_ii(&c__2, &i__2);
+/* L10: */
+    }
+    curr = ptr + *curpbm;
+    dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
+	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz
+	    + *n], info);
+
+/*
+       When solving the final problem, we no longer need the stored data,
+       so we will overwrite the data from this level onto the previously
+       used storage space.
+*/
+
+    if (*curlvl == *tlvls) {
+	qptr[curr] = 1;
+	prmptr[curr] = 1;
+	givptr[curr] = 1;
+    }
+
+/*     Sort and Deflate eigenvalues. */
+
+    dlaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho,
+	    cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
+	    perm[prmptr[curr]], &givptr[curr + 1], &givcol[((givptr[curr]) <<
+	    (1)) + 1], &givnum[((givptr[curr]) << (1)) + 1], &iwork[indxp], &
+	    iwork[indx], info);
+    prmptr[curr + 1] = prmptr[curr] + *n;
+    givptr[curr + 1] += givptr[curr];
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	dlaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda],
+		&work[iw], &qstore[qptr[curr]], &k, info);
+	if (*info != 0) {
+	    goto L30;
+	}
+	if (*icompq == 1) {
+	    dgemm_("N", "N", qsiz, &k, &k, &c_b2865, &work[iq2], &ldq2, &
+		    qstore[qptr[curr]], &k, &c_b2879, &q[q_offset], ldq);
+	}
+/* Computing 2nd power */
+	i__1 = k;
+	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
+
+/*     Prepare the INDXQ sorting permutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	qptr[curr + 1] = qptr[curr];
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L20: */
+	}
+    }
+
+L30:
+    return 0;
+
+/*     End of DLAED7 */
+
+} /* dlaed7_ */
+
+/* Subroutine */ int dlaed8_(integer *icompq, integer *k, integer *n, integer
+	*qsiz, doublereal *d__, doublereal *q, integer *ldq, integer *indxq,
+	doublereal *rho, integer *cutpnt, doublereal *z__, doublereal *dlamda,
+	 doublereal *q2, integer *ldq2, doublereal *w, integer *perm, integer
+	*givptr, integer *givcol, doublereal *givnum, integer *indxp, integer
+	*indx, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal c__;
+    static integer i__, j;
+    static doublereal s, t;
+    static integer k2, n1, n2, jp, n1p1;
+    static doublereal eps, tau, tol;
+    static integer jlam, imax, jmax;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *), dscal_(
+	    integer *, doublereal *, doublereal *, integer *), dcopy_(integer
+	    *, doublereal *, integer *, doublereal *, integer *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAED8 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny element in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+
+    K      (output) INTEGER
+           The number of non-deflated eigenvalues, and the order of the
+           related secular equation.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the eigenvalues of the two submatrices to be
+           combined.  On exit, the trailing (N-K) updated eigenvalues
+           (those which were deflated) sorted into increasing order.
+
+    Q      (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
+           If ICOMPQ = 0, Q is not referenced.  Otherwise,
+           on entry, Q contains the eigenvectors of the partially solved
+           system which has been previously updated in matrix
+           multiplies with other partially solved eigensystems.
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input) INTEGER array, dimension (N)
+           The permutation which separately sorts the two sub-problems
+           in D into ascending order.  Note that elements in the second
+           half of this permutation must first have CUTPNT added to
+           their values in order to be accurate.
+
+    RHO    (input/output) DOUBLE PRECISION
+           On entry, the off-diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined.
+           On exit, RHO has been modified to the value required by
+           DLAED3.
+
+    CUTPNT (input) INTEGER
+           The location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    Z      (input) DOUBLE PRECISION array, dimension (N)
+           On entry, Z contains the updating vector (the last row of
+           the first sub-eigenvector matrix and the first row of the
+           second sub-eigenvector matrix).
+           On exit, the contents of Z are destroyed by the updating
+           process.
+
+    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
+           A copy of the first K eigenvalues which will be used by
+           DLAED3 to form the secular equation.
+
+    Q2     (output) DOUBLE PRECISION array, dimension (LDQ2,N)
+           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+           a copy of the first K eigenvectors which will be used by
+           DLAED7 in a matrix multiply (DGEMM) to update the new
+           eigenvectors.
+
+    LDQ2   (input) INTEGER
+           The leading dimension of the array Q2.  LDQ2 >= max(1,N).
+
+    W      (output) DOUBLE PRECISION array, dimension (N)
+           The first k values of the final deflation-altered z-vector and
+           will be passed to DLAED3.
+
+    PERM   (output) INTEGER array, dimension (N)
+           The permutations (from deflation and sorting) to be applied
+           to each eigenblock.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (output) INTEGER array, dimension (2, N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (output) DOUBLE PRECISION array, dimension (2, N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           The permutation used to place deflated values of D at the end
+           of the array.  INDXP(1:K) points to the nondeflated D-values
+           and INDXP(K+1:N) points to the deflated eigenvalues.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           The permutation used to sort the contents of D into ascending
+           order.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --z__;
+    --dlamda;
+    q2_dim1 = *ldq2;
+    q2_offset = 1 + q2_dim1;
+    q2 -= q2_offset;
+    --w;
+    --perm;
+    givcol -= 3;
+    givnum -= 3;
+    --indxp;
+    --indx;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*icompq == 1 && *qsiz < *n) {
+	*info = -4;
+    } else if (*ldq < max(1,*n)) {
+	*info = -7;
+    } else if ((*cutpnt < min(1,*n)) || (*cutpnt > *n)) {
+	*info = -10;
+    } else if (*ldq2 < max(1,*n)) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED8", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n1 = *cutpnt;
+    n2 = *n - n1;
+    n1p1 = n1 + 1;
+
+    if (*rho < 0.) {
+	dscal_(&n2, &c_b3001, &z__[n1p1], &c__1);
+    }
+
+/*     Normalize z so that norm(z) = 1 */
+
+    t = 1. / sqrt(2.);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	indx[j] = j;
+/* L10: */
+    }
+    dscal_(n, &t, &z__[1], &c__1);
+    *rho = (d__1 = *rho * 2., abs(d__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
+	indxq[i__] += *cutpnt;
+/* L20: */
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+	w[i__] = z__[indxq[i__]];
+/* L30: */
+    }
+    i__ = 1;
+    j = *cutpnt + 1;
+    dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = dlamda[indx[i__]];
+	z__[i__] = w[indx[i__]];
+/* L40: */
+    }
+
+/*     Calculate the allowable deflation tolerence */
+
+    imax = idamax_(n, &z__[1], &c__1);
+    jmax = idamax_(n, &d__[1], &c__1);
+    eps = EPSILON;
+    tol = eps * 8. * (d__1 = d__[jmax], abs(d__1));
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
+	*k = 0;
+	if (*icompq == 0) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
+/* L50: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
+		dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
+			+ 1], &c__1);
+/* L60: */
+	    }
+	    dlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
+	}
+	return 0;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    *k = 0;
+    *givptr = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    indxp[k2] = j;
+	    if (j == *n) {
+		goto L110;
+	    }
+	} else {
+	    jlam = j;
+	    goto L80;
+	}
+/* L70: */
+    }
+L80:
+    ++j;
+    if (j > *n) {
+	goto L100;
+    }
+    if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	indxp[k2] = j;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[jlam];
+	c__ = z__[j];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = dlapy2_(&c__, &s);
+	t = d__[j] - d__[jlam];
+	c__ /= tau;
+	s = -s / tau;
+	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[j] = tau;
+	    z__[jlam] = 0.;
+
+/*           Record the appropriate Givens rotation */
+
+	    ++(*givptr);
+	    givcol[((*givptr) << (1)) + 1] = indxq[indx[jlam]];
+	    givcol[((*givptr) << (1)) + 2] = indxq[indx[j]];
+	    givnum[((*givptr) << (1)) + 1] = c__;
+	    givnum[((*givptr) << (1)) + 2] = s;
+	    if (*icompq == 1) {
+		drot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
+			indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
+	    }
+	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
+	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
+	    d__[jlam] = t;
+	    --k2;
+	    i__ = 1;
+L90:
+	    if (k2 + i__ <= *n) {
+		if (d__[jlam] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = jlam;
+		    ++i__;
+		    goto L90;
+		} else {
+		    indxp[k2 + i__ - 1] = jlam;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = jlam;
+	    }
+	    jlam = j;
+	} else {
+	    ++(*k);
+	    w[*k] = z__[jlam];
+	    dlamda[*k] = d__[jlam];
+	    indxp[*k] = jlam;
+	    jlam = j;
+	}
+    }
+    goto L80;
+L100:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    w[*k] = z__[jlam];
+    dlamda[*k] = d__[jlam];
+    indxp[*k] = jlam;
+
+L110:
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    if (*icompq == 0) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+/* L120: */
+	}
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+	    dcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
+		    , &c__1);
+/* L130: */
+	}
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    if (*k < *n) {
+	if (*icompq == 0) {
+	    i__1 = *n - *k;
+	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	} else {
+	    i__1 = *n - *k;
+	    dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	    i__1 = *n - *k;
+	    dlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
+		    k + 1) * q_dim1 + 1], ldq);
+	}
+    }
+
+    return 0;
+
+/*     End of DLAED8 */
+
+} /* dlaed8_ */
+
+/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop,
+	integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal *
+	rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaed4_(integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAED9 finds the roots of the secular equation, as defined by the
+    values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
+    appropriate calls to DLAED4 and then stores the new matrix of
+    eigenvectors for use in calculating the next level of Z vectors.
+
+    Arguments
+    =========
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved by
+            DLAED4.  K >= 0.
+
+    KSTART  (input) INTEGER
+    KSTOP   (input) INTEGER
+            The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
+            are to be computed.  1 <= KSTART <= KSTOP <= K.
+
+    N       (input) INTEGER
+            The number of rows and columns in the Q matrix.
+            N >= K (delation may result in N > K).
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            D(I) contains the updated eigenvalues
+            for KSTART <= I <= KSTOP.
+
+    Q       (workspace) DOUBLE PRECISION array, dimension (LDQ,N)
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max( 1, N ).
+
+    RHO     (input) DOUBLE PRECISION
+            The value of the parameter in the rank one update equation.
+            RHO >= 0 required.
+
+    DLAMDA  (input) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation.
+
+    W       (input) DOUBLE PRECISION array, dimension (K)
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating vector.
+
+    S       (output) DOUBLE PRECISION array, dimension (LDS, K)
+            Will contain the eigenvectors of the repaired matrix which
+            will be stored for subsequent Z vector calculation and
+            multiplied by the previously accumulated eigenvectors
+            to update the system.
+
+    LDS     (input) INTEGER
+            The leading dimension of S.  LDS >= max( 1, K ).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --w;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1;
+    s -= s_offset;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*k < 0) {
+	*info = -1;
+    } else if ((*kstart < 1) || (*kstart > max(1,*k))) {
+	*info = -2;
+    } else if ((max(1,*kstop) < *kstart) || (*kstop > max(1,*k))) {
+	*info = -3;
+    } else if (*n < *k) {
+	*info = -4;
+    } else if (*ldq < max(1,*k)) {
+	*info = -7;
+    } else if (*lds < max(1,*k)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAED9", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 0) {
+	return 0;
+    }
+
+/*
+       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DLAMDA(I) if it is 1; this makes the subsequent
+       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DLAMDA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DLAMDA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+/* L10: */
+    }
+
+    i__1 = *kstop;
+    for (j = *kstart; j <= i__1; ++j) {
+	dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+		info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    goto L120;
+	}
+/* L20: */
+    }
+
+    if ((*k == 1) || (*k == 2)) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = *k;
+	    for (j = 1; j <= i__2; ++j) {
+		s[j + i__ * s_dim1] = q[j + i__ * q_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+	goto L120;
+    }
+
+/*     Compute updated W. */
+
+    dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1);
+
+/*     Initialize W(I) = Q(I,I) */
+
+    i__1 = *ldq + 1;
+    dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L50: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L60: */
+	}
+/* L70: */
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__1 = sqrt(-w[i__]);
+	w[i__] = d_sign(&d__1, &s[i__ + s_dim1]);
+/* L80: */
+    }
+
+/*     Compute eigenvectors of the modified rank-1 modification. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1];
+/* L90: */
+	}
+	temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1);
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp;
+/* L100: */
+	}
+/* L110: */
+    }
+
+L120:
+    return 0;
+
+/*     End of DLAED9 */
+
+} /* dlaed9_ */
+
+/* Subroutine */ int dlaeda_(integer *n, integer *tlvls, integer *curlvl,
+	integer *curpbm, integer *prmptr, integer *perm, integer *givptr,
+	integer *givcol, doublereal *givnum, doublereal *q, integer *qptr,
+	doublereal *z__, doublereal *ztemp, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, k, mid, ptr;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer curr, bsiz1, bsiz2, psiz1, psiz2, zptr1;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dcopy_(integer *,
+	    doublereal *, integer *, doublereal *, integer *), xerbla_(char *,
+	     integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAEDA computes the Z vector corresponding to the merge step in the
+    CURLVLth step of the merge process with TLVLS steps for the CURPBMth
+    problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= curlvl <= tlvls.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and incidentally the
+           size of the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    Q      (input) DOUBLE PRECISION array, dimension (N**2)
+           Contains the square eigenblocks from previous levels, the
+           starting positions for blocks are given by QPTR.
+
+    QPTR   (input) INTEGER array, dimension (N+2)
+           Contains a list of pointers which indicate where in Q an
+           eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
+           the size of the block.
+
+    Z      (output) DOUBLE PRECISION array, dimension (N)
+           On output this vector contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).
+
+    ZTEMP  (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --ztemp;
+    --z__;
+    --qptr;
+    --q;
+    givnum -= 3;
+    givcol -= 3;
+    --givptr;
+    --perm;
+    --prmptr;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAEDA", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine location of first number in second half. */
+
+    mid = *n / 2 + 1;
+
+/*     Gather last/first rows of appropriate eigenblocks into center of Z */
+
+    ptr = 1;
+
+/*
+       Determine location of lowest level subproblem in the full storage
+       scheme
+*/
+
+    i__1 = *curlvl - 1;
+    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
+
+/*
+       Determine size of these matrices.  We add HALF to the value of
+       the SQRT in case the machine underestimates one of these square
+       roots.
+*/
+
+    bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) + .5);
+    bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])) +
+	    .5);
+    i__1 = mid - bsiz1 - 1;
+    for (k = 1; k <= i__1; ++k) {
+	z__[k] = 0.;
+/* L10: */
+    }
+    dcopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
+	    c__1);
+    dcopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
+    i__1 = *n;
+    for (k = mid + bsiz2; k <= i__1; ++k) {
+	z__[k] = 0.;
+/* L20: */
+    }
+
+/*
+       Loop thru remaining levels 1 -> CURLVL applying the Givens
+       rotations and permutation and then multiplying the center matrices
+       against the current Z.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (k = 1; k <= i__1; ++k) {
+	i__2 = *curlvl - k;
+	i__3 = *curlvl - k - 1;
+	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
+		1;
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	zptr1 = mid - psiz1;
+
+/*       Apply Givens at CURR and CURR+1 */
+
+	i__2 = givptr[curr + 1] - 1;
+	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
+	    drot_(&c__1, &z__[zptr1 + givcol[((i__) << (1)) + 1] - 1], &c__1,
+		    &z__[zptr1 + givcol[((i__) << (1)) + 2] - 1], &c__1, &
+		    givnum[((i__) << (1)) + 1], &givnum[((i__) << (1)) + 2]);
+/* L30: */
+	}
+	i__2 = givptr[curr + 2] - 1;
+	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
+	    drot_(&c__1, &z__[mid - 1 + givcol[((i__) << (1)) + 1]], &c__1, &
+		    z__[mid - 1 + givcol[((i__) << (1)) + 2]], &c__1, &givnum[
+		    ((i__) << (1)) + 1], &givnum[((i__) << (1)) + 2]);
+/* L40: */
+	}
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	i__2 = psiz1 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
+/* L50: */
+	}
+	i__2 = psiz2 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
+		    1];
+/* L60: */
+	}
+
+/*
+          Multiply Blocks at CURR and CURR+1
+
+          Determine size of these matrices.  We add HALF to the value of
+          the SQRT in case the machine underestimates one of these
+          square roots.
+*/
+
+	bsiz1 = (integer) (sqrt((doublereal) (qptr[curr + 1] - qptr[curr])) +
+		.5);
+	bsiz2 = (integer) (sqrt((doublereal) (qptr[curr + 2] - qptr[curr + 1])
+		) + .5);
+	if (bsiz1 > 0) {
+	    dgemv_("T", &bsiz1, &bsiz1, &c_b2865, &q[qptr[curr]], &bsiz1, &
+		    ztemp[1], &c__1, &c_b2879, &z__[zptr1], &c__1);
+	}
+	i__2 = psiz1 - bsiz1;
+	dcopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
+	if (bsiz2 > 0) {
+	    dgemv_("T", &bsiz2, &bsiz2, &c_b2865, &q[qptr[curr + 1]], &bsiz2,
+		    &ztemp[psiz1 + 1], &c__1, &c_b2879, &z__[mid], &c__1);
+	}
+	i__2 = psiz2 - bsiz2;
+	dcopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
+		c__1);
+
+	i__2 = *tlvls - k;
+	ptr += pow_ii(&c__2, &i__2);
+/* L70: */
+    }
+
+    return 0;
+
+/*     End of DLAEDA */
+
+} /* dlaeda_ */
+
+/* Subroutine */ int dlaev2_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *rt1, doublereal *rt2, doublereal *cs1, doublereal *sn1)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
+    static integer sgn1, sgn2;
+    static doublereal acmn, acmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
+       [  A   B  ]
+       [  B   C  ].
+    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+    eigenvector for RT1, giving the decomposition
+
+       [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
+       [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
+
+    Arguments
+    =========
+
+    A       (input) DOUBLE PRECISION
+            The (1,1) element of the 2-by-2 matrix.
+
+    B       (input) DOUBLE PRECISION
+            The (1,2) element and the conjugate of the (2,1) element of
+            the 2-by-2 matrix.
+
+    C       (input) DOUBLE PRECISION
+            The (2,2) element of the 2-by-2 matrix.
+
+    RT1     (output) DOUBLE PRECISION
+            The eigenvalue of larger absolute value.
+
+    RT2     (output) DOUBLE PRECISION
+            The eigenvalue of smaller absolute value.
+
+    CS1     (output) DOUBLE PRECISION
+    SN1     (output) DOUBLE PRECISION
+            The vector (CS1, SN1) is a unit right eigenvector for RT1.
+
+    Further Details
+    ===============
+
+    RT1 is accurate to a few ulps barring over/underflow.
+
+    RT2 may be inaccurate if there is massive cancellation in the
+    determinant A*C-B*B; higher precision or correctly rounded or
+    correctly truncated arithmetic would be needed to compute RT2
+    accurately in all cases.
+
+    CS1 and SN1 are accurate to a few ulps barring over/underflow.
+
+    Overflow is possible only if RT1 is within a factor of 5 of overflow.
+    Underflow is harmless if the input data is 0 or exceeds
+       underflow_threshold / macheps.
+
+   =====================================================================
+
+
+       Compute the eigenvalues
+*/
+
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = abs(df);
+    tb = *b + *b;
+    ab = abs(tb);
+    if (abs(*a) > abs(*c__)) {
+	acmx = *a;
+	acmn = *c__;
+    } else {
+	acmx = *c__;
+	acmn = *a;
+    }
+    if (adf > ab) {
+/* Computing 2nd power */
+	d__1 = ab / adf;
+	rt = adf * sqrt(d__1 * d__1 + 1.);
+    } else if (adf < ab) {
+/* Computing 2nd power */
+	d__1 = adf / ab;
+	rt = ab * sqrt(d__1 * d__1 + 1.);
+    } else {
+
+/*        Includes case AB=ADF=0 */
+
+	rt = ab * sqrt(2.);
+    }
+    if (sm < 0.) {
+	*rt1 = (sm - rt) * .5;
+	sgn1 = -1;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.) {
+	*rt1 = (sm + rt) * .5;
+	sgn1 = 1;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
+
+/*        Includes case RT1 = RT2 = 0 */
+
+	*rt1 = rt * .5;
+	*rt2 = rt * -.5;
+	sgn1 = 1;
+    }
+
+/*     Compute the eigenvector */
+
+    if (df >= 0.) {
+	cs = df + rt;
+	sgn2 = 1;
+    } else {
+	cs = df - rt;
+	sgn2 = -1;
+    }
+    acs = abs(cs);
+    if (acs > ab) {
+	ct = -tb / cs;
+	*sn1 = 1. / sqrt(ct * ct + 1.);
+	*cs1 = ct * *sn1;
+    } else {
+	if (ab == 0.) {
+	    *cs1 = 1.;
+	    *sn1 = 0.;
+	} else {
+	    tn = -cs / tb;
+	    *cs1 = 1. / sqrt(tn * tn + 1.);
+	    *sn1 = tn * *cs1;
+	}
+    }
+    if (sgn1 == sgn2) {
+	tn = *cs1;
+	*cs1 = -(*sn1);
+	*sn1 = tn;
+    }
+    return 0;
+
+/*     End of DLAEV2 */
+
+} /* dlaev2_ */
+
+/* Subroutine */ int dlahqr_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, doublereal *h__, integer *ldh, doublereal
+	*wr, doublereal *wi, integer *iloz, integer *ihiz, doublereal *z__,
+	integer *ldz, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j, k, l, m;
+    static doublereal s, v[3];
+    static integer i1, i2;
+    static doublereal t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22,
+	    h33, h44;
+    static integer nh;
+    static doublereal cs;
+    static integer nr;
+    static doublereal sn;
+    static integer nz;
+    static doublereal ave, h33s, h44s;
+    static integer itn, its;
+    static doublereal ulp, sum, tst1, h43h34, disc, unfl, ovfl;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static doublereal work[1];
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlanv2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *), dlabad_(
+	    doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlarfg_(integer *, doublereal *, doublereal *,
+	     integer *, doublereal *);
+    extern doublereal dlanhs_(char *, integer *, doublereal *, integer *,
+	    doublereal *);
+    static doublereal smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAHQR is an auxiliary routine called by DHSEQR to update the
+    eigenvalues and Schur decomposition already computed by DHSEQR, by
+    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+
+    Arguments
+    =========
+
+    WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+    WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper quasi-triangular in
+            rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
+            ILO = 1). DLAHQR works primarily with the Hessenberg
+            submatrix in rows and columns ILO to IHI, but applies
+            transformations to all of H if WANTT is .TRUE..
+            1 <= ILO <= max(1,IHI); IHI <= N.
+
+    H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
+            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+            standard form. If WANTT is .FALSE., the contents of H are
+            unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    WR      (output) DOUBLE PRECISION array, dimension (N)
+    WI      (output) DOUBLE PRECISION array, dimension (N)
+            The real and imaginary parts, respectively, of the computed
+            eigenvalues ILO to IHI are stored in the corresponding
+            elements of WR and WI. If two eigenvalues are computed as a
+            complex conjugate pair, they are stored in consecutive
+            elements of WR and WI, say the i-th and (i+1)th, with
+            WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
+            eigenvalues are stored in the same order as on the diagonal
+            of the Schur form returned in H, with WR(i) = H(i,i), and, if
+            H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
+            WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
+
+    ILOZ    (input) INTEGER
+    IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE..
+            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+            If WANTZ is .TRUE., on entry Z must contain the current
+            matrix Z of transformations accumulated by DHSEQR, and on
+            exit Z has been updated; transformations are applied only to
+            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+            If WANTZ is .FALSE., Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z. LDZ >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            > 0: DLAHQR failed to compute all the eigenvalues ILO to IHI
+                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
+                 elements i+1:ihi of WR and WI contain those eigenvalues
+                 which have been successfully computed.
+
+    Further Details
+    ===============
+
+    2-96 Based on modifications by
+       David Day, Sandia National Laboratory, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	wi[*ilo] = 0.;
+	return 0;
+    }
+
+    nh = *ihi - *ilo + 1;
+    nz = *ihiz - *iloz + 1;
+
+/*
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (nh / ulp);
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (*wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of 1 or 2. Each iteration of the loop works
+       with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L10:
+    l = *ilo;
+    if (i__ < *ilo) {
+	goto L150;
+    }
+
+/*
+       Perform QR iterations on rows and columns ILO to I until a
+       submatrix of order 1 or 2 splits off at the bottom because a
+       subdiagonal element has become negligible.
+*/
+
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    tst1 = (d__1 = h__[k - 1 + (k - 1) * h_dim1], abs(d__1)) + (d__2 =
+		     h__[k + k * h_dim1], abs(d__2));
+	    if (tst1 == 0.) {
+		i__3 = i__ - l + 1;
+		tst1 = dlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
+	    }
+/* Computing MAX */
+	    d__2 = ulp * tst1;
+	    if ((d__1 = h__[k + (k - 1) * h_dim1], abs(d__1)) <= max(d__2,
+		    smlnum)) {
+		goto L30;
+	    }
+/* L20: */
+	}
+L30:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible */
+
+	    h__[l + (l - 1) * h_dim1] = 0.;
+	}
+
+/*        Exit from loop if a submatrix of order 1 or 2 has split off. */
+
+	if (l >= i__ - 1) {
+	    goto L140;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! (*wantt)) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if ((its == 10) || (its == 20)) {
+
+/*           Exceptional shift. */
+
+	    s = (d__1 = h__[i__ + (i__ - 1) * h_dim1], abs(d__1)) + (d__2 =
+		    h__[i__ - 1 + (i__ - 2) * h_dim1], abs(d__2));
+	    h44 = s * .75 + h__[i__ + i__ * h_dim1];
+	    h33 = h44;
+	    h43h34 = s * -.4375 * s;
+	} else {
+
+/*
+             Prepare to use Francis' double shift
+             (i.e. 2nd degree generalized Rayleigh quotient)
+*/
+
+	    h44 = h__[i__ + i__ * h_dim1];
+	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
+	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
+		    h_dim1];
+	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
+		    h_dim1];
+	    disc = (h33 - h44) * .5;
+	    disc = disc * disc + h43h34;
+	    if (disc > 0.) {
+
+/*              Real roots: use Wilkinson's shift twice */
+
+		disc = sqrt(disc);
+		ave = (h33 + h44) * .5;
+		if (abs(h33) - abs(h44) > 0.) {
+		    h33 = h33 * h44 - h43h34;
+		    h44 = h33 / (d_sign(&disc, &ave) + ave);
+		} else {
+		    h44 = d_sign(&disc, &ave) + ave;
+		}
+		h33 = h44;
+		h43h34 = 0.;
+	    }
+	}
+
+/*        Look for two consecutive small subdiagonal elements. */
+
+	i__2 = l;
+	for (m = i__ - 2; m >= i__2; --m) {
+/*
+             Determine the effect of starting the double-shift QR
+             iteration at row M, and see if this would make H(M,M-1)
+             negligible.
+*/
+
+	    h11 = h__[m + m * h_dim1];
+	    h22 = h__[m + 1 + (m + 1) * h_dim1];
+	    h21 = h__[m + 1 + m * h_dim1];
+	    h12 = h__[m + (m + 1) * h_dim1];
+	    h44s = h44 - h11;
+	    h33s = h33 - h11;
+	    v1 = (h33s * h44s - h43h34) / h21 + h12;
+	    v2 = h22 - h11 - h33s - h44s;
+	    v3 = h__[m + 2 + (m + 1) * h_dim1];
+	    s = abs(v1) + abs(v2) + abs(v3);
+	    v1 /= s;
+	    v2 /= s;
+	    v3 /= s;
+	    v[0] = v1;
+	    v[1] = v2;
+	    v[2] = v3;
+	    if (m == l) {
+		goto L50;
+	    }
+	    h00 = h__[m - 1 + (m - 1) * h_dim1];
+	    h10 = h__[m + (m - 1) * h_dim1];
+	    tst1 = abs(v1) * (abs(h00) + abs(h11) + abs(h22));
+	    if (abs(h10) * (abs(v2) + abs(v3)) <= ulp * tst1) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+
+/*        Double-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = m; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__3 = 3, i__4 = i__ - k + 1;
+	    nr = min(i__3,i__4);
+	    if (k > m) {
+		dcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    dlarfg_(&nr, v, &v[1], &c__1, &t1);
+	    if (k > m) {
+		h__[k + (k - 1) * h_dim1] = v[0];
+		h__[k + 1 + (k - 1) * h_dim1] = 0.;
+		if (k < i__ - 1) {
+		    h__[k + 2 + (k - 1) * h_dim1] = 0.;
+		}
+	    } else if (m > l) {
+		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
+	    }
+	    v2 = v[1];
+	    t2 = t1 * v2;
+	    if (nr == 3) {
+		v3 = v[2];
+		t3 = t1 * v3;
+
+/*
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
+*/
+
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
+			    + v3 * h__[k + 2 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+		    h__[k + 2 + j * h_dim1] -= sum * t3;
+/* L60: */
+		}
+
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
+
+   Computing MIN
+*/
+		i__4 = k + 3;
+		i__3 = min(i__4,i__);
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			     + v3 * h__[j + (k + 2) * h_dim1];
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+		    h__[j + (k + 2) * h_dim1] -= sum * t3;
+/* L70: */
+		}
+
+		if (*wantz) {
+
+/*                 Accumulate transformations in the matrix Z */
+
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+			z__[j + (k + 2) * z_dim1] -= sum * t3;
+/* L80: */
+		    }
+		}
+	    } else if (nr == 2) {
+
+/*
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
+*/
+
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+/* L90: */
+		}
+
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
+*/
+
+		i__3 = i__;
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			    ;
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+/* L100: */
+		}
+
+		if (*wantz) {
+
+/*                 Accumulate transformations in the matrix Z */
+
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+/* L110: */
+		    }
+		}
+	    }
+/* L120: */
+	}
+
+/* L130: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L140:
+
+    if (l == i__) {
+
+/*        H(I,I-1) is negligible: one eigenvalue has converged. */
+
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.;
+    } else if (l == i__ - 1) {
+
+/*
+          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
+
+          Transform the 2-by-2 submatrix to standard Schur form,
+          and compute and store the eigenvalues.
+*/
+
+	dlanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ *
+		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ *
+		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs,
+		&sn);
+
+	if (*wantt) {
+
+/*           Apply the transformation to the rest of H. */
+
+	    if (i2 > i__) {
+		i__1 = i2 - i__;
+		drot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
+			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
+	    }
+	    i__1 = i__ - i1 - 1;
+	    drot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
+		     h_dim1], &c__1, &cs, &sn);
+	}
+	if (*wantz) {
+
+/*           Apply the transformation to Z. */
+
+	    drot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz +
+		    i__ * z_dim1], &c__1, &cs, &sn);
+	}
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L10;
+
+L150:
+    return 0;
+
+/*     End of DLAHQR */
+
+} /* dlahqr_ */
+
+/* Subroutine */ int dlahrd_(integer *n, integer *k, integer *nb, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *t, integer *ldt,
+	doublereal *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__;
+    static doublereal ei;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dgemv_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *), dcopy_(integer *, doublereal *,
+	    integer *, doublereal *, integer *), daxpy_(integer *, doublereal
+	    *, doublereal *, integer *, doublereal *, integer *), dtrmv_(char
+	    *, char *, char *, integer *, doublereal *, integer *, doublereal
+	    *, integer *), dlarfg_(integer *,
+	    doublereal *, doublereal *, integer *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
+    matrix A so that elements below the k-th subdiagonal are zero. The
+    reduction is performed by an orthogonal similarity transformation
+    Q' * A * Q. The routine returns the matrices V and T which determine
+    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+
+    This is an auxiliary routine called by DGEHRD.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    K       (input) INTEGER
+            The offset for the reduction. Elements below the k-th
+            subdiagonal in the first NB columns are reduced to zero.
+
+    NB      (input) INTEGER
+            The number of columns to be reduced.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N-K+1)
+            On entry, the n-by-(n-k+1) general matrix A.
+            On exit, the elements on and above the k-th subdiagonal in
+            the first NB columns are overwritten with the corresponding
+            elements of the reduced matrix; the elements below the k-th
+            subdiagonal, with the array TAU, represent the matrix Q as a
+            product of elementary reflectors. The other columns of A are
+            unchanged. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) DOUBLE PRECISION array, dimension (NB)
+            The scalar factors of the elementary reflectors. See Further
+            Details.
+
+    T       (output) DOUBLE PRECISION array, dimension (LDT,NB)
+            The upper triangular matrix T.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T.  LDT >= NB.
+
+    Y       (output) DOUBLE PRECISION array, dimension (LDY,NB)
+            The n-by-nb matrix Y.
+
+    LDY     (input) INTEGER
+            The leading dimension of the array Y. LDY >= N.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of nb elementary reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+    A(i+k+1:n,i), and tau in TAU(i).
+
+    The elements of the vectors v together form the (n-k+1)-by-nb matrix
+    V which is needed, with T and Y, to apply the transformation to the
+    unreduced part of the matrix, using an update of the form:
+    A := (I - V*T*V') * (A - Y*V').
+
+    The contents of A on exit are illustrated by the following example
+    with n = 7, k = 3 and nb = 2:
+
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( h   h   a   a   a )
+       ( v1  h   a   a   a )
+       ( v1  v2  a   a   a )
+       ( v1  v2  a   a   a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    --tau;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if (*n <= 1) {
+	return 0;
+    }
+
+    i__1 = *nb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (i__ > 1) {
+
+/*
+             Update A(1:n,i)
+
+             Compute i-th column of A - Y * V'
+*/
+
+	    i__2 = i__ - 1;
+	    dgemv_("No transpose", n, &i__2, &c_b3001, &y[y_offset], ldy, &a[*
+		    k + i__ - 1 + a_dim1], lda, &c_b2865, &a[i__ * a_dim1 + 1]
+		    , &c__1);
+
+/*
+             Apply I - V * T' * V' to this column (call it b) from the
+             left, using the last column of T as workspace
+
+             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+                      ( V2 )             ( b2 )
+
+             where V1 is unit lower triangular
+
+             w := V1' * b1
+*/
+
+	    i__2 = i__ - 1;
+	    dcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
+		    1], &c__1);
+	    i__2 = i__ - 1;
+	    dtrmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
+		    lda, &t[*nb * t_dim1 + 1], &c__1);
+
+/*           w := w + V2'*b2 */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[*k + i__ + a_dim1],
+		     lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2865, &t[*
+		    nb * t_dim1 + 1], &c__1);
+
+/*           w := T'*w */
+
+	    i__2 = i__ - 1;
+	    dtrmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		     &t[*nb * t_dim1 + 1], &c__1);
+
+/*           b2 := b2 - V2*w */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[*k + i__ +
+		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b2865, &a[*
+		    k + i__ + i__ * a_dim1], &c__1);
+
+/*           b1 := b1 - V1*w */
+
+	    i__2 = i__ - 1;
+	    dtrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+		    , lda, &t[*nb * t_dim1 + 1], &c__1);
+	    i__2 = i__ - 1;
+	    daxpy_(&i__2, &c_b3001, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
+		    i__ * a_dim1], &c__1);
+
+	    a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
+	}
+
+/*
+          Generate the elementary reflector H(i) to annihilate
+          A(k+i+1:n,i)
+*/
+
+	i__2 = *n - *k - i__ + 1;
+/* Computing MIN */
+	i__3 = *k + i__ + 1;
+	dlarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	ei = a[*k + i__ + i__ * a_dim1];
+	a[*k + i__ + i__ * a_dim1] = 1.;
+
+/*        Compute  Y(1:n,i) */
+
+	i__2 = *n - *k - i__ + 1;
+	dgemv_("No transpose", n, &i__2, &c_b2865, &a[(i__ + 1) * a_dim1 + 1],
+		 lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2879, &y[i__ *
+		y_dim1 + 1], &c__1);
+	i__2 = *n - *k - i__ + 1;
+	i__3 = i__ - 1;
+	dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[*k + i__ + a_dim1],
+		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b2879, &t[i__ *
+		t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	dgemv_("No transpose", n, &i__2, &c_b3001, &y[y_offset], ldy, &t[i__ *
+		 t_dim1 + 1], &c__1, &c_b2865, &y[i__ * y_dim1 + 1], &c__1);
+	dscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+
+/*        Compute T(1:i,i) */
+
+	i__2 = i__ - 1;
+	d__1 = -tau[i__];
+	dscal_(&i__2, &d__1, &t[i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		&t[i__ * t_dim1 + 1], &c__1)
+		;
+	t[i__ + i__ * t_dim1] = tau[i__];
+
+/* L10: */
+    }
+    a[*k + *nb + *nb * a_dim1] = ei;
+
+    return 0;
+
+/*     End of DLAHRD */
+
+} /* dlahrd_ */
+
+/* Subroutine */ int dlaln2_(logical *ltrans, integer *na, integer *nw,
+	doublereal *smin, doublereal *ca, doublereal *a, integer *lda,
+	doublereal *d1, doublereal *d2, doublereal *b, integer *ldb,
+	doublereal *wr, doublereal *wi, doublereal *x, integer *ldx,
+	doublereal *scale, doublereal *xnorm, integer *info)
+{
+    /* Initialized data */
+
+    static logical zswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
+    static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
+    static integer ipivot[16]	/* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
+	    4,3,2,1 };
+
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
+    static doublereal equiv_0[4], equiv_1[4];
+
+    /* Local variables */
+    static integer j;
+#define ci (equiv_0)
+#define cr (equiv_1)
+    static doublereal bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22,
+	    cr21, cr22, li21, csi, ui11, lr21, ui12, ui22;
+#define civ (equiv_0)
+    static doublereal csr, ur11, ur12, ur22;
+#define crv (equiv_1)
+    static doublereal bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs;
+    static integer icmax;
+    static doublereal bnorm, cnorm, smini;
+
+    extern /* Subroutine */ int dladiv_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *);
+    static doublereal bignum, smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLALN2 solves a system of the form  (ca A - w D ) X = s B
+    or (ca A' - w D) X = s B   with possible scaling ("s") and
+    perturbation of A.  (A' means A-transpose.)
+
+    A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
+    real diagonal matrix, w is a real or complex value, and X and B are
+    NA x 1 matrices -- real if w is real, complex if w is complex.  NA
+    may be 1 or 2.
+
+    If w is complex, X and B are represented as NA x 2 matrices,
+    the first column of each being the real part and the second
+    being the imaginary part.
+
+    "s" is a scaling factor (.LE. 1), computed by DLALN2, which is
+    so chosen that X can be computed without overflow.  X is further
+    scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
+    than overflow.
+
+    If both singular values of (ca A - w D) are less than SMIN,
+    SMIN*identity will be used instead of (ca A - w D).  If only one
+    singular value is less than SMIN, one element of (ca A - w D) will be
+    perturbed enough to make the smallest singular value roughly SMIN.
+    If both singular values are at least SMIN, (ca A - w D) will not be
+    perturbed.  In any case, the perturbation will be at most some small
+    multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
+    are computed by infinity-norm approximations, and thus will only be
+    correct to a factor of 2 or so.
+
+    Note: all input quantities are assumed to be smaller than overflow
+    by a reasonable factor.  (See BIGNUM.)
+
+    Arguments
+    ==========
+
+    LTRANS  (input) LOGICAL
+            =.TRUE.:  A-transpose will be used.
+            =.FALSE.: A will be used (not transposed.)
+
+    NA      (input) INTEGER
+            The size of the matrix A.  It may (only) be 1 or 2.
+
+    NW      (input) INTEGER
+            1 if "w" is real, 2 if "w" is complex.  It may only be 1
+            or 2.
+
+    SMIN    (input) DOUBLE PRECISION
+            The desired lower bound on the singular values of A.  This
+            should be a safe distance away from underflow or overflow,
+            say, between (underflow/machine precision) and  (machine
+            precision * overflow ).  (See BIGNUM and ULP.)
+
+    CA      (input) DOUBLE PRECISION
+            The coefficient c, which A is multiplied by.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,NA)
+            The NA x NA matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of A.  It must be at least NA.
+
+    D1      (input) DOUBLE PRECISION
+            The 1,1 element in the diagonal matrix D.
+
+    D2      (input) DOUBLE PRECISION
+            The 2,2 element in the diagonal matrix D.  Not used if NW=1.
+
+    B       (input) DOUBLE PRECISION array, dimension (LDB,NW)
+            The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
+            complex), column 1 contains the real part of B and column 2
+            contains the imaginary part.
+
+    LDB     (input) INTEGER
+            The leading dimension of B.  It must be at least NA.
+
+    WR      (input) DOUBLE PRECISION
+            The real part of the scalar "w".
+
+    WI      (input) DOUBLE PRECISION
+            The imaginary part of the scalar "w".  Not used if NW=1.
+
+    X       (output) DOUBLE PRECISION array, dimension (LDX,NW)
+            The NA x NW matrix X (unknowns), as computed by DLALN2.
+            If NW=2 ("w" is complex), on exit, column 1 will contain
+            the real part of X and column 2 will contain the imaginary
+            part.
+
+    LDX     (input) INTEGER
+            The leading dimension of X.  It must be at least NA.
+
+    SCALE   (output) DOUBLE PRECISION
+            The scale factor that B must be multiplied by to insure
+            that overflow does not occur when computing X.  Thus,
+            (ca A - w D) X  will be SCALE*B, not B (ignoring
+            perturbations of A.)  It will be at most 1.
+
+    XNORM   (output) DOUBLE PRECISION
+            The infinity-norm of X, when X is regarded as an NA x NW
+            real matrix.
+
+    INFO    (output) INTEGER
+            An error flag.  It will be set to zero if no error occurs,
+            a negative number if an argument is in error, or a positive
+            number if  ca A - w D  had to be perturbed.
+            The possible values are:
+            = 0: No error occurred, and (ca A - w D) did not have to be
+                   perturbed.
+            = 1: (ca A - w D) had to be perturbed to make its smallest
+                 (or only) singular value greater than SMIN.
+            NOTE: In the interests of speed, this routine does not
+                  check the inputs for errors.
+
+   =====================================================================
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+
+    /* Function Body */
+
+/*     Compute BIGNUM */
+
+    smlnum = 2. * SAFEMINIMUM;
+    bignum = 1. / smlnum;
+    smini = max(*smin,smlnum);
+
+/*     Don't check for input errors */
+
+    *info = 0;
+
+/*     Standard Initializations */
+
+    *scale = 1.;
+
+    if (*na == 1) {
+
+/*        1 x 1  (i.e., scalar) system   C X = B */
+
+	if (*nw == 1) {
+
+/*
+             Real 1x1 system.
+
+             C = ca A - w D
+*/
+
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    cnorm = abs(csr);
+
+/*           If | C | < SMINI, use C = SMINI */
+
+	    if (cnorm < smini) {
+		csr = smini;
+		cnorm = smini;
+		*info = 1;
+	    }
+
+/*           Check scaling for  X = B / C */
+
+	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1));
+	    if (cnorm < 1. && bnorm > 1.) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1. / bnorm;
+		}
+	    }
+
+/*           Compute X */
+
+	    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr;
+	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1));
+	} else {
+
+/*
+             Complex 1x1 system (w is complex)
+
+             C = ca A - w D
+*/
+
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    csi = -(*wi) * *d1;
+	    cnorm = abs(csr) + abs(csi);
+
+/*           If | C | < SMINI, use C = SMINI */
+
+	    if (cnorm < smini) {
+		csr = smini;
+		csi = 0.;
+		cnorm = smini;
+		*info = 1;
+	    }
+
+/*           Check scaling for  X = B / C */
+
+	    bnorm = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[((b_dim1) <<
+		     (1)) + 1], abs(d__2));
+	    if (cnorm < 1. && bnorm > 1.) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1. / bnorm;
+		}
+	    }
+
+/*           Compute X */
+
+	    d__1 = *scale * b[b_dim1 + 1];
+	    d__2 = *scale * b[((b_dim1) << (1)) + 1];
+	    dladiv_(&d__1, &d__2, &csr, &csi, &x[x_dim1 + 1], &x[((x_dim1) <<
+		    (1)) + 1]);
+	    *xnorm = (d__1 = x[x_dim1 + 1], abs(d__1)) + (d__2 = x[((x_dim1)
+		    << (1)) + 1], abs(d__2));
+	}
+
+    } else {
+
+/*
+          2x2 System
+
+          Compute the real part of  C = ca A - w D  (or  ca A' - w D )
+*/
+
+	cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1;
+	cr[3] = *ca * a[((a_dim1) << (1)) + 2] - *wr * *d2;
+	if (*ltrans) {
+	    cr[2] = *ca * a[a_dim1 + 2];
+	    cr[1] = *ca * a[((a_dim1) << (1)) + 1];
+	} else {
+	    cr[1] = *ca * a[a_dim1 + 2];
+	    cr[2] = *ca * a[((a_dim1) << (1)) + 1];
+	}
+
+	if (*nw == 1) {
+
+/*
+             Real 2x2 system  (w is real)
+
+             Find the largest element in C
+*/
+
+	    cmax = 0.;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((d__1 = crv[j - 1], abs(d__1)) > cmax) {
+		    cmax = (d__1 = crv[j - 1], abs(d__1));
+		    icmax = j;
+		}
+/* L10: */
+	    }
+
+/*           If norm(C) < SMINI, use SMINI*identity. */
+
+	    if (cmax < smini) {
+/* Computing MAX */
+		d__3 = (d__1 = b[b_dim1 + 1], abs(d__1)), d__4 = (d__2 = b[
+			b_dim1 + 2], abs(d__2));
+		bnorm = max(d__3,d__4);
+		if (smini < 1. && bnorm > 1.) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1. / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
+
+/*           Gaussian elimination with complete pivoting. */
+
+	    ur11 = crv[icmax - 1];
+	    cr21 = crv[ipivot[((icmax) << (2)) - 3] - 1];
+	    ur12 = crv[ipivot[((icmax) << (2)) - 2] - 1];
+	    cr22 = crv[ipivot[((icmax) << (2)) - 1] - 1];
+	    ur11r = 1. / ur11;
+	    lr21 = ur11r * cr21;
+	    ur22 = cr22 - ur12 * lr21;
+
+/*           If smaller pivot < SMINI, use SMINI */
+
+	    if (abs(ur22) < smini) {
+		ur22 = smini;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br1 = b[b_dim1 + 2];
+		br2 = b[b_dim1 + 1];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+	    }
+	    br2 -= lr21 * br1;
+/* Computing MAX */
+	    d__2 = (d__1 = br1 * (ur22 * ur11r), abs(d__1)), d__3 = abs(br2);
+	    bbnd = max(d__2,d__3);
+	    if (bbnd > 1. && abs(ur22) < 1.) {
+		if (bbnd >= bignum * abs(ur22)) {
+		    *scale = 1. / bbnd;
+		}
+	    }
+
+	    xr2 = br2 * *scale / ur22;
+	    xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
+	    if (zswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+	    } else {
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+	    }
+/* Computing MAX */
+	    d__1 = abs(xr1), d__2 = abs(xr2);
+	    *xnorm = max(d__1,d__2);
+
+/*           Further scaling if  norm(A) norm(X) > overflow */
+
+	    if (*xnorm > 1. && cmax > 1.) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
+	} else {
+
+/*
+             Complex 2x2 system  (w is complex)
+
+             Find the largest element in C
+*/
+
+	    ci[0] = -(*wi) * *d1;
+	    ci[1] = 0.;
+	    ci[2] = 0.;
+	    ci[3] = -(*wi) * *d2;
+	    cmax = 0.;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1], abs(
+			d__2)) > cmax) {
+		    cmax = (d__1 = crv[j - 1], abs(d__1)) + (d__2 = civ[j - 1]
+			    , abs(d__2));
+		    icmax = j;
+		}
+/* L20: */
+	    }
+
+/*           If norm(C) < SMINI, use SMINI*identity. */
+
+	    if (cmax < smini) {
+/* Computing MAX */
+		d__5 = (d__1 = b[b_dim1 + 1], abs(d__1)) + (d__2 = b[((b_dim1)
+			 << (1)) + 1], abs(d__2)), d__6 = (d__3 = b[b_dim1 +
+			2], abs(d__3)) + (d__4 = b[((b_dim1) << (1)) + 2],
+			abs(d__4));
+		bnorm = max(d__5,d__6);
+		if (smini < 1. && bnorm > 1.) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1. / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		x[((x_dim1) << (1)) + 1] = temp * b[((b_dim1) << (1)) + 1];
+		x[((x_dim1) << (1)) + 2] = temp * b[((b_dim1) << (1)) + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
+
+/*           Gaussian elimination with complete pivoting. */
+
+	    ur11 = crv[icmax - 1];
+	    ui11 = civ[icmax - 1];
+	    cr21 = crv[ipivot[((icmax) << (2)) - 3] - 1];
+	    ci21 = civ[ipivot[((icmax) << (2)) - 3] - 1];
+	    ur12 = crv[ipivot[((icmax) << (2)) - 2] - 1];
+	    ui12 = civ[ipivot[((icmax) << (2)) - 2] - 1];
+	    cr22 = crv[ipivot[((icmax) << (2)) - 1] - 1];
+	    ci22 = civ[ipivot[((icmax) << (2)) - 1] - 1];
+	    if ((icmax == 1) || (icmax == 4)) {
+
+/*              Code when off-diagonals of pivoted C are real */
+
+		if (abs(ur11) > abs(ui11)) {
+		    temp = ui11 / ur11;
+/* Computing 2nd power */
+		    d__1 = temp;
+		    ur11r = 1. / (ur11 * (d__1 * d__1 + 1.));
+		    ui11r = -temp * ur11r;
+		} else {
+		    temp = ur11 / ui11;
+/* Computing 2nd power */
+		    d__1 = temp;
+		    ui11r = -1. / (ui11 * (d__1 * d__1 + 1.));
+		    ur11r = -temp * ui11r;
+		}
+		lr21 = cr21 * ur11r;
+		li21 = cr21 * ui11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ur12 * ui11r;
+		ur22 = cr22 - ur12 * lr21;
+		ui22 = ci22 - ur12 * li21;
+	    } else {
+
+/*              Code when diagonals of pivoted C are real */
+
+		ur11r = 1. / ur11;
+		ui11r = 0.;
+		lr21 = cr21 * ur11r;
+		li21 = ci21 * ur11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ui12 * ur11r;
+		ur22 = cr22 - ur12 * lr21 + ui12 * li21;
+		ui22 = -ur12 * li21 - ui12 * lr21;
+	    }
+	    u22abs = abs(ur22) + abs(ui22);
+
+/*           If smaller pivot < SMINI, use SMINI */
+
+	    if (u22abs < smini) {
+		ur22 = smini;
+		ui22 = 0.;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br2 = b[b_dim1 + 1];
+		br1 = b[b_dim1 + 2];
+		bi2 = b[((b_dim1) << (1)) + 1];
+		bi1 = b[((b_dim1) << (1)) + 2];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+		bi1 = b[((b_dim1) << (1)) + 1];
+		bi2 = b[((b_dim1) << (1)) + 2];
+	    }
+	    br2 = br2 - lr21 * br1 + li21 * bi1;
+	    bi2 = bi2 - li21 * br1 - lr21 * bi1;
+/* Computing MAX */
+	    d__1 = (abs(br1) + abs(bi1)) * (u22abs * (abs(ur11r) + abs(ui11r))
+		    ), d__2 = abs(br2) + abs(bi2);
+	    bbnd = max(d__1,d__2);
+	    if (bbnd > 1. && u22abs < 1.) {
+		if (bbnd >= bignum * u22abs) {
+		    *scale = 1. / bbnd;
+		    br1 = *scale * br1;
+		    bi1 = *scale * bi1;
+		    br2 = *scale * br2;
+		    bi2 = *scale * bi2;
+		}
+	    }
+
+	    dladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
+	    xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
+	    xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
+	    if (zswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+		x[((x_dim1) << (1)) + 1] = xi2;
+		x[((x_dim1) << (1)) + 2] = xi1;
+	    } else {
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+		x[((x_dim1) << (1)) + 1] = xi1;
+		x[((x_dim1) << (1)) + 2] = xi2;
+	    }
+/* Computing MAX */
+	    d__1 = abs(xr1) + abs(xi1), d__2 = abs(xr2) + abs(xi2);
+	    *xnorm = max(d__1,d__2);
+
+/*           Further scaling if  norm(A) norm(X) > overflow */
+
+	    if (*xnorm > 1. && cmax > 1.) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    x[((x_dim1) << (1)) + 1] = temp * x[((x_dim1) << (1)) + 1]
+			    ;
+		    x[((x_dim1) << (1)) + 2] = temp * x[((x_dim1) << (1)) + 2]
+			    ;
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DLALN2 */
+
+} /* dlaln2_ */
+
+#undef crv
+#undef civ
+#undef cr
+#undef ci
+
+
+/* Subroutine */ int dlals0_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *nrhs, doublereal *b, integer *ldb, doublereal
+	*bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol,
+	integer *ldgcol, doublereal *givnum, integer *ldgnum, doublereal *
+	poles, doublereal *difl, doublereal *difr, doublereal *z__, integer *
+	k, doublereal *c__, doublereal *s, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset,
+	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1,
+	    poles_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, j, m, n;
+    static doublereal dj;
+    static integer nlp1;
+    static doublereal temp;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal diflj, difrj, dsigj;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dcopy_(integer *,
+	    doublereal *, integer *, doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+    static doublereal dsigjp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 1, 1999
+
+
+    Purpose
+    =======
+
+    DLALS0 applies back the multiplying factors of either the left or the
+    right singular vector matrix of a diagonal matrix appended by a row
+    to the right hand side matrix B in solving the least squares problem
+    using the divide-and-conquer SVD approach.
+
+    For the left singular vector matrix, three types of orthogonal
+    matrices are involved:
+
+    (1L) Givens rotations: the number of such rotations is GIVPTR; the
+         pairs of columns/rows they were applied to are stored in GIVCOL;
+         and the C- and S-values of these rotations are stored in GIVNUM.
+
+    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+         J-th row.
+
+    (3L) The left singular vector matrix of the remaining matrix.
+
+    For the right singular vector matrix, four types of orthogonal
+    matrices are involved:
+
+    (1R) The right singular vector matrix of the remaining matrix.
+
+    (2R) If SQRE = 1, one extra Givens rotation to generate the right
+         null space.
+
+    (3R) The inverse transformation of (2L).
+
+    (4R) The inverse transformation of (1L).
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed in
+           factored form:
+           = 0: Left singular vector matrix.
+           = 1: Right singular vector matrix.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block. NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block. NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input/output) DOUBLE PRECISION array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B. LDB must be at least
+           max(1,MAX( M, N ) ).
+
+    BX     (workspace) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    PERM   (input) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) applied
+           to the two blocks.
+
+    GIVPTR (input) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of rows/columns
+           involved in a Givens rotation.
+
+    LDGCOL (input) INTEGER
+           The leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value used in the
+           corresponding Givens rotation.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of arrays DIFR, POLES and
+           GIVNUM, must be at least K.
+
+    POLES  (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           On entry, POLES(1:K, 1) contains the new singular
+           values obtained from solving the secular equation, and
+           POLES(1:K, 2) is an array containing the poles in the secular
+           equation.
+
+    DIFL   (input) DOUBLE PRECISION array, dimension ( K ).
+           On entry, DIFL(I) is the distance between I-th updated
+           (undeflated) singular value and the I-th (undeflated) old
+           singular value.
+
+    DIFR   (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
+           On entry, DIFR(I, 1) contains the distances between I-th
+           updated (undeflated) singular value and the I+1-th
+           (undeflated) old singular value. And DIFR(I, 2) is the
+           normalizing factor for the I-th right singular vector.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( K )
+           Contain the components of the deflation-adjusted updating row
+           vector.
+
+    K      (input) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    C      (input) DOUBLE PRECISION
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (input) DOUBLE PRECISION
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( K )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    difr_dim1 = *ldgnum;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    poles_dim1 = *ldgnum;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    --difl;
+    --z__;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    }
+
+    n = *nl + *nr + 1;
+
+    if (*nrhs < 1) {
+	*info = -5;
+    } else if (*ldb < n) {
+	*info = -7;
+    } else if (*ldbx < n) {
+	*info = -9;
+    } else if (*givptr < 0) {
+	*info = -11;
+    } else if (*ldgcol < n) {
+	*info = -13;
+    } else if (*ldgnum < n) {
+	*info = -15;
+    } else if (*k < 1) {
+	*info = -20;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLALS0", &i__1);
+	return 0;
+    }
+
+    m = n + *sqre;
+    nlp1 = *nl + 1;
+
+    if (*icompq == 0) {
+
+/*
+          Apply back orthogonal transformations from the left.
+
+          Step (1L): apply back the Givens rotations performed.
+*/
+
+	i__1 = *givptr;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    drot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
+		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
+		    i__ + ((givnum_dim1) << (1))], &givnum[i__ + givnum_dim1])
+		    ;
+/* L10: */
+	}
+
+/*        Step (2L): permute rows of B. */
+
+	dcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    dcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
+		    ldbx);
+/* L20: */
+	}
+
+/*
+          Step (3L): apply the inverse of the left singular vector
+          matrix to BX.
+*/
+
+	if (*k == 1) {
+	    dcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
+	    if (z__[1] < 0.) {
+		dscal_(nrhs, &c_b3001, &b[b_offset], ldb);
+	    }
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		diflj = difl[j];
+		dj = poles[j + poles_dim1];
+		dsigj = -poles[j + ((poles_dim1) << (1))];
+		if (j < *k) {
+		    difrj = -difr[j + difr_dim1];
+		    dsigjp = -poles[j + 1 + ((poles_dim1) << (1))];
+		}
+		if ((z__[j] == 0.) || (poles[j + ((poles_dim1) << (1))] == 0.)
+			) {
+		    work[j] = 0.;
+		} else {
+		    work[j] = -poles[j + ((poles_dim1) << (1))] * z__[j] /
+			    diflj / (poles[j + ((poles_dim1) << (1))] + dj);
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if ((z__[i__] == 0.) || (poles[i__ + ((poles_dim1) << (1))
+			    ] == 0.)) {
+			work[i__] = 0.;
+		    } else {
+			work[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
+				i__] / (dlamc3_(&poles[i__ + ((poles_dim1) <<
+				(1))], &dsigj) - diflj) / (poles[i__ + ((
+				poles_dim1) << (1))] + dj);
+		    }
+/* L30: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if ((z__[i__] == 0.) || (poles[i__ + ((poles_dim1) << (1))
+			    ] == 0.)) {
+			work[i__] = 0.;
+		    } else {
+			work[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
+				i__] / (dlamc3_(&poles[i__ + ((poles_dim1) <<
+				(1))], &dsigjp) + difrj) / (poles[i__ + ((
+				poles_dim1) << (1))] + dj);
+		    }
+/* L40: */
+		}
+		work[1] = -1.;
+		temp = dnrm2_(k, &work[1], &c__1);
+		dgemv_("T", k, nrhs, &c_b2865, &bx[bx_offset], ldbx, &work[1],
+			 &c__1, &c_b2879, &b[j + b_dim1], ldb);
+		dlascl_("G", &c__0, &c__0, &temp, &c_b2865, &c__1, nrhs, &b[j
+			+ b_dim1], ldb, info);
+/* L50: */
+	    }
+	}
+
+/*        Move the deflated rows of BX to B also. */
+
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    dlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
+		    + b_dim1], ldb);
+	}
+    } else {
+
+/*
+          Apply back the right orthogonal transformations.
+
+          Step (1R): apply back the new right singular vector matrix
+          to B.
+*/
+
+	if (*k == 1) {
+	    dcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		dsigj = poles[j + ((poles_dim1) << (1))];
+		if (z__[j] == 0.) {
+		    work[j] = 0.;
+		} else {
+		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j +
+			    poles_dim1]) / difr[j + ((difr_dim1) << (1))];
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.) {
+			work[i__] = 0.;
+		    } else {
+			d__1 = -poles[i__ + 1 + ((poles_dim1) << (1))];
+			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[
+				i__ + difr_dim1]) / (dsigj + poles[i__ +
+				poles_dim1]) / difr[i__ + ((difr_dim1) << (1))
+				];
+		    }
+/* L60: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.) {
+			work[i__] = 0.;
+		    } else {
+			d__1 = -poles[i__ + ((poles_dim1) << (1))];
+			work[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[
+				i__]) / (dsigj + poles[i__ + poles_dim1]) /
+				difr[i__ + ((difr_dim1) << (1))];
+		    }
+/* L70: */
+		}
+		dgemv_("T", k, nrhs, &c_b2865, &b[b_offset], ldb, &work[1], &
+			c__1, &c_b2879, &bx[j + bx_dim1], ldbx);
+/* L80: */
+	    }
+	}
+
+/*
+          Step (2R): if SQRE = 1, apply back the rotation that is
+          related to the right null space of the subproblem.
+*/
+
+	if (*sqre == 1) {
+	    dcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
+	    drot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
+		    s);
+	}
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    dlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
+		    bx_dim1], ldbx);
+	}
+
+/*        Step (3R): permute rows of B. */
+
+	dcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
+	if (*sqre == 1) {
+	    dcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
+	}
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    dcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
+		    ldb);
+/* L90: */
+	}
+
+/*        Step (4R): apply back the Givens rotations performed. */
+
+	for (i__ = *givptr; i__ >= 1; --i__) {
+	    d__1 = -givnum[i__ + givnum_dim1];
+	    drot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
+		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
+		    i__ + ((givnum_dim1) << (1))], &d__1);
+/* L100: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLALS0 */
+
+} /* dlals0_ */
+
+/* Subroutine */ int dlalsa_(integer *icompq, integer *smlsiz, integer *n,
+	integer *nrhs, doublereal *b, integer *ldb, doublereal *bx, integer *
+	ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *k,
+	doublereal *difl, doublereal *difr, doublereal *z__, doublereal *
+	poles, integer *givptr, integer *givcol, integer *ldgcol, integer *
+	perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *
+	work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1,
+	    b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1,
+	    difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset,
+	     u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1,
+	    i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
+	    ndb1, nlp1, lvl2, nrp1, nlvl, sqre;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer inode, ndiml, ndimr;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlals0_(integer *, integer *, integer *,
+	     integer *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *), dlasdt_(integer *, integer *, integer *, integer *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLALSA is an itermediate step in solving the least squares problem
+    by computing the SVD of the coefficient matrix in compact form (The
+    singular vectors are computed as products of simple orthorgonal
+    matrices.).
+
+    If ICOMPQ = 0, DLALSA applies the inverse of the left singular vector
+    matrix of an upper bidiagonal matrix to the right hand side; and if
+    ICOMPQ = 1, DLALSA applies the right singular vector matrix to the
+    right hand side. The singular vector matrices were generated in
+    compact form by DLALSA.
+
+    Arguments
+    =========
+
+
+    ICOMPQ (input) INTEGER
+           Specifies whether the left or the right singular vector
+           matrix is involved.
+           = 0: Left singular vector matrix
+           = 1: Right singular vector matrix
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The row and column dimensions of the upper bidiagonal matrix.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input) DOUBLE PRECISION array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,MAX( M, N ) ).
+
+    BX     (output) DOUBLE PRECISION array, dimension ( LDBX, NRHS )
+           On exit, the result of applying the left or right singular
+           vector matrix to B.
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
+           On entry, U contains the left singular vector matrices of all
+           subproblems at the bottom level.
+
+    LDU    (input) INTEGER, LDU = > N.
+           The leading dimension of arrays U, VT, DIFL, DIFR,
+           POLES, GIVNUM, and Z.
+
+    VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
+           On entry, VT' contains the right singular vector matrices of
+           all subproblems at the bottom level.
+
+    K      (input) INTEGER array, dimension ( N ).
+
+    DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+
+    DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+           distances between singular values on the I-th level and
+           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+           record the normalizing factors of the right singular vectors
+           matrices of subproblems on I-th level.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+           On entry, Z(1, I) contains the components of the deflation-
+           adjusted updating row vector for subproblems on the I-th
+           level.
+
+    POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+           singular values involved in the secular equations on the I-th
+           level.
+
+    GIVPTR (input) INTEGER array, dimension ( N ).
+           On entry, GIVPTR( I ) records the number of Givens
+           rotations performed on the I-th problem on the computation
+           tree.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+           locations of Givens rotations performed on the I-th level on
+           the computation tree.
+
+    LDGCOL (input) INTEGER, LDGCOL = > N.
+           The leading dimension of arrays GIVCOL and PERM.
+
+    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+           On entry, PERM(*, I) records permutations done on the I-th
+           level of the computation tree.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+           values of Givens rotations performed on the I-th level on the
+           computation tree.
+
+    C      (input) DOUBLE PRECISION array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           C( I ) contains the C-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    S      (input) DOUBLE PRECISION array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           S( I ) contains the S-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    WORK   (workspace) DOUBLE PRECISION array.
+           The dimension must be at least N.
+
+    IWORK  (workspace) INTEGER array.
+           The dimension must be at least 3 * N
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    givnum_dim1 = *ldu;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    poles_dim1 = *ldu;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    z_dim1 = *ldu;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    difr_dim1 = *ldu;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    difl_dim1 = *ldu;
+    difl_offset = 1 + difl_dim1;
+    difl -= difl_offset;
+    vt_dim1 = *ldu;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    --k;
+    --givptr;
+    perm_dim1 = *ldgcol;
+    perm_offset = 1 + perm_dim1;
+    perm -= perm_offset;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    --c__;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*smlsiz < 3) {
+	*info = -2;
+    } else if (*n < *smlsiz) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if (*ldb < *n) {
+	*info = -6;
+    } else if (*ldbx < *n) {
+	*info = -8;
+    } else if (*ldu < *n) {
+	*info = -10;
+    } else if (*ldgcol < *n) {
+	*info = -19;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLALSA", &i__1);
+	return 0;
+    }
+
+/*     Book-keeping and  setting up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       The following code applies back the left singular vector factors.
+       For applying back the right singular vector factors, go to 50.
+*/
+
+    if (*icompq == 1) {
+	goto L50;
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by DLASDQ. The corresponding left and right singular vector
+       matrices are in explicit form. First apply back the left
+       singular vector matrices.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+          IC : center row of each node
+          NL : number of rows of left  subproblem
+          NR : number of rows of right subproblem
+          NLF: starting row of the left   subproblem
+          NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlf = ic - nl;
+	nrf = ic + 1;
+	dgemm_("T", "N", &nl, nrhs, &nl, &c_b2865, &u[nlf + u_dim1], ldu, &b[
+		nlf + b_dim1], ldb, &c_b2879, &bx[nlf + bx_dim1], ldbx);
+	dgemm_("T", "N", &nr, nrhs, &nr, &c_b2865, &u[nrf + u_dim1], ldu, &b[
+		nrf + b_dim1], ldb, &c_b2879, &bx[nrf + bx_dim1], ldbx);
+/* L10: */
+    }
+
+/*
+       Next copy the rows of B that correspond to unchanged rows
+       in the bidiagonal matrix to BX.
+*/
+
+    i__1 = nd;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	ic = iwork[inode + i__ - 1];
+	dcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
+/* L20: */
+    }
+
+/*
+       Finally go through the left singular vector matrices of all
+       the other subproblems bottom-up on the tree.
+*/
+
+    j = pow_ii(&c__2, &nlvl);
+    sqre = 0;
+
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          find the first node LF and last node LL on
+          the current level LVL
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    --j;
+	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
+		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &work[1], info);
+/* L30: */
+	}
+/* L40: */
+    }
+    goto L90;
+
+/*     ICOMPQ = 1: applying back the right singular vector factors. */
+
+L50:
+
+/*
+       First now go through the right singular vector matrices of all
+       the tree nodes top-down.
+*/
+
+    j = 0;
+    i__1 = nlvl;
+    for (lvl = 1; lvl <= i__1; ++lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          Find the first node LF and last node LL on
+          the current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__2 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__2);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__2 = lf;
+	for (i__ = ll; i__ >= i__2; --i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    if (i__ == ll) {
+		sqre = 0;
+	    } else {
+		sqre = 1;
+	    }
+	    ++j;
+	    dlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
+		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &work[1], info);
+/* L60: */
+	}
+/* L70: */
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by DLASDQ. The corresponding right singular vector
+       matrices are in explicit form. Apply them back.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlp1 = nl + 1;
+	if (i__ == nd) {
+	    nrp1 = nr;
+	} else {
+	    nrp1 = nr + 1;
+	}
+	nlf = ic - nl;
+	nrf = ic + 1;
+	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b2865, &vt[nlf + vt_dim1],
+		ldu, &b[nlf + b_dim1], ldb, &c_b2879, &bx[nlf + bx_dim1],
+		ldbx);
+	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b2865, &vt[nrf + vt_dim1],
+		ldu, &b[nrf + b_dim1], ldb, &c_b2879, &bx[nrf + bx_dim1],
+		ldbx);
+/* L80: */
+    }
+
+L90:
+
+    return 0;
+
+/*     End of DLALSA */
+
+} /* dlalsa_ */
+
+/* Subroutine */ int dlalsd_(char *uplo, integer *smlsiz, integer *n, integer
+	*nrhs, doublereal *d__, doublereal *e, doublereal *b, integer *ldb,
+	doublereal *rcond, integer *rank, doublereal *work, integer *iwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double log(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer c__, i__, j, k;
+    static doublereal r__;
+    static integer s, u, z__;
+    static doublereal cs;
+    static integer bx;
+    static doublereal sn;
+    static integer st, vt, nm1, st1;
+    static doublereal eps;
+    static integer iwk;
+    static doublereal tol;
+    static integer difl, difr, perm, nsub;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer nlvl, sqre, bxst;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *),
+	     dcopy_(integer *, doublereal *, integer *, doublereal *, integer
+	    *);
+    static integer poles, sizei, nsize, nwork, icmpq1, icmpq2;
+
+    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *), dlalsa_(integer *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *), dlaset_(char *, integer *, integer *,
+	     doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+    static integer givcol;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static doublereal orgnrm;
+    static integer givnum, givptr, smlszp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLALSD uses the singular value decomposition of A to solve the least
+    squares problem of finding X to minimize the Euclidean norm of each
+    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+    are N-by-NRHS. The solution X overwrites B.
+
+    The singular values of A smaller than RCOND times the largest
+    singular value are treated as zero in solving the least squares
+    problem; in this case a minimum norm solution is returned.
+    The actual singular values are returned in D in ascending order.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    UPLO   (input) CHARACTER*1
+           = 'U': D and E define an upper bidiagonal matrix.
+           = 'L': D and E define a  lower bidiagonal matrix.
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The dimension of the  bidiagonal matrix.  N >= 0.
+
+    NRHS   (input) INTEGER
+           The number of columns of B. NRHS must be at least 1.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry D contains the main diagonal of the bidiagonal
+           matrix. On exit, if INFO = 0, D contains its singular values.
+
+    E      (input) DOUBLE PRECISION array, dimension (N-1)
+           Contains the super-diagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    B      (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+           On input, B contains the right hand sides of the least
+           squares problem. On output, B contains the solution X.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,N).
+
+    RCOND  (input) DOUBLE PRECISION
+           The singular values of A less than or equal to RCOND times
+           the largest singular value are treated as zero in solving
+           the least squares problem. If RCOND is negative,
+           machine precision is used instead.
+           For example, if diag(S)*X=B were the least squares problem,
+           where diag(S) is a diagonal matrix of singular values, the
+           solution would be X(i) = B(i) / S(i) if S(i) is greater than
+           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+           RCOND*max(S).
+
+    RANK   (output) INTEGER
+           The number of singular values of A greater than RCOND times
+           the largest singular value.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension at least
+           (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
+           where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
+
+    IWORK  (workspace) INTEGER array, dimension at least
+           (3*N*NLVL + 11*N)
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+           > 0:  The algorithm failed to compute an singular value while
+                 working on the submatrix lying in rows and columns
+                 INFO/(N+1) through MOD(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if ((*ldb < 1) || (*ldb < *n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLALSD", &i__1);
+	return 0;
+    }
+
+    eps = EPSILON;
+
+/*     Set up the tolerance. */
+
+    if ((*rcond <= 0.) || (*rcond >= 1.)) {
+	*rcond = eps;
+    }
+
+    *rank = 0;
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+	if (d__[1] == 0.) {
+	    dlaset_("A", &c__1, nrhs, &c_b2879, &c_b2879, &b[b_offset], ldb);
+	} else {
+	    *rank = 1;
+	    dlascl_("G", &c__0, &c__0, &d__[1], &c_b2865, &c__1, nrhs, &b[
+		    b_offset], ldb, info);
+	    d__[1] = abs(d__[1]);
+	}
+	return 0;
+    }
+
+/*     Rotate the matrix if it is lower bidiagonal. */
+
+    if (*(unsigned char *)uplo == 'L') {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (*nrhs == 1) {
+		drot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
+			c__1, &cs, &sn);
+	    } else {
+		work[((i__) << (1)) - 1] = cs;
+		work[i__ * 2] = sn;
+	    }
+/* L10: */
+	}
+	if (*nrhs > 1) {
+	    i__1 = *nrhs;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = *n - 1;
+		for (j = 1; j <= i__2; ++j) {
+		    cs = work[((j) << (1)) - 1];
+		    sn = work[j * 2];
+		    drot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ *
+			     b_dim1], &c__1, &cs, &sn);
+/* L20: */
+		}
+/* L30: */
+	    }
+	}
+    }
+
+/*     Scale. */
+
+    nm1 = *n - 1;
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	dlaset_("A", n, nrhs, &c_b2879, &c_b2879, &b[b_offset], ldb);
+	return 0;
+    }
+
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, n, &c__1, &d__[1], n, info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &nm1, &c__1, &e[1], &nm1,
+	    info);
+
+/*
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
+*/
+
+    if (*n <= *smlsiz) {
+	nwork = *n * *n + 1;
+	dlaset_("A", n, n, &c_b2879, &c_b2865, &work[1], n);
+	dlasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, &
+		work[1], n, &b[b_offset], ldb, &work[nwork], info);
+	if (*info != 0) {
+	    return 0;
+	}
+	tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (d__[i__] <= tol) {
+		dlaset_("A", &c__1, nrhs, &c_b2879, &c_b2879, &b[i__ + b_dim1]
+			, ldb);
+	    } else {
+		dlascl_("G", &c__0, &c__0, &d__[i__], &c_b2865, &c__1, nrhs, &
+			b[i__ + b_dim1], ldb, info);
+		++(*rank);
+	    }
+/* L40: */
+	}
+	dgemm_("T", "N", n, nrhs, n, &c_b2865, &work[1], n, &b[b_offset], ldb,
+		 &c_b2879, &work[nwork], n);
+	dlacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb);
+
+/*        Unscale. */
+
+	dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, n, &c__1, &d__[1], n,
+		info);
+	dlasrt_("D", n, &d__[1], info);
+	dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, n, nrhs, &b[b_offset],
+		ldb, info);
+
+	return 0;
+    }
+
+/*     Book-keeping and setting up some constants. */
+
+    nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) /
+	    log(2.)) + 1;
+
+    smlszp = *smlsiz + 1;
+
+    u = 1;
+    vt = *smlsiz * *n + 1;
+    difl = vt + smlszp * *n;
+    difr = difl + nlvl * *n;
+    z__ = difr + ((nlvl * *n) << (1));
+    c__ = z__ + nlvl * *n;
+    s = c__ + *n;
+    poles = s + *n;
+    givnum = poles + ((nlvl) << (1)) * *n;
+    bx = givnum + ((nlvl) << (1)) * *n;
+    nwork = bx + *n * *nrhs;
+
+    sizei = *n + 1;
+    k = sizei + *n;
+    givptr = k + *n;
+    perm = givptr + *n;
+    givcol = perm + nlvl * *n;
+    iwk = givcol + ((nlvl * *n) << (1));
+
+    st = 1;
+    sqre = 0;
+    icmpq1 = 1;
+    icmpq2 = 0;
+    nsub = 0;
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) < eps) {
+	    d__[i__] = d_sign(&eps, &d__[i__]);
+	}
+/* L50: */
+    }
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (((d__1 = e[i__], abs(d__1)) < eps) || (i__ == nm1)) {
+	    ++nsub;
+	    iwork[nsub] = st;
+
+/*
+             Subproblem found. First determine its size and then
+             apply divide and conquer on it.
+*/
+
+	    if (i__ < nm1) {
+
+/*              A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
+
+/*              A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else {
+
+/*
+                A subproblem with E(NM1) small. This implies an
+                1-by-1 subproblem at D(N), which is not solved
+                explicitly.
+*/
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+		++nsub;
+		iwork[nsub] = *n;
+		iwork[sizei + nsub - 1] = 1;
+		dcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
+	    }
+	    st1 = st - 1;
+	    if (nsize == 1) {
+
+/*
+                This is a 1-by-1 subproblem and is not solved
+                explicitly.
+*/
+
+		dcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
+	    } else if (nsize <= *smlsiz) {
+
+/*              This is a small subproblem and is solved by DLASDQ. */
+
+		dlaset_("A", &nsize, &nsize, &c_b2879, &c_b2865, &work[vt +
+			st1], n);
+		dlasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[
+			st], &work[vt + st1], n, &work[nwork], n, &b[st +
+			b_dim1], ldb, &work[nwork], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		dlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
+			st1], n);
+	    } else {
+
+/*              A large problem. Solve it using divide and conquer. */
+
+		dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
+			work[u + st1], n, &work[vt + st1], &iwork[k + st1], &
+			work[difl + st1], &work[difr + st1], &work[z__ + st1],
+			 &work[poles + st1], &iwork[givptr + st1], &iwork[
+			givcol + st1], n, &iwork[perm + st1], &work[givnum +
+			st1], &work[c__ + st1], &work[s + st1], &work[nwork],
+			&iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		bxst = bx + st1;
+		dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
+			work[bxst], n, &work[u + st1], n, &work[vt + st1], &
+			iwork[k + st1], &work[difl + st1], &work[difr + st1],
+			&work[z__ + st1], &work[poles + st1], &iwork[givptr +
+			st1], &iwork[givcol + st1], n, &iwork[perm + st1], &
+			work[givnum + st1], &work[c__ + st1], &work[s + st1],
+			&work[nwork], &iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    st = i__ + 1;
+	}
+/* L60: */
+    }
+
+/*     Apply the singular values and treat the tiny ones as zero. */
+
+    tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+          Some of the elements in D can be negative because 1-by-1
+          subproblems were not solved explicitly.
+*/
+
+	if ((d__1 = d__[i__], abs(d__1)) <= tol) {
+	    dlaset_("A", &c__1, nrhs, &c_b2879, &c_b2879, &work[bx + i__ - 1],
+		     n);
+	} else {
+	    ++(*rank);
+	    dlascl_("G", &c__0, &c__0, &d__[i__], &c_b2865, &c__1, nrhs, &
+		    work[bx + i__ - 1], n, info);
+	}
+	d__[i__] = (d__1 = d__[i__], abs(d__1));
+/* L70: */
+    }
+
+/*     Now apply back the right singular vectors. */
+
+    icmpq2 = 1;
+    i__1 = nsub;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	st = iwork[i__];
+	st1 = st - 1;
+	nsize = iwork[sizei + i__ - 1];
+	bxst = bx + st1;
+	if (nsize == 1) {
+	    dcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
+	} else if (nsize <= *smlsiz) {
+	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b2865, &work[vt + st1],
+		    n, &work[bxst], n, &c_b2879, &b[st + b_dim1], ldb);
+	} else {
+	    dlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
+		    b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[
+		    k + st1], &work[difl + st1], &work[difr + st1], &work[z__
+		    + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[
+		    givcol + st1], n, &iwork[perm + st1], &work[givnum + st1],
+		     &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[
+		    iwk], info);
+	    if (*info != 0) {
+		return 0;
+	    }
+	}
+/* L80: */
+    }
+
+/*     Unscale and sort the singular values. */
+
+    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, n, &c__1, &d__[1], n, info);
+    dlasrt_("D", n, &d__[1], info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, n, nrhs, &b[b_offset], ldb,
+	    info);
+
+    return 0;
+
+/*     End of DLALSD */
+
+} /* dlalsd_ */
+
+/* Subroutine */ int dlamrg_(integer *n1, integer *n2, doublereal *a, integer
+	*dtrd1, integer *dtrd2, integer *index)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, ind1, ind2, n1sv, n2sv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAMRG will create a permutation list which will merge the elements
+    of A (which is composed of two independently sorted sets) into a
+    single set which is sorted in ascending order.
+
+    Arguments
+    =========
+
+    N1     (input) INTEGER
+    N2     (input) INTEGER
+           These arguements contain the respective lengths of the two
+           sorted lists to be merged.
+
+    A      (input) DOUBLE PRECISION array, dimension (N1+N2)
+           The first N1 elements of A contain a list of numbers which
+           are sorted in either ascending or descending order.  Likewise
+           for the final N2 elements.
+
+    DTRD1  (input) INTEGER
+    DTRD2  (input) INTEGER
+           These are the strides to be taken through the array A.
+           Allowable strides are 1 and -1.  They indicate whether a
+           subset of A is sorted in ascending (DTRDx = 1) or descending
+           (DTRDx = -1) order.
+
+    INDEX  (output) INTEGER array, dimension (N1+N2)
+           On exit this array will contain a permutation such that
+           if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be
+           sorted in ascending order.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --index;
+    --a;
+
+    /* Function Body */
+    n1sv = *n1;
+    n2sv = *n2;
+    if (*dtrd1 > 0) {
+	ind1 = 1;
+    } else {
+	ind1 = *n1;
+    }
+    if (*dtrd2 > 0) {
+	ind2 = *n1 + 1;
+    } else {
+	ind2 = *n1 + *n2;
+    }
+    i__ = 1;
+/*     while ( (N1SV > 0) & (N2SV > 0) ) */
+L10:
+    if (n1sv > 0 && n2sv > 0) {
+	if (a[ind1] <= a[ind2]) {
+	    index[i__] = ind1;
+	    ++i__;
+	    ind1 += *dtrd1;
+	    --n1sv;
+	} else {
+	    index[i__] = ind2;
+	    ++i__;
+	    ind2 += *dtrd2;
+	    --n2sv;
+	}
+	goto L10;
+    }
+/*     end while */
+    if (n1sv == 0) {
+	i__1 = n2sv;
+	for (n1sv = 1; n1sv <= i__1; ++n1sv) {
+	    index[i__] = ind2;
+	    ++i__;
+	    ind2 += *dtrd2;
+/* L20: */
+	}
+    } else {
+/*     N2SV .EQ. 0 */
+	i__1 = n1sv;
+	for (n2sv = 1; n2sv <= i__1; ++n2sv) {
+	    index[i__] = ind1;
+	    ++i__;
+	    ind1 += *dtrd1;
+/* L30: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLAMRG */
+
+} /* dlamrg_ */
+
+doublereal dlange_(char *norm, integer *m, integer *n, doublereal *a, integer
+	*lda, doublereal *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal ret_val, d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal sum, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal value;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLANGE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real matrix A.
+
+    Description
+    ===========
+
+    DLANGE returns the value
+
+       DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in DLANGE as described
+            above.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.  When M = 0,
+            DLANGE is set to zero.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.  When N = 0,
+            DLANGE is set to zero.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(M,1).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (min(*m,*n) == 0) {
+	value = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		value = max(d__2,d__3);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if ((lsame_(norm, "O")) || (*(unsigned char *
+	    )norm == '1')) {
+
+/*        Find norm1(A). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.;
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+/* L30: */
+	    }
+	    value = max(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.;
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    d__1 = value, d__2 = work[i__];
+	    value = max(d__1,d__2);
+/* L80: */
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    dlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of DLANGE */
+
+} /* dlange_ */
+
+doublereal dlanhs_(char *norm, integer *n, doublereal *a, integer *lda,
+	doublereal *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublereal ret_val, d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal sum, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal value;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLANHS  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    Hessenberg matrix A.
+
+    Description
+    ===========
+
+    DLANHS returns the value
+
+       DLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in DLANHS as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, DLANHS is
+            set to zero.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The n by n upper Hessenberg matrix A; the part of A below the
+            first sub-diagonal is not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		value = max(d__2,d__3);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if ((lsame_(norm, "O")) || (*(unsigned char *
+	    )norm == '1')) {
+
+/*        Find norm1(A). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.;
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+/* L30: */
+	    }
+	    value = max(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += (d__1 = a[i__ + j * a_dim1], abs(d__1));
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    d__1 = value, d__2 = work[i__];
+	    value = max(d__1,d__2);
+/* L80: */
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of DLANHS */
+
+} /* dlanhs_ */
+
+doublereal dlanst_(char *norm, integer *n, doublereal *d__, doublereal *e)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal ret_val, d__1, d__2, d__3, d__4, d__5;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal sum, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal anorm;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLANST  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real symmetric tridiagonal matrix A.
+
+    Description
+    ===========
+
+    DLANST returns the value
+
+       DLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in DLANST as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, DLANST is
+            set to zero.
+
+    D       (input) DOUBLE PRECISION array, dimension (N)
+            The diagonal elements of A.
+
+    E       (input) DOUBLE PRECISION array, dimension (N-1)
+            The (n-1) sub-diagonal or super-diagonal elements of A.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --e;
+    --d__;
+
+    /* Function Body */
+    if (*n <= 0) {
+	anorm = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	anorm = (d__1 = d__[*n], abs(d__1));
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    d__2 = anorm, d__3 = (d__1 = d__[i__], abs(d__1));
+	    anorm = max(d__2,d__3);
+/* Computing MAX */
+	    d__2 = anorm, d__3 = (d__1 = e[i__], abs(d__1));
+	    anorm = max(d__2,d__3);
+/* L10: */
+	}
+    } else if (((lsame_(norm, "O")) || (*(unsigned char
+	    *)norm == '1')) || (lsame_(norm, "I"))) {
+
+/*        Find norm1(A). */
+
+	if (*n == 1) {
+	    anorm = abs(d__[1]);
+	} else {
+/* Computing MAX */
+	    d__3 = abs(d__[1]) + abs(e[1]), d__4 = (d__1 = e[*n - 1], abs(
+		    d__1)) + (d__2 = d__[*n], abs(d__2));
+	    anorm = max(d__3,d__4);
+	    i__1 = *n - 1;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		d__4 = anorm, d__5 = (d__1 = d__[i__], abs(d__1)) + (d__2 = e[
+			i__], abs(d__2)) + (d__3 = e[i__ - 1], abs(d__3));
+		anorm = max(d__4,d__5);
+/* L20: */
+	    }
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	if (*n > 1) {
+	    i__1 = *n - 1;
+	    dlassq_(&i__1, &e[1], &c__1, &scale, &sum);
+	    sum *= 2;
+	}
+	dlassq_(n, &d__[1], &c__1, &scale, &sum);
+	anorm = scale * sqrt(sum);
+    }
+
+    ret_val = anorm;
+    return ret_val;
+
+/*     End of DLANST */
+
+} /* dlanst_ */
+
+doublereal dlansy_(char *norm, char *uplo, integer *n, doublereal *a, integer
+	*lda, doublereal *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal ret_val, d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal sum, absa, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal value;
+    extern /* Subroutine */ int dlassq_(integer *, doublereal *, integer *,
+	    doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLANSY  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real symmetric matrix A.
+
+    Description
+    ===========
+
+    DLANSY returns the value
+
+       DLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in DLANSY as described
+            above.
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is to be referenced.
+            = 'U':  Upper triangular part of A is referenced
+            = 'L':  Lower triangular part of A is referenced
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, DLANSY is
+            set to zero.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The symmetric matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of A contains the upper triangular part
+            of the matrix A, and the strictly lower triangular part of A
+            is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of A contains the lower triangular part of
+            the matrix A, and the strictly upper triangular part of A is
+            not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+            WORK is not referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
+			    d__1));
+		    value = max(d__2,d__3);
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    d__2 = value, d__3 = (d__1 = a[i__ + j * a_dim1], abs(
+			    d__1));
+		    value = max(d__2,d__3);
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (((lsame_(norm, "I")) || (lsame_(norm,
+	    "O"))) || (*(unsigned char *)norm == '1')) {
+
+/*        Find normI(A) ( = norm1(A), since A is symmetric). */
+
+	value = 0.;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = 0.;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		    sum += absa;
+		    work[i__] += absa;
+/* L50: */
+		}
+		work[j] = sum + (d__1 = a[j + j * a_dim1], abs(d__1));
+/* L60: */
+	    }
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		d__1 = value, d__2 = work[i__];
+		value = max(d__1,d__2);
+/* L70: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] = 0.;
+/* L80: */
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = work[j] + (d__1 = a[j + j * a_dim1], abs(d__1));
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    absa = (d__1 = a[i__ + j * a_dim1], abs(d__1));
+		    sum += absa;
+		    work[i__] += absa;
+/* L90: */
+		}
+		value = max(value,sum);
+/* L100: */
+	    }
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		i__2 = j - 1;
+		dlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L110: */
+	    }
+	} else {
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		dlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
+/* L120: */
+	    }
+	}
+	sum *= 2;
+	i__1 = *lda + 1;
+	dlassq_(n, &a[a_offset], &i__1, &scale, &sum);
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of DLANSY */
+
+} /* dlansy_ */
+
+/* Subroutine */ int dlanv2_(doublereal *a, doublereal *b, doublereal *c__,
+	doublereal *d__, doublereal *rt1r, doublereal *rt1i, doublereal *rt2r,
+	 doublereal *rt2i, doublereal *cs, doublereal *sn)
+{
+    /* System generated locals */
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double d_sign(doublereal *, doublereal *), sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau,
+	    temp, scale, bcmax, bcmis, sigma;
+
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
+    matrix in standard form:
+
+         [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
+         [ C  D ]   [ SN  CS ] [ CC  DD ] [-SN  CS ]
+
+    where either
+    1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or
+    2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex
+    conjugate eigenvalues.
+
+    Arguments
+    =========
+
+    A       (input/output) DOUBLE PRECISION
+    B       (input/output) DOUBLE PRECISION
+    C       (input/output) DOUBLE PRECISION
+    D       (input/output) DOUBLE PRECISION
+            On entry, the elements of the input matrix.
+            On exit, they are overwritten by the elements of the
+            standardised Schur form.
+
+    RT1R    (output) DOUBLE PRECISION
+    RT1I    (output) DOUBLE PRECISION
+    RT2R    (output) DOUBLE PRECISION
+    RT2I    (output) DOUBLE PRECISION
+            The real and imaginary parts of the eigenvalues. If the
+            eigenvalues are a complex conjugate pair, RT1I > 0.
+
+    CS      (output) DOUBLE PRECISION
+    SN      (output) DOUBLE PRECISION
+            Parameters of the rotation matrix.
+
+    Further Details
+    ===============
+
+    Modified by V. Sima, Research Institute for Informatics, Bucharest,
+    Romania, to reduce the risk of cancellation errors,
+    when computing real eigenvalues, and to ensure, if possible, that
+    abs(RT1R) >= abs(RT2R).
+
+    =====================================================================
+*/
+
+
+    eps = PRECISION;
+    if (*c__ == 0.) {
+	*cs = 1.;
+	*sn = 0.;
+	goto L10;
+
+    } else if (*b == 0.) {
+
+/*        Swap rows and columns */
+
+	*cs = 0.;
+	*sn = 1.;
+	temp = *d__;
+	*d__ = *a;
+	*a = temp;
+	*b = -(*c__);
+	*c__ = 0.;
+	goto L10;
+    } else if (*a - *d__ == 0. && d_sign(&c_b2865, b) != d_sign(&c_b2865, c__)
+	    ) {
+	*cs = 1.;
+	*sn = 0.;
+	goto L10;
+    } else {
+
+	temp = *a - *d__;
+	p = temp * .5;
+/* Computing MAX */
+	d__1 = abs(*b), d__2 = abs(*c__);
+	bcmax = max(d__1,d__2);
+/* Computing MIN */
+	d__1 = abs(*b), d__2 = abs(*c__);
+	bcmis = min(d__1,d__2) * d_sign(&c_b2865, b) * d_sign(&c_b2865, c__);
+/* Computing MAX */
+	d__1 = abs(p);
+	scale = max(d__1,bcmax);
+	z__ = p / scale * p + bcmax / scale * bcmis;
+
+/*
+          If Z is of the order of the machine accuracy, postpone the
+          decision on the nature of eigenvalues
+*/
+
+	if (z__ >= eps * 4.) {
+
+/*           Real eigenvalues. Compute A and D. */
+
+	    d__1 = sqrt(scale) * sqrt(z__);
+	    z__ = p + d_sign(&d__1, &p);
+	    *a = *d__ + z__;
+	    *d__ -= bcmax / z__ * bcmis;
+
+/*           Compute B and the rotation matrix */
+
+	    tau = dlapy2_(c__, &z__);
+	    *cs = z__ / tau;
+	    *sn = *c__ / tau;
+	    *b -= *c__;
+	    *c__ = 0.;
+	} else {
+
+/*
+             Complex eigenvalues, or real (almost) equal eigenvalues.
+             Make diagonal elements equal.
+*/
+
+	    sigma = *b + *c__;
+	    tau = dlapy2_(&sigma, &temp);
+	    *cs = sqrt((abs(sigma) / tau + 1.) * .5);
+	    *sn = -(p / (tau * *cs)) * d_sign(&c_b2865, &sigma);
+
+/*
+             Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
+                     [ CC  DD ]   [ C  D ] [ SN  CS ]
+*/
+
+	    aa = *a * *cs + *b * *sn;
+	    bb = -(*a) * *sn + *b * *cs;
+	    cc = *c__ * *cs + *d__ * *sn;
+	    dd = -(*c__) * *sn + *d__ * *cs;
+
+/*
+             Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
+                     [ C  D ]   [-SN  CS ] [ CC  DD ]
+*/
+
+	    *a = aa * *cs + cc * *sn;
+	    *b = bb * *cs + dd * *sn;
+	    *c__ = -aa * *sn + cc * *cs;
+	    *d__ = -bb * *sn + dd * *cs;
+
+	    temp = (*a + *d__) * .5;
+	    *a = temp;
+	    *d__ = temp;
+
+	    if (*c__ != 0.) {
+		if (*b != 0.) {
+		    if (d_sign(&c_b2865, b) == d_sign(&c_b2865, c__)) {
+
+/*                    Real eigenvalues: reduce to upper triangular form */
+
+			sab = sqrt((abs(*b)));
+			sac = sqrt((abs(*c__)));
+			d__1 = sab * sac;
+			p = d_sign(&d__1, c__);
+			tau = 1. / sqrt((d__1 = *b + *c__, abs(d__1)));
+			*a = temp + p;
+			*d__ = temp - p;
+			*b -= *c__;
+			*c__ = 0.;
+			cs1 = sab * tau;
+			sn1 = sac * tau;
+			temp = *cs * cs1 - *sn * sn1;
+			*sn = *cs * sn1 + *sn * cs1;
+			*cs = temp;
+		    }
+		} else {
+		    *b = -(*c__);
+		    *c__ = 0.;
+		    temp = *cs;
+		    *cs = -(*sn);
+		    *sn = temp;
+		}
+	    }
+	}
+
+    }
+
+L10:
+
+/*     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */
+
+    *rt1r = *a;
+    *rt2r = *d__;
+    if (*c__ == 0.) {
+	*rt1i = 0.;
+	*rt2i = 0.;
+    } else {
+	*rt1i = sqrt((abs(*b))) * sqrt((abs(*c__)));
+	*rt2i = -(*rt1i);
+    }
+    return 0;
+
+/*     End of DLANV2 */
+
+} /* dlanv2_ */
+
+doublereal dlapy2_(doublereal *x, doublereal *y)
+{
+    /* System generated locals */
+    doublereal ret_val, d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal w, z__, xabs, yabs;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
+    overflow.
+
+    Arguments
+    =========
+
+    X       (input) DOUBLE PRECISION
+    Y       (input) DOUBLE PRECISION
+            X and Y specify the values x and y.
+
+    =====================================================================
+*/
+
+
+    xabs = abs(*x);
+    yabs = abs(*y);
+    w = max(xabs,yabs);
+    z__ = min(xabs,yabs);
+    if (z__ == 0.) {
+	ret_val = w;
+    } else {
+/* Computing 2nd power */
+	d__1 = z__ / w;
+	ret_val = w * sqrt(d__1 * d__1 + 1.);
+    }
+    return ret_val;
+
+/*     End of DLAPY2 */
+
+} /* dlapy2_ */
+
+doublereal dlapy3_(doublereal *x, doublereal *y, doublereal *z__)
+{
+    /* System generated locals */
+    doublereal ret_val, d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal w, xabs, yabs, zabs;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
+    unnecessary overflow.
+
+    Arguments
+    =========
+
+    X       (input) DOUBLE PRECISION
+    Y       (input) DOUBLE PRECISION
+    Z       (input) DOUBLE PRECISION
+            X, Y and Z specify the values x, y and z.
+
+    =====================================================================
+*/
+
+
+    xabs = abs(*x);
+    yabs = abs(*y);
+    zabs = abs(*z__);
+/* Computing MAX */
+    d__1 = max(xabs,yabs);
+    w = max(d__1,zabs);
+    if (w == 0.) {
+	ret_val = 0.;
+    } else {
+/* Computing 2nd power */
+	d__1 = xabs / w;
+/* Computing 2nd power */
+	d__2 = yabs / w;
+/* Computing 2nd power */
+	d__3 = zabs / w;
+	ret_val = w * sqrt(d__1 * d__1 + d__2 * d__2 + d__3 * d__3);
+    }
+    return ret_val;
+
+/*     End of DLAPY3 */
+
+} /* dlapy3_ */
+
+/* Subroutine */ int dlarf_(char *side, integer *m, integer *n, doublereal *v,
+	 integer *incv, doublereal *tau, doublereal *c__, integer *ldc,
+	doublereal *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset;
+    doublereal d__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLARF applies a real elementary reflector H to a real m by n matrix
+    C, from either the left or the right. H is represented in the form
+
+          H = I - tau * v * v'
+
+    where tau is a real scalar and v is a real vector.
+
+    If tau = 0, then H is taken to be the unit matrix.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) DOUBLE PRECISION array, dimension
+                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+            The vector v in the representation of H. V is not used if
+            TAU = 0.
+
+    INCV    (input) INTEGER
+            The increment between elements of v. INCV <> 0.
+
+    TAU     (input) DOUBLE PRECISION
+            The value tau in the representation of H.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                           (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C */
+
+	if (*tau != 0.) {
+
+/*           w := C' * v */
+
+	    dgemv_("Transpose", m, n, &c_b2865, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b2879, &work[1], &c__1);
+
+/*           C := C - v * w' */
+
+	    d__1 = -(*tau);
+	    dger_(m, n, &d__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
+		    ldc);
+	}
+    } else {
+
+/*        Form  C * H */
+
+	if (*tau != 0.) {
+
+/*           w := C * v */
+
+	    dgemv_("No transpose", m, n, &c_b2865, &c__[c_offset], ldc, &v[1],
+		     incv, &c_b2879, &work[1], &c__1);
+
+/*           C := C - w * v' */
+
+	    d__1 = -(*tau);
+	    dger_(m, n, &d__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
+		    ldc);
+	}
+    }
+    return 0;
+
+/*     End of DLARF */
+
+} /* dlarf_ */
+
+/* Subroutine */ int dlarfb_(char *side, char *trans, char *direct, char *
+	storev, integer *m, integer *n, integer *k, doublereal *v, integer *
+	ldv, doublereal *t, integer *ldt, doublereal *c__, integer *ldc,
+	doublereal *work, integer *ldwork)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
+	    work_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dtrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static char transt[1];
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLARFB applies a real block reflector H or its transpose H' to a
+    real m by n matrix C, from either the left or the right.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply H or H' from the Left
+            = 'R': apply H or H' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply H (No transpose)
+            = 'T': apply H' (Transpose)
+
+    DIRECT  (input) CHARACTER*1
+            Indicates how H is formed from a product of elementary
+            reflectors
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Indicates how the vectors which define the elementary
+            reflectors are stored:
+            = 'C': Columnwise
+            = 'R': Rowwise
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    K       (input) INTEGER
+            The order of the matrix T (= the number of elementary
+            reflectors whose product defines the block reflector).
+
+    V       (input) DOUBLE PRECISION array, dimension
+                                  (LDV,K) if STOREV = 'C'
+                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
+                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+            if STOREV = 'R', LDV >= K.
+
+    T       (input) DOUBLE PRECISION array, dimension (LDT,K)
+            The triangular k by k matrix T in the representation of the
+            block reflector.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LDWORK,K)
+
+    LDWORK  (input) INTEGER
+            The leading dimension of the array WORK.
+            If SIDE = 'L', LDWORK >= max(1,N);
+            if SIDE = 'R', LDWORK >= max(1,M).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    work_dim1 = *ldwork;
+    work_offset = 1 + work_dim1;
+    work -= work_offset;
+
+    /* Function Body */
+    if ((*m <= 0) || (*n <= 0)) {
+	return 0;
+    }
+
+    if (lsame_(trans, "N")) {
+	*(unsigned char *)transt = 'T';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+    if (lsame_(storev, "C")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1 )    (first K rows)
+                       ( V2 )
+             where  V1  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+/* L10: */
+		}
+
+/*              W := W * V1 */
+
+		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &
+			c_b2865, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2 */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b2865,
+			     &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
+			    ldv, &c_b2865, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b2865, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2 * W' */
+
+		    i__1 = *m - *k;
+		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b3001,
+			     &v[*k + 1 + v_dim1], ldv, &work[work_offset],
+			    ldwork, &c_b2865, &c__[*k + 1 + c_dim1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b2865,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
+/* L20: */
+		    }
+/* L30: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L40: */
+		}
+
+/*              W := W * V1 */
+
+		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &
+			c_b2865, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2 */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b2865, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k
+			    + 1 + v_dim1], ldv, &c_b2865, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b2865, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2' */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b3001,
+			     &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
+			    ldv, &c_b2865, &c__[(*k + 1) * c_dim1 + 1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b2865,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
+/* L50: */
+		    }
+/* L60: */
+		}
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1 )
+                       ( V2 )    (last K rows)
+             where  V2  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L70: */
+		}
+
+/*              W := W * V2 */
+
+		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &
+			c_b2865, &v[*m - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1 */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "No transpose", n, k, &i__1, &c_b2865,
+			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b2865,
+			     &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b2865, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1 * W' */
+
+		    i__1 = *m - *k;
+		    dgemm_("No transpose", "Transpose", &i__1, n, k, &c_b3001,
+			     &v[v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b2865, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b2865,
+			&v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L80: */
+		    }
+/* L90: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L100: */
+		}
+
+/*              W := W * V2 */
+
+		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &
+			c_b2865, &v[*n - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1 */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b2865, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b2865, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b2865, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1' */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "Transpose", m, &i__1, k, &c_b3001,
+			     &work[work_offset], ldwork, &v[v_offset], ldv, &
+			    c_b2865, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b2865,
+			&v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+
+/*              C2 := C2 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L110: */
+		    }
+/* L120: */
+		}
+	    }
+	}
+
+    } else if (lsame_(storev, "R")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1  V2 )    (V1: first K columns)
+             where  V1  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+/* L130: */
+		}
+
+/*              W := W * V1' */
+
+		dtrmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b2865,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2' */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b2865, &
+			    c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 +
+			    1], ldv, &c_b2865, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		dtrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b2865, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2' * W' */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b3001, &
+			    v[(*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
+			     ldwork, &c_b2865, &c__[*k + 1 + c_dim1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		dtrmm_("Right", "Upper", "No transpose", "Unit", n, k, &
+			c_b2865, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
+/* L140: */
+		    }
+/* L150: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L160: */
+		}
+
+/*              W := W * V1' */
+
+		dtrmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b2865,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2' */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b2865,
+			     &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
+			    v_dim1 + 1], ldv, &c_b2865, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		dtrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b2865, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2 */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b3001, &work[work_offset], ldwork, &v[(*k + 1) *
+			     v_dim1 + 1], ldv, &c_b2865, &c__[(*k + 1) *
+			    c_dim1 + 1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		dtrmm_("Right", "Upper", "No transpose", "Unit", m, k, &
+			c_b2865, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
+/* L170: */
+		    }
+/* L180: */
+		}
+
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1  V2 )    (V2: last K columns)
+             where  V2  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L190: */
+		}
+
+/*              W := W * V2' */
+
+		dtrmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b2865,
+			&v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1' */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "Transpose", n, k, &i__1, &c_b2865, &
+			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b2865, &
+			    work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		dtrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b2865, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1' * W' */
+
+		    i__1 = *m - *k;
+		    dgemm_("Transpose", "Transpose", &i__1, n, k, &c_b3001, &
+			    v[v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b2865, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		dtrmm_("Right", "Lower", "No transpose", "Unit", n, k, &
+			c_b2865, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L200: */
+		    }
+/* L210: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    dcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L220: */
+		}
+
+/*              W := W * V2' */
+
+		dtrmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b2865,
+			&v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1' */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "Transpose", m, k, &i__1, &c_b2865,
+			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b2865,
+			     &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		dtrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b2865, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1 */
+
+		    i__1 = *n - *k;
+		    dgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b3001, &work[work_offset], ldwork, &v[v_offset],
+			     ldv, &c_b2865, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		dtrmm_("Right", "Lower", "No transpose", "Unit", m, k, &
+			c_b2865, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L230: */
+		    }
+/* L240: */
+		}
+
+	    }
+
+	}
+    }
+
+    return 0;
+
+/*     End of DLARFB */
+
+} /* dlarfb_ */
+
+/* Subroutine */ int dlarfg_(integer *n, doublereal *alpha, doublereal *x,
+	integer *incx, doublereal *tau)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer j, knt;
+    static doublereal beta;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal xnorm;
+
+    static doublereal safmin, rsafmn;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLARFG generates a real elementary reflector H of order n, such
+    that
+
+          H * ( alpha ) = ( beta ),   H' * H = I.
+              (   x   )   (   0  )
+
+    where alpha and beta are scalars, and x is an (n-1)-element real
+    vector. H is represented in the form
+
+          H = I - tau * ( 1 ) * ( 1 v' ) ,
+                        ( v )
+
+    where tau is a real scalar and v is a real (n-1)-element
+    vector.
+
+    If the elements of x are all zero, then tau = 0 and H is taken to be
+    the unit matrix.
+
+    Otherwise  1 <= tau <= 2.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the elementary reflector.
+
+    ALPHA   (input/output) DOUBLE PRECISION
+            On entry, the value alpha.
+            On exit, it is overwritten with the value beta.
+
+    X       (input/output) DOUBLE PRECISION array, dimension
+                           (1+(N-2)*abs(INCX))
+            On entry, the vector x.
+            On exit, it is overwritten with the vector v.
+
+    INCX    (input) INTEGER
+            The increment between elements of X. INCX > 0.
+
+    TAU     (output) DOUBLE PRECISION
+            The value tau.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n <= 1) {
+	*tau = 0.;
+	return 0;
+    }
+
+    i__1 = *n - 1;
+    xnorm = dnrm2_(&i__1, &x[1], incx);
+
+    if (xnorm == 0.) {
+
+/*        H  =  I */
+
+	*tau = 0.;
+    } else {
+
+/*        general case */
+
+	d__1 = dlapy2_(alpha, &xnorm);
+	beta = -d_sign(&d__1, alpha);
+	safmin = SAFEMINIMUM / EPSILON;
+	if (abs(beta) < safmin) {
+
+/*           XNORM, BETA may be inaccurate; scale X and recompute them */
+
+	    rsafmn = 1. / safmin;
+	    knt = 0;
+L10:
+	    ++knt;
+	    i__1 = *n - 1;
+	    dscal_(&i__1, &rsafmn, &x[1], incx);
+	    beta *= rsafmn;
+	    *alpha *= rsafmn;
+	    if (abs(beta) < safmin) {
+		goto L10;
+	    }
+
+/*           New BETA is at most 1, at least SAFMIN */
+
+	    i__1 = *n - 1;
+	    xnorm = dnrm2_(&i__1, &x[1], incx);
+	    d__1 = dlapy2_(alpha, &xnorm);
+	    beta = -d_sign(&d__1, alpha);
+	    *tau = (beta - *alpha) / beta;
+	    i__1 = *n - 1;
+	    d__1 = 1. / (*alpha - beta);
+	    dscal_(&i__1, &d__1, &x[1], incx);
+
+/*           If ALPHA is subnormal, it may lose relative accuracy */
+
+	    *alpha = beta;
+	    i__1 = knt;
+	    for (j = 1; j <= i__1; ++j) {
+		*alpha *= safmin;
+/* L20: */
+	    }
+	} else {
+	    *tau = (beta - *alpha) / beta;
+	    i__1 = *n - 1;
+	    d__1 = 1. / (*alpha - beta);
+	    dscal_(&i__1, &d__1, &x[1], incx);
+	    *alpha = beta;
+	}
+    }
+
+    return 0;
+
+/*     End of DLARFG */
+
+} /* dlarfg_ */
+
+/* Subroutine */ int dlarft_(char *direct, char *storev, integer *n, integer *
+	k, doublereal *v, integer *ldv, doublereal *tau, doublereal *t,
+	integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal vii;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), dtrmv_(char *,
+	    char *, char *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLARFT forms the triangular factor T of a real block reflector H
+    of order n, which is defined as a product of k elementary reflectors.
+
+    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+
+    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+
+    If STOREV = 'C', the vector which defines the elementary reflector
+    H(i) is stored in the i-th column of the array V, and
+
+       H  =  I - V * T * V'
+
+    If STOREV = 'R', the vector which defines the elementary reflector
+    H(i) is stored in the i-th row of the array V, and
+
+       H  =  I - V' * T * V
+
+    Arguments
+    =========
+
+    DIRECT  (input) CHARACTER*1
+            Specifies the order in which the elementary reflectors are
+            multiplied to form the block reflector:
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Specifies how the vectors which define the elementary
+            reflectors are stored (see also Further Details):
+            = 'C': columnwise
+            = 'R': rowwise
+
+    N       (input) INTEGER
+            The order of the block reflector H. N >= 0.
+
+    K       (input) INTEGER
+            The order of the triangular factor T (= the number of
+            elementary reflectors). K >= 1.
+
+    V       (input/output) DOUBLE PRECISION array, dimension
+                                 (LDV,K) if STOREV = 'C'
+                                 (LDV,N) if STOREV = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i).
+
+    T       (output) DOUBLE PRECISION array, dimension (LDT,K)
+            The k by k triangular factor T of the block reflector.
+            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+            lower triangular. The rest of the array is not used.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    Further Details
+    ===============
+
+    The shape of the matrix V and the storage of the vectors which define
+    the H(i) is best illustrated by the following example with n = 5 and
+    k = 3. The elements equal to 1 are not stored; the corresponding
+    array elements are modified but restored on exit. The rest of the
+    array is not used.
+
+    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+
+                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+                     ( v1  1    )                     (     1 v2 v2 v2 )
+                     ( v1 v2  1 )                     (        1 v3 v3 )
+                     ( v1 v2 v3 )
+                     ( v1 v2 v3 )
+
+    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+
+                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+                     (     1 v3 )
+                     (        1 )
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (tau[i__] == 0.) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    t[j + i__ * t_dim1] = 0.;
+/* L10: */
+		}
+	    } else {
+
+/*              general case */
+
+		vii = v[i__ + i__ * v_dim1];
+		v[i__ + i__ * v_dim1] = 1.;
+		if (lsame_(storev, "C")) {
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+
+		    i__2 = *n - i__ + 1;
+		    i__3 = i__ - 1;
+		    d__1 = -tau[i__];
+		    dgemv_("Transpose", &i__2, &i__3, &d__1, &v[i__ + v_dim1],
+			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b2879, &t[
+			    i__ * t_dim1 + 1], &c__1);
+		} else {
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__ + 1;
+		    d__1 = -tau[i__];
+		    dgemv_("No transpose", &i__2, &i__3, &d__1, &v[i__ *
+			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
+			    c_b2879, &t[i__ * t_dim1 + 1], &c__1);
+		}
+		v[i__ + i__ * v_dim1] = vii;
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		dtrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+/* L20: */
+	}
+    } else {
+	for (i__ = *k; i__ >= 1; --i__) {
+	    if (tau[i__] == 0.) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    t[j + i__ * t_dim1] = 0.;
+/* L30: */
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+			vii = v[*n - *k + i__ + i__ * v_dim1];
+			v[*n - *k + i__ + i__ * v_dim1] = 1.;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*/
+
+			i__1 = *n - *k + i__;
+			i__2 = *k - i__;
+			d__1 = -tau[i__];
+			dgemv_("Transpose", &i__1, &i__2, &d__1, &v[(i__ + 1)
+				* v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], &
+				c__1, &c_b2879, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
+			v[*n - *k + i__ + i__ * v_dim1] = vii;
+		    } else {
+			vii = v[i__ + (*n - *k + i__) * v_dim1];
+			v[i__ + (*n - *k + i__) * v_dim1] = 1.;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*/
+
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__;
+			d__1 = -tau[i__];
+			dgemv_("No transpose", &i__1, &i__2, &d__1, &v[i__ +
+				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
+				c_b2879, &t[i__ + 1 + i__ * t_dim1], &c__1);
+			v[i__ + (*n - *k + i__) * v_dim1] = vii;
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    dtrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		}
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+/* L40: */
+	}
+    }
+    return 0;
+
+/*     End of DLARFT */
+
+} /* dlarft_ */
+
+/* Subroutine */ int dlarfx_(char *side, integer *m, integer *n, doublereal *
+	v, doublereal *tau, doublereal *c__, integer *ldc, doublereal *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, i__1;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer j;
+    static doublereal t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5,
+	    v6, v7, v8, v9, t10, v10, sum;
+    extern /* Subroutine */ int dger_(integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLARFX applies a real elementary reflector H to a real m by n
+    matrix C, from either the left or the right. H is represented in the
+    form
+
+          H = I - tau * v * v'
+
+    where tau is a real scalar and v is a real vector.
+
+    If tau = 0, then H is taken to be the unit matrix
+
+    This version uses inline code if H has order < 11.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) DOUBLE PRECISION array, dimension (M) if SIDE = 'L'
+                                       or (N) if SIDE = 'R'
+            The vector v in the representation of H.
+
+    TAU     (input) DOUBLE PRECISION
+            The value tau in the representation of H.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= (1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                        (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+            WORK is not referenced if H has order < 11.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (*tau == 0.) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C, where H has order m. */
+
+	switch (*m) {
+	    case 1:  goto L10;
+	    case 2:  goto L30;
+	    case 3:  goto L50;
+	    case 4:  goto L70;
+	    case 5:  goto L90;
+	    case 6:  goto L110;
+	    case 7:  goto L130;
+	    case 8:  goto L150;
+	    case 9:  goto L170;
+	    case 10:  goto L190;
+	}
+
+/*
+          Code for general M
+
+          w := C'*v
+*/
+
+	dgemv_("Transpose", m, n, &c_b2865, &c__[c_offset], ldc, &v[1], &c__1,
+		 &c_b2879, &work[1], &c__1);
+
+/*        C := C - tau * v * w' */
+
+	d__1 = -(*tau);
+	dger_(m, n, &d__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
+		;
+	goto L410;
+L10:
+
+/*        Special code for 1 x 1 Householder */
+
+	t1 = 1. - *tau * v[1] * v[1];
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1];
+/* L20: */
+	}
+	goto L410;
+L30:
+
+/*        Special code for 2 x 2 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+/* L40: */
+	}
+	goto L410;
+L50:
+
+/*        Special code for 3 x 3 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+/* L60: */
+	}
+	goto L410;
+L70:
+
+/*        Special code for 4 x 4 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+/* L80: */
+	}
+	goto L410;
+L90:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+/* L100: */
+	}
+	goto L410;
+L110:
+
+/*        Special code for 6 x 6 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+/* L120: */
+	}
+	goto L410;
+L130:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+/* L140: */
+	}
+	goto L410;
+L150:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+	    c__[j * c_dim1 + 8] -= sum * t8;
+/* L160: */
+	}
+	goto L410;
+L170:
+
+/*        Special code for 9 x 9 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
+		    c_dim1 + 9];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+	    c__[j * c_dim1 + 8] -= sum * t8;
+	    c__[j * c_dim1 + 9] -= sum * t9;
+/* L180: */
+	}
+	goto L410;
+L190:
+
+/*        Special code for 10 x 10 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	v10 = v[10];
+	t10 = *tau * v10;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
+		    c_dim1 + 9] + v10 * c__[j * c_dim1 + 10];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+	    c__[j * c_dim1 + 8] -= sum * t8;
+	    c__[j * c_dim1 + 9] -= sum * t9;
+	    c__[j * c_dim1 + 10] -= sum * t10;
+/* L200: */
+	}
+	goto L410;
+    } else {
+
+/*        Form  C * H, where H has order n. */
+
+	switch (*n) {
+	    case 1:  goto L210;
+	    case 2:  goto L230;
+	    case 3:  goto L250;
+	    case 4:  goto L270;
+	    case 5:  goto L290;
+	    case 6:  goto L310;
+	    case 7:  goto L330;
+	    case 8:  goto L350;
+	    case 9:  goto L370;
+	    case 10:  goto L390;
+	}
+
+/*
+          Code for general N
+
+          w := C * v
+*/
+
+	dgemv_("No transpose", m, n, &c_b2865, &c__[c_offset], ldc, &v[1], &
+		c__1, &c_b2879, &work[1], &c__1);
+
+/*        C := C - tau * w * v' */
+
+	d__1 = -(*tau);
+	dger_(m, n, &d__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
+		;
+	goto L410;
+L210:
+
+/*        Special code for 1 x 1 Householder */
+
+	t1 = 1. - *tau * v[1] * v[1];
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    c__[j + c_dim1] = t1 * c__[j + c_dim1];
+/* L220: */
+	}
+	goto L410;
+L230:
+
+/*        Special code for 2 x 2 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+/* L240: */
+	}
+	goto L410;
+L250:
+
+/*        Special code for 3 x 3 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+/* L260: */
+	}
+	goto L410;
+L270:
+
+/*        Special code for 4 x 4 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+/* L280: */
+	}
+	goto L410;
+L290:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+/* L300: */
+	}
+	goto L410;
+L310:
+
+/*        Special code for 6 x 6 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+/* L320: */
+	}
+	goto L410;
+L330:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
+		     c__[j + c_dim1 * 7];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+/* L340: */
+	}
+	goto L410;
+L350:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
+		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+	    c__[j + ((c_dim1) << (3))] -= sum * t8;
+/* L360: */
+	}
+	goto L410;
+L370:
+
+/*        Special code for 9 x 9 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
+		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))] +
+		    v9 * c__[j + c_dim1 * 9];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+	    c__[j + ((c_dim1) << (3))] -= sum * t8;
+	    c__[j + c_dim1 * 9] -= sum * t9;
+/* L380: */
+	}
+	goto L410;
+L390:
+
+/*        Special code for 10 x 10 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	v10 = v[10];
+	t10 = *tau * v10;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
+		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))] +
+		    v9 * c__[j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+	    c__[j + ((c_dim1) << (3))] -= sum * t8;
+	    c__[j + c_dim1 * 9] -= sum * t9;
+	    c__[j + c_dim1 * 10] -= sum * t10;
+/* L400: */
+	}
+	goto L410;
+    }
+L410:
+    return 0;
+
+/*     End of DLARFX */
+
+} /* dlarfx_ */
+
+/* Subroutine */ int dlartg_(doublereal *f, doublereal *g, doublereal *cs,
+	doublereal *sn, doublereal *r__)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double log(doublereal), pow_di(doublereal *, integer *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal f1, g1, eps, scale;
+    static integer count;
+    static doublereal safmn2, safmx2;
+
+    static doublereal safmin;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLARTG generate a plane rotation so that
+
+       [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
+       [ -SN  CS  ]     [ G ]     [ 0 ]
+
+    This is a slower, more accurate version of the BLAS1 routine DROTG,
+    with the following other differences:
+       F and G are unchanged on return.
+       If G=0, then CS=1 and SN=0.
+       If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
+          floating point operations (saves work in DBDSQR when
+          there are zeros on the diagonal).
+
+    If F exceeds G in magnitude, CS will be positive.
+
+    Arguments
+    =========
+
+    F       (input) DOUBLE PRECISION
+            The first component of vector to be rotated.
+
+    G       (input) DOUBLE PRECISION
+            The second component of vector to be rotated.
+
+    CS      (output) DOUBLE PRECISION
+            The cosine of the rotation.
+
+    SN      (output) DOUBLE PRECISION
+            The sine of the rotation.
+
+    R       (output) DOUBLE PRECISION
+            The nonzero component of the rotated vector.
+
+    =====================================================================
+*/
+
+
+    if (first) {
+	first = FALSE_;
+	safmin = SAFEMINIMUM;
+	eps = EPSILON;
+	d__1 = BASE;
+	i__1 = (integer) (log(safmin / eps) / log(BASE) /
+		2.);
+	safmn2 = pow_di(&d__1, &i__1);
+	safmx2 = 1. / safmn2;
+    }
+    if (*g == 0.) {
+	*cs = 1.;
+	*sn = 0.;
+	*r__ = *f;
+    } else if (*f == 0.) {
+	*cs = 0.;
+	*sn = 1.;
+	*r__ = *g;
+    } else {
+	f1 = *f;
+	g1 = *g;
+/* Computing MAX */
+	d__1 = abs(f1), d__2 = abs(g1);
+	scale = max(d__1,d__2);
+	if (scale >= safmx2) {
+	    count = 0;
+L10:
+	    ++count;
+	    f1 *= safmn2;
+	    g1 *= safmn2;
+/* Computing MAX */
+	    d__1 = abs(f1), d__2 = abs(g1);
+	    scale = max(d__1,d__2);
+	    if (scale >= safmx2) {
+		goto L10;
+	    }
+/* Computing 2nd power */
+	    d__1 = f1;
+/* Computing 2nd power */
+	    d__2 = g1;
+	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
+	    *cs = f1 / *r__;
+	    *sn = g1 / *r__;
+	    i__1 = count;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		*r__ *= safmx2;
+/* L20: */
+	    }
+	} else if (scale <= safmn2) {
+	    count = 0;
+L30:
+	    ++count;
+	    f1 *= safmx2;
+	    g1 *= safmx2;
+/* Computing MAX */
+	    d__1 = abs(f1), d__2 = abs(g1);
+	    scale = max(d__1,d__2);
+	    if (scale <= safmn2) {
+		goto L30;
+	    }
+/* Computing 2nd power */
+	    d__1 = f1;
+/* Computing 2nd power */
+	    d__2 = g1;
+	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
+	    *cs = f1 / *r__;
+	    *sn = g1 / *r__;
+	    i__1 = count;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		*r__ *= safmn2;
+/* L40: */
+	    }
+	} else {
+/* Computing 2nd power */
+	    d__1 = f1;
+/* Computing 2nd power */
+	    d__2 = g1;
+	    *r__ = sqrt(d__1 * d__1 + d__2 * d__2);
+	    *cs = f1 / *r__;
+	    *sn = g1 / *r__;
+	}
+	if (abs(*f) > abs(*g) && *cs < 0.) {
+	    *cs = -(*cs);
+	    *sn = -(*sn);
+	    *r__ = -(*r__);
+	}
+    }
+    return 0;
+
+/*     End of DLARTG */
+
+} /* dlartg_ */
+
+/* Subroutine */ int dlas2_(doublereal *f, doublereal *g, doublereal *h__,
+	doublereal *ssmin, doublereal *ssmax)
+{
+    /* System generated locals */
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal c__, fa, ga, ha, as, at, au, fhmn, fhmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DLAS2  computes the singular values of the 2-by-2 matrix
+       [  F   G  ]
+       [  0   H  ].
+    On return, SSMIN is the smaller singular value and SSMAX is the
+    larger singular value.
+
+    Arguments
+    =========
+
+    F       (input) DOUBLE PRECISION
+            The (1,1) element of the 2-by-2 matrix.
+
+    G       (input) DOUBLE PRECISION
+            The (1,2) element of the 2-by-2 matrix.
+
+    H       (input) DOUBLE PRECISION
+            The (2,2) element of the 2-by-2 matrix.
+
+    SSMIN   (output) DOUBLE PRECISION
+            The smaller singular value.
+
+    SSMAX   (output) DOUBLE PRECISION
+            The larger singular value.
+
+    Further Details
+    ===============
+
+    Barring over/underflow, all output quantities are correct to within
+    a few units in the last place (ulps), even in the absence of a guard
+    digit in addition/subtraction.
+
+    In IEEE arithmetic, the code works correctly if one matrix element is
+    infinite.
+
+    Overflow will not occur unless the largest singular value itself
+    overflows, or is within a few ulps of overflow. (On machines with
+    partial overflow, like the Cray, overflow may occur if the largest
+    singular value is within a factor of 2 of overflow.)
+
+    Underflow is harmless if underflow is gradual. Otherwise, results
+    may correspond to a matrix modified by perturbations of size near
+    the underflow threshold.
+
+    ====================================================================
+*/
+
+
+    fa = abs(*f);
+    ga = abs(*g);
+    ha = abs(*h__);
+    fhmn = min(fa,ha);
+    fhmx = max(fa,ha);
+    if (fhmn == 0.) {
+	*ssmin = 0.;
+	if (fhmx == 0.) {
+	    *ssmax = ga;
+	} else {
+/* Computing 2nd power */
+	    d__1 = min(fhmx,ga) / max(fhmx,ga);
+	    *ssmax = max(fhmx,ga) * sqrt(d__1 * d__1 + 1.);
+	}
+    } else {
+	if (ga < fhmx) {
+	    as = fhmn / fhmx + 1.;
+	    at = (fhmx - fhmn) / fhmx;
+/* Computing 2nd power */
+	    d__1 = ga / fhmx;
+	    au = d__1 * d__1;
+	    c__ = 2. / (sqrt(as * as + au) + sqrt(at * at + au));
+	    *ssmin = fhmn * c__;
+	    *ssmax = fhmx / c__;
+	} else {
+	    au = fhmx / ga;
+	    if (au == 0.) {
+
+/*
+                Avoid possible harmful underflow if exponent range
+                asymmetric (true SSMIN may not underflow even if
+                AU underflows)
+*/
+
+		*ssmin = fhmn * fhmx / ga;
+		*ssmax = ga;
+	    } else {
+		as = fhmn / fhmx + 1.;
+		at = (fhmx - fhmn) / fhmx;
+/* Computing 2nd power */
+		d__1 = as * au;
+/* Computing 2nd power */
+		d__2 = at * au;
+		c__ = 1. / (sqrt(d__1 * d__1 + 1.) + sqrt(d__2 * d__2 + 1.));
+		*ssmin = fhmn * c__ * au;
+		*ssmin += *ssmin;
+		*ssmax = ga / (c__ + c__);
+	    }
+	}
+    }
+    return 0;
+
+/*     End of DLAS2 */
+
+} /* dlas2_ */
+
+/* Subroutine */ int dlascl_(char *type__, integer *kl, integer *ku,
+	doublereal *cfrom, doublereal *cto, integer *m, integer *n,
+	doublereal *a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer i__, j, k1, k2, k3, k4;
+    static doublereal mul, cto1;
+    static logical done;
+    static doublereal ctoc;
+    extern logical lsame_(char *, char *);
+    static integer itype;
+    static doublereal cfrom1;
+
+    static doublereal cfromc;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal bignum, smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLASCL multiplies the M by N real matrix A by the real scalar
+    CTO/CFROM.  This is done without over/underflow as long as the final
+    result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+    A may be full, upper triangular, lower triangular, upper Hessenberg,
+    or banded.
+
+    Arguments
+    =========
+
+    TYPE    (input) CHARACTER*1
+            TYPE indices the storage type of the input matrix.
+            = 'G':  A is a full matrix.
+            = 'L':  A is a lower triangular matrix.
+            = 'U':  A is an upper triangular matrix.
+            = 'H':  A is an upper Hessenberg matrix.
+            = 'B':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the lower
+                    half stored.
+            = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the upper
+                    half stored.
+            = 'Z':  A is a band matrix with lower bandwidth KL and upper
+                    bandwidth KU.
+
+    KL      (input) INTEGER
+            The lower bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    KU      (input) INTEGER
+            The upper bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    CFROM   (input) DOUBLE PRECISION
+    CTO     (input) DOUBLE PRECISION
+            The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+            without over/underflow if the final result CTO*A(I,J)/CFROM
+            can be represented without over/underflow.  CFROM must be
+            nonzero.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,M)
+            The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+            storage type.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    INFO    (output) INTEGER
+            0  - successful exit
+            <0 - if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(type__, "G")) {
+	itype = 0;
+    } else if (lsame_(type__, "L")) {
+	itype = 1;
+    } else if (lsame_(type__, "U")) {
+	itype = 2;
+    } else if (lsame_(type__, "H")) {
+	itype = 3;
+    } else if (lsame_(type__, "B")) {
+	itype = 4;
+    } else if (lsame_(type__, "Q")) {
+	itype = 5;
+    } else if (lsame_(type__, "Z")) {
+	itype = 6;
+    } else {
+	itype = -1;
+    }
+
+    if (itype == -1) {
+	*info = -1;
+    } else if (*cfrom == 0.) {
+	*info = -4;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (((*n < 0) || (itype == 4 && *n != *m)) || (itype == 5 && *n !=
+	    *m)) {
+	*info = -7;
+    } else if (itype <= 3 && *lda < max(1,*m)) {
+	*info = -9;
+    } else if (itype >= 4) {
+/* Computing MAX */
+	i__1 = *m - 1;
+	if ((*kl < 0) || (*kl > max(i__1,0))) {
+	    *info = -2;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = *n - 1;
+	    if (((*ku < 0) || (*ku > max(i__1,0))) || (((itype == 4) || (
+		    itype == 5)) && *kl != *ku)) {
+		*info = -3;
+	    } else if (((itype == 4 && *lda < *kl + 1) || (itype == 5 && *lda
+		    < *ku + 1)) || (itype == 6 && *lda < ((*kl) << (1)) + *ku
+		    + 1)) {
+		*info = -9;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASCL", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*m == 0)) {
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = SAFEMINIMUM;
+    bignum = 1. / smlnum;
+
+    cfromc = *cfrom;
+    ctoc = *cto;
+
+L10:
+    cfrom1 = cfromc * smlnum;
+    cto1 = ctoc / bignum;
+    if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
+	mul = smlnum;
+	done = FALSE_;
+	cfromc = cfrom1;
+    } else if (abs(cto1) > abs(cfromc)) {
+	mul = bignum;
+	done = FALSE_;
+	ctoc = cto1;
+    } else {
+	mul = ctoc / cfromc;
+	done = TRUE_;
+    }
+
+    if (itype == 0) {
+
+/*        Full matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L20: */
+	    }
+/* L30: */
+	}
+
+    } else if (itype == 1) {
+
+/*        Lower triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L40: */
+	    }
+/* L50: */
+	}
+
+    } else if (itype == 2) {
+
+/*        Upper triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L60: */
+	    }
+/* L70: */
+	}
+
+    } else if (itype == 3) {
+
+/*        Upper Hessenberg matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j + 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L80: */
+	    }
+/* L90: */
+	}
+
+    } else if (itype == 4) {
+
+/*        Lower half of a symmetric band matrix */
+
+	k3 = *kl + 1;
+	k4 = *n + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = k3, i__4 = k4 - j;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L100: */
+	    }
+/* L110: */
+	}
+
+    } else if (itype == 5) {
+
+/*        Upper half of a symmetric band matrix */
+
+	k1 = *ku + 2;
+	k3 = *ku + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__2 = k1 - j;
+	    i__3 = k3;
+	    for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L120: */
+	    }
+/* L130: */
+	}
+
+    } else if (itype == 6) {
+
+/*        Band matrix */
+
+	k1 = *kl + *ku + 2;
+	k2 = *kl + 1;
+	k3 = ((*kl) << (1)) + *ku + 1;
+	k4 = *kl + *ku + 1 + *m;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__3 = k1 - j;
+/* Computing MIN */
+	    i__4 = k3, i__5 = k4 - j;
+	    i__2 = min(i__4,i__5);
+	    for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L140: */
+	    }
+/* L150: */
+	}
+
+    }
+
+    if (! done) {
+	goto L10;
+    }
+
+    return 0;
+
+/*     End of DLASCL */
+
+} /* dlascl_ */
+
+/* Subroutine */ int dlasd0_(integer *n, integer *sqre, doublereal *d__,
+	doublereal *e, doublereal *u, integer *ldu, doublereal *vt, integer *
+	ldvt, integer *smlsiz, integer *iwork, doublereal *work, integer *
+	info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf,
+	    iwk, lvl, ndb1, nlp1, nrp1;
+    static doublereal beta;
+    static integer idxq, nlvl;
+    static doublereal alpha;
+    static integer inode, ndiml, idxqc, ndimr, itemp, sqrei;
+    extern /* Subroutine */ int dlasd1_(integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     doublereal *, integer *, integer *, integer *, doublereal *,
+	    integer *), dlasdq_(char *, integer *, integer *, integer *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlasdt_(integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *), xerbla_(
+	    char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    Using a divide and conquer approach, DLASD0 computes the singular
+    value decomposition (SVD) of a real upper bidiagonal N-by-M
+    matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
+    The algorithm computes orthogonal matrices U and VT such that
+    B = U * S * VT. The singular values S are overwritten on D.
+
+    A related subroutine, DLASDA, computes only the singular values,
+    and optionally, the singular vectors in compact form.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           On entry, the row dimension of the upper bidiagonal matrix.
+           This is also the dimension of the main diagonal array D.
+
+    SQRE   (input) INTEGER
+           Specifies the column dimension of the bidiagonal matrix.
+           = 0: The bidiagonal matrix has column dimension M = N;
+           = 1: The bidiagonal matrix has column dimension M = N+1;
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry D contains the main diagonal of the bidiagonal
+           matrix.
+           On exit D, if INFO = 0, contains its singular values.
+
+    E      (input) DOUBLE PRECISION array, dimension (M-1)
+           Contains the subdiagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    U      (output) DOUBLE PRECISION array, dimension at least (LDQ, N)
+           On exit, U contains the left singular vectors.
+
+    LDU    (input) INTEGER
+           On entry, leading dimension of U.
+
+    VT     (output) DOUBLE PRECISION array, dimension at least (LDVT, M)
+           On exit, VT' contains the right singular vectors.
+
+    LDVT   (input) INTEGER
+           On entry, leading dimension of VT.
+
+    SMLSIZ (input) INTEGER
+           On entry, maximum size of the subproblems at the
+           bottom of the computation tree.
+
+    IWORK  INTEGER work array.
+           Dimension must be at least (8 * N)
+
+    WORK   DOUBLE PRECISION work array.
+           Dimension must be at least (3 * M**2 + 2 * M)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --iwork;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -2;
+    }
+
+    m = *n + *sqre;
+
+    if (*ldu < *n) {
+	*info = -6;
+    } else if (*ldvt < m) {
+	*info = -8;
+    } else if (*smlsiz < 3) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD0", &i__1);
+	return 0;
+    }
+
+/*     If the input matrix is too small, call DLASDQ to find the SVD. */
+
+    if (*n <= *smlsiz) {
+	dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset],
+		ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info);
+	return 0;
+    }
+
+/*     Set up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+    idxq = ndimr + *n;
+    iwk = idxq + *n;
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       For the nodes on bottom level of the tree, solve
+       their subproblems by DLASDQ.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    ncc = 0;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+       IC : center row of each node
+       NL : number of rows of left  subproblem
+       NR : number of rows of right subproblem
+       NLF: starting row of the left   subproblem
+       NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nlp1 = nl + 1;
+	nr = iwork[ndimr + i1];
+	nrp1 = nr + 1;
+	nlf = ic - nl;
+	nrf = ic + 1;
+	sqrei = 1;
+	dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[
+		nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[
+		nlf + nlf * u_dim1], ldu, &work[1], info);
+	if (*info != 0) {
+	    return 0;
+	}
+	itemp = idxq + nlf - 2;
+	i__2 = nl;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[itemp + j] = j;
+/* L10: */
+	}
+	if (i__ == nd) {
+	    sqrei = *sqre;
+	} else {
+	    sqrei = 1;
+	}
+	nrp1 = nr + sqrei;
+	dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[
+		nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[
+		nrf + nrf * u_dim1], ldu, &work[1], info);
+	if (*info != 0) {
+	    return 0;
+	}
+	itemp = idxq + ic;
+	i__2 = nr;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[itemp + j - 1] = j;
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*     Now conquer each subproblem bottom-up. */
+
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+
+/*
+          Find the first node LF and last node LL on the
+          current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    if (*sqre == 0 && i__ == ll) {
+		sqrei = *sqre;
+	    } else {
+		sqrei = 1;
+	    }
+	    idxqc = idxq + nlf - 1;
+	    alpha = d__[ic];
+	    beta = e[ic];
+	    dlasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf *
+		     u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[
+		    idxqc], &iwork[iwk], &work[1], info);
+	    if (*info != 0) {
+		return 0;
+	    }
+/* L40: */
+	}
+/* L50: */
+    }
+
+    return 0;
+
+/*     End of DLASD0 */
+
+} /* dlasd0_ */
+
+/* Subroutine */ int dlasd1_(integer *nl, integer *nr, integer *sqre,
+	doublereal *d__, doublereal *alpha, doublereal *beta, doublereal *u,
+	integer *ldu, doublereal *vt, integer *ldvt, integer *idxq, integer *
+	iwork, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2,
+	    idxc, idxp, ldvt2;
+    extern /* Subroutine */ int dlasd2_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *), dlasd3_(
+	    integer *, integer *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, integer *, integer *, doublereal *, integer *),
+	    dlascl_(char *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublereal *, integer *, integer *),
+	     dlamrg_(integer *, integer *, doublereal *, integer *, integer *,
+	     integer *);
+    static integer isigma;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal orgnrm;
+    static integer coltyp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
+    where N = NL + NR + 1 and M = N + SQRE. DLASD1 is called from DLASD0.
+
+    A related subroutine DLASD7 handles the case in which the singular
+    values (and the singular vectors in factored form) are desired.
+
+    DLASD1 computes the SVD as follows:
+
+                  ( D1(in)  0    0     0 )
+      B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+                  (   0     0   D2(in) 0 )
+
+        = U(out) * ( D(out) 0) * VT(out)
+
+    where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+    with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+    elsewhere; and the entry b is empty if SQRE = 0.
+
+    The left singular vectors of the original matrix are stored in U, and
+    the transpose of the right singular vectors are stored in VT, and the
+    singular values are in D.  The algorithm consists of three stages:
+
+       The first stage consists of deflating the size of the problem
+       when there are multiple singular values or when there are zeros in
+       the Z vector.  For each such occurence the dimension of the
+       secular equation problem is reduced by one.  This stage is
+       performed by the routine DLASD2.
+
+       The second stage consists of calculating the updated
+       singular values. This is done by finding the square roots of the
+       roots of the secular equation via the routine DLASD4 (as called
+       by DLASD3). This routine also calculates the singular vectors of
+       the current problem.
+
+       The final stage consists of computing the updated singular vectors
+       directly using the updated singular values.  The singular vectors
+       for the current problem are multiplied with the singular vectors
+       from the overall problem.
+
+    Arguments
+    =========
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    D      (input/output) DOUBLE PRECISION array,
+                          dimension (N = NL+NR+1).
+           On entry D(1:NL,1:NL) contains the singular values of the
+           upper block; and D(NL+2:N) contains the singular values of
+           the lower block. On exit D(1:N) contains the singular values
+           of the modified matrix.
+
+    ALPHA  (input) DOUBLE PRECISION
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) DOUBLE PRECISION
+           Contains the off-diagonal element associated with the added
+           row.
+
+    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
+           On entry U(1:NL, 1:NL) contains the left singular vectors of
+           the upper block; U(NL+2:N, NL+2:N) contains the left singular
+           vectors of the lower block. On exit U contains the left
+           singular vectors of the bidiagonal matrix.
+
+    LDU    (input) INTEGER
+           The leading dimension of the array U.  LDU >= max( 1, N ).
+
+    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
+           where M = N + SQRE.
+           On entry VT(1:NL+1, 1:NL+1)' contains the right singular
+           vectors of the upper block; VT(NL+2:M, NL+2:M)' contains
+           the right singular vectors of the lower block. On exit
+           VT' contains the right singular vectors of the
+           bidiagonal matrix.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= max( 1, M ).
+
+    IDXQ  (output) INTEGER array, dimension(N)
+           This contains the permutation which will reintegrate the
+           subproblem just solved back into sorted order, i.e.
+           D( IDXQ( I = 1, N ) ) will be in ascending order.
+
+    IWORK  (workspace) INTEGER array, dimension( 4 * N )
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension( 3*M**2 + 2*M )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --idxq;
+    --iwork;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*nl < 1) {
+	*info = -1;
+    } else if (*nr < 1) {
+	*info = -2;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -3;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD1", &i__1);
+	return 0;
+    }
+
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in DLASD2 and DLASD3.
+*/
+
+    ldu2 = n;
+    ldvt2 = m;
+
+    iz = 1;
+    isigma = iz + m;
+    iu2 = isigma + n;
+    ivt2 = iu2 + ldu2 * n;
+    iq = ivt2 + ldvt2 * m;
+
+    idx = 1;
+    idxc = idx + n;
+    coltyp = idxc + n;
+    idxp = coltyp + n;
+
+/*
+       Scale.
+
+   Computing MAX
+*/
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    orgnrm = max(d__1,d__2);
+    d__[*nl + 1] = 0.;
+    i__1 = n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
+	    orgnrm = (d__1 = d__[i__], abs(d__1));
+	}
+/* L10: */
+    }
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &n, &c__1, &d__[1], &n,
+	    info);
+    *alpha /= orgnrm;
+    *beta /= orgnrm;
+
+/*     Deflate singular values. */
+
+    dlasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset],
+	    ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, &
+	    work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], &
+	    idxq[1], &iwork[coltyp], info);
+
+/*     Solve Secular Equation and update singular vectors. */
+
+    ldq = k;
+    dlasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
+	    u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[
+	    ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info);
+    if (*info != 0) {
+	return 0;
+    }
+
+/*     Unscale. */
+
+    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, &n, &c__1, &d__[1], &n,
+	    info);
+
+/*     Prepare the IDXQ sorting permutation. */
+
+    n1 = k;
+    n2 = n - k;
+    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+
+    return 0;
+
+/*     End of DLASD1 */
+
+} /* dlasd1_ */
+
+/* Subroutine */ int dlasd2_(integer *nl, integer *nr, integer *sqre, integer
+	*k, doublereal *d__, doublereal *z__, doublereal *alpha, doublereal *
+	beta, doublereal *u, integer *ldu, doublereal *vt, integer *ldvt,
+	doublereal *dsigma, doublereal *u2, integer *ldu2, doublereal *vt2,
+	integer *ldvt2, integer *idxp, integer *idx, integer *idxc, integer *
+	idxq, integer *coltyp, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset,
+	    vt2_dim1, vt2_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static doublereal c__;
+    static integer i__, j, m, n;
+    static doublereal s;
+    static integer k2;
+    static doublereal z1;
+    static integer ct, jp;
+    static doublereal eps, tau, tol;
+    static integer psm[4], nlp1, nlp2, idxi, idxj;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer ctot[4], idxjp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer jprev;
+
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), dlacpy_(char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *), xerbla_(char *,
+	    integer *);
+    static doublereal hlftol;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASD2 merges the two sets of singular values together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    singular values are close together or if there is a tiny entry in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    DLASD2 is called from DLASD1.
+
+    Arguments
+    =========
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has N = NL + NR + 1 rows and
+           M = N + SQRE >= N columns.
+
+    K      (output) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    D      (input/output) DOUBLE PRECISION array, dimension(N)
+           On entry D contains the singular values of the two submatrices
+           to be combined.  On exit D contains the trailing (N-K) updated
+           singular values (those which were deflated) sorted into
+           increasing order.
+
+    ALPHA  (input) DOUBLE PRECISION
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) DOUBLE PRECISION
+           Contains the off-diagonal element associated with the added
+           row.
+
+    U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
+           On entry U contains the left singular vectors of two
+           submatrices in the two square blocks with corners at (1,1),
+           (NL, NL), and (NL+2, NL+2), (N,N).
+           On exit U contains the trailing (N-K) updated left singular
+           vectors (those which were deflated) in its last N-K columns.
+
+    LDU    (input) INTEGER
+           The leading dimension of the array U.  LDU >= N.
+
+    Z      (output) DOUBLE PRECISION array, dimension(N)
+           On exit Z contains the updating row vector in the secular
+           equation.
+
+    DSIGMA (output) DOUBLE PRECISION array, dimension (N)
+           Contains a copy of the diagonal elements (K-1 singular values
+           and one zero) in the secular equation.
+
+    U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)
+           Contains a copy of the first K-1 left singular vectors which
+           will be used by DLASD3 in a matrix multiply (DGEMM) to solve
+           for the new left singular vectors. U2 is arranged into four
+           blocks. The first block contains a column with 1 at NL+1 and
+           zero everywhere else; the second block contains non-zero
+           entries only at and above NL; the third contains non-zero
+           entries only below NL+1; and the fourth is dense.
+
+    LDU2   (input) INTEGER
+           The leading dimension of the array U2.  LDU2 >= N.
+
+    VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
+           On entry VT' contains the right singular vectors of two
+           submatrices in the two square blocks with corners at (1,1),
+           (NL+1, NL+1), and (NL+2, NL+2), (M,M).
+           On exit VT' contains the trailing (N-K) updated right singular
+           vectors (those which were deflated) in its last N-K columns.
+           In case SQRE =1, the last row of VT spans the right null
+           space.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= M.
+
+    VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
+           VT2' contains a copy of the first K right singular vectors
+           which will be used by DLASD3 in a matrix multiply (DGEMM) to
+           solve for the new right singular vectors. VT2 is arranged into
+           three blocks. The first block contains a row that corresponds
+           to the special 0 diagonal element in SIGMA; the second block
+           contains non-zeros only at and before NL +1; the third block
+           contains non-zeros only at and after  NL +2.
+
+    LDVT2  (input) INTEGER
+           The leading dimension of the array VT2.  LDVT2 >= M.
+
+    IDXP   (workspace) INTEGER array, dimension(N)
+           This will contain the permutation used to place deflated
+           values of D at the end of the array. On output IDXP(2:K)
+           points to the nondeflated D-values and IDXP(K+1:N)
+           points to the deflated singular values.
+
+    IDX    (workspace) INTEGER array, dimension(N)
+           This will contain the permutation used to sort the contents of
+           D into ascending order.
+
+    IDXC   (output) INTEGER array, dimension(N)
+           This will contain the permutation used to arrange the columns
+           of the deflated U matrix into three groups:  the first group
+           contains non-zero entries only at and above NL, the second
+           contains non-zero entries only below NL+2, and the third is
+           dense.
+
+    COLTYP (workspace/output) INTEGER array, dimension(N)
+           As workspace, this will contain a label which will indicate
+           which of the following types a column in the U2 matrix or a
+           row in the VT2 matrix is:
+           1 : non-zero in the upper half only
+           2 : non-zero in the lower half only
+           3 : dense
+           4 : deflated
+
+           On exit, it is an array of dimension 4, with COLTYP(I) being
+           the dimension of the I-th type columns.
+
+    IDXQ   (input) INTEGER array, dimension(N)
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that entries in
+           the first hlaf of this permutation must first be moved one
+           position backward; and entries in the second half
+           must first have NL+1 added to their values.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --z__;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --dsigma;
+    u2_dim1 = *ldu2;
+    u2_offset = 1 + u2_dim1;
+    u2 -= u2_offset;
+    vt2_dim1 = *ldvt2;
+    vt2_offset = 1 + vt2_dim1;
+    vt2 -= vt2_offset;
+    --idxp;
+    --idx;
+    --idxc;
+    --idxq;
+    --coltyp;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*nl < 1) {
+	*info = -1;
+    } else if (*nr < 1) {
+	*info = -2;
+    } else if (*sqre != 1 && *sqre != 0) {
+	*info = -3;
+    }
+
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+    if (*ldu < n) {
+	*info = -10;
+    } else if (*ldvt < m) {
+	*info = -12;
+    } else if (*ldu2 < n) {
+	*info = -15;
+    } else if (*ldvt2 < m) {
+	*info = -17;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD2", &i__1);
+	return 0;
+    }
+
+    nlp1 = *nl + 1;
+    nlp2 = *nl + 2;
+
+/*
+       Generate the first part of the vector Z; and move the singular
+       values in the first part of D one position backward.
+*/
+
+    z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
+    z__[1] = z1;
+    for (i__ = *nl; i__ >= 1; --i__) {
+	z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
+	d__[i__ + 1] = d__[i__];
+	idxq[i__ + 1] = idxq[i__] + 1;
+/* L10: */
+    }
+
+/*     Generate the second part of the vector Z. */
+
+    i__1 = m;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
+/* L20: */
+    }
+
+/*     Initialize some reference arrays. */
+
+    i__1 = nlp1;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	coltyp[i__] = 1;
+/* L30: */
+    }
+    i__1 = n;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	coltyp[i__] = 2;
+/* L40: */
+    }
+
+/*     Sort the singular values into increasing order */
+
+    i__1 = n;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	idxq[i__] += nlp1;
+/* L50: */
+    }
+
+/*
+       DSIGMA, IDXC, IDXC, and the first column of U2
+       are used as storage space.
+*/
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	dsigma[i__] = d__[idxq[i__]];
+	u2[i__ + u2_dim1] = z__[idxq[i__]];
+	idxc[i__] = coltyp[idxq[i__]];
+/* L60: */
+    }
+
+    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	idxi = idx[i__] + 1;
+	d__[i__] = dsigma[idxi];
+	z__[i__] = u2[idxi + u2_dim1];
+	coltyp[i__] = idxc[idxi];
+/* L70: */
+    }
+
+/*     Calculate the allowable deflation tolerance */
+
+    eps = EPSILON;
+/* Computing MAX */
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    tol = max(d__1,d__2);
+/* Computing MAX */
+    d__2 = (d__1 = d__[n], abs(d__1));
+    tol = eps * 8. * max(d__2,tol);
+
+/*
+       There are 2 kinds of deflation -- first a value in the z-vector
+       is small, second two (or more) singular values are very close
+       together (their difference is small).
+
+       If the value in the z-vector is small, we simply permute the
+       array so that the corresponding singular value is moved to the
+       end.
+
+       If two values in the D-vector are close, we perform a two-sided
+       rotation designed to make one of the corresponding z-vector
+       entries zero, and then permute the array so that the deflated
+       singular value is moved to the end.
+
+       If there are multiple singular values then the problem deflates.
+       Here the number of equal singular values are found.  As each equal
+       singular value is found, an elementary reflector is computed to
+       rotate the corresponding singular subspace so that the
+       corresponding components of Z are zero in this new basis.
+*/
+
+    *k = 1;
+    k2 = n + 1;
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	if ((d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    idxp[k2] = j;
+	    coltyp[j] = 4;
+	    if (j == n) {
+		goto L120;
+	    }
+	} else {
+	    jprev = j;
+	    goto L90;
+	}
+/* L80: */
+    }
+L90:
+    j = jprev;
+L100:
+    ++j;
+    if (j > n) {
+	goto L110;
+    }
+    if ((d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	idxp[k2] = j;
+	coltyp[j] = 4;
+    } else {
+
+/*        Check if singular values are close enough to allow deflation. */
+
+	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    s = z__[jprev];
+	    c__ = z__[j];
+
+/*
+             Find sqrt(a**2+b**2) without overflow or
+             destructive underflow.
+*/
+
+	    tau = dlapy2_(&c__, &s);
+	    c__ /= tau;
+	    s = -s / tau;
+	    z__[j] = tau;
+	    z__[jprev] = 0.;
+
+/*
+             Apply back the Givens rotation to the left and right
+             singular vector matrices.
+*/
+
+	    idxjp = idxq[idx[jprev] + 1];
+	    idxj = idxq[idx[j] + 1];
+	    if (idxjp <= nlp1) {
+		--idxjp;
+	    }
+	    if (idxj <= nlp1) {
+		--idxj;
+	    }
+	    drot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
+		    c__1, &c__, &s);
+	    drot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
+		    c__, &s);
+	    if (coltyp[j] != coltyp[jprev]) {
+		coltyp[j] = 3;
+	    }
+	    coltyp[jprev] = 4;
+	    --k2;
+	    idxp[k2] = jprev;
+	    jprev = j;
+	} else {
+	    ++(*k);
+	    u2[*k + u2_dim1] = z__[jprev];
+	    dsigma[*k] = d__[jprev];
+	    idxp[*k] = jprev;
+	    jprev = j;
+	}
+    }
+    goto L100;
+L110:
+
+/*     Record the last singular value. */
+
+    ++(*k);
+    u2[*k + u2_dim1] = z__[jprev];
+    dsigma[*k] = d__[jprev];
+    idxp[*k] = jprev;
+
+L120:
+
+/*
+       Count up the total number of the various types of columns, then
+       form a permutation which positions the four column types into
+       four groups of uniform structure (although one or more of these
+       groups may be empty).
+*/
+
+    for (j = 1; j <= 4; ++j) {
+	ctot[j - 1] = 0;
+/* L130: */
+    }
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	ct = coltyp[j];
+	++ctot[ct - 1];
+/* L140: */
+    }
+
+/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
+
+    psm[0] = 2;
+    psm[1] = ctot[0] + 2;
+    psm[2] = psm[1] + ctot[1];
+    psm[3] = psm[2] + ctot[2];
+
+/*
+       Fill out the IDXC array so that the permutation which it induces
+       will place all type-1 columns first, all type-2 columns next,
+       then all type-3's, and finally all type-4's, starting from the
+       second column. This applies similarly to the rows of VT.
+*/
+
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	jp = idxp[j];
+	ct = coltyp[jp];
+	idxc[psm[ct - 1]] = j;
+	++psm[ct - 1];
+/* L150: */
+    }
+
+/*
+       Sort the singular values and corresponding singular vectors into
+       DSIGMA, U2, and VT2 respectively.  The singular values/vectors
+       which were not deflated go into the first K slots of DSIGMA, U2,
+       and VT2 respectively, while those which were deflated go into the
+       last N - K slots, except that the first column/row will be treated
+       separately.
+*/
+
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	jp = idxp[j];
+	dsigma[j] = d__[jp];
+	idxj = idxq[idx[idxp[idxc[j]]] + 1];
+	if (idxj <= nlp1) {
+	    --idxj;
+	}
+	dcopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
+	dcopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
+/* L160: */
+    }
+
+/*     Determine DSIGMA(1), DSIGMA(2) and Z(1) */
+
+    dsigma[1] = 0.;
+    hlftol = tol / 2.;
+    if (abs(dsigma[2]) <= hlftol) {
+	dsigma[2] = hlftol;
+    }
+    if (m > n) {
+	z__[1] = dlapy2_(&z1, &z__[m]);
+	if (z__[1] <= tol) {
+	    c__ = 1.;
+	    s = 0.;
+	    z__[1] = tol;
+	} else {
+	    c__ = z1 / z__[1];
+	    s = z__[m] / z__[1];
+	}
+    } else {
+	if (abs(z1) <= tol) {
+	    z__[1] = tol;
+	} else {
+	    z__[1] = z1;
+	}
+    }
+
+/*     Move the rest of the updating row to Z. */
+
+    i__1 = *k - 1;
+    dcopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
+
+/*
+       Determine the first column of U2, the first row of VT2 and the
+       last row of VT.
+*/
+
+    dlaset_("A", &n, &c__1, &c_b2879, &c_b2879, &u2[u2_offset], ldu2);
+    u2[nlp1 + u2_dim1] = 1.;
+    if (m > n) {
+	i__1 = nlp1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
+	    vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
+/* L170: */
+	}
+	i__1 = m;
+	for (i__ = nlp2; i__ <= i__1; ++i__) {
+	    vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
+	    vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
+/* L180: */
+	}
+    } else {
+	dcopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
+    }
+    if (m > n) {
+	dcopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
+    }
+
+/*
+       The deflated singular values and their corresponding vectors go
+       into the back of D, U, and V respectively.
+*/
+
+    if (n > *k) {
+	i__1 = n - *k;
+	dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	i__1 = n - *k;
+	dlacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
+		 * u_dim1 + 1], ldu);
+	i__1 = n - *k;
+	dlacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
+		vt_dim1], ldvt);
+    }
+
+/*     Copy CTOT into COLTYP for referencing in DLASD3. */
+
+    for (j = 1; j <= 4; ++j) {
+	coltyp[j] = ctot[j - 1];
+/* L190: */
+    }
+
+    return 0;
+
+/*     End of DLASD2 */
+
+} /* dlasd2_ */
+
+/* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer
+	*k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma,
+	doublereal *u, integer *ldu, doublereal *u2, integer *ldu2,
+	doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2,
+	integer *idxc, integer *ctot, doublereal *z__, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
+	    vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j, m, n, jc;
+    static doublereal rho;
+    static integer nlp1, nlp2, nrp1;
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer ctemp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer ktemp;
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASD3 finds all the square roots of the roots of the secular
+    equation, as defined by the values in D and Z.  It makes the
+    appropriate calls to DLASD4 and then updates the singular
+    vectors by matrix multiplication.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    DLASD3 is called from DLASD1.
+
+    Arguments
+    =========
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has N = NL + NR + 1 rows and
+           M = N + SQRE >= N columns.
+
+    K      (input) INTEGER
+           The size of the secular equation, 1 =< K = < N.
+
+    D      (output) DOUBLE PRECISION array, dimension(K)
+           On exit the square roots of the roots of the secular equation,
+           in ascending order.
+
+    Q      (workspace) DOUBLE PRECISION array,
+                       dimension at least (LDQ,K).
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= K.
+
+    DSIGMA (input) DOUBLE PRECISION array, dimension(K)
+           The first K elements of this array contain the old roots
+           of the deflated updating problem.  These are the poles
+           of the secular equation.
+
+    U      (input) DOUBLE PRECISION array, dimension (LDU, N)
+           The last N - K columns of this matrix contain the deflated
+           left singular vectors.
+
+    LDU    (input) INTEGER
+           The leading dimension of the array U.  LDU >= N.
+
+    U2     (input) DOUBLE PRECISION array, dimension (LDU2, N)
+           The first K columns of this matrix contain the non-deflated
+           left singular vectors for the split problem.
+
+    LDU2   (input) INTEGER
+           The leading dimension of the array U2.  LDU2 >= N.
+
+    VT     (input) DOUBLE PRECISION array, dimension (LDVT, M)
+           The last M - K columns of VT' contain the deflated
+           right singular vectors.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= N.
+
+    VT2    (input) DOUBLE PRECISION array, dimension (LDVT2, N)
+           The first K columns of VT2' contain the non-deflated
+           right singular vectors for the split problem.
+
+    LDVT2  (input) INTEGER
+           The leading dimension of the array VT2.  LDVT2 >= N.
+
+    IDXC   (input) INTEGER array, dimension ( N )
+           The permutation used to arrange the columns of U (and rows of
+           VT) into three groups:  the first group contains non-zero
+           entries only at and above (or before) NL +1; the second
+           contains non-zero entries only at and below (or after) NL+2;
+           and the third is dense. The first column of U and the row of
+           VT are treated separately, however.
+
+           The rows of the singular vectors found by DLASD4
+           must be likewise permuted before the matrix multiplies can
+           take place.
+
+    CTOT   (input) INTEGER array, dimension ( 4 )
+           A count of the total number of the various types of columns
+           in U (or rows in VT), as described in IDXC. The fourth column
+           type is any column which has been deflated.
+
+    Z      (input) DOUBLE PRECISION array, dimension (K)
+           The first K elements of this array contain the components
+           of the deflation-adjusted updating row vector.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+           > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dsigma;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    u2_dim1 = *ldu2;
+    u2_offset = 1 + u2_dim1;
+    u2 -= u2_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    vt2_dim1 = *ldvt2;
+    vt2_offset = 1 + vt2_dim1;
+    vt2 -= vt2_offset;
+    --idxc;
+    --ctot;
+    --z__;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*nl < 1) {
+	*info = -1;
+    } else if (*nr < 1) {
+	*info = -2;
+    } else if (*sqre != 1 && *sqre != 0) {
+	*info = -3;
+    }
+
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+    nlp1 = *nl + 1;
+    nlp2 = *nl + 2;
+
+    if ((*k < 1) || (*k > n)) {
+	*info = -4;
+    } else if (*ldq < *k) {
+	*info = -7;
+    } else if (*ldu < n) {
+	*info = -10;
+    } else if (*ldu2 < n) {
+	*info = -12;
+    } else if (*ldvt < m) {
+	*info = -14;
+    } else if (*ldvt2 < m) {
+	*info = -16;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD3", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 1) {
+	d__[1] = abs(z__[1]);
+	dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
+	if (z__[1] > 0.) {
+	    dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
+	} else {
+	    i__1 = n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		u[i__ + u_dim1] = -u2[i__ + u2_dim1];
+/* L10: */
+	    }
+	}
+	return 0;
+    }
+
+/*
+       Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DSIGMA(I) if it is 1; this makes the subsequent
+       subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DSIGMA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DSIGMA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+/* L20: */
+    }
+
+/*     Keep a copy of Z. */
+
+    dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
+
+/*     Normalize Z. */
+
+    rho = dnrm2_(k, &z__[1], &c__1);
+    dlascl_("G", &c__0, &c__0, &rho, &c_b2865, k, &c__1, &z__[1], k, info);
+    rho *= rho;
+
+/*     Find the new singular values. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
+		 &vt[j * vt_dim1 + 1], info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    return 0;
+	}
+/* L30: */
+    }
+
+/*     Compute updated Z. */
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1];
+	i__2 = i__ - 1;
+	for (j = 1; j <= i__2; ++j) {
+	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
+		    i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]);
+/* L40: */
+	}
+	i__2 = *k - 1;
+	for (j = i__; j <= i__2; ++j) {
+	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
+		    i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
+/* L50: */
+	}
+	d__2 = sqrt((d__1 = z__[i__], abs(d__1)));
+	z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]);
+/* L60: */
+    }
+
+/*
+       Compute left singular vectors of the modified diagonal matrix,
+       and store related information for the right singular vectors.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
+		vt_dim1 + 1];
+	u[i__ * u_dim1 + 1] = -1.;
+	i__2 = *k;
+	for (j = 2; j <= i__2; ++j) {
+	    vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
+		    * vt_dim1];
+	    u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
+/* L70: */
+	}
+	temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
+	q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
+	i__2 = *k;
+	for (j = 2; j <= i__2; ++j) {
+	    jc = idxc[j];
+	    q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp;
+/* L80: */
+	}
+/* L90: */
+    }
+
+/*     Update the left singular vector matrix. */
+
+    if (*k == 2) {
+	dgemm_("N", "N", &n, k, k, &c_b2865, &u2[u2_offset], ldu2, &q[
+		q_offset], ldq, &c_b2879, &u[u_offset], ldu);
+	goto L100;
+    }
+    if (ctot[1] > 0) {
+	dgemm_("N", "N", nl, k, &ctot[1], &c_b2865, &u2[((u2_dim1) << (1)) +
+		1], ldu2, &q[q_dim1 + 2], ldq, &c_b2879, &u[u_dim1 + 1], ldu);
+	if (ctot[3] > 0) {
+	    ktemp = ctot[1] + 2 + ctot[2];
+	    dgemm_("N", "N", nl, k, &ctot[3], &c_b2865, &u2[ktemp * u2_dim1 +
+		    1], ldu2, &q[ktemp + q_dim1], ldq, &c_b2865, &u[u_dim1 +
+		    1], ldu);
+	}
+    } else if (ctot[3] > 0) {
+	ktemp = ctot[1] + 2 + ctot[2];
+	dgemm_("N", "N", nl, k, &ctot[3], &c_b2865, &u2[ktemp * u2_dim1 + 1],
+		ldu2, &q[ktemp + q_dim1], ldq, &c_b2879, &u[u_dim1 + 1], ldu);
+    } else {
+	dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
+    }
+    dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
+    ktemp = ctot[1] + 2;
+    ctemp = ctot[2] + ctot[3];
+    dgemm_("N", "N", nr, k, &ctemp, &c_b2865, &u2[nlp2 + ktemp * u2_dim1],
+	    ldu2, &q[ktemp + q_dim1], ldq, &c_b2879, &u[nlp2 + u_dim1], ldu);
+
+/*     Generate the right singular vectors. */
+
+L100:
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
+	q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
+	i__2 = *k;
+	for (j = 2; j <= i__2; ++j) {
+	    jc = idxc[j];
+	    q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp;
+/* L110: */
+	}
+/* L120: */
+    }
+
+/*     Update the right singular vector matrix. */
+
+    if (*k == 2) {
+	dgemm_("N", "N", k, &m, k, &c_b2865, &q[q_offset], ldq, &vt2[
+		vt2_offset], ldvt2, &c_b2879, &vt[vt_offset], ldvt);
+	return 0;
+    }
+    ktemp = ctot[1] + 1;
+    dgemm_("N", "N", k, &nlp1, &ktemp, &c_b2865, &q[q_dim1 + 1], ldq, &vt2[
+	    vt2_dim1 + 1], ldvt2, &c_b2879, &vt[vt_dim1 + 1], ldvt);
+    ktemp = ctot[1] + 2 + ctot[2];
+    if (ktemp <= *ldvt2) {
+	dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b2865, &q[ktemp * q_dim1 + 1],
+		 ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b2865, &vt[vt_dim1 +
+		1], ldvt);
+    }
+
+    ktemp = ctot[1] + 1;
+    nrp1 = *nr + *sqre;
+    if (ktemp > 1) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    q[i__ + ktemp * q_dim1] = q[i__ + q_dim1];
+/* L130: */
+	}
+	i__1 = m;
+	for (i__ = nlp2; i__ <= i__1; ++i__) {
+	    vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1];
+/* L140: */
+	}
+    }
+    ctemp = ctot[2] + 1 + ctot[3];
+    dgemm_("N", "N", k, &nrp1, &ctemp, &c_b2865, &q[ktemp * q_dim1 + 1], ldq,
+	    &vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b2879, &vt[nlp2 *
+	    vt_dim1 + 1], ldvt);
+
+    return 0;
+
+/*     End of DLASD3 */
+
+} /* dlasd3_ */
+
+/* Subroutine */ int dlasd4_(integer *n, integer *i__, doublereal *d__,
+	doublereal *z__, doublereal *delta, doublereal *rho, doublereal *
+	sigma, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal a, b, c__;
+    static integer j;
+    static doublereal w, dd[3];
+    static integer ii;
+    static doublereal dw, zz[3];
+    static integer ip1;
+    static doublereal eta, phi, eps, tau, psi;
+    static integer iim1, iip1;
+    static doublereal dphi, dpsi;
+    static integer iter;
+    static doublereal temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq,
+	    dtiip;
+    static integer niter;
+    static doublereal dtisq;
+    static logical swtch;
+    static doublereal dtnsq;
+    extern /* Subroutine */ int dlaed6_(integer *, logical *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
+	    , dlasd5_(integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+    static doublereal delsq2, dtnsq1;
+    static logical swtch3;
+
+    static logical orgati;
+    static doublereal erretm, dtipsq, rhoinv;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the square root of the I-th updated
+    eigenvalue of a positive symmetric rank-one modification to
+    a positive diagonal matrix whose entries are given as the squares
+    of the corresponding entries in the array d, and that
+
+           0 <= D(i) < D(j)  for  i < j
+
+    and that RHO > 0. This is arranged by the calling routine, and is
+    no loss in generality.  The rank-one modified system is thus
+
+           diag( D ) * diag( D ) +  RHO *  Z * Z_transpose.
+
+    where we assume the Euclidean norm of Z is 1.
+
+    The method consists of approximating the rational functions in the
+    secular equation by simpler interpolating rational functions.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The length of all arrays.
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  1 <= I <= N.
+
+    D      (input) DOUBLE PRECISION array, dimension ( N )
+           The original eigenvalues.  It is assumed that they are in
+           order, 0 <= D(I) < D(J)  for I < J.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( N )
+           The components of the updating vector.
+
+    DELTA  (output) DOUBLE PRECISION array, dimension ( N )
+           If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
+           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+           contains the information necessary to construct the
+           (singular) eigenvectors.
+
+    RHO    (input) DOUBLE PRECISION
+           The scalar in the symmetric updating formula.
+
+    SIGMA  (output) DOUBLE PRECISION
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( N )
+           If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
+           component.  If N = 1, then WORK( 1 ) = 1.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit
+           > 0:  if INFO = 1, the updating process failed.
+
+    Internal Parameters
+    ===================
+
+    Logical variable ORGATI (origin-at-i?) is used for distinguishing
+    whether D(i) or D(i+1) is treated as the origin.
+
+              ORGATI = .true.    origin at i
+              ORGATI = .false.   origin at i+1
+
+    Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+    if we are working with THREE poles!
+
+    MAXIT is the maximum number of iterations allowed for each
+    eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Since this routine is called in an inner loop, we do no argument
+       checking.
+
+       Quick return for N=1 and 2.
+*/
+
+    /* Parameter adjustments */
+    --work;
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n == 1) {
+
+/*        Presumably, I=1 upon entry */
+
+	*sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]);
+	delta[1] = 1.;
+	work[1] = 1.;
+	return 0;
+    }
+    if (*n == 2) {
+	dlasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
+	return 0;
+    }
+
+/*     Compute machine epsilon */
+
+    eps = EPSILON;
+    rhoinv = 1. / *rho;
+
+/*     The case I = N */
+
+    if (*i__ == *n) {
+
+/*        Initialize some basic variables */
+
+	ii = *n - 1;
+	niter = 1;
+
+/*        Calculate initial guess */
+
+	temp = *rho / 2.;
+
+/*
+          If ||Z||_2 is not one, then TEMP should be set to
+          RHO * ||Z||_2^2 / TWO
+*/
+
+	temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp));
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    work[j] = d__[j] + d__[*n] + temp1;
+	    delta[j] = d__[j] - d__[*n] - temp1;
+/* L10: */
+	}
+
+	psi = 0.;
+	i__1 = *n - 2;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / (delta[j] * work[j]);
+/* L20: */
+	}
+
+	c__ = rhoinv + psi;
+	w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[*
+		n] / (delta[*n] * work[*n]);
+
+	if (w <= 0.) {
+	    temp1 = sqrt(d__[*n] * d__[*n] + *rho);
+	    temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[*
+		    n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] *
+		    z__[*n] / *rho;
+
+/*
+             The following TAU is to approximate
+             SIGMA_n^2 - D( N )*D( N )
+*/
+
+	    if (c__ <= temp) {
+		tau = *rho;
+	    } else {
+		delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
+		a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*
+			n];
+		b = z__[*n] * z__[*n] * delsq;
+		if (a < 0.) {
+		    tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+		} else {
+		    tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+		}
+	    }
+
+/*
+             It can be proved that
+                 D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO
+*/
+
+	} else {
+	    delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
+	    a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
+	    b = z__[*n] * z__[*n] * delsq;
+
+/*
+             The following TAU is to approximate
+             SIGMA_n^2 - D( N )*D( N )
+*/
+
+	    if (a < 0.) {
+		tau = b * 2. / (sqrt(a * a + b * 4. * c__) - a);
+	    } else {
+		tau = (a + sqrt(a * a + b * 4. * c__)) / (c__ * 2.);
+	    }
+
+/*
+             It can be proved that
+             D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2
+*/
+
+	}
+
+/*        The following ETA is to approximate SIGMA_n - D( N ) */
+
+	eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau));
+
+	*sigma = d__[*n] + eta;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - eta;
+	    work[j] = d__[j] + d__[*i__] + eta;
+/* L30: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (delta[j] * work[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L40: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / (delta[*n] * work[*n]);
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Test for convergence */
+
+	if (abs(w) <= eps * erretm) {
+	    goto L240;
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	dtnsq1 = work[*n - 1] * delta[*n - 1];
+	dtnsq = work[*n] * delta[*n];
+	c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
+	a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi);
+	b = dtnsq * dtnsq1 * w;
+	if (c__ < 0.) {
+	    c__ = abs(c__);
+	}
+	if (c__ == 0.) {
+	    eta = *rho - *sigma * *sigma;
+	} else if (a >= 0.) {
+	    eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (c__
+		    * 2.);
+	} else {
+	    eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))
+		    );
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta > 0.) {
+	    eta = -w / (dpsi + dphi);
+	}
+	temp = eta - dtnsq;
+	if (temp > *rho) {
+	    eta = *rho + dtnsq;
+	}
+
+	tau += eta;
+	eta /= *sigma + sqrt(eta + *sigma * *sigma);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+	    work[j] += eta;
+/* L50: */
+	}
+
+	*sigma += eta;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L60: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / (work[*n] * delta[*n]);
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (dpsi
+		+ dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 20; ++niter) {
+
+/*           Test for convergence */
+
+	    if (abs(w) <= eps * erretm) {
+		goto L240;
+	    }
+
+/*           Calculate the new step */
+
+	    dtnsq1 = work[*n - 1] * delta[*n - 1];
+	    dtnsq = work[*n] * delta[*n];
+	    c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
+	    a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi);
+	    b = dtnsq1 * dtnsq * w;
+	    if (a >= 0.) {
+		eta = (a + sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    } else {
+		eta = b * 2. / (a - sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta > 0.) {
+		eta = -w / (dpsi + dphi);
+	    }
+	    temp = eta - dtnsq;
+	    if (temp <= 0.) {
+		eta /= 2.;
+	    }
+
+	    tau += eta;
+	    eta /= *sigma + sqrt(eta + *sigma * *sigma);
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+		work[j] += eta;
+/* L70: */
+	    }
+
+	    *sigma += eta;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
+	    i__1 = ii;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / (work[j] * delta[j]);
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L80: */
+	    }
+	    erretm = abs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    temp = z__[*n] / (work[*n] * delta[*n]);
+	    phi = z__[*n] * temp;
+	    dphi = temp * temp;
+	    erretm = (-phi - psi) * 8. + erretm - phi + rhoinv + abs(tau) * (
+		    dpsi + dphi);
+
+	    w = rhoinv + phi + psi;
+/* L90: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	goto L240;
+
+/*        End for the case I = N */
+
+    } else {
+
+/*        The case for I < N */
+
+	niter = 1;
+	ip1 = *i__ + 1;
+
+/*        Calculate initial guess */
+
+	delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]);
+	delsq2 = delsq / 2.;
+	temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2));
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    work[j] = d__[j] + d__[*i__] + temp;
+	    delta[j] = d__[j] - d__[*i__] - temp;
+/* L100: */
+	}
+
+	psi = 0.;
+	i__1 = *i__ - 1;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / (work[j] * delta[j]);
+/* L110: */
+	}
+
+	phi = 0.;
+	i__1 = *i__ + 2;
+	for (j = *n; j >= i__1; --j) {
+	    phi += z__[j] * z__[j] / (work[j] * delta[j]);
+/* L120: */
+	}
+	c__ = rhoinv + psi + phi;
+	w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[
+		ip1] * z__[ip1] / (work[ip1] * delta[ip1]);
+
+	if (w > 0.) {
+
+/*
+             d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
+
+             We choose d(i) as origin.
+*/
+
+	    orgati = TRUE_;
+	    sg2lb = 0.;
+	    sg2ub = delsq2;
+	    a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
+	    b = z__[*i__] * z__[*i__] * delsq;
+	    if (a > 0.) {
+		tau = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    } else {
+		tau = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    }
+
+/*
+             TAU now is an estimation of SIGMA^2 - D( I )^2. The
+             following, however, is the corresponding estimation of
+             SIGMA - D( I ).
+*/
+
+	    eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau));
+	} else {
+
+/*
+             (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
+
+             We choose d(i+1) as origin.
+*/
+
+	    orgati = FALSE_;
+	    sg2lb = -delsq2;
+	    sg2ub = 0.;
+	    a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
+	    b = z__[ip1] * z__[ip1] * delsq;
+	    if (a < 0.) {
+		tau = b * 2. / (a - sqrt((d__1 = a * a + b * 4. * c__, abs(
+			d__1))));
+	    } else {
+		tau = -(a + sqrt((d__1 = a * a + b * 4. * c__, abs(d__1)))) /
+			(c__ * 2.);
+	    }
+
+/*
+             TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The
+             following, however, is the corresponding estimation of
+             SIGMA - D( IP1 ).
+*/
+
+	    eta = tau / (d__[ip1] + sqrt((d__1 = d__[ip1] * d__[ip1] + tau,
+		    abs(d__1))));
+	}
+
+	if (orgati) {
+	    ii = *i__;
+	    *sigma = d__[*i__] + eta;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		work[j] = d__[j] + d__[*i__] + eta;
+		delta[j] = d__[j] - d__[*i__] - eta;
+/* L130: */
+	    }
+	} else {
+	    ii = *i__ + 1;
+	    *sigma = d__[ip1] + eta;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		work[j] = d__[j] + d__[ip1] + eta;
+		delta[j] = d__[j] - d__[ip1] - eta;
+/* L140: */
+	    }
+	}
+	iim1 = ii - 1;
+	iip1 = ii + 1;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L150: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.;
+	phi = 0.;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L160: */
+	}
+
+	w = rhoinv + phi + psi;
+
+/*
+          W is the value of the secular function with
+          its ii-th element removed.
+*/
+
+	swtch3 = FALSE_;
+	if (orgati) {
+	    if (w < 0.) {
+		swtch3 = TRUE_;
+	    }
+	} else {
+	    if (w > 0.) {
+		swtch3 = TRUE_;
+	    }
+	}
+	if ((ii == 1) || (ii == *n)) {
+	    swtch3 = FALSE_;
+	}
+
+	temp = z__[ii] / (work[ii] * delta[ii]);
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w += temp;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+		abs(tau) * dw;
+
+/*        Test for convergence */
+
+	if (abs(w) <= eps * erretm) {
+	    goto L240;
+	}
+
+	if (w <= 0.) {
+	    sg2lb = max(sg2lb,tau);
+	} else {
+	    sg2ub = min(sg2ub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	if (! swtch3) {
+	    dtipsq = work[ip1] * delta[ip1];
+	    dtisq = work[*i__] * delta[*i__];
+	    if (orgati) {
+/* Computing 2nd power */
+		d__1 = z__[*i__] / dtisq;
+		c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
+	    } else {
+/* Computing 2nd power */
+		d__1 = z__[ip1] / dtipsq;
+		c__ = w - dtisq * dw - delsq * (d__1 * d__1);
+	    }
+	    a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
+	    b = dtipsq * dtisq * w;
+	    if (c__ == 0.) {
+		if (a == 0.) {
+		    if (orgati) {
+			a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi +
+				dphi);
+		    } else {
+			a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi +
+				dphi);
+		    }
+		}
+		eta = b / a;
+	    } else if (a <= 0.) {
+		eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1)))) / (
+			c__ * 2.);
+	    } else {
+		eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__, abs(
+			d__1))));
+	    }
+	} else {
+
+/*           Interpolation using THREE most relevant poles */
+
+	    dtiim = work[iim1] * delta[iim1];
+	    dtiip = work[iip1] * delta[iip1];
+	    temp = rhoinv + psi + phi;
+	    if (orgati) {
+		temp1 = z__[iim1] / dtiim;
+		temp1 *= temp1;
+		c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) *
+			 (d__[iim1] + d__[iip1]) * temp1;
+		zz[0] = z__[iim1] * z__[iim1];
+		if (dpsi < temp1) {
+		    zz[2] = dtiip * dtiip * dphi;
+		} else {
+		    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
+		}
+	    } else {
+		temp1 = z__[iip1] / dtiip;
+		temp1 *= temp1;
+		c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) *
+			 (d__[iim1] + d__[iip1]) * temp1;
+		if (dphi < temp1) {
+		    zz[0] = dtiim * dtiim * dpsi;
+		} else {
+		    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
+		}
+		zz[2] = z__[iip1] * z__[iip1];
+	    }
+	    zz[1] = z__[ii] * z__[ii];
+	    dd[0] = dtiim;
+	    dd[1] = delta[ii] * work[ii];
+	    dd[2] = dtiip;
+	    dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+	    if (*info != 0) {
+		goto L240;
+	    }
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta >= 0.) {
+	    eta = -w / dw;
+	}
+	if (orgati) {
+	    temp1 = work[*i__] * delta[*i__];
+	    temp = eta - temp1;
+	} else {
+	    temp1 = work[ip1] * delta[ip1];
+	    temp = eta - temp1;
+	}
+	if ((temp > sg2ub) || (temp < sg2lb)) {
+	    if (w < 0.) {
+		eta = (sg2ub - tau) / 2.;
+	    } else {
+		eta = (sg2lb - tau) / 2.;
+	    }
+	}
+
+	tau += eta;
+	eta /= *sigma + sqrt(*sigma * *sigma + eta);
+
+	prew = w;
+
+	*sigma += eta;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    work[j] += eta;
+	    delta[j] -= eta;
+/* L170: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.;
+	psi = 0.;
+	erretm = 0.;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L180: */
+	}
+	erretm = abs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.;
+	phi = 0.;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L190: */
+	}
+
+	temp = z__[ii] / (work[ii] * delta[ii]);
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w = rhoinv + phi + psi + temp;
+	erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3. +
+		abs(tau) * dw;
+
+	if (w <= 0.) {
+	    sg2lb = max(sg2lb,tau);
+	} else {
+	    sg2ub = min(sg2ub,tau);
+	}
+
+	swtch = FALSE_;
+	if (orgati) {
+	    if (-w > abs(prew) / 10.) {
+		swtch = TRUE_;
+	    }
+	} else {
+	    if (w > abs(prew) / 10.) {
+		swtch = TRUE_;
+	    }
+	}
+
+/*        Main loop to update the values of the array   DELTA and WORK */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 20; ++niter) {
+
+/*           Test for convergence */
+
+	    if (abs(w) <= eps * erretm) {
+		goto L240;
+	    }
+
+/*           Calculate the new step */
+
+	    if (! swtch3) {
+		dtipsq = work[ip1] * delta[ip1];
+		dtisq = work[*i__] * delta[*i__];
+		if (! swtch) {
+		    if (orgati) {
+/* Computing 2nd power */
+			d__1 = z__[*i__] / dtisq;
+			c__ = w - dtipsq * dw + delsq * (d__1 * d__1);
+		    } else {
+/* Computing 2nd power */
+			d__1 = z__[ip1] / dtipsq;
+			c__ = w - dtisq * dw - delsq * (d__1 * d__1);
+		    }
+		} else {
+		    temp = z__[ii] / (work[ii] * delta[ii]);
+		    if (orgati) {
+			dpsi += temp * temp;
+		    } else {
+			dphi += temp * temp;
+		    }
+		    c__ = w - dtisq * dpsi - dtipsq * dphi;
+		}
+		a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
+		b = dtipsq * dtisq * w;
+		if (c__ == 0.) {
+		    if (a == 0.) {
+			if (! swtch) {
+			    if (orgati) {
+				a = z__[*i__] * z__[*i__] + dtipsq * dtipsq *
+					(dpsi + dphi);
+			    } else {
+				a = z__[ip1] * z__[ip1] + dtisq * dtisq * (
+					dpsi + dphi);
+			    }
+			} else {
+			    a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi;
+			}
+		    }
+		    eta = b / a;
+		} else if (a <= 0.) {
+		    eta = (a - sqrt((d__1 = a * a - b * 4. * c__, abs(d__1))))
+			     / (c__ * 2.);
+		} else {
+		    eta = b * 2. / (a + sqrt((d__1 = a * a - b * 4. * c__,
+			    abs(d__1))));
+		}
+	    } else {
+
+/*              Interpolation using THREE most relevant poles */
+
+		dtiim = work[iim1] * delta[iim1];
+		dtiip = work[iip1] * delta[iip1];
+		temp = rhoinv + psi + phi;
+		if (swtch) {
+		    c__ = temp - dtiim * dpsi - dtiip * dphi;
+		    zz[0] = dtiim * dtiim * dpsi;
+		    zz[2] = dtiip * dtiip * dphi;
+		} else {
+		    if (orgati) {
+			temp1 = z__[iim1] / dtiim;
+			temp1 *= temp1;
+			temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[
+				iip1]) * temp1;
+			c__ = temp - dtiip * (dpsi + dphi) - temp2;
+			zz[0] = z__[iim1] * z__[iim1];
+			if (dpsi < temp1) {
+			    zz[2] = dtiip * dtiip * dphi;
+			} else {
+			    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
+			}
+		    } else {
+			temp1 = z__[iip1] / dtiip;
+			temp1 *= temp1;
+			temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[
+				iip1]) * temp1;
+			c__ = temp - dtiim * (dpsi + dphi) - temp2;
+			if (dphi < temp1) {
+			    zz[0] = dtiim * dtiim * dpsi;
+			} else {
+			    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
+			}
+			zz[2] = z__[iip1] * z__[iip1];
+		    }
+		}
+		dd[0] = dtiim;
+		dd[1] = delta[ii] * work[ii];
+		dd[2] = dtiip;
+		dlaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+		if (*info != 0) {
+		    goto L240;
+		}
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta >= 0.) {
+		eta = -w / dw;
+	    }
+	    if (orgati) {
+		temp1 = work[*i__] * delta[*i__];
+		temp = eta - temp1;
+	    } else {
+		temp1 = work[ip1] * delta[ip1];
+		temp = eta - temp1;
+	    }
+	    if ((temp > sg2ub) || (temp < sg2lb)) {
+		if (w < 0.) {
+		    eta = (sg2ub - tau) / 2.;
+		} else {
+		    eta = (sg2lb - tau) / 2.;
+		}
+	    }
+
+	    tau += eta;
+	    eta /= *sigma + sqrt(*sigma * *sigma + eta);
+
+	    *sigma += eta;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		work[j] += eta;
+		delta[j] -= eta;
+/* L200: */
+	    }
+
+	    prew = w;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.;
+	    psi = 0.;
+	    erretm = 0.;
+	    i__1 = iim1;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / (work[j] * delta[j]);
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L210: */
+	    }
+	    erretm = abs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    dphi = 0.;
+	    phi = 0.;
+	    i__1 = iip1;
+	    for (j = *n; j >= i__1; --j) {
+		temp = z__[j] / (work[j] * delta[j]);
+		phi += z__[j] * temp;
+		dphi += temp * temp;
+		erretm += phi;
+/* L220: */
+	    }
+
+	    temp = z__[ii] / (work[ii] * delta[ii]);
+	    dw = dpsi + dphi + temp * temp;
+	    temp = z__[ii] * temp;
+	    w = rhoinv + phi + psi + temp;
+	    erretm = (phi - psi) * 8. + erretm + rhoinv * 2. + abs(temp) * 3.
+		    + abs(tau) * dw;
+	    if (w * prew > 0. && abs(w) > abs(prew) / 10.) {
+		swtch = ! swtch;
+	    }
+
+	    if (w <= 0.) {
+		sg2lb = max(sg2lb,tau);
+	    } else {
+		sg2ub = min(sg2ub,tau);
+	    }
+
+/* L230: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+
+    }
+
+L240:
+    return 0;
+
+/*     End of DLASD4 */
+
+} /* dlasd4_ */
+
+/* Subroutine */ int dlasd5_(integer *i__, doublereal *d__, doublereal *z__,
+	doublereal *delta, doublereal *rho, doublereal *dsigma, doublereal *
+	work)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal b, c__, w, del, tau, delsq;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the square root of the I-th eigenvalue
+    of a positive symmetric rank-one modification of a 2-by-2 diagonal
+    matrix
+
+               diag( D ) * diag( D ) +  RHO *  Z * transpose(Z) .
+
+    The diagonal entries in the array D are assumed to satisfy
+
+               0 <= D(i) < D(j)  for  i < j .
+
+    We also assume RHO > 0 and that the Euclidean norm of the vector
+    Z is one.
+
+    Arguments
+    =========
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  I = 1 or I = 2.
+
+    D      (input) DOUBLE PRECISION array, dimension ( 2 )
+           The original eigenvalues.  We assume 0 <= D(1) < D(2).
+
+    Z      (input) DOUBLE PRECISION array, dimension ( 2 )
+           The components of the updating vector.
+
+    DELTA  (output) DOUBLE PRECISION array, dimension ( 2 )
+           Contains (D(j) - lambda_I) in its  j-th component.
+           The vector DELTA contains the information necessary
+           to construct the eigenvectors.
+
+    RHO    (input) DOUBLE PRECISION
+           The scalar in the symmetric updating formula.
+
+    DSIGMA (output) DOUBLE PRECISION
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( 2 )
+           WORK contains (D(j) + sigma_I) in its  j-th component.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --work;
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    del = d__[2] - d__[1];
+    delsq = del * (d__[2] + d__[1]);
+    if (*i__ == 1) {
+	w = *rho * 4. * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.) - z__[1] *
+		z__[1] / (d__[1] * 3. + d__[2])) / del + 1.;
+	if (w > 0.) {
+	    b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[1] * z__[1] * delsq;
+
+/*
+             B > ZERO, always
+
+             The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
+*/
+
+	    tau = c__ * 2. / (b + sqrt((d__1 = b * b - c__ * 4., abs(d__1))));
+
+/*           The following TAU is DSIGMA - D( 1 ) */
+
+	    tau /= d__[1] + sqrt(d__[1] * d__[1] + tau);
+	    *dsigma = d__[1] + tau;
+	    delta[1] = -tau;
+	    delta[2] = del - tau;
+	    work[1] = d__[1] * 2. + tau;
+	    work[2] = d__[1] + tau + d__[2];
+/*
+             DELTA( 1 ) = -Z( 1 ) / TAU
+             DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
+*/
+	} else {
+	    b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[2] * z__[2] * delsq;
+
+/*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
+
+	    if (b > 0.) {
+		tau = c__ * -2. / (b + sqrt(b * b + c__ * 4.));
+	    } else {
+		tau = (b - sqrt(b * b + c__ * 4.)) / 2.;
+	    }
+
+/*           The following TAU is DSIGMA - D( 2 ) */
+
+	    tau /= d__[2] + sqrt((d__1 = d__[2] * d__[2] + tau, abs(d__1)));
+	    *dsigma = d__[2] + tau;
+	    delta[1] = -(del + tau);
+	    delta[2] = -tau;
+	    work[1] = d__[1] + tau + d__[2];
+	    work[2] = d__[2] * 2. + tau;
+/*
+             DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+             DELTA( 2 ) = -Z( 2 ) / TAU
+*/
+	}
+/*
+          TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+          DELTA( 1 ) = DELTA( 1 ) / TEMP
+          DELTA( 2 ) = DELTA( 2 ) / TEMP
+*/
+    } else {
+
+/*        Now I=2 */
+
+	b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	c__ = *rho * z__[2] * z__[2] * delsq;
+
+/*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
+
+	if (b > 0.) {
+	    tau = (b + sqrt(b * b + c__ * 4.)) / 2.;
+	} else {
+	    tau = c__ * 2. / (-b + sqrt(b * b + c__ * 4.));
+	}
+
+/*        The following TAU is DSIGMA - D( 2 ) */
+
+	tau /= d__[2] + sqrt(d__[2] * d__[2] + tau);
+	*dsigma = d__[2] + tau;
+	delta[1] = -(del + tau);
+	delta[2] = -tau;
+	work[1] = d__[1] + tau + d__[2];
+	work[2] = d__[2] * 2. + tau;
+/*
+          DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+          DELTA( 2 ) = -Z( 2 ) / TAU
+          TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+          DELTA( 1 ) = DELTA( 1 ) / TEMP
+          DELTA( 2 ) = DELTA( 2 ) / TEMP
+*/
+    }
+    return 0;
+
+/*     End of DLASD5 */
+
+} /* dlasd5_ */
+
+/* Subroutine */ int dlasd6_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, doublereal *d__, doublereal *vf, doublereal *vl,
+	doublereal *alpha, doublereal *beta, integer *idxq, integer *perm,
+	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
+	 integer *ldgnum, doublereal *poles, doublereal *difl, doublereal *
+	difr, doublereal *z__, integer *k, doublereal *c__, doublereal *s,
+	doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset,
+	    poles_dim1, poles_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlasd7_(integer *, integer *, integer *,
+	     integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), dlasd8_(
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     doublereal *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlamrg_(integer *, integer *,
+	    doublereal *, integer *, integer *, integer *);
+    static integer isigma;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal orgnrm;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASD6 computes the SVD of an updated upper bidiagonal matrix B
+    obtained by merging two smaller ones by appending a row. This
+    routine is used only for the problem which requires all singular
+    values and optionally singular vector matrices in factored form.
+    B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
+    A related subroutine, DLASD1, handles the case in which all singular
+    values and singular vectors of the bidiagonal matrix are desired.
+
+    DLASD6 computes the SVD as follows:
+
+                  ( D1(in)  0    0     0 )
+      B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+                  (   0     0   D2(in) 0 )
+
+        = U(out) * ( D(out) 0) * VT(out)
+
+    where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+    with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+    elsewhere; and the entry b is empty if SQRE = 0.
+
+    The singular values of B can be computed using D1, D2, the first
+    components of all the right singular vectors of the lower block, and
+    the last components of all the right singular vectors of the upper
+    block. These components are stored and updated in VF and VL,
+    respectively, in DLASD6. Hence U and VT are not explicitly
+    referenced.
+
+    The singular values are stored in D. The algorithm consists of two
+    stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple singular values or if there is a zero
+          in the Z vector. For each such occurence the dimension of the
+          secular equation problem is reduced by one. This stage is
+          performed by the routine DLASD7.
+
+          The second stage consists of calculating the updated
+          singular values. This is done by finding the roots of the
+          secular equation via the routine DLASD4 (as called by DLASD8).
+          This routine also updates VF and VL and computes the distances
+          between the updated singular values and the old singular
+          values.
+
+    DLASD6 is called from DLASDA.
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed in
+           factored form:
+           = 0: Compute singular values only.
+           = 1: Compute singular vectors in factored form as well.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    D      (input/output) DOUBLE PRECISION array, dimension ( NL+NR+1 ).
+           On entry D(1:NL,1:NL) contains the singular values of the
+           upper block, and D(NL+2:N) contains the singular values
+           of the lower block. On exit D(1:N) contains the singular
+           values of the modified matrix.
+
+    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
+           On entry, VF(1:NL+1) contains the first components of all
+           right singular vectors of the upper block; and VF(NL+2:M)
+           contains the first components of all right singular vectors
+           of the lower block. On exit, VF contains the first components
+           of all right singular vectors of the bidiagonal matrix.
+
+    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
+           On entry, VL(1:NL+1) contains the  last components of all
+           right singular vectors of the upper block; and VL(NL+2:M)
+           contains the last components of all right singular vectors of
+           the lower block. On exit, VL contains the last components of
+           all right singular vectors of the bidiagonal matrix.
+
+    ALPHA  (input) DOUBLE PRECISION
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) DOUBLE PRECISION
+           Contains the off-diagonal element associated with the added
+           row.
+
+    IDXQ   (output) INTEGER array, dimension ( N )
+           This contains the permutation which will reintegrate the
+           subproblem just solved back into sorted order, i.e.
+           D( IDXQ( I = 1, N ) ) will be in ascending order.
+
+    PERM   (output) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) to be applied
+           to each block. Not referenced if ICOMPQ = 0.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem. Not referenced if ICOMPQ = 0.
+
+    GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGCOL (input) INTEGER
+           leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value to be used in the
+           corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of GIVNUM and POLES, must be at least N.
+
+    POLES  (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           On exit, POLES(1,*) is an array containing the new singular
+           values obtained from solving the secular equation, and
+           POLES(2,*) is an array containing the poles in the secular
+           equation. Not referenced if ICOMPQ = 0.
+
+    DIFL   (output) DOUBLE PRECISION array, dimension ( N )
+           On exit, DIFL(I) is the distance between I-th updated
+           (undeflated) singular value and the I-th (undeflated) old
+           singular value.
+
+    DIFR   (output) DOUBLE PRECISION array,
+                    dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and
+                    dimension ( N ) if ICOMPQ = 0.
+           On exit, DIFR(I, 1) is the distance between I-th updated
+           (undeflated) singular value and the I+1-th (undeflated) old
+           singular value.
+
+           If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+           normalizing factors for the right singular vector matrix.
+
+           See DLASD8 for details on DIFL and DIFR.
+
+    Z      (output) DOUBLE PRECISION array, dimension ( M )
+           The first elements of this array contain the components
+           of the deflation-adjusted updating row vector.
+
+    K      (output) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    C      (output) DOUBLE PRECISION
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (output) DOUBLE PRECISION
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension ( 4 * M )
+
+    IWORK  (workspace) INTEGER array, dimension ( 3 * N )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --vf;
+    --vl;
+    --idxq;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    poles_dim1 = *ldgnum;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    --difl;
+    --difr;
+    --z__;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    } else if (*ldgcol < n) {
+	*info = -14;
+    } else if (*ldgnum < n) {
+	*info = -16;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD6", &i__1);
+	return 0;
+    }
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in DLASD7 and DLASD8.
+*/
+
+    isigma = 1;
+    iw = isigma + n;
+    ivfw = iw + m;
+    ivlw = ivfw + m;
+
+    idx = 1;
+    idxc = idx + n;
+    idxp = idxc + n;
+
+/*
+       Scale.
+
+   Computing MAX
+*/
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    orgnrm = max(d__1,d__2);
+    d__[*nl + 1] = 0.;
+    i__1 = n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) > orgnrm) {
+	    orgnrm = (d__1 = d__[i__], abs(d__1));
+	}
+/* L10: */
+    }
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &n, &c__1, &d__[1], &n,
+	    info);
+    *alpha /= orgnrm;
+    *beta /= orgnrm;
+
+/*     Sort and Deflate singular values. */
+
+    dlasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
+	    work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], &
+	    iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[
+	    givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s,
+	    info);
+
+/*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */
+
+    dlasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1],
+	    ldgnum, &work[isigma], &work[iw], info);
+
+/*     Save the poles if ICOMPQ = 1. */
+
+    if (*icompq == 1) {
+	dcopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
+	dcopy_(k, &work[isigma], &c__1, &poles[((poles_dim1) << (1)) + 1], &
+		c__1);
+    }
+
+/*     Unscale. */
+
+    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, &n, &c__1, &d__[1], &n,
+	    info);
+
+/*     Prepare the IDXQ sorting permutation. */
+
+    n1 = *k;
+    n2 = n - *k;
+    dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+
+    return 0;
+
+/*     End of DLASD6 */
+
+} /* dlasd6_ */
+
+/* Subroutine */ int dlasd7_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *k, doublereal *d__, doublereal *z__,
+	doublereal *zw, doublereal *vf, doublereal *vfw, doublereal *vl,
+	doublereal *vlw, doublereal *alpha, doublereal *beta, doublereal *
+	dsigma, integer *idx, integer *idxp, integer *idxq, integer *perm,
+	integer *givptr, integer *givcol, integer *ldgcol, doublereal *givnum,
+	 integer *ldgnum, doublereal *c__, doublereal *s, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static integer i__, j, m, n, k2;
+    static doublereal z1;
+    static integer jp;
+    static doublereal eps, tau, tol;
+    static integer nlp1, nlp2, idxi, idxj;
+    extern /* Subroutine */ int drot_(integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *);
+    static integer idxjp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer jprev;
+
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *);
+    static doublereal hlftol;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASD7 merges the two sets of singular values together into a single
+    sorted set. Then it tries to deflate the size of the problem. There
+    are two ways in which deflation can occur:  when two or more singular
+    values are close together or if there is a tiny entry in the Z
+    vector. For each such occurrence the order of the related
+    secular equation problem is reduced by one.
+
+    DLASD7 is called from DLASD6.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            Specifies whether singular vectors are to be computed
+            in compact form, as follows:
+            = 0: Compute singular values only.
+            = 1: Compute singular vectors of upper
+                 bidiagonal matrix in compact form.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block. NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block. NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has
+           N = NL + NR + 1 rows and
+           M = N + SQRE >= N columns.
+
+    K      (output) INTEGER
+           Contains the dimension of the non-deflated matrix, this is
+           the order of the related secular equation. 1 <= K <=N.
+
+    D      (input/output) DOUBLE PRECISION array, dimension ( N )
+           On entry D contains the singular values of the two submatrices
+           to be combined. On exit D contains the trailing (N-K) updated
+           singular values (those which were deflated) sorted into
+           increasing order.
+
+    Z      (output) DOUBLE PRECISION array, dimension ( M )
+           On exit Z contains the updating row vector in the secular
+           equation.
+
+    ZW     (workspace) DOUBLE PRECISION array, dimension ( M )
+           Workspace for Z.
+
+    VF     (input/output) DOUBLE PRECISION array, dimension ( M )
+           On entry, VF(1:NL+1) contains the first components of all
+           right singular vectors of the upper block; and VF(NL+2:M)
+           contains the first components of all right singular vectors
+           of the lower block. On exit, VF contains the first components
+           of all right singular vectors of the bidiagonal matrix.
+
+    VFW    (workspace) DOUBLE PRECISION array, dimension ( M )
+           Workspace for VF.
+
+    VL     (input/output) DOUBLE PRECISION array, dimension ( M )
+           On entry, VL(1:NL+1) contains the  last components of all
+           right singular vectors of the upper block; and VL(NL+2:M)
+           contains the last components of all right singular vectors
+           of the lower block. On exit, VL contains the last components
+           of all right singular vectors of the bidiagonal matrix.
+
+    VLW    (workspace) DOUBLE PRECISION array, dimension ( M )
+           Workspace for VL.
+
+    ALPHA  (input) DOUBLE PRECISION
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) DOUBLE PRECISION
+           Contains the off-diagonal element associated with the added
+           row.
+
+    DSIGMA (output) DOUBLE PRECISION array, dimension ( N )
+           Contains a copy of the diagonal elements (K-1 singular values
+           and one zero) in the secular equation.
+
+    IDX    (workspace) INTEGER array, dimension ( N )
+           This will contain the permutation used to sort the contents of
+           D into ascending order.
+
+    IDXP   (workspace) INTEGER array, dimension ( N )
+           This will contain the permutation used to place deflated
+           values of D at the end of the array. On output IDXP(2:K)
+           points to the nondeflated D-values and IDXP(K+1:N)
+           points to the deflated singular values.
+
+    IDXQ   (input) INTEGER array, dimension ( N )
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that entries in
+           the first half of this permutation must first be moved one
+           position backward; and entries in the second half
+           must first have NL+1 added to their values.
+
+    PERM   (output) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) to be applied
+           to each singular block. Not referenced if ICOMPQ = 0.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem. Not referenced if ICOMPQ = 0.
+
+    GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGCOL (input) INTEGER
+           The leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value to be used in the
+           corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of GIVNUM, must be at least N.
+
+    C      (output) DOUBLE PRECISION
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (output) DOUBLE PRECISION
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --z__;
+    --zw;
+    --vf;
+    --vfw;
+    --vl;
+    --vlw;
+    --dsigma;
+    --idx;
+    --idxp;
+    --idxq;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+
+    /* Function Body */
+    *info = 0;
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    } else if (*ldgcol < n) {
+	*info = -22;
+    } else if (*ldgnum < n) {
+	*info = -24;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD7", &i__1);
+	return 0;
+    }
+
+    nlp1 = *nl + 1;
+    nlp2 = *nl + 2;
+    if (*icompq == 1) {
+	*givptr = 0;
+    }
+
+/*
+       Generate the first part of the vector Z and move the singular
+       values in the first part of D one position backward.
+*/
+
+    z1 = *alpha * vl[nlp1];
+    vl[nlp1] = 0.;
+    tau = vf[nlp1];
+    for (i__ = *nl; i__ >= 1; --i__) {
+	z__[i__ + 1] = *alpha * vl[i__];
+	vl[i__] = 0.;
+	vf[i__ + 1] = vf[i__];
+	d__[i__ + 1] = d__[i__];
+	idxq[i__ + 1] = idxq[i__] + 1;
+/* L10: */
+    }
+    vf[1] = tau;
+
+/*     Generate the second part of the vector Z. */
+
+    i__1 = m;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	z__[i__] = *beta * vf[i__];
+	vf[i__] = 0.;
+/* L20: */
+    }
+
+/*     Sort the singular values into increasing order */
+
+    i__1 = n;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	idxq[i__] += nlp1;
+/* L30: */
+    }
+
+/*     DSIGMA, IDXC, IDXC, and ZW are used as storage space. */
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	dsigma[i__] = d__[idxq[i__]];
+	zw[i__] = z__[idxq[i__]];
+	vfw[i__] = vf[idxq[i__]];
+	vlw[i__] = vl[idxq[i__]];
+/* L40: */
+    }
+
+    dlamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	idxi = idx[i__] + 1;
+	d__[i__] = dsigma[idxi];
+	z__[i__] = zw[idxi];
+	vf[i__] = vfw[idxi];
+	vl[i__] = vlw[idxi];
+/* L50: */
+    }
+
+/*     Calculate the allowable deflation tolerence */
+
+    eps = EPSILON;
+/* Computing MAX */
+    d__1 = abs(*alpha), d__2 = abs(*beta);
+    tol = max(d__1,d__2);
+/* Computing MAX */
+    d__2 = (d__1 = d__[n], abs(d__1));
+    tol = eps * 64. * max(d__2,tol);
+
+/*
+       There are 2 kinds of deflation -- first a value in the z-vector
+       is small, second two (or more) singular values are very close
+       together (their difference is small).
+
+       If the value in the z-vector is small, we simply permute the
+       array so that the corresponding singular value is moved to the
+       end.
+
+       If two values in the D-vector are close, we perform a two-sided
+       rotation designed to make one of the corresponding z-vector
+       entries zero, and then permute the array so that the deflated
+       singular value is moved to the end.
+
+       If there are multiple singular values then the problem deflates.
+       Here the number of equal singular values are found.  As each equal
+       singular value is found, an elementary reflector is computed to
+       rotate the corresponding singular subspace so that the
+       corresponding components of Z are zero in this new basis.
+*/
+
+    *k = 1;
+    k2 = n + 1;
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	if ((d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    idxp[k2] = j;
+	    if (j == n) {
+		goto L100;
+	    }
+	} else {
+	    jprev = j;
+	    goto L70;
+	}
+/* L60: */
+    }
+L70:
+    j = jprev;
+L80:
+    ++j;
+    if (j > n) {
+	goto L90;
+    }
+    if ((d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	idxp[k2] = j;
+    } else {
+
+/*        Check if singular values are close enough to allow deflation. */
+
+	if ((d__1 = d__[j] - d__[jprev], abs(d__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    *s = z__[jprev];
+	    *c__ = z__[j];
+
+/*
+             Find sqrt(a**2+b**2) without overflow or
+             destructive underflow.
+*/
+
+	    tau = dlapy2_(c__, s);
+	    z__[j] = tau;
+	    z__[jprev] = 0.;
+	    *c__ /= tau;
+	    *s = -(*s) / tau;
+
+/*           Record the appropriate Givens rotation */
+
+	    if (*icompq == 1) {
+		++(*givptr);
+		idxjp = idxq[idx[jprev] + 1];
+		idxj = idxq[idx[j] + 1];
+		if (idxjp <= nlp1) {
+		    --idxjp;
+		}
+		if (idxj <= nlp1) {
+		    --idxj;
+		}
+		givcol[*givptr + ((givcol_dim1) << (1))] = idxjp;
+		givcol[*givptr + givcol_dim1] = idxj;
+		givnum[*givptr + ((givnum_dim1) << (1))] = *c__;
+		givnum[*givptr + givnum_dim1] = *s;
+	    }
+	    drot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
+	    drot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
+	    --k2;
+	    idxp[k2] = jprev;
+	    jprev = j;
+	} else {
+	    ++(*k);
+	    zw[*k] = z__[jprev];
+	    dsigma[*k] = d__[jprev];
+	    idxp[*k] = jprev;
+	    jprev = j;
+	}
+    }
+    goto L80;
+L90:
+
+/*     Record the last singular value. */
+
+    ++(*k);
+    zw[*k] = z__[jprev];
+    dsigma[*k] = d__[jprev];
+    idxp[*k] = jprev;
+
+L100:
+
+/*
+       Sort the singular values into DSIGMA. The singular values which
+       were not deflated go into the first K slots of DSIGMA, except
+       that DSIGMA(1) is treated separately.
+*/
+
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	jp = idxp[j];
+	dsigma[j] = d__[jp];
+	vfw[j] = vf[jp];
+	vlw[j] = vl[jp];
+/* L110: */
+    }
+    if (*icompq == 1) {
+	i__1 = n;
+	for (j = 2; j <= i__1; ++j) {
+	    jp = idxp[j];
+	    perm[j] = idxq[idx[jp] + 1];
+	    if (perm[j] <= nlp1) {
+		--perm[j];
+	    }
+/* L120: */
+	}
+    }
+
+/*
+       The deflated singular values go back into the last N - K slots of
+       D.
+*/
+
+    i__1 = n - *k;
+    dcopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+
+/*
+       Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
+       VL(M).
+*/
+
+    dsigma[1] = 0.;
+    hlftol = tol / 2.;
+    if (abs(dsigma[2]) <= hlftol) {
+	dsigma[2] = hlftol;
+    }
+    if (m > n) {
+	z__[1] = dlapy2_(&z1, &z__[m]);
+	if (z__[1] <= tol) {
+	    *c__ = 1.;
+	    *s = 0.;
+	    z__[1] = tol;
+	} else {
+	    *c__ = z1 / z__[1];
+	    *s = -z__[m] / z__[1];
+	}
+	drot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
+	drot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
+    } else {
+	if (abs(z1) <= tol) {
+	    z__[1] = tol;
+	} else {
+	    z__[1] = z1;
+	}
+    }
+
+/*     Restore Z, VF, and VL. */
+
+    i__1 = *k - 1;
+    dcopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
+    i__1 = n - 1;
+    dcopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
+    i__1 = n - 1;
+    dcopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);
+
+    return 0;
+
+/*     End of DLASD7 */
+
+} /* dlasd7_ */
+
+/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__,
+	doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl,
+	doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal *
+	work, integer *info)
+{
+    /* System generated locals */
+    integer difr_dim1, difr_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal dj, rho;
+    static integer iwk1, iwk2, iwk3;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal temp;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    static integer iwk2i, iwk3i;
+    static doublereal diflj, difrj, dsigj;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *), dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlaset_(char *, integer *, integer
+	    *, doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *);
+    static doublereal dsigjp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASD8 finds the square roots of the roots of the secular equation,
+    as defined by the values in DSIGMA and Z. It makes the appropriate
+    calls to DLASD4, and stores, for each  element in D, the distance
+    to its two nearest poles (elements in DSIGMA). It also updates
+    the arrays VF and VL, the first and last components of all the
+    right singular vectors of the original bidiagonal matrix.
+
+    DLASD8 is called from DLASD6.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            Specifies whether singular vectors are to be computed in
+            factored form in the calling routine:
+            = 0: Compute singular values only.
+            = 1: Compute singular vectors in factored form as well.
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved
+            by DLASD4.  K >= 1.
+
+    D       (output) DOUBLE PRECISION array, dimension ( K )
+            On output, D contains the updated singular values.
+
+    Z       (input) DOUBLE PRECISION array, dimension ( K )
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating row vector.
+
+    VF      (input/output) DOUBLE PRECISION array, dimension ( K )
+            On entry, VF contains  information passed through DBEDE8.
+            On exit, VF contains the first K components of the first
+            components of all right singular vectors of the bidiagonal
+            matrix.
+
+    VL      (input/output) DOUBLE PRECISION array, dimension ( K )
+            On entry, VL contains  information passed through DBEDE8.
+            On exit, VL contains the first K components of the last
+            components of all right singular vectors of the bidiagonal
+            matrix.
+
+    DIFL    (output) DOUBLE PRECISION array, dimension ( K )
+            On exit, DIFL(I) = D(I) - DSIGMA(I).
+
+    DIFR    (output) DOUBLE PRECISION array,
+                     dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
+                     dimension ( K ) if ICOMPQ = 0.
+            On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
+            defined and will not be referenced.
+
+            If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+            normalizing factors for the right singular vector matrix.
+
+    LDDIFR  (input) INTEGER
+            The leading dimension of DIFR, must be at least K.
+
+    DSIGMA  (input) DOUBLE PRECISION array, dimension ( K )
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension at least 3 * K
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --z__;
+    --vf;
+    --vl;
+    --difl;
+    difr_dim1 = *lddifr;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    --dsigma;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*k < 1) {
+	*info = -2;
+    } else if (*lddifr < *k) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASD8", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 1) {
+	d__[1] = abs(z__[1]);
+	difl[1] = d__[1];
+	if (*icompq == 1) {
+	    difl[2] = 1.;
+	    difr[((difr_dim1) << (1)) + 1] = 1.;
+	}
+	return 0;
+    }
+
+/*
+       Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DSIGMA(I) if it is 1; this makes the subsequent
+       subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DSIGMA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DSIGMA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+/* L10: */
+    }
+
+/*     Book keeping. */
+
+    iwk1 = 1;
+    iwk2 = iwk1 + *k;
+    iwk3 = iwk2 + *k;
+    iwk2i = iwk2 - 1;
+    iwk3i = iwk3 - 1;
+
+/*     Normalize Z. */
+
+    rho = dnrm2_(k, &z__[1], &c__1);
+    dlascl_("G", &c__0, &c__0, &rho, &c_b2865, k, &c__1, &z__[1], k, info);
+    rho *= rho;
+
+/*     Initialize WORK(IWK3). */
+
+    dlaset_("A", k, &c__1, &c_b2865, &c_b2865, &work[iwk3], k);
+
+/*
+       Compute the updated singular values, the arrays DIFL, DIFR,
+       and the updated Z.
+*/
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
+		iwk2], info);
+
+/*        If the root finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    return 0;
+	}
+	work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j];
+	difl[j] = -work[j];
+	difr[j + difr_dim1] = -work[j + 1];
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
+		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
+		    j]);
+/* L20: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
+		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
+		    j]);
+/* L30: */
+	}
+/* L40: */
+    }
+
+/*     Compute updated Z. */
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1)));
+	z__[i__] = d_sign(&d__2, &z__[i__]);
+/* L50: */
+    }
+
+/*     Update VF and VL. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	diflj = difl[j];
+	dj = d__[j];
+	dsigj = -dsigma[j];
+	if (j < *k) {
+	    difrj = -difr[j + difr_dim1];
+	    dsigjp = -dsigma[j + 1];
+	}
+	work[j] = -z__[j] / diflj / (dsigma[j] + dj);
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / (
+		    dsigma[i__] + dj);
+/* L60: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) /
+		    (dsigma[i__] + dj);
+/* L70: */
+	}
+	temp = dnrm2_(k, &work[1], &c__1);
+	work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
+	work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
+	if (*icompq == 1) {
+	    difr[j + ((difr_dim1) << (1))] = temp;
+	}
+/* L80: */
+    }
+
+    dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
+    dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);
+
+    return 0;
+
+/*     End of DLASD8 */
+
+} /* dlasd8_ */
+
+/* Subroutine */ int dlasda_(integer *icompq, integer *smlsiz, integer *n,
+	integer *sqre, doublereal *d__, doublereal *e, doublereal *u, integer
+	*ldu, doublereal *vt, integer *k, doublereal *difl, doublereal *difr,
+	doublereal *z__, doublereal *poles, integer *givptr, integer *givcol,
+	integer *ldgcol, integer *perm, doublereal *givnum, doublereal *c__,
+	doublereal *s, doublereal *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
+	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset,
+	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset,
+	    z_dim1, z_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc,
+	    nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1;
+    static doublereal beta;
+    static integer idxq, nlvl;
+    static doublereal alpha;
+    static integer inode, ndiml, ndimr, idxqi, itemp;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer sqrei;
+    extern /* Subroutine */ int dlasd6_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *);
+    static integer nwork1, nwork2;
+    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlasdt_(integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *), dlaset_(
+	    char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
+    static integer smlszp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    Using a divide and conquer approach, DLASDA computes the singular
+    value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
+    B with diagonal D and offdiagonal E, where M = N + SQRE. The
+    algorithm computes the singular values in the SVD B = U * S * VT.
+    The orthogonal matrices U and VT are optionally computed in
+    compact form.
+
+    A related subroutine, DLASD0, computes the singular values and
+    the singular vectors in explicit form.
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed
+           in compact form, as follows
+           = 0: Compute singular values only.
+           = 1: Compute singular vectors of upper bidiagonal
+                matrix in compact form.
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The row dimension of the upper bidiagonal matrix. This is
+           also the dimension of the main diagonal array D.
+
+    SQRE   (input) INTEGER
+           Specifies the column dimension of the bidiagonal matrix.
+           = 0: The bidiagonal matrix has column dimension M = N;
+           = 1: The bidiagonal matrix has column dimension M = N + 1.
+
+    D      (input/output) DOUBLE PRECISION array, dimension ( N )
+           On entry D contains the main diagonal of the bidiagonal
+           matrix. On exit D, if INFO = 0, contains its singular values.
+
+    E      (input) DOUBLE PRECISION array, dimension ( M-1 )
+           Contains the subdiagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    U      (output) DOUBLE PRECISION array,
+           dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
+           singular vector matrices of all subproblems at the bottom
+           level.
+
+    LDU    (input) INTEGER, LDU = > N.
+           The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
+           GIVNUM, and Z.
+
+    VT     (output) DOUBLE PRECISION array,
+           dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right
+           singular vector matrices of all subproblems at the bottom
+           level.
+
+    K      (output) INTEGER array,
+           dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
+           If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
+           secular equation on the computation tree.
+
+    DIFL   (output) DOUBLE PRECISION array, dimension ( LDU, NLVL ),
+           where NLVL = floor(log_2 (N/SMLSIZ))).
+
+    DIFR   (output) DOUBLE PRECISION array,
+                    dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
+                    dimension ( N ) if ICOMPQ = 0.
+           If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
+           record distances between singular values on the I-th
+           level and singular values on the (I -1)-th level, and
+           DIFR(1:N, 2 * I ) contains the normalizing factors for
+           the right singular vector matrix. See DLASD8 for details.
+
+    Z      (output) DOUBLE PRECISION array,
+                    dimension ( LDU, NLVL ) if ICOMPQ = 1 and
+                    dimension ( N ) if ICOMPQ = 0.
+           The first K elements of Z(1, I) contain the components of
+           the deflation-adjusted updating row vector for subproblems
+           on the I-th level.
+
+    POLES  (output) DOUBLE PRECISION array,
+           dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
+           POLES(1, 2*I) contain  the new and old singular values
+           involved in the secular equations on the I-th level.
+
+    GIVPTR (output) INTEGER array,
+           dimension ( N ) if ICOMPQ = 1, and not referenced if
+           ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records
+           the number of Givens rotations performed on the I-th
+           problem on the computation tree.
+
+    GIVCOL (output) INTEGER array,
+           dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not
+           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+           GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations
+           of Givens rotations performed on the I-th level on the
+           computation tree.
+
+    LDGCOL (input) INTEGER, LDGCOL = > N.
+           The leading dimension of arrays GIVCOL and PERM.
+
+    PERM   (output) INTEGER array,
+           dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
+           permutations done on the I-th level of the computation tree.
+
+    GIVNUM (output) DOUBLE PRECISION array,
+           dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
+           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+           GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
+           values of Givens rotations performed on the I-th level on
+           the computation tree.
+
+    C      (output) DOUBLE PRECISION array,
+           dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
+           If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
+           C( I ) contains the C-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    S      (output) DOUBLE PRECISION array, dimension ( N ) if
+           ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
+           and the I-th subproblem is not square, on exit, S( I )
+           contains the S-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    WORK   (workspace) DOUBLE PRECISION array, dimension
+           (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
+
+    IWORK  (workspace) INTEGER array.
+           Dimension must be at least (7 * N).
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    givnum_dim1 = *ldu;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    poles_dim1 = *ldu;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    z_dim1 = *ldu;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    difr_dim1 = *ldu;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    difl_dim1 = *ldu;
+    difl_offset = 1 + difl_dim1;
+    difl -= difl_offset;
+    vt_dim1 = *ldu;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    --k;
+    --givptr;
+    perm_dim1 = *ldgcol;
+    perm_offset = 1 + perm_dim1;
+    perm -= perm_offset;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    --c__;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*smlsiz < 3) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    } else if (*ldu < *n + *sqre) {
+	*info = -8;
+    } else if (*ldgcol < *n) {
+	*info = -17;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASDA", &i__1);
+	return 0;
+    }
+
+    m = *n + *sqre;
+
+/*     If the input matrix is too small, call DLASDQ to find the SVD. */
+
+    if (*n <= *smlsiz) {
+	if (*icompq == 0) {
+	    dlasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+		    vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, &
+		    work[1], info);
+	} else {
+	    dlasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+		    , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1],
+		    info);
+	}
+	return 0;
+    }
+
+/*     Book-keeping and  set up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+    idxq = ndimr + *n;
+    iwk = idxq + *n;
+
+    ncc = 0;
+    nru = 0;
+
+    smlszp = *smlsiz + 1;
+    vf = 1;
+    vl = vf + m;
+    nwork1 = vl + m;
+    nwork2 = nwork1 + smlszp * smlszp;
+
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       for the nodes on bottom level of the tree, solve
+       their subproblems by DLASDQ.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+          IC : center row of each node
+          NL : number of rows of left  subproblem
+          NR : number of rows of right subproblem
+          NLF: starting row of the left   subproblem
+          NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nlp1 = nl + 1;
+	nr = iwork[ndimr + i1];
+	nlf = ic - nl;
+	nrf = ic + 1;
+	idxqi = idxq + nlf - 2;
+	vfi = vf + nlf - 1;
+	vli = vl + nlf - 1;
+	sqrei = 1;
+	if (*icompq == 0) {
+	    dlaset_("A", &nlp1, &nlp1, &c_b2879, &c_b2865, &work[nwork1], &
+		    smlszp);
+	    dlasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
+		    work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2],
+		    &nl, &work[nwork2], info);
+	    itemp = nwork1 + nl * smlszp;
+	    dcopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
+	} else {
+	    dlaset_("A", &nl, &nl, &c_b2879, &c_b2865, &u[nlf + u_dim1], ldu);
+	    dlaset_("A", &nlp1, &nlp1, &c_b2879, &c_b2865, &vt[nlf + vt_dim1],
+		     ldu);
+	    dlasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
+		    vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf +
+		    u_dim1], ldu, &work[nwork1], info);
+	    dcopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1)
+		    ;
+	}
+	if (*info != 0) {
+	    return 0;
+	}
+	i__2 = nl;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[idxqi + j] = j;
+/* L10: */
+	}
+	if (i__ == nd && *sqre == 0) {
+	    sqrei = 0;
+	} else {
+	    sqrei = 1;
+	}
+	idxqi += nlp1;
+	vfi += nlp1;
+	vli += nlp1;
+	nrp1 = nr + sqrei;
+	if (*icompq == 0) {
+	    dlaset_("A", &nrp1, &nrp1, &c_b2879, &c_b2865, &work[nwork1], &
+		    smlszp);
+	    dlasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
+		    work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2],
+		    &nr, &work[nwork2], info);
+	    itemp = nwork1 + (nrp1 - 1) * smlszp;
+	    dcopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
+	} else {
+	    dlaset_("A", &nr, &nr, &c_b2879, &c_b2865, &u[nrf + u_dim1], ldu);
+	    dlaset_("A", &nrp1, &nrp1, &c_b2879, &c_b2865, &vt[nrf + vt_dim1],
+		     ldu);
+	    dlasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
+		    vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf +
+		    u_dim1], ldu, &work[nwork1], info);
+	    dcopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    dcopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1)
+		    ;
+	}
+	if (*info != 0) {
+	    return 0;
+	}
+	i__2 = nr;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[idxqi + j] = j;
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*     Now conquer each subproblem bottom-up. */
+
+    j = pow_ii(&c__2, &nlvl);
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          Find the first node LF and last node LL on
+          the current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    if (i__ == ll) {
+		sqrei = *sqre;
+	    } else {
+		sqrei = 1;
+	    }
+	    vfi = vf + nlf - 1;
+	    vli = vl + nlf - 1;
+	    idxqi = idxq + nlf - 1;
+	    alpha = d__[ic];
+	    beta = e[ic];
+	    if (*icompq == 0) {
+		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+			work[vli], &alpha, &beta, &iwork[idxqi], &perm[
+			perm_offset], &givptr[1], &givcol[givcol_offset],
+			ldgcol, &givnum[givnum_offset], ldu, &poles[
+			poles_offset], &difl[difl_offset], &difr[difr_offset],
+			 &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1],
+			 &iwork[iwk], info);
+	    } else {
+		--j;
+		dlasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+			work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf +
+			lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 *
+			givcol_dim1], ldgcol, &givnum[nlf + lvl2 *
+			givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], &
+			difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 *
+			difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j],
+			&s[j], &work[nwork1], &iwork[iwk], info);
+	    }
+	    if (*info != 0) {
+		return 0;
+	    }
+/* L40: */
+	}
+/* L50: */
+    }
+
+    return 0;
+
+/*     End of DLASDA */
+
+} /* dlasda_ */
+
+/* Subroutine */ int dlasdq_(char *uplo, integer *sqre, integer *n, integer *
+	ncvt, integer *nru, integer *ncc, doublereal *d__, doublereal *e,
+	doublereal *vt, integer *ldvt, doublereal *u, integer *ldu,
+	doublereal *c__, integer *ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal r__, cs, sn;
+    static integer np1, isub;
+    static doublereal smin;
+    static integer sqre1;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dswap_(integer *, doublereal *, integer *
+	    , doublereal *, integer *);
+    static integer iuplo;
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *), xerbla_(char *,
+	    integer *), dbdsqr_(char *, integer *, integer *, integer
+	    *, integer *, doublereal *, doublereal *, doublereal *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static logical rotate;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASDQ computes the singular value decomposition (SVD) of a real
+    (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
+    E, accumulating the transformations if desired. Letting B denote
+    the input bidiagonal matrix, the algorithm computes orthogonal
+    matrices Q and P such that B = Q * S * P' (P' denotes the transpose
+    of P). The singular values S are overwritten on D.
+
+    The input matrix U  is changed to U  * Q  if desired.
+    The input matrix VT is changed to P' * VT if desired.
+    The input matrix C  is changed to Q' * C  if desired.
+
+    See "Computing  Small Singular Values of Bidiagonal Matrices With
+    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+    LAPACK Working Note #3, for a detailed description of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO  (input) CHARACTER*1
+          On entry, UPLO specifies whether the input bidiagonal matrix
+          is upper or lower bidiagonal, and wether it is square are
+          not.
+             UPLO = 'U' or 'u'   B is upper bidiagonal.
+             UPLO = 'L' or 'l'   B is lower bidiagonal.
+
+    SQRE  (input) INTEGER
+          = 0: then the input matrix is N-by-N.
+          = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and
+               (N+1)-by-N if UPLU = 'L'.
+
+          The bidiagonal matrix has
+          N = NL + NR + 1 rows and
+          M = N + SQRE >= N columns.
+
+    N     (input) INTEGER
+          On entry, N specifies the number of rows and columns
+          in the matrix. N must be at least 0.
+
+    NCVT  (input) INTEGER
+          On entry, NCVT specifies the number of columns of
+          the matrix VT. NCVT must be at least 0.
+
+    NRU   (input) INTEGER
+          On entry, NRU specifies the number of rows of
+          the matrix U. NRU must be at least 0.
+
+    NCC   (input) INTEGER
+          On entry, NCC specifies the number of columns of
+          the matrix C. NCC must be at least 0.
+
+    D     (input/output) DOUBLE PRECISION array, dimension (N)
+          On entry, D contains the diagonal entries of the
+          bidiagonal matrix whose SVD is desired. On normal exit,
+          D contains the singular values in ascending order.
+
+    E     (input/output) DOUBLE PRECISION array.
+          dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
+          On entry, the entries of E contain the offdiagonal entries
+          of the bidiagonal matrix whose SVD is desired. On normal
+          exit, E will contain 0. If the algorithm does not converge,
+          D and E will contain the diagonal and superdiagonal entries
+          of a bidiagonal matrix orthogonally equivalent to the one
+          given as input.
+
+    VT    (input/output) DOUBLE PRECISION array, dimension (LDVT, NCVT)
+          On entry, contains a matrix which on exit has been
+          premultiplied by P', dimension N-by-NCVT if SQRE = 0
+          and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
+
+    LDVT  (input) INTEGER
+          On entry, LDVT specifies the leading dimension of VT as
+          declared in the calling (sub) program. LDVT must be at
+          least 1. If NCVT is nonzero LDVT must also be at least N.
+
+    U     (input/output) DOUBLE PRECISION array, dimension (LDU, N)
+          On entry, contains a  matrix which on exit has been
+          postmultiplied by Q, dimension NRU-by-N if SQRE = 0
+          and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
+
+    LDU   (input) INTEGER
+          On entry, LDU  specifies the leading dimension of U as
+          declared in the calling (sub) program. LDU must be at
+          least max( 1, NRU ) .
+
+    C     (input/output) DOUBLE PRECISION array, dimension (LDC, NCC)
+          On entry, contains an N-by-NCC matrix which on exit
+          has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0
+          and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
+
+    LDC   (input) INTEGER
+          On entry, LDC  specifies the leading dimension of C as
+          declared in the calling (sub) program. LDC must be at
+          least 1. If NCC is nonzero, LDC must also be at least N.
+
+    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
+          Workspace. Only referenced if one of NCVT, NRU, or NCC is
+          nonzero, and if N is at least 2.
+
+    INFO  (output) INTEGER
+          On exit, a value of 0 indicates a successful exit.
+          If INFO < 0, argument number -INFO is illegal.
+          If INFO > 0, the algorithm did not converge, and INFO
+          specifies how many superdiagonals did not converge.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    iuplo = 0;
+    if (lsame_(uplo, "U")) {
+	iuplo = 1;
+    }
+    if (lsame_(uplo, "L")) {
+	iuplo = 2;
+    }
+    if (iuplo == 0) {
+	*info = -1;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ncvt < 0) {
+	*info = -4;
+    } else if (*nru < 0) {
+	*info = -5;
+    } else if (*ncc < 0) {
+	*info = -6;
+    } else if ((*ncvt == 0 && *ldvt < 1) || (*ncvt > 0 && *ldvt < max(1,*n)))
+	    {
+	*info = -10;
+    } else if (*ldu < max(1,*nru)) {
+	*info = -12;
+    } else if ((*ncc == 0 && *ldc < 1) || (*ncc > 0 && *ldc < max(1,*n))) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASDQ", &i__1);
+	return 0;
+    }
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     ROTATE is true if any singular vectors desired, false otherwise */
+
+    rotate = ((*ncvt > 0) || (*nru > 0)) || (*ncc > 0);
+    np1 = *n + 1;
+    sqre1 = *sqre;
+
+/*
+       If matrix non-square upper bidiagonal, rotate to be lower
+       bidiagonal.  The rotations are on the right.
+*/
+
+    if (iuplo == 1 && sqre1 == 1) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (rotate) {
+		work[i__] = cs;
+		work[*n + i__] = sn;
+	    }
+/* L10: */
+	}
+	dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	d__[*n] = r__;
+	e[*n] = 0.;
+	if (rotate) {
+	    work[*n] = cs;
+	    work[*n + *n] = sn;
+	}
+	iuplo = 2;
+	sqre1 = 0;
+
+/*        Update singular vectors if desired. */
+
+	if (*ncvt > 0) {
+	    dlasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
+		    vt_offset], ldvt);
+	}
+    }
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left.
+*/
+
+    if (iuplo == 2) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (rotate) {
+		work[i__] = cs;
+		work[*n + i__] = sn;
+	    }
+/* L20: */
+	}
+
+/*
+          If matrix (N+1)-by-N lower bidiagonal, one additional
+          rotation is needed.
+*/
+
+	if (sqre1 == 1) {
+	    dlartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	    d__[*n] = r__;
+	    if (rotate) {
+		work[*n] = cs;
+		work[*n + *n] = sn;
+	    }
+	}
+
+/*        Update singular vectors if desired. */
+
+	if (*nru > 0) {
+	    if (sqre1 == 0) {
+		dlasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
+			u_offset], ldu);
+	    } else {
+		dlasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
+			u_offset], ldu);
+	    }
+	}
+	if (*ncc > 0) {
+	    if (sqre1 == 0) {
+		dlasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
+			c_offset], ldc);
+	    } else {
+		dlasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
+			c_offset], ldc);
+	    }
+	}
+    }
+
+/*
+       Call DBDSQR to compute the SVD of the reduced real
+       N-by-N upper bidiagonal matrix.
+*/
+
+    dbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
+	    u_offset], ldu, &c__[c_offset], ldc, &work[1], info);
+
+/*
+       Sort the singular values into ascending order (insertion sort on
+       singular values, but only one transposition per singular vector)
+*/
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Scan for smallest D(I). */
+
+	isub = i__;
+	smin = d__[i__];
+	i__2 = *n;
+	for (j = i__ + 1; j <= i__2; ++j) {
+	    if (d__[j] < smin) {
+		isub = j;
+		smin = d__[j];
+	    }
+/* L30: */
+	}
+	if (isub != i__) {
+
+/*           Swap singular values and vectors. */
+
+	    d__[isub] = d__[i__];
+	    d__[i__] = smin;
+	    if (*ncvt > 0) {
+		dswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1],
+			ldvt);
+	    }
+	    if (*nru > 0) {
+		dswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1]
+			, &c__1);
+	    }
+	    if (*ncc > 0) {
+		dswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc)
+			;
+	    }
+	}
+/* L40: */
+    }
+
+    return 0;
+
+/*     End of DLASDQ */
+
+} /* dlasdq_ */
+
+/* Subroutine */ int dlasdt_(integer *n, integer *lvl, integer *nd, integer *
+	inode, integer *ndiml, integer *ndimr, integer *msub)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+
+    /* Local variables */
+    static integer i__, il, ir, maxn;
+    static doublereal temp;
+    static integer nlvl, llst, ncrnt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASDT creates a tree of subproblems for bidiagonal divide and
+    conquer.
+
+    Arguments
+    =========
+
+     N      (input) INTEGER
+            On entry, the number of diagonal elements of the
+            bidiagonal matrix.
+
+     LVL    (output) INTEGER
+            On exit, the number of levels on the computation tree.
+
+     ND     (output) INTEGER
+            On exit, the number of nodes on the tree.
+
+     INODE  (output) INTEGER array, dimension ( N )
+            On exit, centers of subproblems.
+
+     NDIML  (output) INTEGER array, dimension ( N )
+            On exit, row dimensions of left children.
+
+     NDIMR  (output) INTEGER array, dimension ( N )
+            On exit, row dimensions of right children.
+
+     MSUB   (input) INTEGER.
+            On entry, the maximum row dimension each subproblem at the
+            bottom of the tree can be of.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Find the number of levels on the tree.
+*/
+
+    /* Parameter adjustments */
+    --ndimr;
+    --ndiml;
+    --inode;
+
+    /* Function Body */
+    maxn = max(1,*n);
+    temp = log((doublereal) maxn / (doublereal) (*msub + 1)) / log(2.);
+    *lvl = (integer) temp + 1;
+
+    i__ = *n / 2;
+    inode[1] = i__ + 1;
+    ndiml[1] = i__;
+    ndimr[1] = *n - i__ - 1;
+    il = 0;
+    ir = 1;
+    llst = 1;
+    i__1 = *lvl - 1;
+    for (nlvl = 1; nlvl <= i__1; ++nlvl) {
+
+/*
+          Constructing the tree at (NLVL+1)-st level. The number of
+          nodes created on this level is LLST * 2.
+*/
+
+	i__2 = llst - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    il += 2;
+	    ir += 2;
+	    ncrnt = llst + i__;
+	    ndiml[il] = ndiml[ncrnt] / 2;
+	    ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1;
+	    inode[il] = inode[ncrnt] - ndimr[il] - 1;
+	    ndiml[ir] = ndimr[ncrnt] / 2;
+	    ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1;
+	    inode[ir] = inode[ncrnt] + ndiml[ir] + 1;
+/* L10: */
+	}
+	llst <<= 1;
+/* L20: */
+    }
+    *nd = ((llst) << (1)) - 1;
+
+    return 0;
+
+/*     End of DLASDT */
+
+} /* dlasdt_ */
+
+/* Subroutine */ int dlaset_(char *uplo, integer *m, integer *n, doublereal *
+	alpha, doublereal *beta, doublereal *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLASET initializes an m-by-n matrix A to BETA on the diagonal and
+    ALPHA on the offdiagonals.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be set.
+            = 'U':      Upper triangular part is set; the strictly lower
+                        triangular part of A is not changed.
+            = 'L':      Lower triangular part is set; the strictly upper
+                        triangular part of A is not changed.
+            Otherwise:  All of the matrix A is set.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    ALPHA   (input) DOUBLE PRECISION
+            The constant to which the offdiagonal elements are to be set.
+
+    BETA    (input) DOUBLE PRECISION
+            The constant to which the diagonal elements are to be set.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On exit, the leading m-by-n submatrix of A is set as follows:
+
+            if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
+            if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
+            otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
+
+            and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+
+/*
+          Set the strictly upper triangular or trapezoidal part of the
+          array to ALPHA.
+*/
+
+	i__1 = *n;
+	for (j = 2; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j - 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = *alpha;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
+
+/*
+          Set the strictly lower triangular or trapezoidal part of the
+          array to ALPHA.
+*/
+
+	i__1 = min(*m,*n);
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j + 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = *alpha;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+    } else {
+
+/*        Set the leading m-by-n submatrix to ALPHA. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = *alpha;
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+/*     Set the first min(M,N) diagonal elements to BETA. */
+
+    i__1 = min(*m,*n);
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	a[i__ + i__ * a_dim1] = *beta;
+/* L70: */
+    }
+
+    return 0;
+
+/*     End of DLASET */
+
+} /* dlaset_ */
+
+/* Subroutine */ int dlasq1_(integer *n, doublereal *d__, doublereal *e,
+	doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    doublereal d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal eps;
+    extern /* Subroutine */ int dlas2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    static doublereal scale;
+    static integer iinfo;
+    static doublereal sigmn;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static doublereal sigmx;
+    extern /* Subroutine */ int dlasq2_(integer *, doublereal *, integer *);
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *), dlasrt_(
+	    char *, integer *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASQ1 computes the singular values of a real N-by-N bidiagonal
+    matrix with diagonal D and off-diagonal E. The singular values
+    are computed to high relative accuracy, in the absence of
+    denormalization, underflow and overflow. The algorithm was first
+    presented in
+
+    "Accurate singular values and differential qd algorithms" by K. V.
+    Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230,
+    1994,
+
+    and the present implementation is described in "An implementation of
+    the dqds Algorithm (Positive Case)", LAPACK Working Note.
+
+    Arguments
+    =========
+
+    N     (input) INTEGER
+          The number of rows and columns in the matrix. N >= 0.
+
+    D     (input/output) DOUBLE PRECISION array, dimension (N)
+          On entry, D contains the diagonal elements of the
+          bidiagonal matrix whose SVD is desired. On normal exit,
+          D contains the singular values in decreasing order.
+
+    E     (input/output) DOUBLE PRECISION array, dimension (N)
+          On entry, elements E(1:N-1) contain the off-diagonal elements
+          of the bidiagonal matrix whose SVD is desired.
+          On exit, E is overwritten.
+
+    WORK  (workspace) DOUBLE PRECISION array, dimension (4*N)
+
+    INFO  (output) INTEGER
+          = 0: successful exit
+          < 0: if INFO = -i, the i-th argument had an illegal value
+          > 0: the algorithm failed
+               = 1, a split was marked by a positive value in E
+               = 2, current block of Z not diagonalized after 30*N
+                    iterations (in inner while loop)
+               = 3, termination criterion of outer while loop not met
+                    (program created more than N unreduced blocks)
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --work;
+    --e;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -2;
+	i__1 = -(*info);
+	xerbla_("DLASQ1", &i__1);
+	return 0;
+    } else if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+	d__[1] = abs(d__[1]);
+	return 0;
+    } else if (*n == 2) {
+	dlas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
+	d__[1] = sigmx;
+	d__[2] = sigmn;
+	return 0;
+    }
+
+/*     Estimate the largest singular value. */
+
+    sigmx = 0.;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = (d__1 = d__[i__], abs(d__1));
+/* Computing MAX */
+	d__2 = sigmx, d__3 = (d__1 = e[i__], abs(d__1));
+	sigmx = max(d__2,d__3);
+/* L10: */
+    }
+    d__[*n] = (d__1 = d__[*n], abs(d__1));
+
+/*     Early return if SIGMX is zero (matrix is already diagonal). */
+
+    if (sigmx == 0.) {
+	dlasrt_("D", n, &d__[1], &iinfo);
+	return 0;
+    }
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	d__1 = sigmx, d__2 = d__[i__];
+	sigmx = max(d__1,d__2);
+/* L20: */
+    }
+
+/*
+       Copy D and E into WORK (in the Z format) and scale (squaring the
+       input data makes scaling by a power of the radix pointless).
+*/
+
+    eps = PRECISION;
+    safmin = SAFEMINIMUM;
+    scale = sqrt(eps / safmin);
+    dcopy_(n, &d__[1], &c__1, &work[1], &c__2);
+    i__1 = *n - 1;
+    dcopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
+    i__1 = ((*n) << (1)) - 1;
+    i__2 = ((*n) << (1)) - 1;
+    dlascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
+	    &iinfo);
+
+/*     Compute the q's and e's. */
+
+    i__1 = ((*n) << (1)) - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing 2nd power */
+	d__1 = work[i__];
+	work[i__] = d__1 * d__1;
+/* L30: */
+    }
+    work[*n * 2] = 0.;
+
+    dlasq2_(n, &work[1], info);
+
+    if (*info == 0) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d__[i__] = sqrt(work[i__]);
+/* L40: */
+	}
+	dlascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
+		iinfo);
+    }
+
+    return 0;
+
+/*     End of DLASQ1 */
+
+} /* dlasq1_ */
+
+/* Subroutine */ int dlasq2_(integer *n, doublereal *z__, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal d__, e;
+    static integer k;
+    static doublereal s, t;
+    static integer i0, i4, n0, pp;
+    static doublereal eps, tol;
+    static integer ipn4;
+    static doublereal tol2;
+    static logical ieee;
+    static integer nbig;
+    static doublereal dmin__, emin, emax;
+    static integer ndiv, iter;
+    static doublereal qmin, temp, qmax, zmax;
+    static integer splt, nfail;
+    static doublereal desig, trace, sigma;
+    static integer iinfo;
+    extern /* Subroutine */ int dlasq3_(integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, doublereal *,
+	     integer *, integer *, integer *, logical *);
+
+    static integer iwhila, iwhilb;
+    static doublereal oldemn, safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASQ2 computes all the eigenvalues of the symmetric positive
+    definite tridiagonal matrix associated with the qd array Z to high
+    relative accuracy are computed to high relative accuracy, in the
+    absence of denormalization, underflow and overflow.
+
+    To see the relation of Z to the tridiagonal matrix, let L be a
+    unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
+    let U be an upper bidiagonal matrix with 1's above and diagonal
+    Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
+    symmetric tridiagonal to which it is similar.
+
+    Note : DLASQ2 defines a logical variable, IEEE, which is true
+    on machines which follow ieee-754 floating-point standard in their
+    handling of infinities and NaNs, and false otherwise. This variable
+    is passed to DLASQ3.
+
+    Arguments
+    =========
+
+    N     (input) INTEGER
+          The number of rows and columns in the matrix. N >= 0.
+
+    Z     (workspace) DOUBLE PRECISION array, dimension ( 4*N )
+          On entry Z holds the qd array. On exit, entries 1 to N hold
+          the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
+          trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
+          N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
+          holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
+          shifts that failed.
+
+    INFO  (output) INTEGER
+          = 0: successful exit
+          < 0: if the i-th argument is a scalar and had an illegal
+               value, then INFO = -i, if the i-th argument is an
+               array and the j-entry had an illegal value, then
+               INFO = -(i*100+j)
+          > 0: the algorithm failed
+                = 1, a split was marked by a positive value in E
+                = 2, current block of Z not diagonalized after 30*N
+                     iterations (in inner while loop)
+                = 3, termination criterion of outer while loop not met
+                     (program created more than N unreduced blocks)
+
+    Further Details
+    ===============
+    Local Variables: I0:N0 defines a current unreduced segment of Z.
+    The shifts are accumulated in SIGMA. Iteration count is in ITER.
+    Ping-pong is controlled by PP (alternates between 0 and 1).
+
+    =====================================================================
+
+
+       Test the input arguments.
+       (in case DLASQ2 is not called by DLASQ1)
+*/
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+    *info = 0;
+    eps = PRECISION;
+    safmin = SAFEMINIMUM;
+    tol = eps * 100.;
+/* Computing 2nd power */
+    d__1 = tol;
+    tol2 = d__1 * d__1;
+
+    if (*n < 0) {
+	*info = -1;
+	xerbla_("DLASQ2", &c__1);
+	return 0;
+    } else if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+
+/*        1-by-1 case. */
+
+	if (z__[1] < 0.) {
+	    *info = -201;
+	    xerbla_("DLASQ2", &c__2);
+	}
+	return 0;
+    } else if (*n == 2) {
+
+/*        2-by-2 case. */
+
+	if ((z__[2] < 0.) || (z__[3] < 0.)) {
+	    *info = -2;
+	    xerbla_("DLASQ2", &c__2);
+	    return 0;
+	} else if (z__[3] > z__[1]) {
+	    d__ = z__[3];
+	    z__[3] = z__[1];
+	    z__[1] = d__;
+	}
+	z__[5] = z__[1] + z__[2] + z__[3];
+	if (z__[2] > z__[3] * tol2) {
+	    t = (z__[1] - z__[3] + z__[2]) * .5;
+	    s = z__[3] * (z__[2] / t);
+	    if (s <= t) {
+		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.) + 1.)));
+	    } else {
+		s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
+	    }
+	    t = z__[1] + (s + z__[2]);
+	    z__[3] *= z__[1] / t;
+	    z__[1] = t;
+	}
+	z__[2] = z__[3];
+	z__[6] = z__[2] + z__[1];
+	return 0;
+    }
+
+/*     Check for negative data and compute sums of q's and e's. */
+
+    z__[*n * 2] = 0.;
+    emin = z__[2];
+    qmax = 0.;
+    zmax = 0.;
+    d__ = 0.;
+    e = 0.;
+
+    i__1 = (*n - 1) << (1);
+    for (k = 1; k <= i__1; k += 2) {
+	if (z__[k] < 0.) {
+	    *info = -(k + 200);
+	    xerbla_("DLASQ2", &c__2);
+	    return 0;
+	} else if (z__[k + 1] < 0.) {
+	    *info = -(k + 201);
+	    xerbla_("DLASQ2", &c__2);
+	    return 0;
+	}
+	d__ += z__[k];
+	e += z__[k + 1];
+/* Computing MAX */
+	d__1 = qmax, d__2 = z__[k];
+	qmax = max(d__1,d__2);
+/* Computing MIN */
+	d__1 = emin, d__2 = z__[k + 1];
+	emin = min(d__1,d__2);
+/* Computing MAX */
+	d__1 = max(qmax,zmax), d__2 = z__[k + 1];
+	zmax = max(d__1,d__2);
+/* L10: */
+    }
+    if (z__[((*n) << (1)) - 1] < 0.) {
+	*info = -(((*n) << (1)) + 199);
+	xerbla_("DLASQ2", &c__2);
+	return 0;
+    }
+    d__ += z__[((*n) << (1)) - 1];
+/* Computing MAX */
+    d__1 = qmax, d__2 = z__[((*n) << (1)) - 1];
+    qmax = max(d__1,d__2);
+    zmax = max(qmax,zmax);
+
+/*     Check for diagonality. */
+
+    if (e == 0.) {
+	i__1 = *n;
+	for (k = 2; k <= i__1; ++k) {
+	    z__[k] = z__[((k) << (1)) - 1];
+/* L20: */
+	}
+	dlasrt_("D", n, &z__[1], &iinfo);
+	z__[((*n) << (1)) - 1] = d__;
+	return 0;
+    }
+
+    trace = d__ + e;
+
+/*     Check for zero data. */
+
+    if (trace == 0.) {
+	z__[((*n) << (1)) - 1] = 0.;
+	return 0;
+    }
+
+/*     Check whether the machine is IEEE conformable. */
+
+    ieee = ilaenv_(&c__10, "DLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
+	    6, (ftnlen)1) == 1 && ilaenv_(&c__11, "DLASQ2", "N", &c__1, &c__2,
+	     &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
+
+/*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
+
+    for (k = (*n) << (1); k >= 2; k += -2) {
+	z__[k * 2] = 0.;
+	z__[((k) << (1)) - 1] = z__[k];
+	z__[((k) << (1)) - 2] = 0.;
+	z__[((k) << (1)) - 3] = z__[k - 1];
+/* L30: */
+    }
+
+    i0 = 1;
+    n0 = *n;
+
+/*     Reverse the qd-array, if warranted. */
+
+    if (z__[((i0) << (2)) - 3] * 1.5 < z__[((n0) << (2)) - 3]) {
+	ipn4 = (i0 + n0) << (2);
+	i__1 = (i0 + n0 - 1) << (1);
+	for (i4 = (i0) << (2); i4 <= i__1; i4 += 4) {
+	    temp = z__[i4 - 3];
+	    z__[i4 - 3] = z__[ipn4 - i4 - 3];
+	    z__[ipn4 - i4 - 3] = temp;
+	    temp = z__[i4 - 1];
+	    z__[i4 - 1] = z__[ipn4 - i4 - 5];
+	    z__[ipn4 - i4 - 5] = temp;
+/* L40: */
+	}
+    }
+
+/*     Initial split checking via dqd and Li's test. */
+
+    pp = 0;
+
+    for (k = 1; k <= 2; ++k) {
+
+	d__ = z__[((n0) << (2)) + pp - 3];
+	i__1 = ((i0) << (2)) + pp;
+	for (i4 = ((n0 - 1) << (2)) + pp; i4 >= i__1; i4 += -4) {
+	    if (z__[i4 - 1] <= tol2 * d__) {
+		z__[i4 - 1] = -0.;
+		d__ = z__[i4 - 3];
+	    } else {
+		d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
+	    }
+/* L50: */
+	}
+
+/*        dqd maps Z to ZZ plus Li's test. */
+
+	emin = z__[((i0) << (2)) + pp + 1];
+	d__ = z__[((i0) << (2)) + pp - 3];
+	i__1 = ((n0 - 1) << (2)) + pp;
+	for (i4 = ((i0) << (2)) + pp; i4 <= i__1; i4 += 4) {
+	    z__[i4 - ((pp) << (1)) - 2] = d__ + z__[i4 - 1];
+	    if (z__[i4 - 1] <= tol2 * d__) {
+		z__[i4 - 1] = -0.;
+		z__[i4 - ((pp) << (1)) - 2] = d__;
+		z__[i4 - ((pp) << (1))] = 0.;
+		d__ = z__[i4 + 1];
+	    } else if (safmin * z__[i4 + 1] < z__[i4 - ((pp) << (1)) - 2] &&
+		    safmin * z__[i4 - ((pp) << (1)) - 2] < z__[i4 + 1]) {
+		temp = z__[i4 + 1] / z__[i4 - ((pp) << (1)) - 2];
+		z__[i4 - ((pp) << (1))] = z__[i4 - 1] * temp;
+		d__ *= temp;
+	    } else {
+		z__[i4 - ((pp) << (1))] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4
+			- ((pp) << (1)) - 2]);
+		d__ = z__[i4 + 1] * (d__ / z__[i4 - ((pp) << (1)) - 2]);
+	    }
+/* Computing MIN */
+	    d__1 = emin, d__2 = z__[i4 - ((pp) << (1))];
+	    emin = min(d__1,d__2);
+/* L60: */
+	}
+	z__[((n0) << (2)) - pp - 2] = d__;
+
+/*        Now find qmax. */
+
+	qmax = z__[((i0) << (2)) - pp - 2];
+	i__1 = ((n0) << (2)) - pp - 2;
+	for (i4 = ((i0) << (2)) - pp + 2; i4 <= i__1; i4 += 4) {
+/* Computing MAX */
+	    d__1 = qmax, d__2 = z__[i4];
+	    qmax = max(d__1,d__2);
+/* L70: */
+	}
+
+/*        Prepare for the next iteration on K. */
+
+	pp = 1 - pp;
+/* L80: */
+    }
+
+    iter = 2;
+    nfail = 0;
+    ndiv = (n0 - i0) << (1);
+
+    i__1 = *n + 1;
+    for (iwhila = 1; iwhila <= i__1; ++iwhila) {
+	if (n0 < 1) {
+	    goto L150;
+	}
+
+/*
+          While array unfinished do
+
+          E(N0) holds the value of SIGMA when submatrix in I0:N0
+          splits from the rest of the array, but is negated.
+*/
+
+	desig = 0.;
+	if (n0 == *n) {
+	    sigma = 0.;
+	} else {
+	    sigma = -z__[((n0) << (2)) - 1];
+	}
+	if (sigma < 0.) {
+	    *info = 1;
+	    return 0;
+	}
+
+/*
+          Find last unreduced submatrix's top index I0, find QMAX and
+          EMIN. Find Gershgorin-type bound if Q's much greater than E's.
+*/
+
+	emax = 0.;
+	if (n0 > i0) {
+	    emin = (d__1 = z__[((n0) << (2)) - 5], abs(d__1));
+	} else {
+	    emin = 0.;
+	}
+	qmin = z__[((n0) << (2)) - 3];
+	qmax = qmin;
+	for (i4 = (n0) << (2); i4 >= 8; i4 += -4) {
+	    if (z__[i4 - 5] <= 0.) {
+		goto L100;
+	    }
+	    if (qmin >= emax * 4.) {
+/* Computing MIN */
+		d__1 = qmin, d__2 = z__[i4 - 3];
+		qmin = min(d__1,d__2);
+/* Computing MAX */
+		d__1 = emax, d__2 = z__[i4 - 5];
+		emax = max(d__1,d__2);
+	    }
+/* Computing MAX */
+	    d__1 = qmax, d__2 = z__[i4 - 7] + z__[i4 - 5];
+	    qmax = max(d__1,d__2);
+/* Computing MIN */
+	    d__1 = emin, d__2 = z__[i4 - 5];
+	    emin = min(d__1,d__2);
+/* L90: */
+	}
+	i4 = 4;
+
+L100:
+	i0 = i4 / 4;
+
+/*        Store EMIN for passing to DLASQ3. */
+
+	z__[((n0) << (2)) - 1] = emin;
+
+/*
+          Put -(initial shift) into DMIN.
+
+   Computing MAX
+*/
+	d__1 = 0., d__2 = qmin - sqrt(qmin) * 2. * sqrt(emax);
+	dmin__ = -max(d__1,d__2);
+
+/*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */
+
+	pp = 0;
+
+	nbig = (n0 - i0 + 1) * 30;
+	i__2 = nbig;
+	for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
+	    if (i0 > n0) {
+		goto L130;
+	    }
+
+/*           While submatrix unfinished take a good dqds step. */
+
+	    dlasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
+		    nfail, &iter, &ndiv, &ieee);
+
+	    pp = 1 - pp;
+
+/*           When EMIN is very small check for splits. */
+
+	    if (pp == 0 && n0 - i0 >= 3) {
+		if ((z__[n0 * 4] <= tol2 * qmax) || (z__[((n0) << (2)) - 1] <=
+			 tol2 * sigma)) {
+		    splt = i0 - 1;
+		    qmax = z__[((i0) << (2)) - 3];
+		    emin = z__[((i0) << (2)) - 1];
+		    oldemn = z__[i0 * 4];
+		    i__3 = (n0 - 3) << (2);
+		    for (i4 = (i0) << (2); i4 <= i__3; i4 += 4) {
+			if ((z__[i4] <= tol2 * z__[i4 - 3]) || (z__[i4 - 1] <=
+				 tol2 * sigma)) {
+			    z__[i4 - 1] = -sigma;
+			    splt = i4 / 4;
+			    qmax = 0.;
+			    emin = z__[i4 + 3];
+			    oldemn = z__[i4 + 4];
+			} else {
+/* Computing MAX */
+			    d__1 = qmax, d__2 = z__[i4 + 1];
+			    qmax = max(d__1,d__2);
+/* Computing MIN */
+			    d__1 = emin, d__2 = z__[i4 - 1];
+			    emin = min(d__1,d__2);
+/* Computing MIN */
+			    d__1 = oldemn, d__2 = z__[i4];
+			    oldemn = min(d__1,d__2);
+			}
+/* L110: */
+		    }
+		    z__[((n0) << (2)) - 1] = emin;
+		    z__[n0 * 4] = oldemn;
+		    i0 = splt + 1;
+		}
+	    }
+
+/* L120: */
+	}
+
+	*info = 2;
+	return 0;
+
+/*        end IWHILB */
+
+L130:
+
+/* L140: */
+	;
+    }
+
+    *info = 3;
+    return 0;
+
+/*     end IWHILA */
+
+L150:
+
+/*     Move q's to the front. */
+
+    i__1 = *n;
+    for (k = 2; k <= i__1; ++k) {
+	z__[k] = z__[((k) << (2)) - 3];
+/* L160: */
+    }
+
+/*     Sort and compute sum of eigenvalues. */
+
+    dlasrt_("D", n, &z__[1], &iinfo);
+
+    e = 0.;
+    for (k = *n; k >= 1; --k) {
+	e += z__[k];
+/* L170: */
+    }
+
+/*     Store trace, sum(eigenvalues) and information on performance. */
+
+    z__[((*n) << (1)) + 1] = trace;
+    z__[((*n) << (1)) + 2] = e;
+    z__[((*n) << (1)) + 3] = (doublereal) iter;
+/* Computing 2nd power */
+    i__1 = *n;
+    z__[((*n) << (1)) + 4] = (doublereal) ndiv / (doublereal) (i__1 * i__1);
+    z__[((*n) << (1)) + 5] = nfail * 100. / (doublereal) iter;
+    return 0;
+
+/*     End of DLASQ2 */
+
+} /* dlasq2_ */
+
+/* Subroutine */ int dlasq3_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, doublereal *dmin__, doublereal *sigma, doublereal *desig,
+	 doublereal *qmax, integer *nfail, integer *iter, integer *ndiv,
+	logical *ieee)
+{
+    /* Initialized data */
+
+    static integer ttype = 0;
+    static doublereal dmin1 = 0.;
+    static doublereal dmin2 = 0.;
+    static doublereal dn = 0.;
+    static doublereal dn1 = 0.;
+    static doublereal dn2 = 0.;
+    static doublereal tau = 0.;
+
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal s, t;
+    static integer j4, nn;
+    static doublereal eps, tol;
+    static integer n0in, ipn4;
+    static doublereal tol2, temp;
+    extern /* Subroutine */ int dlasq4_(integer *, integer *, doublereal *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *)
+	    , dlasq5_(integer *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, logical *), dlasq6_(
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *);
+
+    static doublereal safmin;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       May 17, 2000
+
+
+    Purpose
+    =======
+
+    DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
+    In case of failure it changes shifts, and tries again until output
+    is positive.
+
+    Arguments
+    =========
+
+    I0     (input) INTEGER
+           First index.
+
+    N0     (input) INTEGER
+           Last index.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( 4*N )
+           Z holds the qd array.
+
+    PP     (input) INTEGER
+           PP=0 for ping, PP=1 for pong.
+
+    DMIN   (output) DOUBLE PRECISION
+           Minimum value of d.
+
+    SIGMA  (output) DOUBLE PRECISION
+           Sum of shifts used in current segment.
+
+    DESIG  (input/output) DOUBLE PRECISION
+           Lower order part of SIGMA
+
+    QMAX   (input) DOUBLE PRECISION
+           Maximum value of q.
+
+    NFAIL  (output) INTEGER
+           Number of times shift was too big.
+
+    ITER   (output) INTEGER
+           Number of iterations.
+
+    NDIV   (output) INTEGER
+           Number of divisions.
+
+    TTYPE  (output) INTEGER
+           Shift type.
+
+    IEEE   (input) LOGICAL
+           Flag for IEEE or non IEEE arithmetic (passed to DLASQ5).
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+
+    n0in = *n0;
+    eps = PRECISION;
+    safmin = SAFEMINIMUM;
+    tol = eps * 100.;
+/* Computing 2nd power */
+    d__1 = tol;
+    tol2 = d__1 * d__1;
+
+/*     Check for deflation. */
+
+L10:
+
+    if (*n0 < *i0) {
+	return 0;
+    }
+    if (*n0 == *i0) {
+	goto L20;
+    }
+    nn = ((*n0) << (2)) + *pp;
+    if (*n0 == *i0 + 1) {
+	goto L40;
+    }
+
+/*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
+
+    if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - ((*pp) << (1))
+	     - 4] > tol2 * z__[nn - 7]) {
+	goto L30;
+    }
+
+L20:
+
+    z__[((*n0) << (2)) - 3] = z__[((*n0) << (2)) + *pp - 3] + *sigma;
+    --(*n0);
+    goto L10;
+
+/*     Check  whether E(N0-2) is negligible, 2 eigenvalues. */
+
+L30:
+
+    if (z__[nn - 9] > tol2 * *sigma && z__[nn - ((*pp) << (1)) - 8] > tol2 *
+	    z__[nn - 11]) {
+	goto L50;
+    }
+
+L40:
+
+    if (z__[nn - 3] > z__[nn - 7]) {
+	s = z__[nn - 3];
+	z__[nn - 3] = z__[nn - 7];
+	z__[nn - 7] = s;
+    }
+    if (z__[nn - 5] > z__[nn - 3] * tol2) {
+	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
+	s = z__[nn - 3] * (z__[nn - 5] / t);
+	if (s <= t) {
+	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.) + 1.)));
+	} else {
+	    s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
+	}
+	t = z__[nn - 7] + (s + z__[nn - 5]);
+	z__[nn - 3] *= z__[nn - 7] / t;
+	z__[nn - 7] = t;
+    }
+    z__[((*n0) << (2)) - 7] = z__[nn - 7] + *sigma;
+    z__[((*n0) << (2)) - 3] = z__[nn - 3] + *sigma;
+    *n0 += -2;
+    goto L10;
+
+L50:
+
+/*     Reverse the qd-array, if warranted. */
+
+    if ((*dmin__ <= 0.) || (*n0 < n0in)) {
+	if (z__[((*i0) << (2)) + *pp - 3] * 1.5 < z__[((*n0) << (2)) + *pp -
+		3]) {
+	    ipn4 = (*i0 + *n0) << (2);
+	    i__1 = (*i0 + *n0 - 1) << (1);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		temp = z__[j4 - 3];
+		z__[j4 - 3] = z__[ipn4 - j4 - 3];
+		z__[ipn4 - j4 - 3] = temp;
+		temp = z__[j4 - 2];
+		z__[j4 - 2] = z__[ipn4 - j4 - 2];
+		z__[ipn4 - j4 - 2] = temp;
+		temp = z__[j4 - 1];
+		z__[j4 - 1] = z__[ipn4 - j4 - 5];
+		z__[ipn4 - j4 - 5] = temp;
+		temp = z__[j4];
+		z__[j4] = z__[ipn4 - j4 - 4];
+		z__[ipn4 - j4 - 4] = temp;
+/* L60: */
+	    }
+	    if (*n0 - *i0 <= 4) {
+		z__[((*n0) << (2)) + *pp - 1] = z__[((*i0) << (2)) + *pp - 1];
+		z__[((*n0) << (2)) - *pp] = z__[((*i0) << (2)) - *pp];
+	    }
+/* Computing MIN */
+	    d__1 = dmin2, d__2 = z__[((*n0) << (2)) + *pp - 1];
+	    dmin2 = min(d__1,d__2);
+/* Computing MIN */
+	    d__1 = z__[((*n0) << (2)) + *pp - 1], d__2 = z__[((*i0) << (2)) +
+		    *pp - 1], d__1 = min(d__1,d__2), d__2 = z__[((*i0) << (2))
+		     + *pp + 3];
+	    z__[((*n0) << (2)) + *pp - 1] = min(d__1,d__2);
+/* Computing MIN */
+	    d__1 = z__[((*n0) << (2)) - *pp], d__2 = z__[((*i0) << (2)) - *pp]
+		    , d__1 = min(d__1,d__2), d__2 = z__[((*i0) << (2)) - *pp
+		    + 4];
+	    z__[((*n0) << (2)) - *pp] = min(d__1,d__2);
+/* Computing MAX */
+	    d__1 = *qmax, d__2 = z__[((*i0) << (2)) + *pp - 3], d__1 = max(
+		    d__1,d__2), d__2 = z__[((*i0) << (2)) + *pp + 1];
+	    *qmax = max(d__1,d__2);
+	    *dmin__ = -0.;
+	}
+    }
+
+/*
+   L70:
+
+   Computing MIN
+*/
+    d__1 = z__[((*n0) << (2)) + *pp - 1], d__2 = z__[((*n0) << (2)) + *pp - 9]
+	    , d__1 = min(d__1,d__2), d__2 = dmin2 + z__[((*n0) << (2)) - *pp];
+    if ((*dmin__ < 0.) || (safmin * *qmax < min(d__1,d__2))) {
+
+/*        Choose a shift. */
+
+	dlasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
+		 &dn2, &tau, &ttype);
+
+/*        Call dqds until DMIN > 0. */
+
+L80:
+
+	dlasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
+		&dn2, ieee);
+
+	*ndiv += *n0 - *i0 + 2;
+	++(*iter);
+
+/*        Check status. */
+
+	if (*dmin__ >= 0. && dmin1 > 0.) {
+
+/*           Success. */
+
+	    goto L100;
+
+	} else if (*dmin__ < 0. && dmin1 > 0. && z__[((*n0 - 1) << (2)) - *pp]
+		 < tol * (*sigma + dn1) && abs(dn) < tol * *sigma) {
+
+/*           Convergence hidden by negative DN. */
+
+	    z__[((*n0 - 1) << (2)) - *pp + 2] = 0.;
+	    *dmin__ = 0.;
+	    goto L100;
+	} else if (*dmin__ < 0.) {
+
+/*           TAU too big. Select new TAU and try again. */
+
+	    ++(*nfail);
+	    if (ttype < -22) {
+
+/*              Failed twice. Play it safe. */
+
+		tau = 0.;
+	    } else if (dmin1 > 0.) {
+
+/*              Late failure. Gives excellent shift. */
+
+		tau = (tau + *dmin__) * (1. - eps * 2.);
+		ttype += -11;
+	    } else {
+
+/*              Early failure. Divide by 4. */
+
+		tau *= .25;
+		ttype += -12;
+	    }
+	    goto L80;
+	} else if (*dmin__ != *dmin__) {
+
+/*           NaN. */
+
+	    tau = 0.;
+	    goto L80;
+	} else {
+
+/*           Possible underflow. Play it safe. */
+
+	    goto L90;
+	}
+    }
+
+/*     Risk of underflow. */
+
+L90:
+    dlasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
+    *ndiv += *n0 - *i0 + 2;
+    ++(*iter);
+    tau = 0.;
+
+L100:
+    if (tau < *sigma) {
+	*desig += tau;
+	t = *sigma + *desig;
+	*desig -= t - *sigma;
+    } else {
+	t = *sigma + tau;
+	*desig = *sigma - (t - tau) + *desig;
+    }
+    *sigma = t;
+
+    return 0;
+
+/*     End of DLASQ3 */
+
+} /* dlasq3_ */
+
+/* Subroutine */ int dlasq4_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, integer *n0in, doublereal *dmin__, doublereal *dmin1,
+	doublereal *dmin2, doublereal *dn, doublereal *dn1, doublereal *dn2,
+	doublereal *tau, integer *ttype)
+{
+    /* Initialized data */
+
+    static doublereal g = 0.;
+
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal s, a2, b1, b2;
+    static integer i4, nn, np;
+    static doublereal gam, gap1, gap2;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASQ4 computes an approximation TAU to the smallest eigenvalue
+    using values of d from the previous transform.
+
+    I0    (input) INTEGER
+          First index.
+
+    N0    (input) INTEGER
+          Last index.
+
+    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+          Z holds the qd array.
+
+    PP    (input) INTEGER
+          PP=0 for ping, PP=1 for pong.
+
+    NOIN  (input) INTEGER
+          The value of N0 at start of EIGTEST.
+
+    DMIN  (input) DOUBLE PRECISION
+          Minimum value of d.
+
+    DMIN1 (input) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ).
+
+    DMIN2 (input) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+
+    DN    (input) DOUBLE PRECISION
+          d(N)
+
+    DN1   (input) DOUBLE PRECISION
+          d(N-1)
+
+    DN2   (input) DOUBLE PRECISION
+          d(N-2)
+
+    TAU   (output) DOUBLE PRECISION
+          This is the shift.
+
+    TTYPE (output) INTEGER
+          Shift type.
+
+    Further Details
+    ===============
+    CNST1 = 9/16
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+
+/*
+       A negative DMIN forces the shift to take that absolute value
+       TTYPE records the type of shift.
+*/
+
+    if (*dmin__ <= 0.) {
+	*tau = -(*dmin__);
+	*ttype = -1;
+	return 0;
+    }
+
+    nn = ((*n0) << (2)) + *pp;
+    if (*n0in == *n0) {
+
+/*        No eigenvalues deflated. */
+
+	if ((*dmin__ == *dn) || (*dmin__ == *dn1)) {
+
+	    b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
+	    b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
+	    a2 = z__[nn - 7] + z__[nn - 5];
+
+/*           Cases 2 and 3. */
+
+	    if (*dmin__ == *dn && *dmin1 == *dn1) {
+		gap2 = *dmin2 - a2 - *dmin2 * .25;
+		if (gap2 > 0. && gap2 > b2) {
+		    gap1 = a2 - *dn - b2 / gap2 * b2;
+		} else {
+		    gap1 = a2 - *dn - (b1 + b2);
+		}
+		if (gap1 > 0. && gap1 > b1) {
+/* Computing MAX */
+		    d__1 = *dn - b1 / gap1 * b1, d__2 = *dmin__ * .5;
+		    s = max(d__1,d__2);
+		    *ttype = -2;
+		} else {
+		    s = 0.;
+		    if (*dn > b1) {
+			s = *dn - b1;
+		    }
+		    if (a2 > b1 + b2) {
+/* Computing MIN */
+			d__1 = s, d__2 = a2 - (b1 + b2);
+			s = min(d__1,d__2);
+		    }
+/* Computing MAX */
+		    d__1 = s, d__2 = *dmin__ * .333;
+		    s = max(d__1,d__2);
+		    *ttype = -3;
+		}
+	    } else {
+
+/*              Case 4. */
+
+		*ttype = -4;
+		s = *dmin__ * .25;
+		if (*dmin__ == *dn) {
+		    gam = *dn;
+		    a2 = 0.;
+		    if (z__[nn - 5] > z__[nn - 7]) {
+			return 0;
+		    }
+		    b2 = z__[nn - 5] / z__[nn - 7];
+		    np = nn - 9;
+		} else {
+		    np = nn - ((*pp) << (1));
+		    b2 = z__[np - 2];
+		    gam = *dn1;
+		    if (z__[np - 4] > z__[np - 2]) {
+			return 0;
+		    }
+		    a2 = z__[np - 4] / z__[np - 2];
+		    if (z__[nn - 9] > z__[nn - 11]) {
+			return 0;
+		    }
+		    b2 = z__[nn - 9] / z__[nn - 11];
+		    np = nn - 13;
+		}
+
+/*              Approximate contribution to norm squared from I < NN-1. */
+
+		a2 += b2;
+		i__1 = ((*i0) << (2)) - 1 + *pp;
+		for (i4 = np; i4 >= i__1; i4 += -4) {
+		    if (b2 == 0.) {
+			goto L20;
+		    }
+		    b1 = b2;
+		    if (z__[i4] > z__[i4 - 2]) {
+			return 0;
+		    }
+		    b2 *= z__[i4] / z__[i4 - 2];
+		    a2 += b2;
+		    if ((max(b2,b1) * 100. < a2) || (.563 < a2)) {
+			goto L20;
+		    }
+/* L10: */
+		}
+L20:
+		a2 *= 1.05;
+
+/*              Rayleigh quotient residual bound. */
+
+		if (a2 < .563) {
+		    s = gam * (1. - sqrt(a2)) / (a2 + 1.);
+		}
+	    }
+	} else if (*dmin__ == *dn2) {
+
+/*           Case 5. */
+
+	    *ttype = -5;
+	    s = *dmin__ * .25;
+
+/*           Compute contribution to norm squared from I > NN-2. */
+
+	    np = nn - ((*pp) << (1));
+	    b1 = z__[np - 2];
+	    b2 = z__[np - 6];
+	    gam = *dn2;
+	    if ((z__[np - 8] > b2) || (z__[np - 4] > b1)) {
+		return 0;
+	    }
+	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.);
+
+/*           Approximate contribution to norm squared from I < NN-2. */
+
+	    if (*n0 - *i0 > 2) {
+		b2 = z__[nn - 13] / z__[nn - 15];
+		a2 += b2;
+		i__1 = ((*i0) << (2)) - 1 + *pp;
+		for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
+		    if (b2 == 0.) {
+			goto L40;
+		    }
+		    b1 = b2;
+		    if (z__[i4] > z__[i4 - 2]) {
+			return 0;
+		    }
+		    b2 *= z__[i4] / z__[i4 - 2];
+		    a2 += b2;
+		    if ((max(b2,b1) * 100. < a2) || (.563 < a2)) {
+			goto L40;
+		    }
+/* L30: */
+		}
+L40:
+		a2 *= 1.05;
+	    }
+
+	    if (a2 < .563) {
+		s = gam * (1. - sqrt(a2)) / (a2 + 1.);
+	    }
+	} else {
+
+/*           Case 6, no information to guide us. */
+
+	    if (*ttype == -6) {
+		g += (1. - g) * .333;
+	    } else if (*ttype == -18) {
+		g = .083250000000000005;
+	    } else {
+		g = .25;
+	    }
+	    s = g * *dmin__;
+	    *ttype = -6;
+	}
+
+    } else if (*n0in == *n0 + 1) {
+
+/*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
+
+	if (*dmin1 == *dn1 && *dmin2 == *dn2) {
+
+/*           Cases 7 and 8. */
+
+	    *ttype = -7;
+	    s = *dmin1 * .333;
+	    if (z__[nn - 5] > z__[nn - 7]) {
+		return 0;
+	    }
+	    b1 = z__[nn - 5] / z__[nn - 7];
+	    b2 = b1;
+	    if (b2 == 0.) {
+		goto L60;
+	    }
+	    i__1 = ((*i0) << (2)) - 1 + *pp;
+	    for (i4 = ((*n0) << (2)) - 9 + *pp; i4 >= i__1; i4 += -4) {
+		a2 = b1;
+		if (z__[i4] > z__[i4 - 2]) {
+		    return 0;
+		}
+		b1 *= z__[i4] / z__[i4 - 2];
+		b2 += b1;
+		if (max(b1,a2) * 100. < b2) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+L60:
+	    b2 = sqrt(b2 * 1.05);
+/* Computing 2nd power */
+	    d__1 = b2;
+	    a2 = *dmin1 / (d__1 * d__1 + 1.);
+	    gap2 = *dmin2 * .5 - a2;
+	    if (gap2 > 0. && gap2 > b2 * a2) {
+/* Computing MAX */
+		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
+		s = max(d__1,d__2);
+	    } else {
+/* Computing MAX */
+		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
+		s = max(d__1,d__2);
+		*ttype = -8;
+	    }
+	} else {
+
+/*           Case 9. */
+
+	    s = *dmin1 * .25;
+	    if (*dmin1 == *dn1) {
+		s = *dmin1 * .5;
+	    }
+	    *ttype = -9;
+	}
+
+    } else if (*n0in == *n0 + 2) {
+
+/*
+          Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN.
+
+          Cases 10 and 11.
+*/
+
+	if (*dmin2 == *dn2 && z__[nn - 5] * 2. < z__[nn - 7]) {
+	    *ttype = -10;
+	    s = *dmin2 * .333;
+	    if (z__[nn - 5] > z__[nn - 7]) {
+		return 0;
+	    }
+	    b1 = z__[nn - 5] / z__[nn - 7];
+	    b2 = b1;
+	    if (b2 == 0.) {
+		goto L80;
+	    }
+	    i__1 = ((*i0) << (2)) - 1 + *pp;
+	    for (i4 = ((*n0) << (2)) - 9 + *pp; i4 >= i__1; i4 += -4) {
+		if (z__[i4] > z__[i4 - 2]) {
+		    return 0;
+		}
+		b1 *= z__[i4] / z__[i4 - 2];
+		b2 += b1;
+		if (b1 * 100. < b2) {
+		    goto L80;
+		}
+/* L70: */
+	    }
+L80:
+	    b2 = sqrt(b2 * 1.05);
+/* Computing 2nd power */
+	    d__1 = b2;
+	    a2 = *dmin2 / (d__1 * d__1 + 1.);
+	    gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
+		    nn - 9]) - a2;
+	    if (gap2 > 0. && gap2 > b2 * a2) {
+/* Computing MAX */
+		d__1 = s, d__2 = a2 * (1. - a2 * 1.01 * (b2 / gap2) * b2);
+		s = max(d__1,d__2);
+	    } else {
+/* Computing MAX */
+		d__1 = s, d__2 = a2 * (1. - b2 * 1.01);
+		s = max(d__1,d__2);
+	    }
+	} else {
+	    s = *dmin2 * .25;
+	    *ttype = -11;
+	}
+    } else if (*n0in > *n0 + 2) {
+
+/*        Case 12, more than two eigenvalues deflated. No information. */
+
+	s = 0.;
+	*ttype = -12;
+    }
+
+    *tau = s;
+    return 0;
+
+/*     End of DLASQ4 */
+
+} /* dlasq4_ */
+
+/* Subroutine */ int dlasq5_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, doublereal *tau, doublereal *dmin__, doublereal *dmin1,
+	doublereal *dmin2, doublereal *dn, doublereal *dnm1, doublereal *dnm2,
+	 logical *ieee)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static doublereal d__;
+    static integer j4, j4p2;
+    static doublereal emin, temp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       May 17, 2000
+
+
+    Purpose
+    =======
+
+    DLASQ5 computes one dqds transform in ping-pong form, one
+    version for IEEE machines another for non IEEE machines.
+
+    Arguments
+    =========
+
+    I0    (input) INTEGER
+          First index.
+
+    N0    (input) INTEGER
+          Last index.
+
+    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+          Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+          an extra argument.
+
+    PP    (input) INTEGER
+          PP=0 for ping, PP=1 for pong.
+
+    TAU   (input) DOUBLE PRECISION
+          This is the shift.
+
+    DMIN  (output) DOUBLE PRECISION
+          Minimum value of d.
+
+    DMIN1 (output) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ).
+
+    DMIN2 (output) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+
+    DN    (output) DOUBLE PRECISION
+          d(N0), the last value of d.
+
+    DNM1  (output) DOUBLE PRECISION
+          d(N0-1).
+
+    DNM2  (output) DOUBLE PRECISION
+          d(N0-2).
+
+    IEEE  (input) LOGICAL
+          Flag for IEEE or non IEEE arithmetic.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+    if (*n0 - *i0 - 1 <= 0) {
+	return 0;
+    }
+
+    j4 = ((*i0) << (2)) + *pp - 3;
+    emin = z__[j4 + 4];
+    d__ = z__[j4] - *tau;
+    *dmin__ = d__;
+    *dmin1 = -z__[j4];
+
+    if (*ieee) {
+
+/*        Code for IEEE arithmetic. */
+
+	if (*pp == 0) {
+	    i__1 = (*n0 - 3) << (2);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		z__[j4 - 2] = d__ + z__[j4 - 1];
+		temp = z__[j4 + 1] / z__[j4 - 2];
+		d__ = d__ * temp - *tau;
+		*dmin__ = min(*dmin__,d__);
+		z__[j4] = z__[j4 - 1] * temp;
+/* Computing MIN */
+		d__1 = z__[j4];
+		emin = min(d__1,emin);
+/* L10: */
+	    }
+	} else {
+	    i__1 = (*n0 - 3) << (2);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		z__[j4 - 3] = d__ + z__[j4];
+		temp = z__[j4 + 2] / z__[j4 - 3];
+		d__ = d__ * temp - *tau;
+		*dmin__ = min(*dmin__,d__);
+		z__[j4 - 1] = z__[j4] * temp;
+/* Computing MIN */
+		d__1 = z__[j4 - 1];
+		emin = min(d__1,emin);
+/* L20: */
+	    }
+	}
+
+/*        Unroll last two steps. */
+
+	*dnm2 = d__;
+	*dmin2 = *dmin__;
+	j4 = ((*n0 - 2) << (2)) - *pp;
+	j4p2 = j4 + ((*pp) << (1)) - 1;
+	z__[j4 - 2] = *dnm2 + z__[j4p2];
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
+	*dmin__ = min(*dmin__,*dnm1);
+
+	*dmin1 = *dmin__;
+	j4 += 4;
+	j4p2 = j4 + ((*pp) << (1)) - 1;
+	z__[j4 - 2] = *dnm1 + z__[j4p2];
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
+	*dmin__ = min(*dmin__,*dn);
+
+    } else {
+
+/*        Code for non IEEE arithmetic. */
+
+	if (*pp == 0) {
+	    i__1 = (*n0 - 3) << (2);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		z__[j4 - 2] = d__ + z__[j4 - 1];
+		if (d__ < 0.) {
+		    return 0;
+		} else {
+		    z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
+		    d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
+		}
+		*dmin__ = min(*dmin__,d__);
+/* Computing MIN */
+		d__1 = emin, d__2 = z__[j4];
+		emin = min(d__1,d__2);
+/* L30: */
+	    }
+	} else {
+	    i__1 = (*n0 - 3) << (2);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		z__[j4 - 3] = d__ + z__[j4];
+		if (d__ < 0.) {
+		    return 0;
+		} else {
+		    z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
+		    d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
+		}
+		*dmin__ = min(*dmin__,d__);
+/* Computing MIN */
+		d__1 = emin, d__2 = z__[j4 - 1];
+		emin = min(d__1,d__2);
+/* L40: */
+	    }
+	}
+
+/*        Unroll last two steps. */
+
+	*dnm2 = d__;
+	*dmin2 = *dmin__;
+	j4 = ((*n0 - 2) << (2)) - *pp;
+	j4p2 = j4 + ((*pp) << (1)) - 1;
+	z__[j4 - 2] = *dnm2 + z__[j4p2];
+	if (*dnm2 < 0.) {
+	    return 0;
+	} else {
+	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	    *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
+	}
+	*dmin__ = min(*dmin__,*dnm1);
+
+	*dmin1 = *dmin__;
+	j4 += 4;
+	j4p2 = j4 + ((*pp) << (1)) - 1;
+	z__[j4 - 2] = *dnm1 + z__[j4p2];
+	if (*dnm1 < 0.) {
+	    return 0;
+	} else {
+	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	    *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
+	}
+	*dmin__ = min(*dmin__,*dn);
+
+    }
+
+    z__[j4 + 2] = *dn;
+    z__[((*n0) << (2)) - *pp] = emin;
+    return 0;
+
+/*     End of DLASQ5 */
+
+} /* dlasq5_ */
+
+/* Subroutine */ int dlasq6_(integer *i0, integer *n0, doublereal *z__,
+	integer *pp, doublereal *dmin__, doublereal *dmin1, doublereal *dmin2,
+	 doublereal *dn, doublereal *dnm1, doublereal *dnm2)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+
+    /* Local variables */
+    static doublereal d__;
+    static integer j4, j4p2;
+    static doublereal emin, temp;
+
+    static doublereal safmin;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    DLASQ6 computes one dqd (shift equal to zero) transform in
+    ping-pong form, with protection against underflow and overflow.
+
+    Arguments
+    =========
+
+    I0    (input) INTEGER
+          First index.
+
+    N0    (input) INTEGER
+          Last index.
+
+    Z     (input) DOUBLE PRECISION array, dimension ( 4*N )
+          Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+          an extra argument.
+
+    PP    (input) INTEGER
+          PP=0 for ping, PP=1 for pong.
+
+    DMIN  (output) DOUBLE PRECISION
+          Minimum value of d.
+
+    DMIN1 (output) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ).
+
+    DMIN2 (output) DOUBLE PRECISION
+          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+
+    DN    (output) DOUBLE PRECISION
+          d(N0), the last value of d.
+
+    DNM1  (output) DOUBLE PRECISION
+          d(N0-1).
+
+    DNM2  (output) DOUBLE PRECISION
+          d(N0-2).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+    if (*n0 - *i0 - 1 <= 0) {
+	return 0;
+    }
+
+    safmin = SAFEMINIMUM;
+    j4 = ((*i0) << (2)) + *pp - 3;
+    emin = z__[j4 + 4];
+    d__ = z__[j4];
+    *dmin__ = d__;
+
+    if (*pp == 0) {
+	i__1 = (*n0 - 3) << (2);
+	for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	    z__[j4 - 2] = d__ + z__[j4 - 1];
+	    if (z__[j4 - 2] == 0.) {
+		z__[j4] = 0.;
+		d__ = z__[j4 + 1];
+		*dmin__ = d__;
+		emin = 0.;
+	    } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4
+		    - 2] < z__[j4 + 1]) {
+		temp = z__[j4 + 1] / z__[j4 - 2];
+		z__[j4] = z__[j4 - 1] * temp;
+		d__ *= temp;
+	    } else {
+		z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
+		d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
+	    }
+	    *dmin__ = min(*dmin__,d__);
+/* Computing MIN */
+	    d__1 = emin, d__2 = z__[j4];
+	    emin = min(d__1,d__2);
+/* L10: */
+	}
+    } else {
+	i__1 = (*n0 - 3) << (2);
+	for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	    z__[j4 - 3] = d__ + z__[j4];
+	    if (z__[j4 - 3] == 0.) {
+		z__[j4 - 1] = 0.;
+		d__ = z__[j4 + 2];
+		*dmin__ = d__;
+		emin = 0.;
+	    } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4
+		    - 3] < z__[j4 + 2]) {
+		temp = z__[j4 + 2] / z__[j4 - 3];
+		z__[j4 - 1] = z__[j4] * temp;
+		d__ *= temp;
+	    } else {
+		z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
+		d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
+	    }
+	    *dmin__ = min(*dmin__,d__);
+/* Computing MIN */
+	    d__1 = emin, d__2 = z__[j4 - 1];
+	    emin = min(d__1,d__2);
+/* L20: */
+	}
+    }
+
+/*     Unroll last two steps. */
+
+    *dnm2 = d__;
+    *dmin2 = *dmin__;
+    j4 = ((*n0 - 2) << (2)) - *pp;
+    j4p2 = j4 + ((*pp) << (1)) - 1;
+    z__[j4 - 2] = *dnm2 + z__[j4p2];
+    if (z__[j4 - 2] == 0.) {
+	z__[j4] = 0.;
+	*dnm1 = z__[j4p2 + 2];
+	*dmin__ = *dnm1;
+	emin = 0.;
+    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
+	    z__[j4p2 + 2]) {
+	temp = z__[j4p2 + 2] / z__[j4 - 2];
+	z__[j4] = z__[j4p2] * temp;
+	*dnm1 = *dnm2 * temp;
+    } else {
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
+    }
+    *dmin__ = min(*dmin__,*dnm1);
+
+    *dmin1 = *dmin__;
+    j4 += 4;
+    j4p2 = j4 + ((*pp) << (1)) - 1;
+    z__[j4 - 2] = *dnm1 + z__[j4p2];
+    if (z__[j4 - 2] == 0.) {
+	z__[j4] = 0.;
+	*dn = z__[j4p2 + 2];
+	*dmin__ = *dn;
+	emin = 0.;
+    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
+	    z__[j4p2 + 2]) {
+	temp = z__[j4p2 + 2] / z__[j4 - 2];
+	z__[j4] = z__[j4p2] * temp;
+	*dn = *dnm1 * temp;
+    } else {
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
+    }
+    *dmin__ = min(*dmin__,*dn);
+
+    z__[j4 + 2] = *dn;
+    z__[((*n0) << (2)) - *pp] = emin;
+    return 0;
+
+/*     End of DLASQ6 */
+
+} /* dlasq6_ */
+
+/* Subroutine */ int dlasr_(char *side, char *pivot, char *direct, integer *m,
+	 integer *n, doublereal *c__, doublereal *s, doublereal *a, integer *
+	lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, info;
+    static doublereal temp;
+    extern logical lsame_(char *, char *);
+    static doublereal ctemp, stemp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLASR   performs the transformation
+
+       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+
+       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+
+    where A is an m by n real matrix and P is an orthogonal matrix,
+    consisting of a sequence of plane rotations determined by the
+    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+    and z = n when SIDE = 'R' or 'r' ):
+
+    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+
+       P = P( z - 1 )*...*P( 2 )*P( 1 ),
+
+    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+
+       P = P( 1 )*P( 2 )*...*P( z - 1 ),
+
+    where  P( k ) is a plane rotation matrix for the following planes:
+
+       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+          the plane ( k, k + 1 )
+
+       when  PIVOT = 'T' or 't'  ( Top pivot ),
+          the plane ( 1, k + 1 )
+
+       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+          the plane ( k, z )
+
+    c( k ) and s( k )  must contain the  cosine and sine that define the
+    matrix  P( k ).  The two by two plane rotation part of the matrix
+    P( k ), R( k ), is assumed to be of the form
+
+       R( k ) = (  c( k )  s( k ) ).
+                ( -s( k )  c( k ) )
+
+    This version vectorises across rows of the array A when SIDE = 'L'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            Specifies whether the plane rotation matrix P is applied to
+            A on the left or the right.
+            = 'L':  Left, compute A := P*A
+            = 'R':  Right, compute A:= A*P'
+
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+
+    PIVOT   (input) CHARACTER*1
+            Specifies the plane for which P(k) is a plane rotation
+            matrix.
+            = 'V':  Variable pivot, the plane (k,k+1)
+            = 'T':  Top pivot, the plane (1,k+1)
+            = 'B':  Bottom pivot, the plane (k,z)
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  If m <= 1, an immediate
+            return is effected.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  If n <= 1, an
+            immediate return is effected.
+
+    C, S    (input) DOUBLE PRECISION arrays, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            c(k) and s(k) contain the cosine and sine that define the
+            matrix P(k).  The two by two plane rotation part of the
+            matrix P(k), R(k), is assumed to be of the form
+            R( k ) = (  c( k )  s( k ) ).
+                     ( -s( k )  c( k ) )
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            The m by n matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P' if SIDE = 'L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --c__;
+    --s;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! ((lsame_(side, "L")) || (lsame_(side, "R")))) {
+	info = 1;
+    } else if (! (((lsame_(pivot, "V")) || (lsame_(
+	    pivot, "T"))) || (lsame_(pivot, "B")))) {
+	info = 2;
+    } else if (! ((lsame_(direct, "F")) || (lsame_(
+	    direct, "B")))) {
+	info = 3;
+    } else if (*m < 0) {
+	info = 4;
+    } else if (*n < 0) {
+	info = 5;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("DLASR ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  P * A */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[j + 1 + i__ * a_dim1];
+			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
+				    a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
+				    + i__ * a_dim1];
+/* L10: */
+			}
+		    }
+/* L20: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[j + 1 + i__ * a_dim1];
+			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
+				    a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
+				    + i__ * a_dim1];
+/* L30: */
+			}
+		    }
+/* L40: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
+				    i__ * a_dim1 + 1];
+			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
+				    i__ * a_dim1 + 1];
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
+				    i__ * a_dim1 + 1];
+			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
+				    i__ * a_dim1 + 1];
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
+				     + ctemp * temp;
+			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
+				    a_dim1] - stemp * temp;
+/* L90: */
+			}
+		    }
+/* L100: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
+				     + ctemp * temp;
+			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
+				    a_dim1] - stemp * temp;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+	    }
+	}
+    } else if (lsame_(side, "R")) {
+
+/*        Form A * P' */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[i__ + (j + 1) * a_dim1];
+			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
+				     a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
+				    i__ + j * a_dim1];
+/* L130: */
+			}
+		    }
+/* L140: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[i__ + (j + 1) * a_dim1];
+			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
+				     a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
+				    i__ + j * a_dim1];
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
+				    i__ + a_dim1];
+			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
+				    a_dim1];
+/* L170: */
+			}
+		    }
+/* L180: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
+				    i__ + a_dim1];
+			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
+				    a_dim1];
+/* L190: */
+			}
+		    }
+/* L200: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
+				     + ctemp * temp;
+			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
+				    a_dim1] - stemp * temp;
+/* L210: */
+			}
+		    }
+/* L220: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
+				     + ctemp * temp;
+			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
+				    a_dim1] - stemp * temp;
+/* L230: */
+			}
+		    }
+/* L240: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DLASR */
+
+} /* dlasr_ */
+
+/* Subroutine */ int dlasrt_(char *id, integer *n, doublereal *d__, integer *
+	info)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal d1, d2, d3;
+    static integer dir;
+    static doublereal tmp;
+    static integer endd;
+    extern logical lsame_(char *, char *);
+    static integer stack[64]	/* was [2][32] */;
+    static doublereal dmnmx;
+    static integer start;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer stkpnt;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    Sort the numbers in D in increasing order (if ID = 'I') or
+    in decreasing order (if ID = 'D' ).
+
+    Use Quick Sort, reverting to Insertion sort on arrays of
+    size <= 20. Dimension of STACK limits N to about 2**32.
+
+    Arguments
+    =========
+
+    ID      (input) CHARACTER*1
+            = 'I': sort D in increasing order;
+            = 'D': sort D in decreasing order.
+
+    N       (input) INTEGER
+            The length of the array D.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the array to be sorted.
+            On exit, D has been sorted into increasing order
+            (D(1) <= ... <= D(N) ) or into decreasing order
+            (D(1) >= ... >= D(N) ), depending on ID.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input paramters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    dir = -1;
+    if (lsame_(id, "D")) {
+	dir = 0;
+    } else if (lsame_(id, "I")) {
+	dir = 1;
+    }
+    if (dir == -1) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLASRT", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 1) {
+	return 0;
+    }
+
+    stkpnt = 1;
+    stack[0] = 1;
+    stack[1] = *n;
+L10:
+    start = stack[((stkpnt) << (1)) - 2];
+    endd = stack[((stkpnt) << (1)) - 1];
+    --stkpnt;
+    if (endd - start <= 20 && endd - start > 0) {
+
+/*        Do Insertion sort on D( START:ENDD ) */
+
+	if (dir == 0) {
+
+/*           Sort into decreasing order */
+
+	    i__1 = endd;
+	    for (i__ = start + 1; i__ <= i__1; ++i__) {
+		i__2 = start + 1;
+		for (j = i__; j >= i__2; --j) {
+		    if (d__[j] > d__[j - 1]) {
+			dmnmx = d__[j];
+			d__[j] = d__[j - 1];
+			d__[j - 1] = dmnmx;
+		    } else {
+			goto L30;
+		    }
+/* L20: */
+		}
+L30:
+		;
+	    }
+
+	} else {
+
+/*           Sort into increasing order */
+
+	    i__1 = endd;
+	    for (i__ = start + 1; i__ <= i__1; ++i__) {
+		i__2 = start + 1;
+		for (j = i__; j >= i__2; --j) {
+		    if (d__[j] < d__[j - 1]) {
+			dmnmx = d__[j];
+			d__[j] = d__[j - 1];
+			d__[j - 1] = dmnmx;
+		    } else {
+			goto L50;
+		    }
+/* L40: */
+		}
+L50:
+		;
+	    }
+
+	}
+
+    } else if (endd - start > 20) {
+
+/*
+          Partition D( START:ENDD ) and stack parts, largest one first
+
+          Choose partition entry as median of 3
+*/
+
+	d1 = d__[start];
+	d2 = d__[endd];
+	i__ = (start + endd) / 2;
+	d3 = d__[i__];
+	if (d1 < d2) {
+	    if (d3 < d1) {
+		dmnmx = d1;
+	    } else if (d3 < d2) {
+		dmnmx = d3;
+	    } else {
+		dmnmx = d2;
+	    }
+	} else {
+	    if (d3 < d2) {
+		dmnmx = d2;
+	    } else if (d3 < d1) {
+		dmnmx = d3;
+	    } else {
+		dmnmx = d1;
+	    }
+	}
+
+	if (dir == 0) {
+
+/*           Sort into decreasing order */
+
+	    i__ = start - 1;
+	    j = endd + 1;
+L60:
+L70:
+	    --j;
+	    if (d__[j] < dmnmx) {
+		goto L70;
+	    }
+L80:
+	    ++i__;
+	    if (d__[i__] > dmnmx) {
+		goto L80;
+	    }
+	    if (i__ < j) {
+		tmp = d__[i__];
+		d__[i__] = d__[j];
+		d__[j] = tmp;
+		goto L60;
+	    }
+	    if (j - start > endd - j - 1) {
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = start;
+		stack[((stkpnt) << (1)) - 1] = j;
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = j + 1;
+		stack[((stkpnt) << (1)) - 1] = endd;
+	    } else {
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = j + 1;
+		stack[((stkpnt) << (1)) - 1] = endd;
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = start;
+		stack[((stkpnt) << (1)) - 1] = j;
+	    }
+	} else {
+
+/*           Sort into increasing order */
+
+	    i__ = start - 1;
+	    j = endd + 1;
+L90:
+L100:
+	    --j;
+	    if (d__[j] > dmnmx) {
+		goto L100;
+	    }
+L110:
+	    ++i__;
+	    if (d__[i__] < dmnmx) {
+		goto L110;
+	    }
+	    if (i__ < j) {
+		tmp = d__[i__];
+		d__[i__] = d__[j];
+		d__[j] = tmp;
+		goto L90;
+	    }
+	    if (j - start > endd - j - 1) {
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = start;
+		stack[((stkpnt) << (1)) - 1] = j;
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = j + 1;
+		stack[((stkpnt) << (1)) - 1] = endd;
+	    } else {
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = j + 1;
+		stack[((stkpnt) << (1)) - 1] = endd;
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = start;
+		stack[((stkpnt) << (1)) - 1] = j;
+	    }
+	}
+    }
+    if (stkpnt > 0) {
+	goto L10;
+    }
+    return 0;
+
+/*     End of DLASRT */
+
+} /* dlasrt_ */
+
+/* Subroutine */ int dlassq_(integer *n, doublereal *x, integer *incx,
+	doublereal *scale, doublereal *sumsq)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer ix;
+    static doublereal absxi;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASSQ  returns the values  scl  and  smsq  such that
+
+       ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
+
+    where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is
+    assumed to be non-negative and  scl  returns the value
+
+       scl = max( scale, abs( x( i ) ) ).
+
+    scale and sumsq must be supplied in SCALE and SUMSQ and
+    scl and smsq are overwritten on SCALE and SUMSQ respectively.
+
+    The routine makes only one pass through the vector x.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of elements to be used from the vector X.
+
+    X       (input) DOUBLE PRECISION array, dimension (N)
+            The vector for which a scaled sum of squares is computed.
+               x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
+
+    INCX    (input) INTEGER
+            The increment between successive values of the vector X.
+            INCX > 0.
+
+    SCALE   (input/output) DOUBLE PRECISION
+            On entry, the value  scale  in the equation above.
+            On exit, SCALE is overwritten with  scl , the scaling factor
+            for the sum of squares.
+
+    SUMSQ   (input/output) DOUBLE PRECISION
+            On entry, the value  sumsq  in the equation above.
+            On exit, SUMSQ is overwritten with  smsq , the basic sum of
+            squares from which  scl  has been factored out.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n > 0) {
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    if (x[ix] != 0.) {
+		absxi = (d__1 = x[ix], abs(d__1));
+		if (*scale < absxi) {
+/* Computing 2nd power */
+		    d__1 = *scale / absxi;
+		    *sumsq = *sumsq * (d__1 * d__1) + 1;
+		    *scale = absxi;
+		} else {
+/* Computing 2nd power */
+		    d__1 = absxi / *scale;
+		    *sumsq += d__1 * d__1;
+		}
+	    }
+/* L10: */
+	}
+    }
+    return 0;
+
+/*     End of DLASSQ */
+
+} /* dlassq_ */
+
+/* Subroutine */ int dlasv2_(doublereal *f, doublereal *g, doublereal *h__,
+	doublereal *ssmin, doublereal *ssmax, doublereal *snr, doublereal *
+	csr, doublereal *snl, doublereal *csl)
+{
+    /* System generated locals */
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt,
+	     clt, crt, slt, srt;
+    static integer pmax;
+    static doublereal temp;
+    static logical swap;
+    static doublereal tsign;
+
+    static logical gasmal;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLASV2 computes the singular value decomposition of a 2-by-2
+    triangular matrix
+       [  F   G  ]
+       [  0   H  ].
+    On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the
+    smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and
+    right singular vectors for abs(SSMAX), giving the decomposition
+
+       [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ]
+       [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ].
+
+    Arguments
+    =========
+
+    F       (input) DOUBLE PRECISION
+            The (1,1) element of the 2-by-2 matrix.
+
+    G       (input) DOUBLE PRECISION
+            The (1,2) element of the 2-by-2 matrix.
+
+    H       (input) DOUBLE PRECISION
+            The (2,2) element of the 2-by-2 matrix.
+
+    SSMIN   (output) DOUBLE PRECISION
+            abs(SSMIN) is the smaller singular value.
+
+    SSMAX   (output) DOUBLE PRECISION
+            abs(SSMAX) is the larger singular value.
+
+    SNL     (output) DOUBLE PRECISION
+    CSL     (output) DOUBLE PRECISION
+            The vector (CSL, SNL) is a unit left singular vector for the
+            singular value abs(SSMAX).
+
+    SNR     (output) DOUBLE PRECISION
+    CSR     (output) DOUBLE PRECISION
+            The vector (CSR, SNR) is a unit right singular vector for the
+            singular value abs(SSMAX).
+
+    Further Details
+    ===============
+
+    Any input parameter may be aliased with any output parameter.
+
+    Barring over/underflow and assuming a guard digit in subtraction, all
+    output quantities are correct to within a few units in the last
+    place (ulps).
+
+    In IEEE arithmetic, the code works correctly if one matrix element is
+    infinite.
+
+    Overflow will not occur unless the largest singular value itself
+    overflows or is within a few ulps of overflow. (On machines with
+    partial overflow, like the Cray, overflow may occur if the largest
+    singular value is within a factor of 2 of overflow.)
+
+    Underflow is harmless if underflow is gradual. Otherwise, results
+    may correspond to a matrix modified by perturbations of size near
+    the underflow threshold.
+
+   =====================================================================
+*/
+
+
+    ft = *f;
+    fa = abs(ft);
+    ht = *h__;
+    ha = abs(*h__);
+
+/*
+       PMAX points to the maximum absolute element of matrix
+         PMAX = 1 if F largest in absolute values
+         PMAX = 2 if G largest in absolute values
+         PMAX = 3 if H largest in absolute values
+*/
+
+    pmax = 1;
+    swap = ha > fa;
+    if (swap) {
+	pmax = 3;
+	temp = ft;
+	ft = ht;
+	ht = temp;
+	temp = fa;
+	fa = ha;
+	ha = temp;
+
+/*        Now FA .ge. HA */
+
+    }
+    gt = *g;
+    ga = abs(gt);
+    if (ga == 0.) {
+
+/*        Diagonal matrix */
+
+	*ssmin = ha;
+	*ssmax = fa;
+	clt = 1.;
+	crt = 1.;
+	slt = 0.;
+	srt = 0.;
+    } else {
+	gasmal = TRUE_;
+	if (ga > fa) {
+	    pmax = 2;
+	    if (fa / ga < EPSILON) {
+
+/*              Case of very large GA */
+
+		gasmal = FALSE_;
+		*ssmax = ga;
+		if (ha > 1.) {
+		    *ssmin = fa / (ga / ha);
+		} else {
+		    *ssmin = fa / ga * ha;
+		}
+		clt = 1.;
+		slt = ht / gt;
+		srt = 1.;
+		crt = ft / gt;
+	    }
+	}
+	if (gasmal) {
+
+/*           Normal case */
+
+	    d__ = fa - ha;
+	    if (d__ == fa) {
+
+/*              Copes with infinite F or H */
+
+		l = 1.;
+	    } else {
+		l = d__ / fa;
+	    }
+
+/*           Note that 0 .le. L .le. 1 */
+
+	    m = gt / ft;
+
+/*           Note that abs(M) .le. 1/macheps */
+
+	    t = 2. - l;
+
+/*           Note that T .ge. 1 */
+
+	    mm = m * m;
+	    tt = t * t;
+	    s = sqrt(tt + mm);
+
+/*           Note that 1 .le. S .le. 1 + 1/macheps */
+
+	    if (l == 0.) {
+		r__ = abs(m);
+	    } else {
+		r__ = sqrt(l * l + mm);
+	    }
+
+/*           Note that 0 .le. R .le. 1 + 1/macheps */
+
+	    a = (s + r__) * .5;
+
+/*           Note that 1 .le. A .le. 1 + abs(M) */
+
+	    *ssmin = ha / a;
+	    *ssmax = fa * a;
+	    if (mm == 0.) {
+
+/*              Note that M is very tiny */
+
+		if (l == 0.) {
+		    t = d_sign(&c_b5654, &ft) * d_sign(&c_b2865, &gt);
+		} else {
+		    t = gt / d_sign(&d__, &ft) + m / t;
+		}
+	    } else {
+		t = (m / (s + t) + m / (r__ + l)) * (a + 1.);
+	    }
+	    l = sqrt(t * t + 4.);
+	    crt = 2. / l;
+	    srt = t / l;
+	    clt = (crt + srt * m) / a;
+	    slt = ht / ft * srt / a;
+	}
+    }
+    if (swap) {
+	*csl = srt;
+	*snl = crt;
+	*csr = slt;
+	*snr = clt;
+    } else {
+	*csl = clt;
+	*snl = slt;
+	*csr = crt;
+	*snr = srt;
+    }
+
+/*     Correct signs of SSMAX and SSMIN */
+
+    if (pmax == 1) {
+	tsign = d_sign(&c_b2865, csr) * d_sign(&c_b2865, csl) * d_sign(&
+		c_b2865, f);
+    }
+    if (pmax == 2) {
+	tsign = d_sign(&c_b2865, snr) * d_sign(&c_b2865, csl) * d_sign(&
+		c_b2865, g);
+    }
+    if (pmax == 3) {
+	tsign = d_sign(&c_b2865, snr) * d_sign(&c_b2865, snl) * d_sign(&
+		c_b2865, h__);
+    }
+    *ssmax = d_sign(ssmax, &tsign);
+    d__1 = tsign * d_sign(&c_b2865, f) * d_sign(&c_b2865, h__);
+    *ssmin = d_sign(ssmin, &d__1);
+    return 0;
+
+/*     End of DLASV2 */
+
+} /* dlasv2_ */
+
+/* Subroutine */ int dlaswp_(integer *n, doublereal *a, integer *lda, integer
+	*k1, integer *k2, integer *ipiv, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
+    static doublereal temp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DLASWP performs a series of row interchanges on the matrix A.
+    One row interchange is initiated for each of rows K1 through K2 of A.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the matrix of column dimension N to which the row
+            interchanges will be applied.
+            On exit, the permuted matrix.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+
+    K1      (input) INTEGER
+            The first element of IPIV for which a row interchange will
+            be done.
+
+    K2      (input) INTEGER
+            The last element of IPIV for which a row interchange will
+            be done.
+
+    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+            The vector of pivot indices.  Only the elements in positions
+            K1 through K2 of IPIV are accessed.
+            IPIV(K) = L implies rows K and L are to be interchanged.
+
+    INCX    (input) INTEGER
+            The increment between successive values of IPIV.  If IPIV
+            is negative, the pivots are applied in reverse order.
+
+    Further Details
+    ===============
+
+    Modified by
+     R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+
+   =====================================================================
+
+
+       Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    if (*incx > 0) {
+	ix0 = *k1;
+	i1 = *k1;
+	i2 = *k2;
+	inc = 1;
+    } else if (*incx < 0) {
+	ix0 = (1 - *k2) * *incx + 1;
+	i1 = *k2;
+	i2 = *k1;
+	inc = -1;
+    } else {
+	return 0;
+    }
+
+    n32 = (*n / 32) << (5);
+    if (n32 != 0) {
+	i__1 = n32;
+	for (j = 1; j <= i__1; j += 32) {
+	    ix = ix0;
+	    i__2 = i2;
+	    i__3 = inc;
+	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3)
+		    {
+		ip = ipiv[ix];
+		if (ip != i__) {
+		    i__4 = j + 31;
+		    for (k = j; k <= i__4; ++k) {
+			temp = a[i__ + k * a_dim1];
+			a[i__ + k * a_dim1] = a[ip + k * a_dim1];
+			a[ip + k * a_dim1] = temp;
+/* L10: */
+		    }
+		}
+		ix += *incx;
+/* L20: */
+	    }
+/* L30: */
+	}
+    }
+    if (n32 != *n) {
+	++n32;
+	ix = ix0;
+	i__1 = i2;
+	i__3 = inc;
+	for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) {
+	    ip = ipiv[ix];
+	    if (ip != i__) {
+		i__2 = *n;
+		for (k = n32; k <= i__2; ++k) {
+		    temp = a[i__ + k * a_dim1];
+		    a[i__ + k * a_dim1] = a[ip + k * a_dim1];
+		    a[ip + k * a_dim1] = temp;
+/* L40: */
+		}
+	    }
+	    ix += *incx;
+/* L50: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLASWP */
+
+} /* dlaswp_ */
+
+/* Subroutine */ int dlatrd_(char *uplo, integer *n, integer *nb, doublereal *
+	a, integer *lda, doublereal *e, doublereal *tau, doublereal *w,
+	integer *ldw)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, iw;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal alpha;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), daxpy_(integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *),
+	    dsymv_(char *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), dlarfg_(integer *, doublereal *, doublereal *, integer *,
+	     doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DLATRD reduces NB rows and columns of a real symmetric matrix A to
+    symmetric tridiagonal form by an orthogonal similarity
+    transformation Q' * A * Q, and returns the matrices V and W which are
+    needed to apply the transformation to the unreduced part of A.
+
+    If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
+    matrix, of which the upper triangle is supplied;
+    if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
+    matrix, of which the lower triangle is supplied.
+
+    This is an auxiliary routine called by DSYTRD.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is stored:
+            = 'U': Upper triangular
+            = 'L': Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    NB      (input) INTEGER
+            The number of rows and columns to be reduced.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit:
+            if UPLO = 'U', the last NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements above the diagonal
+              with the array TAU, represent the orthogonal matrix Q as a
+              product of elementary reflectors;
+            if UPLO = 'L', the first NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements below the diagonal
+              with the array TAU, represent the  orthogonal matrix Q as a
+              product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= (1,N).
+
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
+            If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+            elements of the last NB columns of the reduced matrix;
+            if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+            the first NB columns of the reduced matrix.
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors, stored in
+            TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+            See Further Details.
+
+    W       (output) DOUBLE PRECISION array, dimension (LDW,NB)
+            The n-by-nb matrix W required to update the unreduced part
+            of A.
+
+    LDW     (input) INTEGER
+            The leading dimension of the array W. LDW >= max(1,N).
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n) H(n-1) . . . H(n-nb+1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+    and tau in TAU(i-1).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+    and tau in TAU(i).
+
+    The elements of the vectors v together form the n-by-nb matrix V
+    which is needed, with W, to apply the transformation to the unreduced
+    part of the matrix, using a symmetric rank-2k update of the form:
+    A := A - V*W' - W*V'.
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5 and nb = 2:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  a   a   a   v4  v5 )              (  d                  )
+      (      a   a   v4  v5 )              (  1   d              )
+      (          a   1   v5 )              (  v1  1   a          )
+      (              d   1  )              (  v1  v2  a   a      )
+      (                  d  )              (  v1  v2  a   a   a  )
+
+    where d denotes a diagonal element of the reduced matrix, a denotes
+    an element of the original matrix that is unchanged, and vi denotes
+    an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --e;
+    --tau;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1;
+    w -= w_offset;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (lsame_(uplo, "U")) {
+
+/*        Reduce last NB columns of upper triangle */
+
+	i__1 = *n - *nb + 1;
+	for (i__ = *n; i__ >= i__1; --i__) {
+	    iw = i__ - *n + *nb;
+	    if (i__ < *n) {
+
+/*              Update A(1:i,i) */
+
+		i__2 = *n - i__;
+		dgemv_("No transpose", &i__, &i__2, &c_b3001, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
+			c_b2865, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		dgemv_("No transpose", &i__, &i__2, &c_b3001, &w[(iw + 1) *
+			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b2865, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+	    if (i__ > 1) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(1:i-2,i)
+*/
+
+		i__2 = i__ - 1;
+		dlarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
+			1], &c__1, &tau[i__ - 1]);
+		e[i__ - 1] = a[i__ - 1 + i__ * a_dim1];
+		a[i__ - 1 + i__ * a_dim1] = 1.;
+
+/*              Compute W(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		dsymv_("Upper", &i__2, &c_b2865, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b2879, &w[iw * w_dim1 + 1], &
+			c__1);
+		if (i__ < *n) {
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    dgemv_("Transpose", &i__2, &i__3, &c_b2865, &w[(iw + 1) *
+			    w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, &
+			    c_b2879, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[(i__ +
+			    1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1],
+			    &c__1, &c_b2865, &w[iw * w_dim1 + 1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[(i__ + 1) *
+			     a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &
+			    c_b2879, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &w[(iw + 1)
+			     * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b2865, &w[iw * w_dim1 + 1], &c__1);
+		}
+		i__2 = i__ - 1;
+		dscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		alpha = tau[i__ - 1] * -.5 * ddot_(&i__2, &w[iw * w_dim1 + 1],
+			 &c__1, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		daxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
+			w_dim1 + 1], &c__1);
+	    }
+
+/* L10: */
+	}
+    } else {
+
+/*        Reduce first NB columns of lower triangle */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:n,i) */
+
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + a_dim1],
+		    lda, &w[i__ + w_dim1], ldw, &c_b2865, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    dgemv_("No transpose", &i__2, &i__3, &c_b3001, &w[i__ + w_dim1],
+		    ldw, &a[i__ + a_dim1], lda, &c_b2865, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:n,i)
+*/
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) +
+			i__ * a_dim1], &c__1, &tau[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*              Compute W(i+1:n,i) */
+
+		i__2 = *n - i__;
+		dsymv_("Lower", &i__2, &c_b2865, &a[i__ + 1 + (i__ + 1) *
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2879, &w[i__ + 1 + i__ * w_dim1], &c__1)
+			;
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &w[i__ + 1 +
+			w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2879, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[i__ + 1 +
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b2865, &
+			w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b2879, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &w[i__ + 1 +
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b2865, &
+			w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		dscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		alpha = tau[i__] * -.5 * ddot_(&i__2, &w[i__ + 1 + i__ *
+			w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+	    }
+
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLATRD */
+
+} /* dlatrd_ */
+
+/* Subroutine */ int dlauu2_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    static doublereal aii;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLAUU2 computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the unblocked form of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAUU2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the product U * U'. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    aii = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		i__2 = *n - i__ + 1;
+		a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1],
+			lda, &a[i__ + i__ * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		dgemv_("No transpose", &i__2, &i__3, &c_b2865, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			aii, &a[i__ * a_dim1 + 1], &c__1);
+	    } else {
+		dscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+/* L10: */
+	}
+
+    } else {
+
+/*        Compute the product L' * L. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    aii = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		i__2 = *n - i__ + 1;
+		a[i__ + i__ * a_dim1] = ddot_(&i__2, &a[i__ + i__ * a_dim1], &
+			c__1, &a[i__ + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		dgemv_("Transpose", &i__2, &i__3, &c_b2865, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii,
+			 &a[i__ + a_dim1], lda);
+	    } else {
+		dscal_(&i__, &aii, &a[i__ + a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of DLAUU2 */
+
+} /* dlauu2_ */
+
+/* Subroutine */ int dlauum_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, ib, nb;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     integer *), dlauu2_(char *, integer *,
+	    doublereal *, integer *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DLAUUM computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the blocked form of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DLAUUM", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "DLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code */
+
+	dlauu2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute the product U * U'. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		dtrmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib,
+			&c_b2865, &a[i__ + i__ * a_dim1], lda, &a[i__ *
+			a_dim1 + 1], lda);
+		dlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    dgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
+			    c_b2865, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__
+			    + (i__ + ib) * a_dim1], lda, &c_b2865, &a[i__ *
+			    a_dim1 + 1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    dsyrk_("Upper", "No transpose", &ib, &i__3, &c_b2865, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b2865, &a[i__
+			    + i__ * a_dim1], lda);
+		}
+/* L10: */
+	    }
+	} else {
+
+/*           Compute the product L' * L. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		dtrmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
+			c_b2865, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1]
+			, lda);
+		dlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    dgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
+			    c_b2865, &a[i__ + ib + i__ * a_dim1], lda, &a[i__
+			    + ib + a_dim1], lda, &c_b2865, &a[i__ + a_dim1],
+			    lda);
+		    i__3 = *n - i__ - ib + 1;
+		    dsyrk_("Lower", "Transpose", &ib, &i__3, &c_b2865, &a[i__
+			    + ib + i__ * a_dim1], lda, &c_b2865, &a[i__ + i__
+			    * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DLAUUM */
+
+} /* dlauum_ */
+
+/* Subroutine */ int dorg2r_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DORG2R generates an m by n real matrix Q with orthonormal columns,
+    which is defined as the first n columns of a product of k elementary
+    reflectors of order m
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by DGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by DGEQRF in the first k columns of its array
+            argument A.
+            On exit, the m-by-n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQRF.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if ((*n < 0) || (*n > *m)) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORG2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+/*     Initialise columns k+1:n to columns of the unit matrix */
+
+    i__1 = *n;
+    for (j = *k + 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (l = 1; l <= i__2; ++l) {
+	    a[l + j * a_dim1] = 0.;
+/* L10: */
+	}
+	a[j + j * a_dim1] = 1.;
+/* L20: */
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i) to A(i:m,i:n) from the left */
+
+	if (i__ < *n) {
+	    a[i__ + i__ * a_dim1] = 1.;
+	    i__1 = *m - i__ + 1;
+	    i__2 = *n - i__;
+	    dlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	}
+	if (i__ < *m) {
+	    i__1 = *m - i__;
+	    d__1 = -tau[i__];
+	    dscal_(&i__1, &d__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+	}
+	a[i__ + i__ * a_dim1] = 1. - tau[i__];
+
+/*        Set A(1:i-1,i) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    a[l + i__ * a_dim1] = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of DORG2R */
+
+} /* dorg2r_ */
+
+/* Subroutine */ int dorgbr_(char *vect, integer *m, integer *n, integer *k,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, nb, mn;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical wantq;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dorglq_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *), dorgqr_(integer *, integer *, integer *, doublereal *,
+	     integer *, doublereal *, doublereal *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGBR generates one of the real orthogonal matrices Q or P**T
+    determined by DGEBRD when reducing a real matrix A to bidiagonal
+    form: A = Q * B * P**T.  Q and P**T are defined as products of
+    elementary reflectors H(i) or G(i) respectively.
+
+    If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+    is of order M:
+    if m >= k, Q = H(1) H(2) . . . H(k) and DORGBR returns the first n
+    columns of Q, where m >= n >= k;
+    if m < k, Q = H(1) H(2) . . . H(m-1) and DORGBR returns Q as an
+    M-by-M matrix.
+
+    If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
+    is of order N:
+    if k < n, P**T = G(k) . . . G(2) G(1) and DORGBR returns the first m
+    rows of P**T, where n >= m >= k;
+    if k >= n, P**T = G(n-1) . . . G(2) G(1) and DORGBR returns P**T as
+    an N-by-N matrix.
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            Specifies whether the matrix Q or the matrix P**T is
+            required, as defined in the transformation applied by DGEBRD:
+            = 'Q':  generate Q;
+            = 'P':  generate P**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q or P**T to be returned.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q or P**T to be returned.
+            N >= 0.
+            If VECT = 'Q', M >= N >= min(M,K);
+            if VECT = 'P', N >= M >= min(N,K).
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original M-by-K
+            matrix reduced by DGEBRD.
+            If VECT = 'P', the number of rows in the original K-by-N
+            matrix reduced by DGEBRD.
+            K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by DGEBRD.
+            On exit, the M-by-N matrix Q or P**T.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension
+                                  (min(M,K)) if VECT = 'Q'
+                                  (min(N,K)) if VECT = 'P'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i), which determines Q or P**T, as
+            returned by DGEBRD in its array argument TAUQ or TAUP.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+            For optimum performance LWORK >= min(M,N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(vect, "Q");
+    mn = min(*m,*n);
+    lquery = *lwork == -1;
+    if (! wantq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (((*n < 0) || (wantq && ((*n > *m) || (*n < min(*m,*k))))) || (!
+	     wantq && ((*m > *n) || (*m < min(*n,*k))))) {
+	*info = -3;
+    } else if (*k < 0) {
+	*info = -4;
+    } else if (*lda < max(1,*m)) {
+	*info = -6;
+    } else if (*lwork < max(1,mn) && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+	if (wantq) {
+	    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	} else {
+	    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	}
+	lwkopt = max(1,mn) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGBR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    if (wantq) {
+
+/*
+          Form Q, determined by a call to DGEBRD to reduce an m-by-k
+          matrix
+*/
+
+	if (*m >= *k) {
+
+/*           If m >= k, assume m >= n >= k */
+
+	    dorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If m < k, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             column to the right, and set the first row and column of Q
+             to those of the unit matrix
+*/
+
+	    for (j = *m; j >= 2; --j) {
+		a[j * a_dim1 + 1] = 0.;
+		i__1 = *m;
+		for (i__ = j + 1; i__ <= i__1; ++i__) {
+		    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
+/* L10: */
+		}
+/* L20: */
+	    }
+	    a[a_dim1 + 1] = 1.;
+	    i__1 = *m;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		a[i__ + a_dim1] = 0.;
+/* L30: */
+	    }
+	    if (*m > 1) {
+
+/*              Form Q(2:m,2:m) */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		dorgqr_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
+			tau[1], &work[1], lwork, &iinfo);
+	    }
+	}
+    } else {
+
+/*
+          Form P', determined by a call to DGEBRD to reduce a k-by-n
+          matrix
+*/
+
+	if (*k < *n) {
+
+/*           If k < n, assume k <= m <= n */
+
+	    dorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If k >= n, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             row downward, and set the first row and column of P' to
+             those of the unit matrix
+*/
+
+	    a[a_dim1 + 1] = 1.;
+	    i__1 = *n;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		a[i__ + a_dim1] = 0.;
+/* L40: */
+	    }
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		for (i__ = j - 1; i__ >= 2; --i__) {
+		    a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1];
+/* L50: */
+		}
+		a[j * a_dim1 + 1] = 0.;
+/* L60: */
+	    }
+	    if (*n > 1) {
+
+/*              Form P'(2:n,2:n) */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		dorglq_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
+			tau[1], &work[1], lwork, &iinfo);
+	    }
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORGBR */
+
+} /* dorgbr_ */
+
+/* Subroutine */ int dorghr_(integer *n, integer *ilo, integer *ihi,
+	doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+	integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, nb, nh, iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dorgqr_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGHR generates a real orthogonal matrix Q which is defined as the
+    product of IHI-ILO elementary reflectors of order N, as returned by
+    DGEHRD:
+
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix Q. N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of DGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by DGEHRD.
+            On exit, the N-by-N orthogonal matrix Q.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (N-1)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEHRD.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= IHI-ILO.
+            For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,nh) && ! lquery) {
+	*info = -8;
+    }
+
+    if (*info == 0) {
+	nb = ilaenv_(&c__1, "DORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	lwkopt = max(1,nh) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGHR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+/*
+       Shift the vectors which define the elementary reflectors one
+       column to the right, and set the first ilo and the last n-ihi
+       rows and columns to those of the unit matrix
+*/
+
+    i__1 = *ilo + 1;
+    for (j = *ihi; j >= i__1; --j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.;
+/* L10: */
+	}
+	i__2 = *ihi;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
+/* L20: */
+	}
+	i__2 = *n;
+	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    i__1 = *ilo;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.;
+/* L50: */
+	}
+	a[j + j * a_dim1] = 1.;
+/* L60: */
+    }
+    i__1 = *n;
+    for (j = *ihi + 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.;
+/* L70: */
+	}
+	a[j + j * a_dim1] = 1.;
+/* L80: */
+    }
+
+    if (nh > 0) {
+
+/*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
+
+	dorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
+		ilo], &work[1], lwork, &iinfo);
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORGHR */
+
+} /* dorghr_ */
+
+/* Subroutine */ int dorgl2_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dlarf_(char *, integer *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGL2 generates an m by n real matrix Q with orthonormal rows,
+    which is defined as the first m rows of a product of k elementary
+    reflectors of order n
+
+          Q  =  H(k) . . . H(2) H(1)
+
+    as returned by DGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by DGELQF in the first k rows of its array argument A.
+            On exit, the m-by-n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGELQF.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *m)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGL2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	return 0;
+    }
+
+    if (*k < *m) {
+
+/*        Initialise rows k+1:m to rows of the unit matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (l = *k + 1; l <= i__2; ++l) {
+		a[l + j * a_dim1] = 0.;
+/* L10: */
+	    }
+	    if (j > *k && j <= *m) {
+		a[j + j * a_dim1] = 1.;
+	    }
+/* L20: */
+	}
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i) to A(i:m,i:n) from the right */
+
+	if (i__ < *n) {
+	    if (i__ < *m) {
+		a[i__ + i__ * a_dim1] = 1.;
+		i__1 = *m - i__;
+		i__2 = *n - i__ + 1;
+		dlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
+			tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
+	    i__1 = *n - i__;
+	    d__1 = -tau[i__];
+	    dscal_(&i__1, &d__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+	a[i__ + i__ * a_dim1] = 1. - tau[i__];
+
+/*        Set A(i,1:i-1) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    a[i__ + l * a_dim1] = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of DORGL2 */
+
+} /* dorgl2_ */
+
+/* Subroutine */ int dorglq_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int dorgl2_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGLQ generates an M-by-N real matrix Q with orthonormal rows,
+    which is defined as the first M rows of a product of K elementary
+    reflectors of order N
+
+          Q  =  H(k) . . . H(2) H(1)
+
+    as returned by DGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by DGELQF in the first k rows of its array argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGELQF.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*m) * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *m)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGLQ", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGLQ", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk rows are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(kk+1:m,1:kk) to zero. */
+
+	i__1 = kk;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *m) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	dorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *n - i__ + 1;
+		dlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i+ib:m,i:n) from the right */
+
+		i__2 = *m - i__ - ib + 1;
+		i__3 = *n - i__ + 1;
+		dlarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, &
+			i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
+	    }
+
+/*           Apply H' to columns i:n of current block */
+
+	    i__2 = *n - i__ + 1;
+	    dorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set columns 1:i-1 of current block to zero */
+
+	    i__2 = i__ - 1;
+	    for (j = 1; j <= i__2; ++j) {
+		i__3 = i__ + ib - 1;
+		for (l = i__; l <= i__3; ++l) {
+		    a[l + j * a_dim1] = 0.;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DORGLQ */
+
+} /* dorglq_ */
+
+/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal *
+	a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int dorg2r_(integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *),
+	    dlarfb_(char *, char *, char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORGQR generates an M-by-N real matrix Q with orthonormal columns,
+    which is defined as the first N columns of a product of K elementary
+    reflectors of order M
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by DGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by DGEQRF in the first k columns of its array
+            argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQRF.
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "DORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*n) * nb;
+    work[1] = (doublereal) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if ((*n < 0) || (*n > *m)) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORGQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "DORGQR", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "DORGQR", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk columns are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(1:kk,kk+1:n) to zero. */
+
+	i__1 = *n;
+	for (j = kk + 1; j <= i__1; ++j) {
+	    i__2 = kk;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *n) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	dorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *m - i__ + 1;
+		dlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i:m,i+ib:n) from the left */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__ - ib + 1;
+		dlarfb_("Left", "No transpose", "Forward", "Columnwise", &
+			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
+			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
+			work[ib + 1], &ldwork);
+	    }
+
+/*           Apply H to rows i:m of current block */
+
+	    i__2 = *m - i__ + 1;
+	    dorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set rows 1:i-1 of current block to zero */
+
+	    i__2 = i__ + ib - 1;
+	    for (j = i__; j <= i__2; ++j) {
+		i__3 = i__ - 1;
+		for (l = 1; l <= i__3; ++l) {
+		    a[l + j * a_dim1] = 0.;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1] = (doublereal) iws;
+    return 0;
+
+/*     End of DORGQR */
+
+} /* dorgqr_ */
+
+/* Subroutine */ int dorm2l_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, mi, ni, nq;
+    static doublereal aii;
+    static logical left;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DORM2L overwrites the general real m by n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'T', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'T',
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by DGEQLF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'T': apply Q' (Transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQLF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORM2L", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && notran) || (! left && ! notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+    } else {
+	mi = *m;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) is applied to C(1:m-k+i,1:n) */
+
+	    mi = *m - *k + i__;
+	} else {
+
+/*           H(i) is applied to C(1:m,1:n-k+i) */
+
+	    ni = *n - *k + i__;
+	}
+
+/*        Apply H(i) */
+
+	aii = a[nq - *k + i__ + i__ * a_dim1];
+	a[nq - *k + i__ + i__ * a_dim1] = 1.;
+	dlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[
+		c_offset], ldc, &work[1]);
+	a[nq - *k + i__ + i__ * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of DORM2L */
+
+} /* dorm2l_ */
+
+/* Subroutine */ int dorm2r_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static doublereal aii;
+    static logical left;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DORM2R overwrites the general real m by n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'T', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'T',
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by DGEQRF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'T': apply Q' (Transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQRF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORM2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && ! notran) || (! left && notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) */
+
+	aii = a[i__ + i__ * a_dim1];
+	a[i__ + i__ * a_dim1] = 1.;
+	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[
+		ic + jc * c_dim1], ldc, &work[1]);
+	a[i__ + i__ * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of DORM2R */
+
+} /* dorm2r_ */
+
+/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m,
+	integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau,
+	doublereal *c__, integer *ldc, doublereal *work, integer *lwork,
+	integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static logical notran;
+    extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *);
+    static logical applyq;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      P * C          C * P
+    TRANS = 'T':      P**T * C       C * P**T
+
+    Here Q and P**T are the orthogonal matrices determined by DGEBRD when
+    reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
+    P**T are defined as products of elementary reflectors H(i) and G(i)
+    respectively.
+
+    Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+    order of the orthogonal matrix Q or P**T that is applied.
+
+    If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+    if nq >= k, Q = H(1) H(2) . . . H(k);
+    if nq < k, Q = H(1) H(2) . . . H(nq-1).
+
+    If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+    if k < nq, P = G(1) G(2) . . . G(k);
+    if k >= nq, P = G(1) G(2) . . . G(nq-1).
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            = 'Q': apply Q or Q**T;
+            = 'P': apply P or P**T.
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q, Q**T, P or P**T from the Left;
+            = 'R': apply Q, Q**T, P or P**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q  or P;
+            = 'T':  Transpose, apply Q**T or P**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original
+            matrix reduced by DGEBRD.
+            If VECT = 'P', the number of rows in the original
+            matrix reduced by DGEBRD.
+            K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension
+                                  (LDA,min(nq,K)) if VECT = 'Q'
+                                  (LDA,nq)        if VECT = 'P'
+            The vectors which define the elementary reflectors H(i) and
+            G(i), whose products determine the matrices Q and P, as
+            returned by DGEBRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If VECT = 'Q', LDA >= max(1,nq);
+            if VECT = 'P', LDA >= max(1,min(nq,K)).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (min(nq,K))
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i) which determines Q or P, as returned
+            by DGEBRD in the array argument TAUQ or TAUP.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
+            or P*C or P**T*C or C*P or C*P**T.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    applyq = lsame_(vect, "Q");
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q or P and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! applyq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (! left && ! lsame_(side, "R")) {
+	*info = -2;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*k < 0) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = min(nq,*k);
+	if ((applyq && *lda < max(1,nq)) || (! applyq && *lda < max(i__1,i__2)
+		)) {
+	    *info = -8;
+	} else if (*ldc < max(1,*m)) {
+	    *info = -11;
+	} else if (*lwork < max(1,nw) && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info == 0) {
+	if (applyq) {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "DORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "DORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORMBR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    work[1] = 1.;
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    if (applyq) {
+
+/*        Apply Q */
+
+	if (nq >= *k) {
+
+/*           Q was determined by a call to DGEBRD with nq >= k */
+
+	    dormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           Q was determined by a call to DGEBRD with nq < k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    dormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
+		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+	}
+    } else {
+
+/*        Apply P */
+
+	if (notran) {
+	    *(unsigned char *)transt = 'T';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+	if (nq > *k) {
+
+/*           P was determined by a call to DGEBRD with nq > k */
+
+	    dormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           P was determined by a call to DGEBRD with nq <= k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    dormlq_(side, transt, &mi, &ni, &i__1, &a[((a_dim1) << (1)) + 1],
+		    lda, &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1],
+		    lwork, &iinfo);
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMBR */
+
+} /* dormbr_ */
+
+/* Subroutine */ int dorml2_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static doublereal aii;
+    static logical left;
+    extern /* Subroutine */ int dlarf_(char *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DORML2 overwrites the general real m by n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'T', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'T',
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by DGELQF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'T': apply Q' (Transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGELQF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORML2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && notran) || (! left && ! notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) */
+
+	aii = a[i__ + i__ * a_dim1];
+	a[i__ + i__ * a_dim1] = 1.;
+	dlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[
+		ic + jc * c_dim1], ldc, &work[1]);
+	a[i__ + i__ * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of DORML2 */
+
+} /* dorml2_ */
+
+/* Subroutine */ int dormlq_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int dorml2_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *), dlarfb_(char
+	    *, char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORMLQ overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by DGELQF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGELQF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMLQ", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORMLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMLQ", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	dorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && notran) || (! left && ! notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	if (notran) {
+	    *(unsigned char *)transt = 'T';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    dlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
+		    lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    dlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
+		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
+		    ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMLQ */
+
+} /* dormlq_ */
+
+/* Subroutine */ int dormql_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int dorm2l_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *), dlarfb_(char
+	    *, char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORMQL overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by DGEQLF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQLF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQL", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORMQL", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQL", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	dorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && notran) || (! left && ! notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	} else {
+	    mi = *m;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i+ib-1) . . . H(i+1) H(i)
+*/
+
+	    i__4 = nq - *k + i__ + ib - 1;
+	    dlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
+		    , lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(1:m-k+i+ib-1,1:n) */
+
+		mi = *m - *k + i__ + ib - 1;
+	    } else {
+
+/*              H or H' is applied to C(1:m,1:n-k+i+ib-1) */
+
+		ni = *n - *k + i__ + ib - 1;
+	    }
+
+/*           Apply H or H' */
+
+	    dlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
+		    work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMQL */
+
+} /* dormql_ */
+
+/* Subroutine */ int dormqr_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static doublereal t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int dorm2r_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *), dlarfb_(char
+	    *, char *, char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, integer *), dlarft_(char *, char *, integer *, integer *, doublereal
+	    *, integer *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORMQR overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by DGEQRF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            DGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) DOUBLE PRECISION array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DGEQRF.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "DORMQR", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DORMQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "DORMQR", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	dorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && ! notran) || (! left && notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    dlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
+		    a_dim1], lda, &tau[i__], t, &c__65)
+		    ;
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    dlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
+		    c_dim1], ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMQR */
+
+} /* dormqr_ */
+
+/* Subroutine */ int dormtr_(char *side, char *uplo, char *trans, integer *m,
+	integer *n, doublereal *a, integer *lda, doublereal *tau, doublereal *
+	c__, integer *ldc, doublereal *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int dormql_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *),
+	    dormqr_(char *, char *, integer *, integer *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DORMTR overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    nq-1 elementary reflectors, as returned by DSYTRD:
+
+    if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+
+    if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    UPLO    (input) CHARACTER*1
+            = 'U': Upper triangle of A contains elementary reflectors
+                   from DSYTRD;
+            = 'L': Lower triangle of A contains elementary reflectors
+                   from DSYTRD.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by DSYTRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) DOUBLE PRECISION array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by DSYTRD.
+
+    C       (input/output) DOUBLE PRECISION array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	*info = -2;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	if (upper) {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "DORMQL", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "DORMQL", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "DORMQR", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("DORMTR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (nq == 1)) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    if (left) {
+	mi = *m - 1;
+	ni = *n;
+    } else {
+	mi = *m;
+	ni = *n - 1;
+    }
+
+    if (upper) {
+
+/*        Q was determined by a call to DSYTRD with UPLO = 'U' */
+
+	i__2 = nq - 1;
+	dormql_(side, trans, &mi, &ni, &i__2, &a[((a_dim1) << (1)) + 1], lda,
+		&tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
+    } else {
+
+/*        Q was determined by a call to DSYTRD with UPLO = 'L' */
+
+	if (left) {
+	    i1 = 2;
+	    i2 = 1;
+	} else {
+	    i1 = 1;
+	    i2 = 2;
+	}
+	i__2 = nq - 1;
+	dormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
+		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+    }
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DORMTR */
+
+} /* dormtr_ */
+
+/* Subroutine */ int dpotf2_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer j;
+    static doublereal ajj;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DPOTF2 computes the Cholesky factorization of a real symmetric
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U' * U ,  if UPLO = 'U', or
+       A = L  * L',  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the unblocked version of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is stored.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            n by n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U'*U  or A = L*L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, the leading minor of order k is not
+                 positive definite, and the factorization could not be
+                 completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DPOTF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the Cholesky factorization A = U'*U. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute U(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j - 1;
+	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j * a_dim1 + 1], &c__1,
+		    &a[j * a_dim1 + 1], &c__1);
+	    if (ajj <= 0.) {
+		a[j + j * a_dim1] = ajj;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    a[j + j * a_dim1] = ajj;
+
+/*           Compute elements J+1:N of row J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		i__3 = *n - j;
+		dgemv_("Transpose", &i__2, &i__3, &c_b3001, &a[(j + 1) *
+			a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b2865,
+			 &a[j + (j + 1) * a_dim1], lda);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		dscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Compute the Cholesky factorization A = L*L'. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute L(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j - 1;
+	    ajj = a[j + j * a_dim1] - ddot_(&i__2, &a[j + a_dim1], lda, &a[j
+		    + a_dim1], lda);
+	    if (ajj <= 0.) {
+		a[j + j * a_dim1] = ajj;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    a[j + j * a_dim1] = ajj;
+
+/*           Compute elements J+1:N of column J. */
+
+	    if (j < *n) {
+		i__2 = *n - j;
+		i__3 = j - 1;
+		dgemv_("No transpose", &i__2, &i__3, &c_b3001, &a[j + 1 +
+			a_dim1], lda, &a[j + a_dim1], lda, &c_b2865, &a[j + 1
+			+ j * a_dim1], &c__1);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		dscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    goto L40;
+
+L30:
+    *info = j;
+
+L40:
+    return 0;
+
+/*     End of DPOTF2 */
+
+} /* dpotf2_ */
+
+/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer j, jb, nb;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int dsyrk_(char *, char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     integer *), dpotf2_(char *, integer *,
+	    doublereal *, integer *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DPOTRF computes the Cholesky factorization of a real symmetric
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U**T * U,  if UPLO = 'U', or
+       A = L  * L**T,  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the block version of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U**T*U or A = L*L**T.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the leading minor of order i is not
+                  positive definite, and the factorization could not be
+                  completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DPOTRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "DPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code. */
+
+	dpotf2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code. */
+
+	if (upper) {
+
+/*           Compute the Cholesky factorization A = U'*U. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		dsyrk_("Upper", "Transpose", &jb, &i__3, &c_b3001, &a[j *
+			a_dim1 + 1], lda, &c_b2865, &a[j + j * a_dim1], lda);
+		dpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block row. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    dgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
+			    c_b3001, &a[j * a_dim1 + 1], lda, &a[(j + jb) *
+			    a_dim1 + 1], lda, &c_b2865, &a[j + (j + jb) *
+			    a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    dtrsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
+			    i__3, &c_b2865, &a[j + j * a_dim1], lda, &a[j + (
+			    j + jb) * a_dim1], lda);
+		}
+/* L10: */
+	    }
+
+	} else {
+
+/*           Compute the Cholesky factorization A = L*L'. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		dsyrk_("Lower", "No transpose", &jb, &i__3, &c_b3001, &a[j +
+			a_dim1], lda, &c_b2865, &a[j + j * a_dim1], lda);
+		dpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block column. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    dgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
+			    c_b3001, &a[j + jb + a_dim1], lda, &a[j + a_dim1],
+			     lda, &c_b2865, &a[j + jb + j * a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    dtrsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
+			    jb, &c_b2865, &a[j + j * a_dim1], lda, &a[j + jb
+			    + j * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+    goto L40;
+
+L30:
+    *info = *info + j - 1;
+
+L40:
+    return 0;
+
+/*     End of DPOTRF */
+
+} /* dpotrf_ */
+
+/* Subroutine */ int dpotri_(char *uplo, integer *n, doublereal *a, integer *
+	lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), dlauum_(
+	    char *, integer *, doublereal *, integer *, integer *),
+	    dtrtri_(char *, char *, integer *, doublereal *, integer *,
+	    integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DPOTRI computes the inverse of a real symmetric positive definite
+    matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+    computed by DPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular factor U or L from the Cholesky
+            factorization A = U**T*U or A = L*L**T, as computed by
+            DPOTRF.
+            On exit, the upper or lower triangle of the (symmetric)
+            inverse of A, overwriting the input factor U or L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the (i,i) element of the factor U or L is
+                  zero, and the inverse could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DPOTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Invert the triangular Cholesky factor U or L. */
+
+    dtrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
+
+    dlauum_(uplo, n, &a[a_offset], lda, info);
+
+    return 0;
+
+/*     End of DPOTRI */
+
+} /* dpotri_ */
+
+/* Subroutine */ int dpotrs_(char *uplo, integer *n, integer *nrhs,
+	doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DPOTRS solves a system of linear equations A*X = B with a symmetric
+    positive definite matrix A using the Cholesky factorization
+    A = U**T*U or A = L*L**T computed by DPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The triangular factor U or L from the Cholesky factorization
+            A = U**T*U or A = L*L**T, as computed by DPOTRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DPOTRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*nrhs == 0)) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*
+          Solve A*X = B where A = U'*U.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	dtrsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b2865, &
+		a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b2865,
+		 &a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A*X = B where A = L*L'.
+
+          Solve L*X = B, overwriting B with X.
+*/
+
+	dtrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b2865,
+		 &a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	dtrsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b2865, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    }
+
+    return 0;
+
+/*     End of DPOTRS */
+
+} /* dpotrs_ */
+
+/* Subroutine */ int dstedc_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
+	integer *lwork, integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k, m;
+    static doublereal p;
+    static integer ii, end, lgn;
+    static doublereal eps, tiny;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer lwmin;
+    extern /* Subroutine */ int dlaed0_(integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, doublereal *, integer *, integer *);
+    static integer start;
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlacpy_(char *, integer *, integer
+	    *, doublereal *, integer *, doublereal *, integer *),
+	    dlaset_(char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *), dlasrt_(char *, integer *, doublereal *, integer *);
+    static integer liwmin, icompz;
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *);
+    static doublereal orgnrm;
+    static logical lquery;
+    static integer smlsiz, dtrtrw, storez;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+    The eigenvectors of a full or band real symmetric matrix can also be
+    found if DSYTRD or DSPTRD or DSBTRD has been used to reduce this
+    matrix to tridiagonal form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See DLAED3 for details.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'I':  Compute eigenvectors of tridiagonal matrix also.
+            = 'V':  Compute eigenvectors of original dense symmetric
+                    matrix also.  On entry, Z contains the orthogonal
+                    matrix used to reduce the original matrix to
+                    tridiagonal form.
+
+    N       (input) INTEGER
+            The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the subdiagonal elements of the tridiagonal matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
+            On entry, if COMPZ = 'V', then Z contains the orthogonal
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original symmetric matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If  COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1.
+            If eigenvectors are desired, then LDZ >= max(1,N).
+
+    WORK    (workspace/output) DOUBLE PRECISION array,
+                                           dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If COMPZ = 'N' or N <= 1 then LWORK must be at least 1.
+            If COMPZ = 'V' and N > 1 then LWORK must be at least
+                           ( 1 + 3*N + 2*N*lg N + 3*N**2 ),
+                           where lg( N ) = smallest integer k such
+                           that 2**k >= N.
+            If COMPZ = 'I' and N > 1 then LWORK must be at least
+                           ( 1 + 4*N + N**2 ).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1.
+            If COMPZ = 'V' and N > 1 then LIWORK must be at least
+                           ( 6 + 6*N + 5*N*lg N ).
+            If COMPZ = 'I' and N > 1 then LIWORK must be at least
+                           ( 3 + 5*N ).
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = (*lwork == -1) || (*liwork == -1);
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if ((*n <= 1) || (icompz <= 0)) {
+	liwmin = 1;
+	lwmin = 1;
+    } else {
+	lgn = (integer) (log((doublereal) (*n)) / log(2.));
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (icompz == 1) {
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lwmin = *n * 3 + 1 + ((*n) << (1)) * lgn + i__1 * i__1 * 3;
+	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	} else if (icompz == 2) {
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lwmin = ((*n) << (2)) + 1 + i__1 * i__1;
+	    liwmin = *n * 5 + 3;
+	}
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+	*info = -6;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -10;
+    }
+
+    if (*info == 0) {
+	work[1] = (doublereal) lwmin;
+	iwork[1] = liwmin;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSTEDC", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	if (icompz != 0) {
+	    z__[z_dim1 + 1] = 1.;
+	}
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "DSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       If the following conditional clause is removed, then the routine
+       will use the Divide and Conquer routine to compute only the
+       eigenvalues, which requires (3N + 3N**2) real workspace and
+       (2 + 5N + 2N lg(N)) integer workspace.
+       Since on many architectures DSTERF is much faster than any other
+       algorithm for finding eigenvalues only, it is used here
+       as the default.
+
+       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
+*/
+
+    if (icompz == 0) {
+	dsterf_(n, &d__[1], &e[1], info);
+	return 0;
+    }
+
+/*
+       If N is smaller than the minimum divide size (SMLSIZ+1), then
+       solve the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompz == 0) {
+	    dsterf_(n, &d__[1], &e[1], info);
+	    return 0;
+	} else if (icompz == 2) {
+	    dsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+		    info);
+	    return 0;
+	} else {
+	    dsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+		    info);
+	    return 0;
+	}
+    }
+
+/*
+       If COMPZ = 'V', the Z matrix must be stored elsewhere for later
+       use.
+*/
+
+    if (icompz == 1) {
+	storez = *n * *n + 1;
+    } else {
+	storez = 1;
+    }
+
+    if (icompz == 2) {
+	dlaset_("Full", n, n, &c_b2879, &c_b2865, &z__[z_offset], ldz);
+    }
+
+/*     Scale. */
+
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	return 0;
+    }
+
+    eps = EPSILON;
+
+    start = 1;
+
+/*     while ( START <= N ) */
+
+L10:
+    if (start <= *n) {
+
+/*
+       Let END be the position of the next subdiagonal entry such that
+       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+       matrix identified by the elements between START and END
+       constitutes an independent sub-problem.
+*/
+
+	end = start;
+L20:
+	if (end < *n) {
+	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
+		    d__[end + 1], abs(d__2)));
+	    if ((d__1 = e[end], abs(d__1)) > tiny) {
+		++end;
+		goto L20;
+	    }
+	}
+
+/*        (Sub) Problem determined.  Compute its size and solve it. */
+
+	m = end - start + 1;
+	if (m == 1) {
+	    start = end + 1;
+	    goto L10;
+	}
+	if (m > smlsiz) {
+	    *info = smlsiz;
+
+/*           Scale. */
+
+	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &m, &c__1, &d__[
+		    start], &m, info);
+	    i__1 = m - 1;
+	    i__2 = m - 1;
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b2865, &i__1, &c__1, &e[
+		    start], &i__2, info);
+
+	    if (icompz == 1) {
+		dtrtrw = 1;
+	    } else {
+		dtrtrw = start;
+	    }
+	    dlaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[dtrtrw +
+		    start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[
+		    1], info);
+	    if (*info != 0) {
+		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
+			+ 1) + start - 1;
+		return 0;
+	    }
+
+/*           Scale back. */
+
+	    dlascl_("G", &c__0, &c__0, &c_b2865, &orgnrm, &m, &c__1, &d__[
+		    start], &m, info);
+
+	} else {
+	    if (icompz == 1) {
+
+/*
+       Since QR won't update a Z matrix which is larger than the
+       length of D, we must solve the sub-problem in a workspace and
+       then multiply back into Z.
+*/
+
+		dsteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
+			m * m + 1], info);
+		dlacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
+			storez], n);
+		dgemm_("N", "N", n, &m, &m, &c_b2865, &work[storez], ldz, &
+			work[1], &m, &c_b2879, &z__[start * z_dim1 + 1], ldz);
+	    } else if (icompz == 2) {
+		dsteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
+			z_dim1], ldz, &work[1], info);
+	    } else {
+		dsterf_(&m, &d__[start], &e[start], info);
+	    }
+	    if (*info != 0) {
+		*info = start * (*n + 1) + end;
+		return 0;
+	    }
+	}
+
+	start = end + 1;
+	goto L10;
+    }
+
+/*
+       endwhile
+
+       If the problem split any number of times, then the eigenvalues
+       will not be properly ordered.  Here we permute the eigenvalues
+       (and the associated eigenvectors) into ascending order.
+*/
+
+    if (m != *n) {
+	if (icompz == 0) {
+
+/*        Use Quick Sort */
+
+	    dlasrt_("I", n, &d__[1], info);
+
+	} else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	    i__1 = *n;
+	    for (ii = 2; ii <= i__1; ++ii) {
+		i__ = ii - 1;
+		k = i__;
+		p = d__[i__];
+		i__2 = *n;
+		for (j = ii; j <= i__2; ++j) {
+		    if (d__[j] < p) {
+			k = j;
+			p = d__[j];
+		    }
+/* L30: */
+		}
+		if (k != i__) {
+		    d__[k] = d__[i__];
+		    d__[i__] = p;
+		    dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
+			    + 1], &c__1);
+		}
+/* L40: */
+	    }
+	}
+    }
+
+    work[1] = (doublereal) lwmin;
+    iwork[1] = liwmin;
+
+    return 0;
+
+/*     End of DSTEDC */
+
+} /* dstedc_ */
+
+/* Subroutine */ int dsteqr_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublereal *z__, integer *ldz, doublereal *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal b, c__, f, g;
+    static integer i__, j, k, l, m;
+    static doublereal p, r__, s;
+    static integer l1, ii, mm, lm1, mm1, nm1;
+    static doublereal rt1, rt2, eps;
+    static integer lsv;
+    static doublereal tst, eps2;
+    static integer lend, jtot;
+    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *);
+    static doublereal anorm;
+    extern /* Subroutine */ int dswap_(integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaev2_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *);
+    static integer lendm1, lendp1;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlaset_(char *, integer *, integer
+	    *, doublereal *, doublereal *, doublereal *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+    static doublereal safmax;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static integer lendsv;
+    static doublereal ssfmin;
+    static integer nmaxit, icompz;
+    static doublereal ssfmax;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    DSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the implicit QL or QR method.
+    The eigenvectors of a full or band symmetric matrix can also be found
+    if DSYTRD or DSPTRD or DSBTRD has been used to reduce this matrix to
+    tridiagonal form.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'V':  Compute eigenvalues and eigenvectors of the original
+                    symmetric matrix.  On entry, Z must contain the
+                    orthogonal matrix used to reduce the original matrix
+                    to tridiagonal form.
+            = 'I':  Compute eigenvalues and eigenvectors of the
+                    tridiagonal matrix.  Z is initialized to the identity
+                    matrix.
+
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) DOUBLE PRECISION array, dimension (LDZ, N)
+            On entry, if  COMPZ = 'V', then Z contains the orthogonal
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if  COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original symmetric matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1, and if
+            eigenvectors are desired, then  LDZ >= max(1,N).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
+            If COMPZ = 'N', then WORK is not referenced.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm has failed to find all the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero; on exit, D
+                  and E contain the elements of a symmetric tridiagonal
+                  matrix which is orthogonally similar to the original
+                  matrix.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSTEQR", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	if (icompz == 2) {
+	    z__[z_dim1 + 1] = 1.;
+	}
+	return 0;
+    }
+
+/*     Determine the unit roundoff and over/underflow thresholds. */
+
+    eps = EPSILON;
+/* Computing 2nd power */
+    d__1 = eps;
+    eps2 = d__1 * d__1;
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    ssfmax = sqrt(safmax) / 3.;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*
+       Compute the eigenvalues and eigenvectors of the tridiagonal
+       matrix.
+*/
+
+    if (icompz == 2) {
+	dlaset_("Full", n, n, &c_b2879, &c_b2865, &z__[z_offset], ldz);
+    }
+
+    nmaxit = *n * 30;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+    nm1 = *n - 1;
+
+L10:
+    if (l1 > *n) {
+	goto L160;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.;
+    }
+    if (l1 <= nm1) {
+	i__1 = nm1;
+	for (m = l1; m <= i__1; ++m) {
+	    tst = (d__1 = e[m], abs(d__1));
+	    if (tst == 0.) {
+		goto L30;
+	    }
+	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
+		    + 1], abs(d__2))) * eps) {
+		e[m] = 0.;
+		goto L30;
+	    }
+/* L20: */
+	}
+    }
+    m = *n;
+
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
+
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm == 0.) {
+	goto L10;
+    }
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
+
+/*     Choose between QL and QR iteration */
+
+    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
+
+    if (lend > l) {
+
+/*
+          QL Iteration
+
+          Look for small subdiagonal element.
+*/
+
+L40:
+	if (l != lend) {
+	    lendm1 = lend - 1;
+	    i__1 = lendm1;
+	    for (m = l; m <= i__1; ++m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			+ 1], abs(d__2)) + safmin) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+	}
+
+	m = lend;
+
+L60:
+	if (m < lend) {
+	    e[m] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L80;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l + 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		work[l] = c__;
+		work[*n - 1 + l] = s;
+		dlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+			z__[l * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+	    }
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.;
+	    l += 2;
+	    if (l <= lend) {
+		goto L40;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l + 1] - p) / (e[l] * 2.);
+	r__ = dlapy2_(&g, &c_b2865);
+	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
+
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
+
+/*        Inner loop */
+
+	mm1 = m - 1;
+	i__1 = l;
+	for (i__ = mm1; i__ >= i__1; --i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m - 1) {
+		e[i__ + 1] = r__;
+	    }
+	    g = d__[i__ + 1] - p;
+	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__ + 1] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = -s;
+	    }
+
+/* L70: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = m - l + 1;
+	    dlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[l] = g;
+	goto L40;
+
+/*        Eigenvalue found. */
+
+L80:
+	d__[l] = p;
+
+	++l;
+	if (l <= lend) {
+	    goto L40;
+	}
+	goto L140;
+
+    } else {
+
+/*
+          QR Iteration
+
+          Look for small superdiagonal element.
+*/
+
+L90:
+	if (l != lend) {
+	    lendp1 = lend + 1;
+	    i__1 = lendp1;
+	    for (m = l; m >= i__1; --m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m - 1], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			- 1], abs(d__2)) + safmin) {
+		    goto L110;
+		}
+/* L100: */
+	    }
+	}
+
+	m = lend;
+
+L110:
+	if (m > lend) {
+	    e[m - 1] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L130;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l - 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+			;
+		work[m] = c__;
+		work[*n - 1 + m] = s;
+		dlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+			z__[(l - 1) * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
+	    }
+	    d__[l - 1] = rt1;
+	    d__[l] = rt2;
+	    e[l - 1] = 0.;
+	    l += -2;
+	    if (l >= lend) {
+		goto L90;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
+	r__ = dlapy2_(&g, &c_b2865);
+	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
+
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
+
+/*        Inner loop */
+
+	lm1 = l - 1;
+	i__1 = lm1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m) {
+		e[i__ - 1] = r__;
+	    }
+	    g = d__[i__] - p;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = s;
+	    }
+
+/* L120: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = l - m + 1;
+	    dlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[lm1] = g;
+	goto L90;
+
+/*        Eigenvalue found. */
+
+L130:
+	d__[l] = p;
+
+	--l;
+	if (l >= lend) {
+	    goto L90;
+	}
+	goto L140;
+
+    }
+
+/*     Undo scaling if necessary */
+
+L140:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    } else if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    }
+
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
+
+    if (jtot < nmaxit) {
+	goto L10;
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.) {
+	    ++(*info);
+	}
+/* L150: */
+    }
+    goto L190;
+
+/*     Order eigenvalues and eigenvectors. */
+
+L160:
+    if (icompz == 0) {
+
+/*        Use Quick Sort */
+
+	dlasrt_("I", n, &d__[1], info);
+
+    } else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L170: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		dswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L180: */
+	}
+    }
+
+L190:
+    return 0;
+
+/*     End of DSTEQR */
+
+} /* dsteqr_ */
+
+/* Subroutine */ int dsterf_(integer *n, doublereal *d__, doublereal *e,
+	integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal c__;
+    static integer i__, l, m;
+    static doublereal p, r__, s;
+    static integer l1;
+    static doublereal bb, rt1, rt2, eps, rte;
+    static integer lsv;
+    static doublereal eps2, oldc;
+    static integer lend, jtot;
+    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    static doublereal gamma, alpha, sigma, anorm;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    static doublereal oldgam, safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal safmax;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static integer lendsv;
+    static doublereal ssfmin;
+    static integer nmaxit;
+    static doublereal ssfmax;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DSTERF computes all eigenvalues of a symmetric tridiagonal matrix
+    using the Pal-Walker-Kahan variant of the QL or QR algorithm.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the n diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm failed to find all of the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --e;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n < 0) {
+	*info = -1;
+	i__1 = -(*info);
+	xerbla_("DSTERF", &i__1);
+	return 0;
+    }
+    if (*n <= 1) {
+	return 0;
+    }
+
+/*     Determine the unit roundoff for this environment. */
+
+    eps = EPSILON;
+/* Computing 2nd power */
+    d__1 = eps;
+    eps2 = d__1 * d__1;
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    ssfmax = sqrt(safmax) / 3.;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*     Compute the eigenvalues of the tridiagonal matrix. */
+
+    nmaxit = *n * 30;
+    sigma = 0.;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+
+L10:
+    if (l1 > *n) {
+	goto L170;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.;
+    }
+    i__1 = *n - 1;
+    for (m = l1; m <= i__1; ++m) {
+	if ((d__3 = e[m], abs(d__3)) <= sqrt((d__1 = d__[m], abs(d__1))) *
+		sqrt((d__2 = d__[m + 1], abs(d__2))) * eps) {
+	    e[m] = 0.;
+	    goto L30;
+	}
+/* L20: */
+    }
+    m = *n;
+
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
+
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
+
+    i__1 = lend - 1;
+    for (i__ = l; i__ <= i__1; ++i__) {
+/* Computing 2nd power */
+	d__1 = e[i__];
+	e[i__] = d__1 * d__1;
+/* L40: */
+    }
+
+/*     Choose between QL and QR iteration */
+
+    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
+
+    if (lend >= l) {
+
+/*
+          QL Iteration
+
+          Look for small subdiagonal element.
+*/
+
+L50:
+	if (l != lend) {
+	    i__1 = lend - 1;
+	    for (m = l; m <= i__1; ++m) {
+		if ((d__2 = e[m], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
+			+ 1], abs(d__1))) {
+		    goto L70;
+		}
+/* L60: */
+	    }
+	}
+	m = lend;
+
+L70:
+	if (m < lend) {
+	    e[m] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L90;
+	}
+
+/*
+          If remaining matrix is 2 by 2, use DLAE2 to compute its
+          eigenvalues.
+*/
+
+	if (m == l + 1) {
+	    rte = sqrt(e[l]);
+	    dlae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2);
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.;
+	    l += 2;
+	    if (l <= lend) {
+		goto L50;
+	    }
+	    goto L150;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L150;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	rte = sqrt(e[l]);
+	sigma = (d__[l + 1] - p) / (rte * 2.);
+	r__ = dlapy2_(&sigma, &c_b2865);
+	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
+
+	c__ = 1.;
+	s = 0.;
+	gamma = d__[m] - sigma;
+	p = gamma * gamma;
+
+/*        Inner loop */
+
+	i__1 = l;
+	for (i__ = m - 1; i__ >= i__1; --i__) {
+	    bb = e[i__];
+	    r__ = p + bb;
+	    if (i__ != m - 1) {
+		e[i__ + 1] = s * r__;
+	    }
+	    oldc = c__;
+	    c__ = p / r__;
+	    s = bb / r__;
+	    oldgam = gamma;
+	    alpha = d__[i__];
+	    gamma = c__ * (alpha - sigma) - s * oldgam;
+	    d__[i__ + 1] = oldgam + (alpha - gamma);
+	    if (c__ != 0.) {
+		p = gamma * gamma / c__;
+	    } else {
+		p = oldc * bb;
+	    }
+/* L80: */
+	}
+
+	e[l] = s * p;
+	d__[l] = sigma + gamma;
+	goto L50;
+
+/*        Eigenvalue found. */
+
+L90:
+	d__[l] = p;
+
+	++l;
+	if (l <= lend) {
+	    goto L50;
+	}
+	goto L150;
+
+    } else {
+
+/*
+          QR Iteration
+
+          Look for small superdiagonal element.
+*/
+
+L100:
+	i__1 = lend + 1;
+	for (m = l; m >= i__1; --m) {
+	    if ((d__2 = e[m - 1], abs(d__2)) <= eps2 * (d__1 = d__[m] * d__[m
+		    - 1], abs(d__1))) {
+		goto L120;
+	    }
+/* L110: */
+	}
+	m = lend;
+
+L120:
+	if (m > lend) {
+	    e[m - 1] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L140;
+	}
+
+/*
+          If remaining matrix is 2 by 2, use DLAE2 to compute its
+          eigenvalues.
+*/
+
+	if (m == l - 1) {
+	    rte = sqrt(e[l - 1]);
+	    dlae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2);
+	    d__[l] = rt1;
+	    d__[l - 1] = rt2;
+	    e[l - 1] = 0.;
+	    l += -2;
+	    if (l >= lend) {
+		goto L100;
+	    }
+	    goto L150;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L150;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	rte = sqrt(e[l - 1]);
+	sigma = (d__[l - 1] - p) / (rte * 2.);
+	r__ = dlapy2_(&sigma, &c_b2865);
+	sigma = p - rte / (sigma + d_sign(&r__, &sigma));
+
+	c__ = 1.;
+	s = 0.;
+	gamma = d__[m] - sigma;
+	p = gamma * gamma;
+
+/*        Inner loop */
+
+	i__1 = l - 1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    bb = e[i__];
+	    r__ = p + bb;
+	    if (i__ != m) {
+		e[i__ - 1] = s * r__;
+	    }
+	    oldc = c__;
+	    c__ = p / r__;
+	    s = bb / r__;
+	    oldgam = gamma;
+	    alpha = d__[i__ + 1];
+	    gamma = c__ * (alpha - sigma) - s * oldgam;
+	    d__[i__] = oldgam + (alpha - gamma);
+	    if (c__ != 0.) {
+		p = gamma * gamma / c__;
+	    } else {
+		p = oldc * bb;
+	    }
+/* L130: */
+	}
+
+	e[l - 1] = s * p;
+	d__[l] = sigma + gamma;
+	goto L100;
+
+/*        Eigenvalue found. */
+
+L140:
+	d__[l] = p;
+
+	--l;
+	if (l >= lend) {
+	    goto L100;
+	}
+	goto L150;
+
+    }
+
+/*     Undo scaling if necessary */
+
+L150:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+    }
+    if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+    }
+
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
+
+    if (jtot < nmaxit) {
+	goto L10;
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.) {
+	    ++(*info);
+	}
+/* L160: */
+    }
+    goto L180;
+
+/*     Sort eigenvalues in increasing order. */
+
+L170:
+    dlasrt_("I", n, &d__[1], info);
+
+L180:
+    return 0;
+
+/*     End of DSTERF */
+
+} /* dsterf_ */
+
+/* Subroutine */ int dsyevd_(char *jobz, char *uplo, integer *n, doublereal *
+	a, integer *lda, doublereal *w, doublereal *work, integer *lwork,
+	integer *iwork, integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal eps;
+    static integer inde;
+    static doublereal anrm, rmin, rmax;
+    static integer lopt;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal sigma;
+    extern logical lsame_(char *, char *);
+    static integer iinfo, lwmin, liopt;
+    static logical lower, wantz;
+    static integer indwk2, llwrk2;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dstedc_(char *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, integer *, integer *, integer *), dlacpy_(
+	    char *, integer *, integer *, doublereal *, integer *, doublereal
+	    *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal bignum;
+    static integer indtau;
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *);
+    extern doublereal dlansy_(char *, char *, integer *, doublereal *,
+	    integer *, doublereal *);
+    static integer indwrk, liwmin;
+    extern /* Subroutine */ int dormtr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *, integer *), dsytrd_(char *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *);
+    static integer llwork;
+    static doublereal smlnum;
+    static logical lquery;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DSYEVD computes all eigenvalues and, optionally, eigenvectors of a
+    real symmetric matrix A. If eigenvectors are desired, it uses a
+    divide and conquer algorithm.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Because of large use of BLAS of level 3, DSYEVD needs N**2 more
+    workspace than DSYEVX.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only;
+            = 'V':  Compute eigenvalues and eigenvectors.
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA, N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of A contains the
+            upper triangular part of the matrix A.  If UPLO = 'L',
+            the leading N-by-N lower triangular part of A contains
+            the lower triangular part of the matrix A.
+            On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+            orthonormal eigenvectors of the matrix A.
+            If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+            or the upper triangle (if UPLO='U') of A, including the
+            diagonal, is destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    W       (output) DOUBLE PRECISION array, dimension (N)
+            If INFO = 0, the eigenvalues in ascending order.
+
+    WORK    (workspace/output) DOUBLE PRECISION array,
+                                           dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If N <= 1,               LWORK must be at least 1.
+            If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1.
+            If JOBZ = 'V' and N > 1, LWORK must be at least
+                                                  1 + 6*N + 2*N**2.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If N <= 1,                LIWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+            If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the algorithm failed to converge; i
+                  off-diagonal elements of an intermediate tridiagonal
+                  form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --w;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    wantz = lsame_(jobz, "V");
+    lower = lsame_(uplo, "L");
+    lquery = (*lwork == -1) || (*liwork == -1);
+
+    *info = 0;
+    if (*n <= 1) {
+	liwmin = 1;
+	lwmin = 1;
+	lopt = lwmin;
+	liopt = liwmin;
+    } else {
+	if (wantz) {
+	    liwmin = *n * 5 + 3;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lwmin = *n * 6 + 1 + ((i__1 * i__1) << (1));
+	} else {
+	    liwmin = 1;
+	    lwmin = ((*n) << (1)) + 1;
+	}
+	lopt = lwmin;
+	liopt = liwmin;
+    }
+    if (! ((wantz) || (lsame_(jobz, "N")))) {
+	*info = -1;
+    } else if (! ((lower) || (lsame_(uplo, "U")))) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -10;
+    }
+
+    if (*info == 0) {
+	work[1] = (doublereal) lopt;
+	iwork[1] = liopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSYEVD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	w[1] = a[a_dim1 + 1];
+	if (wantz) {
+	    a[a_dim1 + 1] = 1.;
+	}
+	return 0;
+    }
+
+/*     Get machine constants. */
+
+    safmin = SAFEMINIMUM;
+    eps = PRECISION;
+    smlnum = safmin / eps;
+    bignum = 1. / smlnum;
+    rmin = sqrt(smlnum);
+    rmax = sqrt(bignum);
+
+/*     Scale matrix to allowable range, if necessary. */
+
+    anrm = dlansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
+    iscale = 0;
+    if (anrm > 0. && anrm < rmin) {
+	iscale = 1;
+	sigma = rmin / anrm;
+    } else if (anrm > rmax) {
+	iscale = 1;
+	sigma = rmax / anrm;
+    }
+    if (iscale == 1) {
+	dlascl_(uplo, &c__0, &c__0, &c_b2865, &sigma, n, n, &a[a_offset], lda,
+		 info);
+    }
+
+/*     Call DSYTRD to reduce symmetric matrix to tridiagonal form. */
+
+    inde = 1;
+    indtau = inde + *n;
+    indwrk = indtau + *n;
+    llwork = *lwork - indwrk + 1;
+    indwk2 = indwrk + *n * *n;
+    llwrk2 = *lwork - indwk2 + 1;
+
+    dsytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
+	    work[indwrk], &llwork, &iinfo);
+    lopt = (integer) (((*n) << (1)) + work[indwrk]);
+
+/*
+       For eigenvalues only, call DSTERF.  For eigenvectors, first call
+       DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+       tridiagonal matrix, then call DORMTR to multiply it by the
+       Householder transformations stored in A.
+*/
+
+    if (! wantz) {
+	dsterf_(n, &w[1], &work[inde], info);
+    } else {
+	dstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], &
+		llwrk2, &iwork[1], liwork, info);
+	dormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
+		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
+	dlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
+/*
+   Computing MAX
+   Computing 2nd power
+*/
+	i__3 = *n;
+	i__1 = lopt, i__2 = *n * 6 + 1 + ((i__3 * i__3) << (1));
+	lopt = max(i__1,i__2);
+    }
+
+/*     If matrix was scaled, then rescale eigenvalues appropriately. */
+
+    if (iscale == 1) {
+	d__1 = 1. / sigma;
+	dscal_(n, &d__1, &w[1], &c__1);
+    }
+
+    work[1] = (doublereal) lopt;
+    iwork[1] = liopt;
+
+    return 0;
+
+/*     End of DSYEVD */
+
+} /* dsyevd_ */
+
+/* Subroutine */ int dsytd2_(char *uplo, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tau, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal taui;
+    extern /* Subroutine */ int dsyr2_(char *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static doublereal alpha;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *,
+	    doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, integer *), dlarfg_(integer *, doublereal *,
+	     doublereal *, integer *, doublereal *), xerbla_(char *, integer *
+	    );
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    DSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
+    form T by an orthogonal similarity transformation: Q' * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is stored:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the orthogonal
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the orthogonal matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSYTD2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Reduce the upper triangle of A */
+
+	for (i__ = *n - 1; i__ >= 1; --i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(1:i-1,i+1)
+*/
+
+	    dlarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1
+		    + 1], &c__1, &taui);
+	    e[i__] = a[i__ + (i__ + 1) * a_dim1];
+
+	    if (taui != 0.) {
+
+/*              Apply H(i) from both sides to A(1:i,1:i) */
+
+		a[i__ + (i__ + 1) * a_dim1] = 1.;
+
+/*              Compute  x := tau * A * v  storing x in TAU(1:i) */
+
+		dsymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
+			a_dim1 + 1], &c__1, &c_b2879, &tau[1], &c__1);
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		alpha = taui * -.5 * ddot_(&i__, &tau[1], &c__1, &a[(i__ + 1)
+			* a_dim1 + 1], &c__1);
+		daxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
+			1], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		dsyr2_(uplo, &i__, &c_b3001, &a[(i__ + 1) * a_dim1 + 1], &
+			c__1, &tau[1], &c__1, &a[a_offset], lda);
+
+		a[i__ + (i__ + 1) * a_dim1] = e[i__];
+	    }
+	    d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1];
+	    tau[i__] = taui;
+/* L10: */
+	}
+	d__[1] = a[a_dim1 + 1];
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(i+2:n,i)
+*/
+
+	    i__2 = *n - i__;
+/* Computing MIN */
+	    i__3 = i__ + 2;
+	    dlarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		     a_dim1], &c__1, &taui);
+	    e[i__] = a[i__ + 1 + i__ * a_dim1];
+
+	    if (taui != 0.) {
+
+/*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
+
+		a[i__ + 1 + i__ * a_dim1] = 1.;
+
+/*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
+
+		i__2 = *n - i__;
+		dsymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b2879, &
+			tau[i__], &c__1);
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		i__2 = *n - i__;
+		alpha = taui * -.5 * ddot_(&i__2, &tau[i__], &c__1, &a[i__ +
+			1 + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		daxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+			i__], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		i__2 = *n - i__;
+		dsyr2_(uplo, &i__2, &c_b3001, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) *
+			a_dim1], lda);
+
+		a[i__ + 1 + i__ * a_dim1] = e[i__];
+	    }
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    tau[i__] = taui;
+/* L20: */
+	}
+	d__[*n] = a[*n + *n * a_dim1];
+    }
+
+    return 0;
+
+/*     End of DSYTD2 */
+
+} /* dsytd2_ */
+
+/* Subroutine */ int dsytrd_(char *uplo, integer *n, doublereal *a, integer *
+	lda, doublereal *d__, doublereal *e, doublereal *tau, doublereal *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, nb, kk, nx, iws;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    static logical upper;
+    extern /* Subroutine */ int dsytd2_(char *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dsyr2k_(char *, char *, integer *, integer *, doublereal
+	    *, doublereal *, integer *, doublereal *, integer *, doublereal *,
+	     doublereal *, integer *), dlatrd_(char *,
+	    integer *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *), xerbla_(char *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DSYTRD reduces a real symmetric matrix A to real symmetric
+    tridiagonal form T by an orthogonal similarity transformation:
+    Q**T * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the orthogonal
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the orthogonal matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) DOUBLE PRECISION array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= 1.
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*lwork < 1 && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+
+/*        Determine the block size. */
+
+	nb = ilaenv_(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
+		 (ftnlen)1);
+	lwkopt = *n * nb;
+	work[1] = (doublereal) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DSYTRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1] = 1.;
+	return 0;
+    }
+
+    nx = *n;
+    iws = 1;
+    if (nb > 1 && nb < *n) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, &
+		c_n1, (ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *n) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code by setting NX = N.
+
+   Computing MAX
+*/
+		i__1 = *lwork / ldwork;
+		nb = max(i__1,1);
+		nbmin = ilaenv_(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		if (nb < nbmin) {
+		    nx = *n;
+		}
+	    }
+	} else {
+	    nx = *n;
+	}
+    } else {
+	nb = 1;
+    }
+
+    if (upper) {
+
+/*
+          Reduce the upper triangle of A.
+          Columns 1:kk are handled by the unblocked method.
+*/
+
+	kk = *n - (*n - nx + nb - 1) / nb * nb;
+	i__1 = kk + 1;
+	i__2 = -nb;
+	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+		i__2) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = i__ + nb - 1;
+	    dlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
+		    work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(1:i-1,1:i-1), using an
+             update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = i__ - 1;
+	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b3001, &a[i__ *
+		    a_dim1 + 1], lda, &work[1], &ldwork, &c_b2865, &a[
+		    a_offset], lda);
+
+/*
+             Copy superdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j - 1 + j * a_dim1] = e[j - 1];
+		d__[j] = a[j + j * a_dim1];
+/* L10: */
+	    }
+/* L20: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	dsytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__2 = *n - nx;
+	i__1 = nb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = *n - i__ + 1;
+	    dlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
+		    tau[i__], &work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(i+ib:n,i+ib:n), using
+             an update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = *n - i__ - nb + 1;
+	    dsyr2k_(uplo, "No transpose", &i__3, &nb, &c_b3001, &a[i__ + nb +
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b2865, &a[
+		    i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*
+             Copy subdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + 1 + j * a_dim1] = e[j];
+		d__[j] = a[j + j * a_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	i__1 = *n - i__ + 1;
+	dsytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
+		&tau[i__], &iinfo);
+    }
+
+    work[1] = (doublereal) lwkopt;
+    return 0;
+
+/*     End of DSYTRD */
+
+} /* dsytrd_ */
+
+/* Subroutine */ int dtrevc_(char *side, char *howmny, logical *select,
+	integer *n, doublereal *t, integer *ldt, doublereal *vl, integer *
+	ldvl, doublereal *vr, integer *ldvr, integer *mm, integer *m,
+	doublereal *work, integer *info)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3;
+    doublereal d__1, d__2, d__3, d__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static doublereal x[4]	/* was [2][2] */;
+    static integer j1, j2, n2, ii, ki, ip, is;
+    static doublereal wi, wr, rec, ulp, beta, emax;
+    static logical pair;
+    extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *,
+	    integer *);
+    static logical allv;
+    static integer ierr;
+    static doublereal unfl, ovfl, smin;
+    static logical over;
+    static doublereal vmax;
+    static integer jnxt;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal scale;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *);
+    static doublereal remax;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static logical leftv, bothv;
+    extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *,
+	    integer *, doublereal *, integer *);
+    static doublereal vcrit;
+    static logical somev;
+    static doublereal xnorm;
+    extern /* Subroutine */ int dlaln2_(logical *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     doublereal *, doublereal *, integer *, doublereal *, doublereal *
+	    , doublereal *, integer *, doublereal *, doublereal *, integer *),
+	     dlabad_(doublereal *, doublereal *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal bignum;
+    static logical rightv;
+    static doublereal smlnum;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    DTREVC computes some or all of the right and/or left eigenvectors of
+    a real upper quasi-triangular matrix T.
+
+    The right eigenvector x and the left eigenvector y of T corresponding
+    to an eigenvalue w are defined by:
+
+                 T*x = w*x,     y'*T = w*y'
+
+    where y' denotes the conjugate transpose of the vector y.
+
+    If all eigenvectors are requested, the routine may either return the
+    matrices X and/or Y of right or left eigenvectors of T, or the
+    products Q*X and/or Q*Y, where Q is an input orthogonal
+    matrix. If T was obtained from the real-Schur factorization of an
+    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+    right or left eigenvectors of A.
+
+    T must be in Schur canonical form (as returned by DHSEQR), that is,
+    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+    2-by-2 diagonal block has its diagonal elements equal and its
+    off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
+    diagonal block is a complex conjugate pair of eigenvalues and
+    eigenvectors; only one eigenvector of the pair is computed, namely
+    the one corresponding to the eigenvalue with positive imaginary part.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  compute right eigenvectors only;
+            = 'L':  compute left eigenvectors only;
+            = 'B':  compute both right and left eigenvectors.
+
+    HOWMNY  (input) CHARACTER*1
+            = 'A':  compute all right and/or left eigenvectors;
+            = 'B':  compute all right and/or left eigenvectors,
+                    and backtransform them using the input matrices
+                    supplied in VR and/or VL;
+            = 'S':  compute selected right and/or left eigenvectors,
+                    specified by the logical array SELECT.
+
+    SELECT  (input/output) LOGICAL array, dimension (N)
+            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+            computed.
+            If HOWMNY = 'A' or 'B', SELECT is not referenced.
+            To select the real eigenvector corresponding to a real
+            eigenvalue w(j), SELECT(j) must be set to .TRUE..  To select
+            the complex eigenvector corresponding to a complex conjugate
+            pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be
+            set to .TRUE.; then on exit SELECT(j) is .TRUE. and
+            SELECT(j+1) is .FALSE..
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input) DOUBLE PRECISION array, dimension (LDT,N)
+            The upper quasi-triangular matrix T in Schur canonical form.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    VL      (input/output) DOUBLE PRECISION array, dimension (LDVL,MM)
+            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+            contain an N-by-N matrix Q (usually the orthogonal matrix Q
+            of Schur vectors returned by DHSEQR).
+            On exit, if SIDE = 'L' or 'B', VL contains:
+            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+                             VL has the same quasi-lower triangular form
+                             as T'. If T(i,i) is a real eigenvalue, then
+                             the i-th column VL(i) of VL  is its
+                             corresponding eigenvector. If T(i:i+1,i:i+1)
+                             is a 2-by-2 block whose eigenvalues are
+                             complex-conjugate eigenvalues of T, then
+                             VL(i)+sqrt(-1)*VL(i+1) is the complex
+                             eigenvector corresponding to the eigenvalue
+                             with positive real part.
+            if HOWMNY = 'B', the matrix Q*Y;
+            if HOWMNY = 'S', the left eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VL, in the same order as their
+                             eigenvalues.
+            A complex eigenvector corresponding to a complex eigenvalue
+            is stored in two consecutive columns, the first holding the
+            real part, and the second the imaginary part.
+            If SIDE = 'R', VL is not referenced.
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= max(1,N) if
+            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+
+    VR      (input/output) DOUBLE PRECISION array, dimension (LDVR,MM)
+            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+            contain an N-by-N matrix Q (usually the orthogonal matrix Q
+            of Schur vectors returned by DHSEQR).
+            On exit, if SIDE = 'R' or 'B', VR contains:
+            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+                             VR has the same quasi-upper triangular form
+                             as T. If T(i,i) is a real eigenvalue, then
+                             the i-th column VR(i) of VR  is its
+                             corresponding eigenvector. If T(i:i+1,i:i+1)
+                             is a 2-by-2 block whose eigenvalues are
+                             complex-conjugate eigenvalues of T, then
+                             VR(i)+sqrt(-1)*VR(i+1) is the complex
+                             eigenvector corresponding to the eigenvalue
+                             with positive real part.
+            if HOWMNY = 'B', the matrix Q*X;
+            if HOWMNY = 'S', the right eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VR, in the same order as their
+                             eigenvalues.
+            A complex eigenvector corresponding to a complex eigenvalue
+            is stored in two consecutive columns, the first holding the
+            real part and the second the imaginary part.
+            If SIDE = 'L', VR is not referenced.
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= max(1,N) if
+            SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+
+    MM      (input) INTEGER
+            The number of columns in the arrays VL and/or VR. MM >= M.
+
+    M       (output) INTEGER
+            The number of columns in the arrays VL and/or VR actually
+            used to store the eigenvectors.
+            If HOWMNY = 'A' or 'B', M is set to N.
+            Each selected real eigenvector occupies one column and each
+            selected complex eigenvector occupies two columns.
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (3*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The algorithm used in this program is basically backward (forward)
+    substitution, with scaling to make the the code robust against
+    possible overflow.
+
+    Each eigenvector is normalized so that the element of largest
+    magnitude has magnitude 1; here the magnitude of a complex number
+    (x,y) is taken to be |x| + |y|.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --select;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    bothv = lsame_(side, "B");
+    rightv = (lsame_(side, "R")) || (bothv);
+    leftv = (lsame_(side, "L")) || (bothv);
+
+    allv = lsame_(howmny, "A");
+    over = lsame_(howmny, "B");
+    somev = lsame_(howmny, "S");
+
+    *info = 0;
+    if (! rightv && ! leftv) {
+	*info = -1;
+    } else if (! allv && ! over && ! somev) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ldt < max(1,*n)) {
+	*info = -6;
+    } else if ((*ldvl < 1) || (leftv && *ldvl < *n)) {
+	*info = -8;
+    } else if ((*ldvr < 1) || (rightv && *ldvr < *n)) {
+	*info = -10;
+    } else {
+
+/*
+          Set M to the number of columns required to store the selected
+          eigenvectors, standardize the array SELECT if necessary, and
+          test MM.
+*/
+
+	if (somev) {
+	    *m = 0;
+	    pair = FALSE_;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (pair) {
+		    pair = FALSE_;
+		    select[j] = FALSE_;
+		} else {
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] == 0.) {
+			    if (select[j]) {
+				++(*m);
+			    }
+			} else {
+			    pair = TRUE_;
+			    if ((select[j]) || (select[j + 1])) {
+				select[j] = TRUE_;
+				*m += 2;
+			    }
+			}
+		    } else {
+			if (select[*n]) {
+			    ++(*m);
+			}
+		    }
+		}
+/* L10: */
+	    }
+	} else {
+	    *m = *n;
+	}
+
+	if (*mm < *m) {
+	    *info = -11;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DTREVC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Set the constants to control overflow. */
+
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (*n / ulp);
+    bignum = (1. - ulp) / smlnum;
+
+/*
+       Compute 1-norm of each column of strictly upper triangular
+       part of T to control overflow in triangular solver.
+*/
+
+    work[1] = 0.;
+    i__1 = *n;
+    for (j = 2; j <= i__1; ++j) {
+	work[j] = 0.;
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[j] += (d__1 = t[i__ + j * t_dim1], abs(d__1));
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*
+       Index IP is used to specify the real or complex eigenvalue:
+         IP = 0, real eigenvalue,
+              1, first of conjugate complex pair: (wr,wi)
+             -1, second of conjugate complex pair: (wr,wi)
+*/
+
+    n2 = (*n) << (1);
+
+    if (rightv) {
+
+/*        Compute right eigenvectors. */
+
+	ip = 0;
+	is = *m;
+	for (ki = *n; ki >= 1; --ki) {
+
+	    if (ip == 1) {
+		goto L130;
+	    }
+	    if (ki == 1) {
+		goto L40;
+	    }
+	    if (t[ki + (ki - 1) * t_dim1] == 0.) {
+		goto L40;
+	    }
+	    ip = -1;
+
+L40:
+	    if (somev) {
+		if (ip == 0) {
+		    if (! select[ki]) {
+			goto L130;
+		    }
+		} else {
+		    if (! select[ki - 1]) {
+			goto L130;
+		    }
+		}
+	    }
+
+/*           Compute the KI-th eigenvalue (WR,WI). */
+
+	    wr = t[ki + ki * t_dim1];
+	    wi = 0.;
+	    if (ip != 0) {
+		wi = sqrt((d__1 = t[ki + (ki - 1) * t_dim1], abs(d__1))) *
+			sqrt((d__2 = t[ki - 1 + ki * t_dim1], abs(d__2)));
+	    }
+/* Computing MAX */
+	    d__1 = ulp * (abs(wr) + abs(wi));
+	    smin = max(d__1,smlnum);
+
+	    if (ip == 0) {
+
+/*              Real right eigenvector */
+
+		work[ki + *n] = 1.;
+
+/*              Form right-hand side */
+
+		i__1 = ki - 1;
+		for (k = 1; k <= i__1; ++k) {
+		    work[k + *n] = -t[k + ki * t_dim1];
+/* L50: */
+		}
+
+/*
+                Solve the upper quasi-triangular system:
+                   (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
+*/
+
+		jnxt = ki - 1;
+		for (j = ki - 1; j >= 1; --j) {
+		    if (j > jnxt) {
+			goto L60;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j - 1;
+		    if (j > 1) {
+			if (t[j + (j - 1) * t_dim1] != 0.) {
+			    j1 = j - 1;
+			    jnxt = j - 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*                    1-by-1 diagonal block */
+
+			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b2865, &t[j
+				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
+				j + *n], n, &wr, &c_b2879, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*
+                      Scale X(1,1) to avoid overflow when updating
+                      the right-hand side.
+*/
+
+			if (xnorm > 1.) {
+			    if (work[j] > bignum / xnorm) {
+				x[0] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			}
+			work[j + *n] = x[0];
+
+/*                    Update right-hand side */
+
+			i__1 = j - 1;
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+
+		    } else {
+
+/*                    2-by-2 diagonal block */
+
+			dlaln2_(&c_false, &c__2, &c__1, &smin, &c_b2865, &t[j
+				- 1 + (j - 1) * t_dim1], ldt, &c_b2865, &
+				c_b2865, &work[j - 1 + *n], n, &wr, &c_b2879,
+				x, &c__2, &scale, &xnorm, &ierr);
+
+/*
+                      Scale X(1,1) and X(2,1) to avoid overflow when
+                      updating the right-hand side.
+*/
+
+			if (xnorm > 1.) {
+/* Computing MAX */
+			    d__1 = work[j - 1], d__2 = work[j];
+			    beta = max(d__1,d__2);
+			    if (beta > bignum / xnorm) {
+				x[0] /= xnorm;
+				x[1] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			}
+			work[j - 1 + *n] = x[0];
+			work[j + *n] = x[1];
+
+/*                    Update right-hand side */
+
+			i__1 = j - 2;
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[*n + 1], &c__1);
+			i__1 = j - 2;
+			d__1 = -x[1];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+		    }
+L60:
+		    ;
+		}
+
+/*              Copy the vector x or Q*x to VR and normalize. */
+
+		if (! over) {
+		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], &
+			    c__1);
+
+		    ii = idamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		    remax = 1. / (d__1 = vr[ii + is * vr_dim1], abs(d__1));
+		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		    i__1 = *n;
+		    for (k = ki + 1; k <= i__1; ++k) {
+			vr[k + is * vr_dim1] = 0.;
+/* L70: */
+		    }
+		} else {
+		    if (ki > 1) {
+			i__1 = ki - 1;
+			dgemv_("N", n, &i__1, &c_b2865, &vr[vr_offset], ldvr,
+				&work[*n + 1], &c__1, &work[ki + *n], &vr[ki *
+				 vr_dim1 + 1], &c__1);
+		    }
+
+		    ii = idamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		    remax = 1. / (d__1 = vr[ii + ki * vr_dim1], abs(d__1));
+		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+		}
+
+	    } else {
+
+/*
+                Complex right eigenvector.
+
+                Initial solve
+                  [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
+                  [ (T(KI,KI-1)   T(KI,KI)   )               ]
+*/
+
+		if ((d__1 = t[ki - 1 + ki * t_dim1], abs(d__1)) >= (d__2 = t[
+			ki + (ki - 1) * t_dim1], abs(d__2))) {
+		    work[ki - 1 + *n] = 1.;
+		    work[ki + n2] = wi / t[ki - 1 + ki * t_dim1];
+		} else {
+		    work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1];
+		    work[ki + n2] = 1.;
+		}
+		work[ki + *n] = 0.;
+		work[ki - 1 + n2] = 0.;
+
+/*              Form right-hand side */
+
+		i__1 = ki - 2;
+		for (k = 1; k <= i__1; ++k) {
+		    work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) *
+			    t_dim1];
+		    work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1];
+/* L80: */
+		}
+
+/*
+                Solve upper quasi-triangular system:
+                (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
+*/
+
+		jnxt = ki - 2;
+		for (j = ki - 2; j >= 1; --j) {
+		    if (j > jnxt) {
+			goto L90;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j - 1;
+		    if (j > 1) {
+			if (t[j + (j - 1) * t_dim1] != 0.) {
+			    j1 = j - 1;
+			    jnxt = j - 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*                    1-by-1 diagonal block */
+
+			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b2865, &t[j
+				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
+				j + *n], n, &wr, &wi, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*
+                      Scale X(1,1) and X(1,2) to avoid overflow when
+                      updating the right-hand side.
+*/
+
+			if (xnorm > 1.) {
+			    if (work[j] > bignum / xnorm) {
+				x[0] /= xnorm;
+				x[2] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+
+/*                    Update the right-hand side */
+
+			i__1 = j - 1;
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+			i__1 = j - 1;
+			d__1 = -x[2];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				n2 + 1], &c__1);
+
+		    } else {
+
+/*                    2-by-2 diagonal block */
+
+			dlaln2_(&c_false, &c__2, &c__2, &smin, &c_b2865, &t[j
+				- 1 + (j - 1) * t_dim1], ldt, &c_b2865, &
+				c_b2865, &work[j - 1 + *n], n, &wr, &wi, x, &
+				c__2, &scale, &xnorm, &ierr);
+
+/*
+                      Scale X to avoid overflow when updating
+                      the right-hand side.
+*/
+
+			if (xnorm > 1.) {
+/* Computing MAX */
+			    d__1 = work[j - 1], d__2 = work[j];
+			    beta = max(d__1,d__2);
+			    if (beta > bignum / xnorm) {
+				rec = 1. / xnorm;
+				x[0] *= rec;
+				x[2] *= rec;
+				x[1] *= rec;
+				x[3] *= rec;
+				scale *= rec;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    dscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    dscal_(&ki, &scale, &work[n2 + 1], &c__1);
+			}
+			work[j - 1 + *n] = x[0];
+			work[j + *n] = x[1];
+			work[j - 1 + n2] = x[2];
+			work[j + n2] = x[3];
+
+/*                    Update the right-hand side */
+
+			i__1 = j - 2;
+			d__1 = -x[0];
+			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[*n + 1], &c__1);
+			i__1 = j - 2;
+			d__1 = -x[1];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+			i__1 = j - 2;
+			d__1 = -x[2];
+			daxpy_(&i__1, &d__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[n2 + 1], &c__1);
+			i__1 = j - 2;
+			d__1 = -x[3];
+			daxpy_(&i__1, &d__1, &t[j * t_dim1 + 1], &c__1, &work[
+				n2 + 1], &c__1);
+		    }
+L90:
+		    ;
+		}
+
+/*              Copy the vector x or Q*x to VR and normalize. */
+
+		if (! over) {
+		    dcopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1
+			    + 1], &c__1);
+		    dcopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], &
+			    c__1);
+
+		    emax = 0.;
+		    i__1 = ki;
+		    for (k = 1; k <= i__1; ++k) {
+/* Computing MAX */
+			d__3 = emax, d__4 = (d__1 = vr[k + (is - 1) * vr_dim1]
+				, abs(d__1)) + (d__2 = vr[k + is * vr_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
+/* L100: */
+		    }
+
+		    remax = 1. / emax;
+		    dscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1);
+		    dscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		    i__1 = *n;
+		    for (k = ki + 1; k <= i__1; ++k) {
+			vr[k + (is - 1) * vr_dim1] = 0.;
+			vr[k + is * vr_dim1] = 0.;
+/* L110: */
+		    }
+
+		} else {
+
+		    if (ki > 2) {
+			i__1 = ki - 2;
+			dgemv_("N", n, &i__1, &c_b2865, &vr[vr_offset], ldvr,
+				&work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[
+				(ki - 1) * vr_dim1 + 1], &c__1);
+			i__1 = ki - 2;
+			dgemv_("N", n, &i__1, &c_b2865, &vr[vr_offset], ldvr,
+				&work[n2 + 1], &c__1, &work[ki + n2], &vr[ki *
+				 vr_dim1 + 1], &c__1);
+		    } else {
+			dscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1
+				+ 1], &c__1);
+			dscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], &
+				c__1);
+		    }
+
+		    emax = 0.;
+		    i__1 = *n;
+		    for (k = 1; k <= i__1; ++k) {
+/* Computing MAX */
+			d__3 = emax, d__4 = (d__1 = vr[k + (ki - 1) * vr_dim1]
+				, abs(d__1)) + (d__2 = vr[k + ki * vr_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
+/* L120: */
+		    }
+		    remax = 1. / emax;
+		    dscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1);
+		    dscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+		}
+	    }
+
+	    --is;
+	    if (ip != 0) {
+		--is;
+	    }
+L130:
+	    if (ip == 1) {
+		ip = 0;
+	    }
+	    if (ip == -1) {
+		ip = 1;
+	    }
+/* L140: */
+	}
+    }
+
+    if (leftv) {
+
+/*        Compute left eigenvectors. */
+
+	ip = 0;
+	is = 1;
+	i__1 = *n;
+	for (ki = 1; ki <= i__1; ++ki) {
+
+	    if (ip == -1) {
+		goto L250;
+	    }
+	    if (ki == *n) {
+		goto L150;
+	    }
+	    if (t[ki + 1 + ki * t_dim1] == 0.) {
+		goto L150;
+	    }
+	    ip = 1;
+
+L150:
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L250;
+		}
+	    }
+
+/*           Compute the KI-th eigenvalue (WR,WI). */
+
+	    wr = t[ki + ki * t_dim1];
+	    wi = 0.;
+	    if (ip != 0) {
+		wi = sqrt((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1))) *
+			sqrt((d__2 = t[ki + 1 + ki * t_dim1], abs(d__2)));
+	    }
+/* Computing MAX */
+	    d__1 = ulp * (abs(wr) + abs(wi));
+	    smin = max(d__1,smlnum);
+
+	    if (ip == 0) {
+
+/*              Real left eigenvector. */
+
+		work[ki + *n] = 1.;
+
+/*              Form right-hand side */
+
+		i__2 = *n;
+		for (k = ki + 1; k <= i__2; ++k) {
+		    work[k + *n] = -t[ki + k * t_dim1];
+/* L160: */
+		}
+
+/*
+                Solve the quasi-triangular system:
+                   (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
+*/
+
+		vmax = 1.;
+		vcrit = bignum;
+
+		jnxt = ki + 1;
+		i__2 = *n;
+		for (j = ki + 1; j <= i__2; ++j) {
+		    if (j < jnxt) {
+			goto L170;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j + 1;
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] != 0.) {
+			    j2 = j + 1;
+			    jnxt = j + 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*
+                      1-by-1 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side.
+*/
+
+			if (work[j] > vcrit) {
+			    rec = 1. / vmax;
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 1;
+			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
+				&c__1, &work[ki + 1 + *n], &c__1);
+
+/*                    Solve (T(J,J)-WR)'*X = WORK */
+
+			dlaln2_(&c_false, &c__1, &c__1, &smin, &c_b2865, &t[j
+				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
+				j + *n], n, &wr, &c_b2879, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			}
+			work[j + *n] = x[0];
+/* Computing MAX */
+			d__2 = (d__1 = work[j + *n], abs(d__1));
+			vmax = max(d__2,vmax);
+			vcrit = bignum / vmax;
+
+		    } else {
+
+/*
+                      2-by-2 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side.
+
+   Computing MAX
+*/
+			d__1 = work[j], d__2 = work[j + 1];
+			beta = max(d__1,d__2);
+			if (beta > vcrit) {
+			    rec = 1. / vmax;
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 1;
+			work[j + *n] -= ddot_(&i__3, &t[ki + 1 + j * t_dim1],
+				&c__1, &work[ki + 1 + *n], &c__1);
+
+			i__3 = j - ki - 1;
+			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 1 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 1 + *n], &c__1);
+
+/*
+                      Solve
+                        [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
+                        [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
+*/
+
+			dlaln2_(&c_true, &c__2, &c__1, &smin, &c_b2865, &t[j
+				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
+				j + *n], n, &wr, &c_b2879, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + 1 + *n] = x[1];
+
+/* Computing MAX */
+			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
+				= work[j + 1 + *n], abs(d__2)), d__3 = max(
+				d__3,d__4);
+			vmax = max(d__3,vmax);
+			vcrit = bignum / vmax;
+
+		    }
+L170:
+		    ;
+		}
+
+/*              Copy the vector x or Q*x to VL and normalize. */
+
+		if (! over) {
+		    i__2 = *n - ki + 1;
+		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+			    vl_dim1], &c__1);
+
+		    i__2 = *n - ki + 1;
+		    ii = idamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki -
+			    1;
+		    remax = 1. / (d__1 = vl[ii + is * vl_dim1], abs(d__1));
+		    i__2 = *n - ki + 1;
+		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+
+		    i__2 = ki - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			vl[k + is * vl_dim1] = 0.;
+/* L180: */
+		    }
+
+		} else {
+
+		    if (ki < *n) {
+			i__2 = *n - ki;
+			dgemv_("N", n, &i__2, &c_b2865, &vl[(ki + 1) *
+				vl_dim1 + 1], ldvl, &work[ki + 1 + *n], &c__1,
+				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+		    }
+
+		    ii = idamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		    remax = 1. / (d__1 = vl[ii + ki * vl_dim1], abs(d__1));
+		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+
+		}
+
+	    } else {
+
+/*
+                Complex left eigenvector.
+
+                 Initial solve:
+                   ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
+                   ((T(KI+1,KI) T(KI+1,KI+1))                )
+*/
+
+		if ((d__1 = t[ki + (ki + 1) * t_dim1], abs(d__1)) >= (d__2 =
+			t[ki + 1 + ki * t_dim1], abs(d__2))) {
+		    work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1];
+		    work[ki + 1 + n2] = 1.;
+		} else {
+		    work[ki + *n] = 1.;
+		    work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1];
+		}
+		work[ki + 1 + *n] = 0.;
+		work[ki + n2] = 0.;
+
+/*              Form right-hand side */
+
+		i__2 = *n;
+		for (k = ki + 2; k <= i__2; ++k) {
+		    work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1];
+		    work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1]
+			    ;
+/* L190: */
+		}
+
+/*
+                Solve complex quasi-triangular system:
+                ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
+*/
+
+		vmax = 1.;
+		vcrit = bignum;
+
+		jnxt = ki + 2;
+		i__2 = *n;
+		for (j = ki + 2; j <= i__2; ++j) {
+		    if (j < jnxt) {
+			goto L200;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j + 1;
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] != 0.) {
+			    j2 = j + 1;
+			    jnxt = j + 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*
+                      1-by-1 diagonal block
+
+                      Scale if necessary to avoid overflow when
+                      forming the right-hand side elements.
+*/
+
+			if (work[j] > vcrit) {
+			    rec = 1. / vmax;
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 2;
+			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + *n], &c__1);
+			i__3 = j - ki - 2;
+			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + n2], &c__1);
+
+/*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */
+
+			d__1 = -wi;
+			dlaln2_(&c_false, &c__1, &c__2, &smin, &c_b2865, &t[j
+				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
+				j + *n], n, &wr, &d__1, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+/* Computing MAX */
+			d__3 = (d__1 = work[j + *n], abs(d__1)), d__4 = (d__2
+				= work[j + n2], abs(d__2)), d__3 = max(d__3,
+				d__4);
+			vmax = max(d__3,vmax);
+			vcrit = bignum / vmax;
+
+		    } else {
+
+/*
+                      2-by-2 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side elements.
+
+   Computing MAX
+*/
+			d__1 = work[j], d__2 = work[j + 1];
+			beta = max(d__1,d__2);
+			if (beta > vcrit) {
+			    rec = 1. / vmax;
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 2;
+			work[j + *n] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + *n], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + n2] -= ddot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + n2], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + 1 + *n] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 2 + *n], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + 1 + n2] -= ddot_(&i__3, &t[ki + 2 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 2 + n2], &c__1);
+
+/*
+                      Solve 2-by-2 complex linear equation
+                        ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
+                        ([T(j+1,j) T(j+1,j+1)]             )
+*/
+
+			d__1 = -wi;
+			dlaln2_(&c_true, &c__2, &c__2, &smin, &c_b2865, &t[j
+				+ j * t_dim1], ldt, &c_b2865, &c_b2865, &work[
+				j + *n], n, &wr, &d__1, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.) {
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    dscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+			work[j + 1 + *n] = x[1];
+			work[j + 1 + n2] = x[3];
+/* Computing MAX */
+			d__1 = abs(x[0]), d__2 = abs(x[2]), d__1 = max(d__1,
+				d__2), d__2 = abs(x[1]), d__1 = max(d__1,d__2)
+				, d__2 = abs(x[3]), d__1 = max(d__1,d__2);
+			vmax = max(d__1,vmax);
+			vcrit = bignum / vmax;
+
+		    }
+L200:
+		    ;
+		}
+
+/*
+                Copy the vector x or Q*x to VL and normalize.
+
+   L210:
+*/
+		if (! over) {
+		    i__2 = *n - ki + 1;
+		    dcopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+			    vl_dim1], &c__1);
+		    i__2 = *n - ki + 1;
+		    dcopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) *
+			    vl_dim1], &c__1);
+
+		    emax = 0.;
+		    i__2 = *n;
+		    for (k = ki; k <= i__2; ++k) {
+/* Computing MAX */
+			d__3 = emax, d__4 = (d__1 = vl[k + is * vl_dim1], abs(
+				d__1)) + (d__2 = vl[k + (is + 1) * vl_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
+/* L220: */
+		    }
+		    remax = 1. / emax;
+		    i__2 = *n - ki + 1;
+		    dscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+		    i__2 = *n - ki + 1;
+		    dscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1)
+			    ;
+
+		    i__2 = ki - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			vl[k + is * vl_dim1] = 0.;
+			vl[k + (is + 1) * vl_dim1] = 0.;
+/* L230: */
+		    }
+		} else {
+		    if (ki < *n - 1) {
+			i__2 = *n - ki - 1;
+			dgemv_("N", n, &i__2, &c_b2865, &vl[(ki + 2) *
+				vl_dim1 + 1], ldvl, &work[ki + 2 + *n], &c__1,
+				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+			i__2 = *n - ki - 1;
+			dgemv_("N", n, &i__2, &c_b2865, &vl[(ki + 2) *
+				vl_dim1 + 1], ldvl, &work[ki + 2 + n2], &c__1,
+				 &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 +
+				1], &c__1);
+		    } else {
+			dscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], &
+				c__1);
+			dscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1
+				+ 1], &c__1);
+		    }
+
+		    emax = 0.;
+		    i__2 = *n;
+		    for (k = 1; k <= i__2; ++k) {
+/* Computing MAX */
+			d__3 = emax, d__4 = (d__1 = vl[k + ki * vl_dim1], abs(
+				d__1)) + (d__2 = vl[k + (ki + 1) * vl_dim1],
+				abs(d__2));
+			emax = max(d__3,d__4);
+/* L240: */
+		    }
+		    remax = 1. / emax;
+		    dscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+		    dscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1);
+
+		}
+
+	    }
+
+	    ++is;
+	    if (ip != 0) {
+		++is;
+	    }
+L250:
+	    if (ip == -1) {
+		ip = 0;
+	    }
+	    if (ip == 1) {
+		ip = -1;
+	    }
+
+/* L260: */
+	}
+
+    }
+
+    return 0;
+
+/*     End of DTREVC */
+
+} /* dtrevc_ */
+
+/* Subroutine */ int dtrti2_(char *uplo, char *diag, integer *n, doublereal *
+	a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer j;
+    static doublereal ajj;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int dtrmv_(char *, char *, char *, integer *,
+	    doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    DTRTI2 computes the inverse of a real upper or lower triangular
+    matrix.
+
+    This is the Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading n by n upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DTRTI2", &i__1);
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute inverse of upper triangular matrix. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (nounit) {
+		a[j + j * a_dim1] = 1. / a[j + j * a_dim1];
+		ajj = -a[j + j * a_dim1];
+	    } else {
+		ajj = -1.;
+	    }
+
+/*           Compute elements 1:j-1 of j-th column. */
+
+	    i__2 = j - 1;
+	    dtrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
+		    a[j * a_dim1 + 1], &c__1);
+	    i__2 = j - 1;
+	    dscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
+	}
+    } else {
+
+/*        Compute inverse of lower triangular matrix. */
+
+	for (j = *n; j >= 1; --j) {
+	    if (nounit) {
+		a[j + j * a_dim1] = 1. / a[j + j * a_dim1];
+		ajj = -a[j + j * a_dim1];
+	    } else {
+		ajj = -1.;
+	    }
+	    if (j < *n) {
+
+/*              Compute elements j+1:n of j-th column. */
+
+		i__1 = *n - j;
+		dtrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
+			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
+		i__1 = *n - j;
+		dscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of DTRTI2 */
+
+} /* dtrti2_ */
+
+/* Subroutine */ int dtrtri_(char *uplo, char *diag, integer *n, doublereal *
+	a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer j, jb, nb, nn;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *), dtrsm_(
+	    char *, char *, char *, char *, integer *, integer *, doublereal *
+	    , doublereal *, integer *, doublereal *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int dtrti2_(char *, char *, integer *, doublereal
+	    *, integer *, integer *), xerbla_(char *, integer
+	    *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    DTRTRI computes the inverse of a real upper or lower triangular
+    matrix A.
+
+    This is the Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  A is upper triangular;
+            = 'L':  A is lower triangular.
+
+    DIAG    (input) CHARACTER*1
+            = 'N':  A is non-unit triangular;
+            = 'U':  A is unit triangular.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+                 matrix is singular and its inverse can not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("DTRTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Check for singularity if non-unit. */
+
+    if (nounit) {
+	i__1 = *n;
+	for (*info = 1; *info <= i__1; ++(*info)) {
+	    if (a[*info + *info * a_dim1] == 0.) {
+		return 0;
+	    }
+/* L10: */
+	}
+	*info = 0;
+    }
+
+/*
+       Determine the block size for this environment.
+
+   Writing concatenation
+*/
+    i__2[0] = 1, a__1[0] = uplo;
+    i__2[1] = 1, a__1[1] = diag;
+    s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+    nb = ilaenv_(&c__1, "DTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code */
+
+	dtrti2_(uplo, diag, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute inverse of upper triangular matrix */
+
+	    i__1 = *n;
+	    i__3 = nb;
+	    for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) {
+/* Computing MIN */
+		i__4 = nb, i__5 = *n - j + 1;
+		jb = min(i__4,i__5);
+
+/*              Compute rows 1:j-1 of current block column */
+
+		i__4 = j - 1;
+		dtrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b2865, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+		i__4 = j - 1;
+		dtrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b3001, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
+			lda);
+
+/*              Compute inverse of current diagonal block */
+
+		dtrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L20: */
+	    }
+	} else {
+
+/*           Compute inverse of lower triangular matrix */
+
+	    nn = (*n - 1) / nb * nb + 1;
+	    i__3 = -nb;
+	    for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) {
+/* Computing MIN */
+		i__1 = nb, i__4 = *n - j + 1;
+		jb = min(i__1,i__4);
+		if (j + jb <= *n) {
+
+/*                 Compute rows j+jb:n of current block column */
+
+		    i__1 = *n - j - jb + 1;
+		    dtrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
+			    &c_b2865, &a[j + jb + (j + jb) * a_dim1], lda, &a[
+			    j + jb + j * a_dim1], lda);
+		    i__1 = *n - j - jb + 1;
+		    dtrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
+			     &c_b3001, &a[j + j * a_dim1], lda, &a[j + jb + j
+			    * a_dim1], lda);
+		}
+
+/*              Compute inverse of current diagonal block */
+
+		dtrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L30: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of DTRTRI */
+
+} /* dtrtri_ */
+
+integer ieeeck_(integer *ispec, real *zero, real *one)
+{
+    /* System generated locals */
+    integer ret_val;
+
+    /* Local variables */
+    static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro,
+	    newzro;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1998
+
+
+    Purpose
+    =======
+
+    IEEECK is called from the ILAENV to verify that Infinity and
+    possibly NaN arithmetic is safe (i.e. will not trap).
+
+    Arguments
+    =========
+
+    ISPEC   (input) INTEGER
+            Specifies whether to test just for inifinity arithmetic
+            or whether to test for infinity and NaN arithmetic.
+            = 0: Verify infinity arithmetic only.
+            = 1: Verify infinity and NaN arithmetic.
+
+    ZERO    (input) REAL
+            Must contain the value 0.0
+            This is passed to prevent the compiler from optimizing
+            away this code.
+
+    ONE     (input) REAL
+            Must contain the value 1.0
+            This is passed to prevent the compiler from optimizing
+            away this code.
+
+    RETURN VALUE:  INTEGER
+            = 0:  Arithmetic failed to produce the correct answers
+            = 1:  Arithmetic produced the correct answers
+*/
+
+    ret_val = 1;
+
+    posinf = *one / *zero;
+    if (posinf <= *one) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    neginf = -(*one) / *zero;
+    if (neginf >= *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    negzro = *one / (neginf + *one);
+    if (negzro != *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    neginf = *one / negzro;
+    if (neginf >= *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    newzro = negzro + *zero;
+    if (newzro != *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    posinf = *one / newzro;
+    if (posinf <= *one) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    neginf *= posinf;
+    if (neginf >= *zero) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    posinf *= posinf;
+    if (posinf <= *one) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+
+/*     Return if we were only asked to check infinity arithmetic */
+
+    if (*ispec == 0) {
+	return ret_val;
+    }
+
+    nan1 = posinf + neginf;
+
+    nan2 = posinf / neginf;
+
+    nan3 = posinf / posinf;
+
+    nan4 = posinf * *zero;
+
+    nan5 = neginf * negzro;
+
+    nan6 = nan5 * 0.f;
+
+    if (nan1 == nan1) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan2 == nan2) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan3 == nan3) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan4 == nan4) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan5 == nan5) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    if (nan6 == nan6) {
+	ret_val = 0;
+	return ret_val;
+    }
+
+    return ret_val;
+} /* ieeeck_ */
+
+integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1,
+	integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen
+	opts_len)
+{
+    /* System generated locals */
+    integer ret_val;
+
+    /* Builtin functions */
+    /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen);
+    integer s_cmp(char *, char *, ftnlen, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static char c1[1], c2[2], c3[3], c4[2];
+    static integer ic, nb, iz, nx;
+    static logical cname, sname;
+    static integer nbmin;
+    extern integer ieeeck_(integer *, real *, real *);
+    static char subnam[6];
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ILAENV is called from the LAPACK routines to choose problem-dependent
+    parameters for the local environment.  See ISPEC for a description of
+    the parameters.
+
+    This version provides a set of parameters which should give good,
+    but not optimal, performance on many of the currently available
+    computers.  Users are encouraged to modify this subroutine to set
+    the tuning parameters for their particular machine using the option
+    and problem size information in the arguments.
+
+    This routine will not function correctly if it is converted to all
+    lower case.  Converting it to all upper case is allowed.
+
+    Arguments
+    =========
+
+    ISPEC   (input) INTEGER
+            Specifies the parameter to be returned as the value of
+            ILAENV.
+            = 1: the optimal blocksize; if this value is 1, an unblocked
+                 algorithm will give the best performance.
+            = 2: the minimum block size for which the block routine
+                 should be used; if the usable block size is less than
+                 this value, an unblocked routine should be used.
+            = 3: the crossover point (in a block routine, for N less
+                 than this value, an unblocked routine should be used)
+            = 4: the number of shifts, used in the nonsymmetric
+                 eigenvalue routines
+            = 5: the minimum column dimension for blocking to be used;
+                 rectangular blocks must have dimension at least k by m,
+                 where k is given by ILAENV(2,...) and m by ILAENV(5,...)
+            = 6: the crossover point for the SVD (when reducing an m by n
+                 matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds
+                 this value, a QR factorization is used first to reduce
+                 the matrix to a triangular form.)
+            = 7: the number of processors
+            = 8: the crossover point for the multishift QR and QZ methods
+                 for nonsymmetric eigenvalue problems.
+            = 9: maximum size of the subproblems at the bottom of the
+                 computation tree in the divide-and-conquer algorithm
+                 (used by xGELSD and xGESDD)
+            =10: ieee NaN arithmetic can be trusted not to trap
+            =11: infinity arithmetic can be trusted not to trap
+
+    NAME    (input) CHARACTER*(*)
+            The name of the calling subroutine, in either upper case or
+            lower case.
+
+    OPTS    (input) CHARACTER*(*)
+            The character options to the subroutine NAME, concatenated
+            into a single character string.  For example, UPLO = 'U',
+            TRANS = 'T', and DIAG = 'N' for a triangular routine would
+            be specified as OPTS = 'UTN'.
+
+    N1      (input) INTEGER
+    N2      (input) INTEGER
+    N3      (input) INTEGER
+    N4      (input) INTEGER
+            Problem dimensions for the subroutine NAME; these may not all
+            be required.
+
+   (ILAENV) (output) INTEGER
+            >= 0: the value of the parameter specified by ISPEC
+            < 0:  if ILAENV = -k, the k-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The following conventions have been used when calling ILAENV from the
+    LAPACK routines:
+    1)  OPTS is a concatenation of all of the character options to
+        subroutine NAME, in the same order that they appear in the
+        argument list for NAME, even if they are not used in determining
+        the value of the parameter specified by ISPEC.
+    2)  The problem dimensions N1, N2, N3, N4 are specified in the order
+        that they appear in the argument list for NAME.  N1 is used
+        first, N2 second, and so on, and unused problem dimensions are
+        passed a value of -1.
+    3)  The parameter value returned by ILAENV is checked for validity in
+        the calling subroutine.  For example, ILAENV is used to retrieve
+        the optimal blocksize for STRTRI as follows:
+
+        NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 )
+        IF( NB.LE.1 ) NB = MAX( 1, N )
+
+    =====================================================================
+*/
+
+
+    switch (*ispec) {
+	case 1:  goto L100;
+	case 2:  goto L100;
+	case 3:  goto L100;
+	case 4:  goto L400;
+	case 5:  goto L500;
+	case 6:  goto L600;
+	case 7:  goto L700;
+	case 8:  goto L800;
+	case 9:  goto L900;
+	case 10:  goto L1000;
+	case 11:  goto L1100;
+    }
+
+/*     Invalid value for ISPEC */
+
+    ret_val = -1;
+    return ret_val;
+
+L100:
+
+/*     Convert NAME to upper case if the first character is lower case. */
+
+    ret_val = 1;
+    s_copy(subnam, name__, (ftnlen)6, name_len);
+    ic = *(unsigned char *)subnam;
+    iz = 'Z';
+    if ((iz == 90) || (iz == 122)) {
+
+/*        ASCII character set */
+
+	if (ic >= 97 && ic <= 122) {
+	    *(unsigned char *)subnam = (char) (ic - 32);
+	    for (i__ = 2; i__ <= 6; ++i__) {
+		ic = *(unsigned char *)&subnam[i__ - 1];
+		if (ic >= 97 && ic <= 122) {
+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
+		}
+/* L10: */
+	    }
+	}
+
+    } else if ((iz == 233) || (iz == 169)) {
+
+/*        EBCDIC character set */
+
+	if (((ic >= 129 && ic <= 137) || (ic >= 145 && ic <= 153)) || (ic >=
+		162 && ic <= 169)) {
+	    *(unsigned char *)subnam = (char) (ic + 64);
+	    for (i__ = 2; i__ <= 6; ++i__) {
+		ic = *(unsigned char *)&subnam[i__ - 1];
+		if (((ic >= 129 && ic <= 137) || (ic >= 145 && ic <= 153)) ||
+			(ic >= 162 && ic <= 169)) {
+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64);
+		}
+/* L20: */
+	    }
+	}
+
+    } else if ((iz == 218) || (iz == 250)) {
+
+/*        Prime machines:  ASCII+128 */
+
+	if (ic >= 225 && ic <= 250) {
+	    *(unsigned char *)subnam = (char) (ic - 32);
+	    for (i__ = 2; i__ <= 6; ++i__) {
+		ic = *(unsigned char *)&subnam[i__ - 1];
+		if (ic >= 225 && ic <= 250) {
+		    *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32);
+		}
+/* L30: */
+	    }
+	}
+    }
+
+    *(unsigned char *)c1 = *(unsigned char *)subnam;
+    sname = (*(unsigned char *)c1 == 'S') || (*(unsigned char *)c1 == 'D');
+    cname = (*(unsigned char *)c1 == 'C') || (*(unsigned char *)c1 == 'Z');
+    if (! ((cname) || (sname))) {
+	return ret_val;
+    }
+    s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2);
+    s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3);
+    s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2);
+
+    switch (*ispec) {
+	case 1:  goto L110;
+	case 2:  goto L200;
+	case 3:  goto L300;
+    }
+
+L110:
+
+/*
+       ISPEC = 1:  block size
+
+       In these examples, separate code is provided for setting NB for
+       real and complex.  We assume that NB will take the same value in
+       single or double precision.
+*/
+
+    nb = 1;
+
+    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	} else if ((((s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0) || (s_cmp(
+		c3, "RQF", (ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "LQF", (
+		ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "QLF", (ftnlen)3, (
+		ftnlen)3) == 0)) {
+	    if (sname) {
+		nb = 32;
+	    } else {
+		nb = 32;
+	    }
+	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 32;
+	    } else {
+		nb = 32;
+	    }
+	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 32;
+	    } else {
+		nb = 32;
+	    }
+	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 32;
+	} else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 64;
+	}
+    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 64;
+	} else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 32;
+	} else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 64;
+	}
+    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nb = 32;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nb = 32;
+	    }
+	}
+    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nb = 32;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nb = 32;
+	    }
+	}
+    } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		if (*n4 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    } else {
+		if (*n4 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    }
+	}
+    } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		if (*n2 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    } else {
+		if (*n2 <= 64) {
+		    nb = 1;
+		} else {
+		    nb = 32;
+		}
+	    }
+	}
+    } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nb = 64;
+	    } else {
+		nb = 64;
+	    }
+	}
+    } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) {
+	    nb = 1;
+	}
+    }
+    ret_val = nb;
+    return ret_val;
+
+L200:
+
+/*     ISPEC = 2:  minimum block size */
+
+    nbmin = 2;
+    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
+	if ((((s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0) || (s_cmp(c3,
+		"RQF", (ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "LQF", (
+		ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "QLF", (ftnlen)3, (
+		ftnlen)3) == 0)) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	} else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 2;
+	    } else {
+		nbmin = 2;
+	    }
+	}
+    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nbmin = 8;
+	    } else {
+		nbmin = 8;
+	    }
+	} else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nbmin = 2;
+	}
+    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nbmin = 2;
+	}
+    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nbmin = 2;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nbmin = 2;
+	    }
+	}
+    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nbmin = 2;
+	    }
+	} else if (*(unsigned char *)c3 == 'M') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nbmin = 2;
+	    }
+	}
+    }
+    ret_val = nbmin;
+    return ret_val;
+
+L300:
+
+/*     ISPEC = 3:  crossover point */
+
+    nx = 0;
+    if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) {
+	if ((((s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0) || (s_cmp(c3,
+		"RQF", (ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "LQF", (
+		ftnlen)3, (ftnlen)3) == 0)) || (s_cmp(c3, "QLF", (ftnlen)3, (
+		ftnlen)3) == 0)) {
+	    if (sname) {
+		nx = 128;
+	    } else {
+		nx = 128;
+	    }
+	} else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nx = 128;
+	    } else {
+		nx = 128;
+	    }
+	} else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    if (sname) {
+		nx = 128;
+	    } else {
+		nx = 128;
+	    }
+	}
+    } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) {
+	if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nx = 32;
+	}
+    } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) {
+	if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) {
+	    nx = 32;
+	}
+    } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nx = 128;
+	    }
+	}
+    } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) {
+	if (*(unsigned char *)c3 == 'G') {
+	    if (((((((s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0) || (s_cmp(
+		    c4, "RQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4,
+		    "LQ", (ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "QL", (
+		    ftnlen)2, (ftnlen)2) == 0)) || (s_cmp(c4, "HR", (ftnlen)2,
+		     (ftnlen)2) == 0)) || (s_cmp(c4, "TR", (ftnlen)2, (ftnlen)
+		    2) == 0)) || (s_cmp(c4, "BR", (ftnlen)2, (ftnlen)2) == 0))
+		     {
+		nx = 128;
+	    }
+	}
+    }
+    ret_val = nx;
+    return ret_val;
+
+L400:
+
+/*     ISPEC = 4:  number of shifts (used by xHSEQR) */
+
+    ret_val = 6;
+    return ret_val;
+
+L500:
+
+/*     ISPEC = 5:  minimum column dimension (not used) */
+
+    ret_val = 2;
+    return ret_val;
+
+L600:
+
+/*     ISPEC = 6:  crossover point for SVD (used by xGELSS and xGESVD) */
+
+    ret_val = (integer) ((real) min(*n1,*n2) * 1.6f);
+    return ret_val;
+
+L700:
+
+/*     ISPEC = 7:  number of processors (not used) */
+
+    ret_val = 1;
+    return ret_val;
+
+L800:
+
+/*     ISPEC = 8:  crossover point for multishift (used by xHSEQR) */
+
+    ret_val = 50;
+    return ret_val;
+
+L900:
+
+/*
+       ISPEC = 9:  maximum size of the subproblems at the bottom of the
+                   computation tree in the divide-and-conquer algorithm
+                   (used by xGELSD and xGESDD)
+*/
+
+    ret_val = 25;
+    return ret_val;
+
+L1000:
+
+/*
+       ISPEC = 10: ieee NaN arithmetic can be trusted not to trap
+
+       ILAENV = 0
+*/
+    ret_val = 1;
+    if (ret_val == 1) {
+	ret_val = ieeeck_(&c__0, &c_b320, &c_b1011);
+    }
+    return ret_val;
+
+L1100:
+
+/*
+       ISPEC = 11: infinity arithmetic can be trusted not to trap
+
+       ILAENV = 0
+*/
+    ret_val = 1;
+    if (ret_val == 1) {
+	ret_val = ieeeck_(&c__1, &c_b320, &c_b1011);
+    }
+    return ret_val;
+
+/*     End of ILAENV */
+
+} /* ilaenv_ */
+
+/* Subroutine */ int sbdsdc_(char *uplo, char *compq, integer *n, real *d__,
+	real *e, real *u, integer *ldu, real *vt, integer *ldvt, real *q,
+	integer *iq, real *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double r_sign(real *, real *), log(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static real p, r__;
+    static integer z__, ic, ii, kk;
+    static real cs;
+    static integer is, iu;
+    static real sn;
+    static integer nm1;
+    static real eps;
+    static integer ivt, difl, difr, ierr, perm, mlvl, sqre;
+    extern logical lsame_(char *, char *);
+    static integer poles;
+    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, real *, integer *);
+    static integer iuplo, nsize, start;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), sswap_(integer *, real *, integer *, real *, integer *
+	    ), slasd0_(integer *, integer *, real *, real *, real *, integer *
+	    , real *, integer *, integer *, integer *, real *, integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int slasda_(integer *, integer *, integer *,
+	    integer *, real *, real *, real *, integer *, real *, integer *,
+	    real *, real *, real *, real *, integer *, integer *, integer *,
+	    integer *, real *, real *, real *, real *, integer *, integer *),
+	    xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
+    static integer givcol;
+    extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, real *, real *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, integer *);
+    static integer icompq;
+    extern /* Subroutine */ int slaset_(char *, integer *, integer *, real *,
+	    real *, real *, integer *), slartg_(real *, real *, real *
+	    , real *, real *);
+    static real orgnrm;
+    static integer givnum;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    static integer givptr, qstart, smlsiz, wstart, smlszp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 1, 1999
+
+
+    Purpose
+    =======
+
+    SBDSDC computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = U * S * VT,
+    using a divide and conquer method, where S is a diagonal matrix
+    with non-negative diagonal elements (the singular values of B), and
+    U and VT are orthogonal matrices of left and right singular vectors,
+    respectively. SBDSDC can be used to compute all singular values,
+    and optionally, singular vectors or singular vectors in compact form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See SLASD3 for details.
+
+    The code currently call SLASDQ if singular values only are desired.
+    However, it can be slightly modified to compute singular values
+    using the divide and conquer method.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal.
+            = 'L':  B is lower bidiagonal.
+
+    COMPQ   (input) CHARACTER*1
+            Specifies whether singular vectors are to be computed
+            as follows:
+            = 'N':  Compute singular values only;
+            = 'P':  Compute singular values and compute singular
+                    vectors in compact form;
+            = 'I':  Compute singular values and singular vectors.
+
+    N       (input) INTEGER
+            The order of the matrix B.  N >= 0.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B.
+
+    E       (input/output) REAL array, dimension (N)
+            On entry, the elements of E contain the offdiagonal
+            elements of the bidiagonal matrix whose SVD is desired.
+            On exit, E has been destroyed.
+
+    U       (output) REAL array, dimension (LDU,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, U contains the left singular vectors
+               of the bidiagonal matrix.
+            For other values of COMPQ, U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1.
+            If singular vectors are desired, then LDU >= max( 1, N ).
+
+    VT      (output) REAL array, dimension (LDVT,N)
+            If  COMPQ = 'I', then:
+               On exit, if INFO = 0, VT' contains the right singular
+               vectors of the bidiagonal matrix.
+            For other values of COMPQ, VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1.
+            If singular vectors are desired, then LDVT >= max( 1, N ).
+
+    Q       (output) REAL array, dimension (LDQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, Q contains all the REAL data in
+               LDQ >= N*(11 + 2*SMLSIZ + 8*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, Q is not referenced.
+
+    IQ      (output) INTEGER array, dimension (LDIQ)
+            If  COMPQ = 'P', then:
+               On exit, if INFO = 0, Q and IQ contain the left
+               and right singular vectors in a compact form,
+               requiring O(N log N) space instead of 2*N**2.
+               In particular, IQ contains all INTEGER data in
+               LDIQ >= N*(3 + 3*INT(LOG_2(N/(SMLSIZ+1))))
+               words of memory, where SMLSIZ is returned by ILAENV and
+               is equal to the maximum size of the subproblems at the
+               bottom of the computation tree (usually about 25).
+            For other values of COMPQ, IQ is not referenced.
+
+    WORK    (workspace) REAL array, dimension (LWORK)
+            If COMPQ = 'N' then LWORK >= (4 * N).
+            If COMPQ = 'P' then LWORK >= (6 * N).
+            If COMPQ = 'I' then LWORK >= (3 * N**2 + 4 * N).
+
+    IWORK   (workspace) INTEGER array, dimension (8*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an singular value.
+                  The update process of divide and conquer failed.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --q;
+    --iq;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    iuplo = 0;
+    if (lsame_(uplo, "U")) {
+	iuplo = 1;
+    }
+    if (lsame_(uplo, "L")) {
+	iuplo = 2;
+    }
+    if (lsame_(compq, "N")) {
+	icompq = 0;
+    } else if (lsame_(compq, "P")) {
+	icompq = 1;
+    } else if (lsame_(compq, "I")) {
+	icompq = 2;
+    } else {
+	icompq = -1;
+    }
+    if (iuplo == 0) {
+	*info = -1;
+    } else if (icompq < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ldu < 1) || (icompq == 2 && *ldu < *n)) {
+	*info = -7;
+    } else if ((*ldvt < 1) || (icompq == 2 && *ldvt < *n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SBDSDC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    smlsiz = ilaenv_(&c__9, "SBDSDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+    if (*n == 1) {
+	if (icompq == 1) {
+	    q[1] = r_sign(&c_b1011, &d__[1]);
+	    q[smlsiz * *n + 1] = 1.f;
+	} else if (icompq == 2) {
+	    u[u_dim1 + 1] = r_sign(&c_b1011, &d__[1]);
+	    vt[vt_dim1 + 1] = 1.f;
+	}
+	d__[1] = dabs(d__[1]);
+	return 0;
+    }
+    nm1 = *n - 1;
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
+*/
+
+    wstart = 1;
+    qstart = 3;
+    if (icompq == 1) {
+	scopy_(n, &d__[1], &c__1, &q[1], &c__1);
+	i__1 = *n - 1;
+	scopy_(&i__1, &e[1], &c__1, &q[*n + 1], &c__1);
+    }
+    if (iuplo == 2) {
+	qstart = 5;
+	wstart = ((*n) << (1)) - 1;
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (icompq == 1) {
+		q[i__ + ((*n) << (1))] = cs;
+		q[i__ + *n * 3] = sn;
+	    } else if (icompq == 2) {
+		work[i__] = cs;
+		work[nm1 + i__] = -sn;
+	    }
+/* L10: */
+	}
+    }
+
+/*     If ICOMPQ = 0, use SLASDQ to compute the singular values. */
+
+    if (icompq == 0) {
+	slasdq_("U", &c__0, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+		vt_offset], ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		wstart], info);
+	goto L40;
+    }
+
+/*
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompq == 2) {
+	    slaset_("A", n, n, &c_b320, &c_b1011, &u[u_offset], ldu);
+	    slaset_("A", n, n, &c_b320, &c_b1011, &vt[vt_offset], ldvt);
+	    slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+		    , ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[
+		    wstart], info);
+	} else if (icompq == 1) {
+	    iu = 1;
+	    ivt = iu + *n;
+	    slaset_("A", n, n, &c_b320, &c_b1011, &q[iu + (qstart - 1) * *n],
+		    n);
+	    slaset_("A", n, n, &c_b320, &c_b1011, &q[ivt + (qstart - 1) * *n],
+		     n);
+	    slasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &q[ivt + (
+		    qstart - 1) * *n], n, &q[iu + (qstart - 1) * *n], n, &q[
+		    iu + (qstart - 1) * *n], n, &work[wstart], info);
+	}
+	goto L40;
+    }
+
+    if (icompq == 2) {
+	slaset_("A", n, n, &c_b320, &c_b1011, &u[u_offset], ldu);
+	slaset_("A", n, n, &c_b320, &c_b1011, &vt[vt_offset], ldvt)
+		;
+    }
+
+/*     Scale. */
+
+    orgnrm = slanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.f) {
+	return 0;
+    }
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, &c__1, &d__[1], n, &ierr);
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &nm1, &c__1, &e[1], &nm1, &
+	    ierr);
+
+    eps = slamch_("Epsilon");
+
+    mlvl = (integer) (log((real) (*n) / (real) (smlsiz + 1)) / log(2.f)) + 1;
+    smlszp = smlsiz + 1;
+
+    if (icompq == 1) {
+	iu = 1;
+	ivt = smlsiz + 1;
+	difl = ivt + smlszp;
+	difr = difl + mlvl;
+	z__ = difr + ((mlvl) << (1));
+	ic = z__ + mlvl;
+	is = ic + 1;
+	poles = is + 1;
+	givnum = poles + ((mlvl) << (1));
+
+	k = 1;
+	givptr = 2;
+	perm = 3;
+	givcol = perm + mlvl;
+    }
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = d__[i__], dabs(r__1)) < eps) {
+	    d__[i__] = r_sign(&eps, &d__[i__]);
+	}
+/* L20: */
+    }
+
+    start = 1;
+    sqre = 0;
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (((r__1 = e[i__], dabs(r__1)) < eps) || (i__ == nm1)) {
+
+/*
+          Subproblem found. First determine its size and then
+          apply divide and conquer on it.
+*/
+
+	    if (i__ < nm1) {
+
+/*        A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - start + 1;
+	    } else if ((r__1 = e[i__], dabs(r__1)) >= eps) {
+
+/*        A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - start + 1;
+	    } else {
+
+/*
+          A subproblem with E(NM1) small. This implies an
+          1-by-1 subproblem at D(N). Solve this 1-by-1 problem
+          first.
+*/
+
+		nsize = i__ - start + 1;
+		if (icompq == 2) {
+		    u[*n + *n * u_dim1] = r_sign(&c_b1011, &d__[*n]);
+		    vt[*n + *n * vt_dim1] = 1.f;
+		} else if (icompq == 1) {
+		    q[*n + (qstart - 1) * *n] = r_sign(&c_b1011, &d__[*n]);
+		    q[*n + (smlsiz + qstart - 1) * *n] = 1.f;
+		}
+		d__[*n] = (r__1 = d__[*n], dabs(r__1));
+	    }
+	    if (icompq == 2) {
+		slasd0_(&nsize, &sqre, &d__[start], &e[start], &u[start +
+			start * u_dim1], ldu, &vt[start + start * vt_dim1],
+			ldvt, &smlsiz, &iwork[1], &work[wstart], info);
+	    } else {
+		slasda_(&icompq, &smlsiz, &nsize, &sqre, &d__[start], &e[
+			start], &q[start + (iu + qstart - 2) * *n], n, &q[
+			start + (ivt + qstart - 2) * *n], &iq[start + k * *n],
+			 &q[start + (difl + qstart - 2) * *n], &q[start + (
+			difr + qstart - 2) * *n], &q[start + (z__ + qstart -
+			2) * *n], &q[start + (poles + qstart - 2) * *n], &iq[
+			start + givptr * *n], &iq[start + givcol * *n], n, &
+			iq[start + perm * *n], &q[start + (givnum + qstart -
+			2) * *n], &q[start + (ic + qstart - 2) * *n], &q[
+			start + (is + qstart - 2) * *n], &work[wstart], &
+			iwork[1], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    start = i__ + 1;
+	}
+/* L30: */
+    }
+
+/*     Unscale */
+
+    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n, &ierr);
+L40:
+
+/*     Use Selection Sort to minimize swaps of singular vectors */
+
+    i__1 = *n;
+    for (ii = 2; ii <= i__1; ++ii) {
+	i__ = ii - 1;
+	kk = i__;
+	p = d__[i__];
+	i__2 = *n;
+	for (j = ii; j <= i__2; ++j) {
+	    if (d__[j] > p) {
+		kk = j;
+		p = d__[j];
+	    }
+/* L50: */
+	}
+	if (kk != i__) {
+	    d__[kk] = d__[i__];
+	    d__[i__] = p;
+	    if (icompq == 1) {
+		iq[i__] = kk;
+	    } else if (icompq == 2) {
+		sswap_(n, &u[i__ * u_dim1 + 1], &c__1, &u[kk * u_dim1 + 1], &
+			c__1);
+		sswap_(n, &vt[i__ + vt_dim1], ldvt, &vt[kk + vt_dim1], ldvt);
+	    }
+	} else if (icompq == 1) {
+	    iq[i__] = i__;
+	}
+/* L60: */
+    }
+
+/*     If ICOMPQ = 1, use IQ(N,1) as the indicator for UPLO */
+
+    if (icompq == 1) {
+	if (iuplo == 1) {
+	    iq[*n] = 1;
+	} else {
+	    iq[*n] = 0;
+	}
+    }
+
+/*
+       If B is lower bidiagonal, update U by those Givens rotations
+       which rotated B to be upper bidiagonal
+*/
+
+    if (iuplo == 2 && icompq == 2) {
+	slasr_("L", "V", "B", n, n, &work[1], &work[*n], &u[u_offset], ldu);
+    }
+
+    return 0;
+
+/*     End of SBDSDC */
+
+} /* sbdsdc_ */
+
+/* Subroutine */ int sbdsqr_(char *uplo, integer *n, integer *ncvt, integer *
+	nru, integer *ncc, real *d__, real *e, real *vt, integer *ldvt, real *
+	u, integer *ldu, real *c__, integer *ldc, real *work, integer *info)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2;
+    real r__1, r__2, r__3, r__4;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double pow_dd(doublereal *, doublereal *), sqrt(doublereal), r_sign(real *
+	    , real *);
+
+    /* Local variables */
+    static real f, g, h__;
+    static integer i__, j, m;
+    static real r__, cs;
+    static integer ll;
+    static real sn, mu;
+    static integer nm1, nm12, nm13, lll;
+    static real eps, sll, tol, abse;
+    static integer idir;
+    static real abss;
+    static integer oldm;
+    static real cosl;
+    static integer isub, iter;
+    static real unfl, sinl, cosr, smin, smax, sinr;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *), slas2_(real *, real *, real *, real *,
+	     real *);
+    extern logical lsame_(char *, char *);
+    static real oldcs;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static integer oldll;
+    static real shift, sigmn, oldsn;
+    static integer maxit;
+    static real sminl;
+    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, real *, integer *);
+    static real sigmx;
+    static logical lower;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
+	    integer *), slasq1_(integer *, real *, real *, real *, integer *),
+	     slasv2_(real *, real *, real *, real *, real *, real *, real *,
+	    real *, real *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real sminoa;
+    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
+	    );
+    static real thresh;
+    static logical rotate;
+    static real sminlo, tolmul;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SBDSQR computes the singular value decomposition (SVD) of a real
+    N-by-N (upper or lower) bidiagonal matrix B:  B = Q * S * P' (P'
+    denotes the transpose of P), where S is a diagonal matrix with
+    non-negative diagonal elements (the singular values of B), and Q
+    and P are orthogonal matrices.
+
+    The routine computes S, and optionally computes U * Q, P' * VT,
+    or Q' * C, for given real input matrices U, VT, and C.
+
+    See "Computing  Small Singular Values of Bidiagonal Matrices With
+    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+    LAPACK Working Note #3 (or SIAM J. Sci. Statist. Comput. vol. 11,
+    no. 5, pp. 873-912, Sept 1990) and
+    "Accurate singular values and differential qd algorithms," by
+    B. Parlett and V. Fernando, Technical Report CPAM-554, Mathematics
+    Department, University of California at Berkeley, July 1992
+    for a detailed description of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  B is upper bidiagonal;
+            = 'L':  B is lower bidiagonal.
+
+    N       (input) INTEGER
+            The order of the matrix B.  N >= 0.
+
+    NCVT    (input) INTEGER
+            The number of columns of the matrix VT. NCVT >= 0.
+
+    NRU     (input) INTEGER
+            The number of rows of the matrix U. NRU >= 0.
+
+    NCC     (input) INTEGER
+            The number of columns of the matrix C. NCC >= 0.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the n diagonal elements of the bidiagonal matrix B.
+            On exit, if INFO=0, the singular values of B in decreasing
+            order.
+
+    E       (input/output) REAL array, dimension (N)
+            On entry, the elements of E contain the
+            offdiagonal elements of the bidiagonal matrix whose SVD
+            is desired. On normal exit (INFO = 0), E is destroyed.
+            If the algorithm does not converge (INFO > 0), D and E
+            will contain the diagonal and superdiagonal elements of a
+            bidiagonal matrix orthogonally equivalent to the one given
+            as input. E(N) is used for workspace.
+
+    VT      (input/output) REAL array, dimension (LDVT, NCVT)
+            On entry, an N-by-NCVT matrix VT.
+            On exit, VT is overwritten by P' * VT.
+            VT is not referenced if NCVT = 0.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.
+            LDVT >= max(1,N) if NCVT > 0; LDVT >= 1 if NCVT = 0.
+
+    U       (input/output) REAL array, dimension (LDU, N)
+            On entry, an NRU-by-N matrix U.
+            On exit, U is overwritten by U * Q.
+            U is not referenced if NRU = 0.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= max(1,NRU).
+
+    C       (input/output) REAL array, dimension (LDC, NCC)
+            On entry, an N-by-NCC matrix C.
+            On exit, C is overwritten by Q' * C.
+            C is not referenced if NCC = 0.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C.
+            LDC >= max(1,N) if NCC > 0; LDC >=1 if NCC = 0.
+
+    WORK    (workspace) REAL array, dimension (4*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  If INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm did not converge; D and E contain the
+                  elements of a bidiagonal matrix which is orthogonally
+                  similar to the input matrix B;  if INFO = i, i
+                  elements of E have not converged to zero.
+
+    Internal Parameters
+    ===================
+
+    TOLMUL  REAL, default = max(10,min(100,EPS**(-1/8)))
+            TOLMUL controls the convergence criterion of the QR loop.
+            If it is positive, TOLMUL*EPS is the desired relative
+               precision in the computed singular values.
+            If it is negative, abs(TOLMUL*EPS*sigma_max) is the
+               desired absolute accuracy in the computed singular
+               values (corresponds to relative accuracy
+               abs(TOLMUL*EPS) in the largest singular value.
+            abs(TOLMUL) should be between 1 and 1/EPS, and preferably
+               between 10 (for fast convergence) and .1/EPS
+               (for there to be some accuracy in the results).
+            Default is to lose at either one eighth or 2 of the
+               available decimal digits in each computed singular value
+               (whichever is smaller).
+
+    MAXITR  INTEGER, default = 6
+            MAXITR controls the maximum number of passes of the
+            algorithm through its inner loop. The algorithms stops
+            (and so fails to converge) if the number of passes
+            through the inner loop exceeds MAXITR*N**2.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lower = lsame_(uplo, "L");
+    if (! lsame_(uplo, "U") && ! lower) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ncvt < 0) {
+	*info = -3;
+    } else if (*nru < 0) {
+	*info = -4;
+    } else if (*ncc < 0) {
+	*info = -5;
+    } else if ((*ncvt == 0 && *ldvt < 1) || (*ncvt > 0 && *ldvt < max(1,*n)))
+	    {
+	*info = -9;
+    } else if (*ldu < max(1,*nru)) {
+	*info = -11;
+    } else if ((*ncc == 0 && *ldc < 1) || (*ncc > 0 && *ldc < max(1,*n))) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SBDSQR", &i__1);
+	return 0;
+    }
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	goto L160;
+    }
+
+/*     ROTATE is true if any singular vectors desired, false otherwise */
+
+    rotate = ((*ncvt > 0) || (*nru > 0)) || (*ncc > 0);
+
+/*     If no singular vectors desired, use qd algorithm */
+
+    if (! rotate) {
+	slasq1_(n, &d__[1], &e[1], &work[1], info);
+	return 0;
+    }
+
+    nm1 = *n - 1;
+    nm12 = nm1 + nm1;
+    nm13 = nm12 + nm1;
+    idir = 0;
+
+/*     Get machine constants */
+
+    eps = slamch_("Epsilon");
+    unfl = slamch_("Safe minimum");
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left
+*/
+
+    if (lower) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    work[i__] = cs;
+	    work[nm1 + i__] = sn;
+/* L10: */
+	}
+
+/*        Update singular vectors if desired */
+
+	if (*nru > 0) {
+	    slasr_("R", "V", "F", nru, n, &work[1], &work[*n], &u[u_offset],
+		    ldu);
+	}
+	if (*ncc > 0) {
+	    slasr_("L", "V", "F", n, ncc, &work[1], &work[*n], &c__[c_offset],
+		     ldc);
+	}
+    }
+
+/*
+       Compute singular values to relative accuracy TOL
+       (By setting TOL to be negative, algorithm will compute
+       singular values to absolute accuracy ABS(TOL)*norm(input matrix))
+
+   Computing MAX
+   Computing MIN
+*/
+    d__1 = (doublereal) eps;
+    r__3 = 100.f, r__4 = pow_dd(&d__1, &c_b2944);
+    r__1 = 10.f, r__2 = dmin(r__3,r__4);
+    tolmul = dmax(r__1,r__2);
+    tol = tolmul * eps;
+
+/*     Compute approximate maximum, minimum singular values */
+
+    smax = 0.f;
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	r__2 = smax, r__3 = (r__1 = d__[i__], dabs(r__1));
+	smax = dmax(r__2,r__3);
+/* L20: */
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	r__2 = smax, r__3 = (r__1 = e[i__], dabs(r__1));
+	smax = dmax(r__2,r__3);
+/* L30: */
+    }
+    sminl = 0.f;
+    if (tol >= 0.f) {
+
+/*        Relative accuracy desired */
+
+	sminoa = dabs(d__[1]);
+	if (sminoa == 0.f) {
+	    goto L50;
+	}
+	mu = sminoa;
+	i__1 = *n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    mu = (r__2 = d__[i__], dabs(r__2)) * (mu / (mu + (r__1 = e[i__ -
+		    1], dabs(r__1))));
+	    sminoa = dmin(sminoa,mu);
+	    if (sminoa == 0.f) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+	sminoa /= sqrt((real) (*n));
+/* Computing MAX */
+	r__1 = tol * sminoa, r__2 = *n * 6 * *n * unfl;
+	thresh = dmax(r__1,r__2);
+    } else {
+
+/*
+          Absolute accuracy desired
+
+   Computing MAX
+*/
+	r__1 = dabs(tol) * smax, r__2 = *n * 6 * *n * unfl;
+	thresh = dmax(r__1,r__2);
+    }
+
+/*
+       Prepare for main iteration loop for the singular values
+       (MAXIT is the maximum number of passes through the inner
+       loop permitted before nonconvergence signalled.)
+*/
+
+    maxit = *n * 6 * *n;
+    iter = 0;
+    oldll = -1;
+    oldm = -1;
+
+/*     M points to last element of unconverged part of matrix */
+
+    m = *n;
+
+/*     Begin main iteration loop */
+
+L60:
+
+/*     Check for convergence or exceeding iteration count */
+
+    if (m <= 1) {
+	goto L160;
+    }
+    if (iter > maxit) {
+	goto L200;
+    }
+
+/*     Find diagonal block of matrix to work on */
+
+    if (tol < 0.f && (r__1 = d__[m], dabs(r__1)) <= thresh) {
+	d__[m] = 0.f;
+    }
+    smax = (r__1 = d__[m], dabs(r__1));
+    smin = smax;
+    i__1 = m - 1;
+    for (lll = 1; lll <= i__1; ++lll) {
+	ll = m - lll;
+	abss = (r__1 = d__[ll], dabs(r__1));
+	abse = (r__1 = e[ll], dabs(r__1));
+	if (tol < 0.f && abss <= thresh) {
+	    d__[ll] = 0.f;
+	}
+	if (abse <= thresh) {
+	    goto L80;
+	}
+	smin = dmin(smin,abss);
+/* Computing MAX */
+	r__1 = max(smax,abss);
+	smax = dmax(r__1,abse);
+/* L70: */
+    }
+    ll = 0;
+    goto L90;
+L80:
+    e[ll] = 0.f;
+
+/*     Matrix splits since E(LL) = 0 */
+
+    if (ll == m - 1) {
+
+/*        Convergence of bottom singular value, return to top of loop */
+
+	--m;
+	goto L60;
+    }
+L90:
+    ++ll;
+
+/*     E(LL) through E(M-1) are nonzero, E(LL-1) is zero */
+
+    if (ll == m - 1) {
+
+/*        2 by 2 block, handle separately */
+
+	slasv2_(&d__[m - 1], &e[m - 1], &d__[m], &sigmn, &sigmx, &sinr, &cosr,
+		 &sinl, &cosl);
+	d__[m - 1] = sigmx;
+	e[m - 1] = 0.f;
+	d__[m] = sigmn;
+
+/*        Compute singular vectors, if desired */
+
+	if (*ncvt > 0) {
+	    srot_(ncvt, &vt[m - 1 + vt_dim1], ldvt, &vt[m + vt_dim1], ldvt, &
+		    cosr, &sinr);
+	}
+	if (*nru > 0) {
+	    srot_(nru, &u[(m - 1) * u_dim1 + 1], &c__1, &u[m * u_dim1 + 1], &
+		    c__1, &cosl, &sinl);
+	}
+	if (*ncc > 0) {
+	    srot_(ncc, &c__[m - 1 + c_dim1], ldc, &c__[m + c_dim1], ldc, &
+		    cosl, &sinl);
+	}
+	m += -2;
+	goto L60;
+    }
+
+/*
+       If working on new submatrix, choose shift direction
+       (from larger end diagonal element towards smaller)
+*/
+
+    if ((ll > oldm) || (m < oldll)) {
+	if ((r__1 = d__[ll], dabs(r__1)) >= (r__2 = d__[m], dabs(r__2))) {
+
+/*           Chase bulge from top (big end) to bottom (small end) */
+
+	    idir = 1;
+	} else {
+
+/*           Chase bulge from bottom (big end) to top (small end) */
+
+	    idir = 2;
+	}
+    }
+
+/*     Apply convergence tests */
+
+    if (idir == 1) {
+
+/*
+          Run convergence test in forward direction
+          First apply standard test to bottom of matrix
+*/
+
+	if (((r__2 = e[m - 1], dabs(r__2)) <= dabs(tol) * (r__1 = d__[m],
+		dabs(r__1))) || (tol < 0.f && (r__3 = e[m - 1], dabs(r__3)) <=
+		 thresh)) {
+	    e[m - 1] = 0.f;
+	    goto L60;
+	}
+
+	if (tol >= 0.f) {
+
+/*
+             If relative accuracy desired,
+             apply convergence criterion forward
+*/
+
+	    mu = (r__1 = d__[ll], dabs(r__1));
+	    sminl = mu;
+	    i__1 = m - 1;
+	    for (lll = ll; lll <= i__1; ++lll) {
+		if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
+		    e[lll] = 0.f;
+		    goto L60;
+		}
+		sminlo = sminl;
+		mu = (r__2 = d__[lll + 1], dabs(r__2)) * (mu / (mu + (r__1 =
+			e[lll], dabs(r__1))));
+		sminl = dmin(sminl,mu);
+/* L100: */
+	    }
+	}
+
+    } else {
+
+/*
+          Run convergence test in backward direction
+          First apply standard test to top of matrix
+*/
+
+	if (((r__2 = e[ll], dabs(r__2)) <= dabs(tol) * (r__1 = d__[ll], dabs(
+		r__1))) || (tol < 0.f && (r__3 = e[ll], dabs(r__3)) <= thresh)
+		) {
+	    e[ll] = 0.f;
+	    goto L60;
+	}
+
+	if (tol >= 0.f) {
+
+/*
+             If relative accuracy desired,
+             apply convergence criterion backward
+*/
+
+	    mu = (r__1 = d__[m], dabs(r__1));
+	    sminl = mu;
+	    i__1 = ll;
+	    for (lll = m - 1; lll >= i__1; --lll) {
+		if ((r__1 = e[lll], dabs(r__1)) <= tol * mu) {
+		    e[lll] = 0.f;
+		    goto L60;
+		}
+		sminlo = sminl;
+		mu = (r__2 = d__[lll], dabs(r__2)) * (mu / (mu + (r__1 = e[
+			lll], dabs(r__1))));
+		sminl = dmin(sminl,mu);
+/* L110: */
+	    }
+	}
+    }
+    oldll = ll;
+    oldm = m;
+
+/*
+       Compute shift.  First, test if shifting would ruin relative
+       accuracy, and if so set the shift to zero.
+
+   Computing MAX
+*/
+    r__1 = eps, r__2 = tol * .01f;
+    if (tol >= 0.f && *n * tol * (sminl / smax) <= dmax(r__1,r__2)) {
+
+/*        Use a zero shift to avoid loss of relative accuracy */
+
+	shift = 0.f;
+    } else {
+
+/*        Compute the shift from 2-by-2 block at end of matrix */
+
+	if (idir == 1) {
+	    sll = (r__1 = d__[ll], dabs(r__1));
+	    slas2_(&d__[m - 1], &e[m - 1], &d__[m], &shift, &r__);
+	} else {
+	    sll = (r__1 = d__[m], dabs(r__1));
+	    slas2_(&d__[ll], &e[ll], &d__[ll + 1], &shift, &r__);
+	}
+
+/*        Test if shift negligible, and if so set to zero */
+
+	if (sll > 0.f) {
+/* Computing 2nd power */
+	    r__1 = shift / sll;
+	    if (r__1 * r__1 < eps) {
+		shift = 0.f;
+	    }
+	}
+    }
+
+/*     Increment iteration count */
+
+    iter = iter + m - ll;
+
+/*     If SHIFT = 0, do simplified QR iteration */
+
+    if (shift == 0.f) {
+	if (idir == 1) {
+
+/*
+             Chase bulge from top to bottom
+             Save cosines and sines for later singular vector updates
+*/
+
+	    cs = 1.f;
+	    oldcs = 1.f;
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		r__1 = d__[i__] * cs;
+		slartg_(&r__1, &e[i__], &cs, &sn, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = oldsn * r__;
+		}
+		r__1 = oldcs * r__;
+		r__2 = d__[i__ + 1] * sn;
+		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll + 1] = cs;
+		work[i__ - ll + 1 + nm1] = sn;
+		work[i__ - ll + 1 + nm12] = oldcs;
+		work[i__ - ll + 1 + nm13] = oldsn;
+/* L120: */
+	    }
+	    h__ = d__[m] * cs;
+	    d__[m] = h__ * oldcs;
+	    e[m - 1] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
+		e[m - 1] = 0.f;
+	    }
+
+	} else {
+
+/*
+             Chase bulge from bottom to top
+             Save cosines and sines for later singular vector updates
+*/
+
+	    cs = 1.f;
+	    oldcs = 1.f;
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		r__1 = d__[i__] * cs;
+		slartg_(&r__1, &e[i__ - 1], &cs, &sn, &r__);
+		if (i__ < m) {
+		    e[i__] = oldsn * r__;
+		}
+		r__1 = oldcs * r__;
+		r__2 = d__[i__ - 1] * sn;
+		slartg_(&r__1, &r__2, &oldcs, &oldsn, &d__[i__]);
+		work[i__ - ll] = cs;
+		work[i__ - ll + nm1] = -sn;
+		work[i__ - ll + nm12] = oldcs;
+		work[i__ - ll + nm13] = -oldsn;
+/* L130: */
+	    }
+	    h__ = d__[ll] * cs;
+	    d__[ll] = h__ * oldcs;
+	    e[ll] = h__ * oldsn;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
+		e[ll] = 0.f;
+	    }
+	}
+    } else {
+
+/*        Use nonzero shift */
+
+	if (idir == 1) {
+
+/*
+             Chase bulge from top to bottom
+             Save cosines and sines for later singular vector updates
+*/
+
+	    f = ((r__1 = d__[ll], dabs(r__1)) - shift) * (r_sign(&c_b1011, &
+		    d__[ll]) + shift / d__[ll]);
+	    g = e[ll];
+	    i__1 = m - 1;
+	    for (i__ = ll; i__ <= i__1; ++i__) {
+		slartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ > ll) {
+		    e[i__ - 1] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__];
+		e[i__] = cosr * e[i__] - sinr * d__[i__];
+		g = sinr * d__[i__ + 1];
+		d__[i__ + 1] = cosr * d__[i__ + 1];
+		slartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__] + sinl * d__[i__ + 1];
+		d__[i__ + 1] = cosl * d__[i__ + 1] - sinl * e[i__];
+		if (i__ < m - 1) {
+		    g = sinl * e[i__ + 1];
+		    e[i__ + 1] = cosl * e[i__ + 1];
+		}
+		work[i__ - ll + 1] = cosr;
+		work[i__ - ll + 1 + nm1] = sinr;
+		work[i__ - ll + 1 + nm12] = cosl;
+		work[i__ - ll + 1 + nm13] = sinl;
+/* L140: */
+	    }
+	    e[m - 1] = f;
+
+/*           Update singular vectors */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "F", &i__1, ncvt, &work[1], &work[*n], &vt[
+			ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		slasr_("R", "V", "F", nru, &i__1, &work[nm12 + 1], &work[nm13
+			+ 1], &u[ll * u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "F", &i__1, ncc, &work[nm12 + 1], &work[nm13
+			+ 1], &c__[ll + c_dim1], ldc);
+	    }
+
+/*           Test convergence */
+
+	    if ((r__1 = e[m - 1], dabs(r__1)) <= thresh) {
+		e[m - 1] = 0.f;
+	    }
+
+	} else {
+
+/*
+             Chase bulge from bottom to top
+             Save cosines and sines for later singular vector updates
+*/
+
+	    f = ((r__1 = d__[m], dabs(r__1)) - shift) * (r_sign(&c_b1011, &
+		    d__[m]) + shift / d__[m]);
+	    g = e[m - 1];
+	    i__1 = ll + 1;
+	    for (i__ = m; i__ >= i__1; --i__) {
+		slartg_(&f, &g, &cosr, &sinr, &r__);
+		if (i__ < m) {
+		    e[i__] = r__;
+		}
+		f = cosr * d__[i__] + sinr * e[i__ - 1];
+		e[i__ - 1] = cosr * e[i__ - 1] - sinr * d__[i__];
+		g = sinr * d__[i__ - 1];
+		d__[i__ - 1] = cosr * d__[i__ - 1];
+		slartg_(&f, &g, &cosl, &sinl, &r__);
+		d__[i__] = r__;
+		f = cosl * e[i__ - 1] + sinl * d__[i__ - 1];
+		d__[i__ - 1] = cosl * d__[i__ - 1] - sinl * e[i__ - 1];
+		if (i__ > ll + 1) {
+		    g = sinl * e[i__ - 2];
+		    e[i__ - 2] = cosl * e[i__ - 2];
+		}
+		work[i__ - ll] = cosr;
+		work[i__ - ll + nm1] = -sinr;
+		work[i__ - ll + nm12] = cosl;
+		work[i__ - ll + nm13] = -sinl;
+/* L150: */
+	    }
+	    e[ll] = f;
+
+/*           Test convergence */
+
+	    if ((r__1 = e[ll], dabs(r__1)) <= thresh) {
+		e[ll] = 0.f;
+	    }
+
+/*           Update singular vectors if desired */
+
+	    if (*ncvt > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "B", &i__1, ncvt, &work[nm12 + 1], &work[
+			nm13 + 1], &vt[ll + vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		i__1 = m - ll + 1;
+		slasr_("R", "V", "B", nru, &i__1, &work[1], &work[*n], &u[ll *
+			 u_dim1 + 1], ldu);
+	    }
+	    if (*ncc > 0) {
+		i__1 = m - ll + 1;
+		slasr_("L", "V", "B", &i__1, ncc, &work[1], &work[*n], &c__[
+			ll + c_dim1], ldc);
+	    }
+	}
+    }
+
+/*     QR iteration finished, go back and check convergence */
+
+    goto L60;
+
+/*     All singular values converged, so make them positive */
+
+L160:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (d__[i__] < 0.f) {
+	    d__[i__] = -d__[i__];
+
+/*           Change sign of singular vectors, if desired */
+
+	    if (*ncvt > 0) {
+		sscal_(ncvt, &c_b1290, &vt[i__ + vt_dim1], ldvt);
+	    }
+	}
+/* L170: */
+    }
+
+/*
+       Sort the singular values into decreasing order (insertion sort on
+       singular values, but only one transposition per singular vector)
+*/
+
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Scan for smallest D(I) */
+
+	isub = 1;
+	smin = d__[1];
+	i__2 = *n + 1 - i__;
+	for (j = 2; j <= i__2; ++j) {
+	    if (d__[j] <= smin) {
+		isub = j;
+		smin = d__[j];
+	    }
+/* L180: */
+	}
+	if (isub != *n + 1 - i__) {
+
+/*           Swap singular values and vectors */
+
+	    d__[isub] = d__[*n + 1 - i__];
+	    d__[*n + 1 - i__] = smin;
+	    if (*ncvt > 0) {
+		sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[*n + 1 - i__ +
+			vt_dim1], ldvt);
+	    }
+	    if (*nru > 0) {
+		sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[(*n + 1 - i__) *
+			u_dim1 + 1], &c__1);
+	    }
+	    if (*ncc > 0) {
+		sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[*n + 1 - i__ +
+			c_dim1], ldc);
+	    }
+	}
+/* L190: */
+    }
+    goto L220;
+
+/*     Maximum number of iterations exceeded, failure to converge */
+
+L200:
+    *info = 0;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.f) {
+	    ++(*info);
+	}
+/* L210: */
+    }
+L220:
+    return 0;
+
+/*     End of SBDSQR */
+
+} /* sbdsqr_ */
+
+/* Subroutine */ int sgebak_(char *job, char *side, integer *n, integer *ilo,
+	integer *ihi, real *scale, integer *m, real *v, integer *ldv, integer
+	*info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, i__1;
+
+    /* Local variables */
+    static integer i__, k;
+    static real s;
+    static integer ii;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static logical leftv;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
+	    integer *), xerbla_(char *, integer *);
+    static logical rightv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SGEBAK forms the right or left eigenvectors of a real general matrix
+    by backward transformation on the computed eigenvectors of the
+    balanced matrix output by SGEBAL.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the type of backward transformation required:
+            = 'N', do nothing, return immediately;
+            = 'P', do backward transformation for permutation only;
+            = 'S', do backward transformation for scaling only;
+            = 'B', do backward transformations for both permutation and
+                   scaling.
+            JOB must be the same as the argument JOB supplied to SGEBAL.
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  V contains right eigenvectors;
+            = 'L':  V contains left eigenvectors.
+
+    N       (input) INTEGER
+            The number of rows of the matrix V.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            The integers ILO and IHI determined by SGEBAL.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    SCALE   (input) REAL array, dimension (N)
+            Details of the permutation and scaling factors, as returned
+            by SGEBAL.
+
+    M       (input) INTEGER
+            The number of columns of the matrix V.  M >= 0.
+
+    V       (input/output) REAL array, dimension (LDV,M)
+            On entry, the matrix of right or left eigenvectors to be
+            transformed, as returned by SHSEIN or STREVC.
+            On exit, V is overwritten by the transformed eigenvectors.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V. LDV >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Decode and Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --scale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+
+    /* Function Body */
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
+
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (! rightv && ! leftv) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -4;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*ldv < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBAK", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	goto L30;
+    }
+
+/*     Backward balance */
+
+    if ((lsame_(job, "S")) || (lsame_(job, "B"))) {
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = scale[i__];
+		sscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L10: */
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = 1.f / scale[i__];
+		sscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
+
+    }
+
+/*
+       Backward permutation
+
+       For  I = ILO-1 step -1 until 1,
+                IHI+1 step 1 until N do --
+*/
+
+L30:
+    if ((lsame_(job, "P")) || (lsame_(job, "B"))) {
+	if (rightv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L40;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L50;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = scale[i__];
+		if (k == i__) {
+		    goto L50;
+		}
+		sswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L50:
+		;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SGEBAK */
+
+} /* sgebak_ */
+
+/* Subroutine */ int sgebal_(char *job, integer *n, real *a, integer *lda,
+	integer *ilo, integer *ihi, real *scale, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Local variables */
+    static real c__, f, g;
+    static integer i__, j, k, l, m;
+    static real r__, s, ca, ra;
+    static integer ica, ira, iexc;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sswap_(integer *, real *, integer *, real *, integer *);
+    static real sfmin1, sfmin2, sfmax1, sfmax2;
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    static logical noconv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGEBAL balances a general real matrix A.  This involves, first,
+    permuting A by a similarity transformation to isolate eigenvalues
+    in the first 1 to ILO-1 and last IHI+1 to N elements on the
+    diagonal; and second, applying a diagonal similarity transformation
+    to rows and columns ILO to IHI to make the rows and columns as
+    close in norm as possible.  Both steps are optional.
+
+    Balancing may reduce the 1-norm of the matrix, and improve the
+    accuracy of the computed eigenvalues and/or eigenvectors.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the operations to be performed on A:
+            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+                    for i = 1,...,N;
+            = 'P':  permute only;
+            = 'S':  scale only;
+            = 'B':  both permute and scale.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the input matrix A.
+            On exit,  A is overwritten by the balanced matrix.
+            If JOB = 'N', A is not referenced.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    ILO     (output) INTEGER
+    IHI     (output) INTEGER
+            ILO and IHI are set to integers such that on exit
+            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+
+    SCALE   (output) REAL array, dimension (N)
+            Details of the permutations and scaling factors applied to
+            A.  If P(j) is the index of the row and column interchanged
+            with row and column j and D(j) is the scaling factor
+            applied to row and column j, then
+            SCALE(j) = P(j)    for j = 1,...,ILO-1
+                     = D(j)    for j = ILO,...,IHI
+                     = P(j)    for j = IHI+1,...,N.
+            The order in which the interchanges are made is N to IHI+1,
+            then 1 to ILO-1.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The permutations consist of row and column interchanges which put
+    the matrix in the form
+
+               ( T1   X   Y  )
+       P A P = (  0   B   Z  )
+               (  0   0   T2 )
+
+    where T1 and T2 are upper triangular matrices whose eigenvalues lie
+    along the diagonal.  The column indices ILO and IHI mark the starting
+    and ending columns of the submatrix B. Balancing consists of applying
+    a diagonal similarity transformation inv(D) * B * D to make the
+    1-norms of each row of B and its corresponding column nearly equal.
+    The output matrix is
+
+       ( T1     X*D          Y    )
+       (  0  inv(D)*B*D  inv(D)*Z ).
+       (  0      0           T2   )
+
+    Information about the permutations P and the diagonal matrix D is
+    returned in the vector SCALE.
+
+    This subroutine is based on the EISPACK routine BALANC.
+
+    Modified by Tzu-Yi Chen, Computer Science Division, University of
+      California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --scale;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBAL", &i__1);
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+
+    if (*n == 0) {
+	goto L210;
+    }
+
+    if (lsame_(job, "N")) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scale[i__] = 1.f;
+/* L10: */
+	}
+	goto L210;
+    }
+
+    if (lsame_(job, "S")) {
+	goto L120;
+    }
+
+/*     Permutation to isolate eigenvalues if possible */
+
+    goto L50;
+
+/*     Row and column exchange. */
+
+L20:
+    scale[m] = (real) j;
+    if (j == m) {
+	goto L30;
+    }
+
+    sswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    i__1 = *n - k + 1;
+    sswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+
+L30:
+    switch (iexc) {
+	case 1:  goto L40;
+	case 2:  goto L80;
+    }
+
+/*     Search for rows isolating an eigenvalue and push them down. */
+
+L40:
+    if (l == 1) {
+	goto L210;
+    }
+    --l;
+
+L50:
+    for (j = l; j >= 1; --j) {
+
+	i__1 = l;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (i__ == j) {
+		goto L60;
+	    }
+	    if (a[j + i__ * a_dim1] != 0.f) {
+		goto L70;
+	    }
+L60:
+	    ;
+	}
+
+	m = l;
+	iexc = 1;
+	goto L20;
+L70:
+	;
+    }
+
+    goto L90;
+
+/*     Search for columns isolating an eigenvalue and push them left. */
+
+L80:
+    ++k;
+
+L90:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+
+	i__2 = l;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    if (i__ == j) {
+		goto L100;
+	    }
+	    if (a[i__ + j * a_dim1] != 0.f) {
+		goto L110;
+	    }
+L100:
+	    ;
+	}
+
+	m = k;
+	iexc = 2;
+	goto L20;
+L110:
+	;
+    }
+
+L120:
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	scale[i__] = 1.f;
+/* L130: */
+    }
+
+    if (lsame_(job, "P")) {
+	goto L210;
+    }
+
+/*
+       Balance the submatrix in rows K to L.
+
+       Iterative loop for norm reduction
+*/
+
+    sfmin1 = slamch_("S") / slamch_("P");
+    sfmax1 = 1.f / sfmin1;
+    sfmin2 = sfmin1 * 8.f;
+    sfmax2 = 1.f / sfmin2;
+L140:
+    noconv = FALSE_;
+
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	c__ = 0.f;
+	r__ = 0.f;
+
+	i__2 = l;
+	for (j = k; j <= i__2; ++j) {
+	    if (j == i__) {
+		goto L150;
+	    }
+	    c__ += (r__1 = a[j + i__ * a_dim1], dabs(r__1));
+	    r__ += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+L150:
+	    ;
+	}
+	ica = isamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = (r__1 = a[ica + i__ * a_dim1], dabs(r__1));
+	i__2 = *n - k + 1;
+	ira = isamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = (r__1 = a[i__ + (ira + k - 1) * a_dim1], dabs(r__1));
+
+/*        Guard against zero C or R due to underflow. */
+
+	if ((c__ == 0.f) || (r__ == 0.f)) {
+	    goto L200;
+	}
+	g = r__ / 8.f;
+	f = 1.f;
+	s = c__ + r__;
+L160:
+/* Computing MAX */
+	r__1 = max(f,c__);
+/* Computing MIN */
+	r__2 = min(r__,g);
+	if (((c__ >= g) || (dmax(r__1,ca) >= sfmax2)) || (dmin(r__2,ra) <=
+		sfmin2)) {
+	    goto L170;
+	}
+	f *= 8.f;
+	c__ *= 8.f;
+	ca *= 8.f;
+	r__ /= 8.f;
+	g /= 8.f;
+	ra /= 8.f;
+	goto L160;
+
+L170:
+	g = c__ / 8.f;
+L180:
+/* Computing MIN */
+	r__1 = min(f,c__), r__1 = min(r__1,g);
+	if (((g < r__) || (dmax(r__,ra) >= sfmax2)) || (dmin(r__1,ca) <=
+		sfmin2)) {
+	    goto L190;
+	}
+	f /= 8.f;
+	c__ /= 8.f;
+	g /= 8.f;
+	ca /= 8.f;
+	r__ *= 8.f;
+	ra *= 8.f;
+	goto L180;
+
+/*        Now balance. */
+
+L190:
+	if (c__ + r__ >= s * .95f) {
+	    goto L200;
+	}
+	if (f < 1.f && scale[i__] < 1.f) {
+	    if (f * scale[i__] <= sfmin1) {
+		goto L200;
+	    }
+	}
+	if (f > 1.f && scale[i__] > 1.f) {
+	    if (scale[i__] >= sfmax1 / f) {
+		goto L200;
+	    }
+	}
+	g = 1.f / f;
+	scale[i__] *= f;
+	noconv = TRUE_;
+
+	i__2 = *n - k + 1;
+	sscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	sscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+
+L200:
+	;
+    }
+
+    if (noconv) {
+	goto L140;
+    }
+
+L210:
+    *ilo = k;
+    *ihi = l;
+
+    return 0;
+
+/*     End of SGEBAL */
+
+} /* sgebal_ */
+
+/* Subroutine */ int sgebd2_(integer *m, integer *n, real *a, integer *lda,
+	real *d__, real *e, real *tauq, real *taup, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SGEBD2 reduces a real general m by n matrix A to upper or lower
+    bidiagonal form B by an orthogonal transformation: Q' * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the orthogonal matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the orthogonal matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) REAL array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) REAL array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    WORK    (workspace) REAL array, dimension (max(M,N))
+
+    INFO    (output) INTEGER
+            = 0: successful exit.
+            < 0: if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBD2", &i__1);
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tauq[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector G(i) to annihilate
+                A(i,i+2:n)
+*/
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.f;
+
+/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		slarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
+			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		a[i__ + (i__ + 1) * a_dim1] = e[i__];
+	    } else {
+		taup[i__] = 0.f;
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+
+/*           Apply G(i) to A(i+1:m,i:n) from the right */
+
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+/* Computing MIN */
+	    i__4 = i__ + 1;
+	    slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
+		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = d__[i__];
+
+	    if (i__ < *m) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:m,i)
+*/
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*              Apply H(i) to A(i+1:m,i+1:n) from the left */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &tauq[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		a[i__ + 1 + i__ * a_dim1] = e[i__];
+	    } else {
+		tauq[i__] = 0.f;
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of SGEBD2 */
+
+} /* sgebd2_ */
+
+/* Subroutine */ int sgebrd_(integer *m, integer *n, real *a, integer *lda,
+	real *d__, real *e, real *tauq, real *taup, real *work, integer *
+	lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, nb, nx;
+    static real ws;
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer minmn;
+    extern /* Subroutine */ int sgebd2_(integer *, integer *, real *, integer
+	    *, real *, real *, real *, real *, real *, integer *), slabrd_(
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , real *, real *, real *, integer *, real *, integer *), xerbla_(
+	    char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwrkx, ldwrky, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGEBRD reduces a general real M-by-N matrix A to upper or lower
+    bidiagonal form B by an orthogonal transformation: Q**T * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the orthogonal matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the orthogonal matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) REAL array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) REAL array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,M,N).
+            For optimum performance LWORK >= (M+N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in A(i+1:m,i);
+    u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in A(i,i+2:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = ilaenv_(&c__1, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = max(i__1,i__2);
+    lwkopt = (*m + *n) * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = max(1,*m);
+	if (*lwork < max(i__1,*n) && ! lquery) {
+	    *info = -10;
+	}
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("SGEBRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    minmn = min(*m,*n);
+    if (minmn == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    ws = (real) max(*m,*n);
+    ldwrkx = *m;
+    ldwrky = *n;
+
+    if (nb > 1 && nb < minmn) {
+
+/*
+          Set the crossover point NX.
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+
+/*        Determine when to switch from blocked to unblocked code. */
+
+	if (nx < minmn) {
+	    ws = (real) ((*m + *n) * nb);
+	    if ((real) (*lwork) < ws) {
+
+/*
+                Not enough work space for the optimal NB, consider using
+                a smaller block size.
+*/
+
+		nbmin = ilaenv_(&c__2, "SGEBRD", " ", m, n, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		if (*lwork >= (*m + *n) * nbmin) {
+		    nb = *lwork / (*m + *n);
+		} else {
+		    nb = 1;
+		    nx = minmn;
+		}
+	    }
+	}
+    } else {
+	nx = minmn;
+    }
+
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+
+/*
+          Reduce rows and columns i:i+nb-1 to bidiagonal form and return
+          the matrices X and Y which are needed to update the unreduced
+          part of the matrix
+*/
+
+	i__3 = *m - i__ + 1;
+	i__4 = *n - i__ + 1;
+	slabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
+		* nb + 1], &ldwrky);
+
+/*
+          Update the trailing submatrix A(i+nb:m,i+nb:n), using an update
+          of the form  A := A - V*Y' - X*U'
+*/
+
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	sgemm_("No transpose", "Transpose", &i__3, &i__4, &nb, &c_b1290, &a[
+		i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb + nb + 1], &
+		ldwrky, &c_b1011, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	sgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &c_b1290, &
+		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
+		c_b1011, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*        Copy diagonal and off-diagonal elements of B back into A */
+
+	if (*m >= *n) {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + (j + 1) * a_dim1] = e[j];
+/* L10: */
+	    }
+	} else {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + j * a_dim1] = d__[j];
+		a[j + 1 + j * a_dim1] = e[j];
+/* L20: */
+	    }
+	}
+/* L30: */
+    }
+
+/*     Use unblocked code to reduce the remainder of the matrix */
+
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    sgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+	    tauq[i__], &taup[i__], &work[1], &iinfo);
+    work[1] = ws;
+    return 0;
+
+/*     End of SGEBRD */
+
+} /* sgebrd_ */
+
+/* Subroutine */ int sgeev_(char *jobvl, char *jobvr, integer *n, real *a,
+	integer *lda, real *wr, real *wi, real *vl, integer *ldvl, real *vr,
+	integer *ldvr, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, k;
+    static real r__, cs, sn;
+    static integer ihi;
+    static real scl;
+    static integer ilo;
+    static real dum[1], eps;
+    static integer ibal;
+    static char side[1];
+    static integer maxb;
+    static real anrm;
+    static integer ierr, itau, iwrk, nout;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    extern doublereal snrm2_(integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    extern doublereal slapy2_(real *, real *);
+    extern /* Subroutine */ int slabad_(real *, real *);
+    static logical scalea;
+    static real cscale;
+    extern /* Subroutine */ int sgebak_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, integer *, integer *), sgebal_(char *, integer *, real *, integer *,
+	    integer *, integer *, real *, integer *);
+    extern doublereal slamch_(char *), slange_(char *, integer *,
+	    integer *, real *, integer *, real *);
+    extern /* Subroutine */ int sgehrd_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *), xerbla_(char
+	    *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical select[1];
+    static real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
+	    integer *, real *, integer *), slartg_(real *, real *,
+	    real *, real *, real *), sorghr_(integer *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *, integer *), shseqr_(
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, real *, real *, integer *, real *, integer *, integer *), strevc_(char *, char *, logical *, integer *,
+	    real *, integer *, real *, integer *, real *, integer *, integer *
+	    , integer *, real *, integer *);
+    static integer minwrk, maxwrk;
+    static logical wantvl;
+    static real smlnum;
+    static integer hswork;
+    static logical lquery, wantvr;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 8, 1999
+
+
+    Purpose
+    =======
+
+    SGEEV computes for an N-by-N real nonsymmetric matrix A, the
+    eigenvalues and, optionally, the left and/or right eigenvectors.
+
+    The right eigenvector v(j) of A satisfies
+                     A * v(j) = lambda(j) * v(j)
+    where lambda(j) is its eigenvalue.
+    The left eigenvector u(j) of A satisfies
+                  u(j)**H * A = lambda(j) * u(j)**H
+    where u(j)**H denotes the conjugate transpose of u(j).
+
+    The computed eigenvectors are normalized to have Euclidean norm
+    equal to 1 and largest component real.
+
+    Arguments
+    =========
+
+    JOBVL   (input) CHARACTER*1
+            = 'N': left eigenvectors of A are not computed;
+            = 'V': left eigenvectors of A are computed.
+
+    JOBVR   (input) CHARACTER*1
+            = 'N': right eigenvectors of A are not computed;
+            = 'V': right eigenvectors of A are computed.
+
+    N       (input) INTEGER
+            The order of the matrix A. N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the N-by-N matrix A.
+            On exit, A has been overwritten.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    WR      (output) REAL array, dimension (N)
+    WI      (output) REAL array, dimension (N)
+            WR and WI contain the real and imaginary parts,
+            respectively, of the computed eigenvalues.  Complex
+            conjugate pairs of eigenvalues appear consecutively
+            with the eigenvalue having the positive imaginary part
+            first.
+
+    VL      (output) REAL array, dimension (LDVL,N)
+            If JOBVL = 'V', the left eigenvectors u(j) are stored one
+            after another in the columns of VL, in the same order
+            as their eigenvalues.
+            If JOBVL = 'N', VL is not referenced.
+            If the j-th eigenvalue is real, then u(j) = VL(:,j),
+            the j-th column of VL.
+            If the j-th and (j+1)-st eigenvalues form a complex
+            conjugate pair, then u(j) = VL(:,j) + i*VL(:,j+1) and
+            u(j+1) = VL(:,j) - i*VL(:,j+1).
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= 1; if
+            JOBVL = 'V', LDVL >= N.
+
+    VR      (output) REAL array, dimension (LDVR,N)
+            If JOBVR = 'V', the right eigenvectors v(j) are stored one
+            after another in the columns of VR, in the same order
+            as their eigenvalues.
+            If JOBVR = 'N', VR is not referenced.
+            If the j-th eigenvalue is real, then v(j) = VR(:,j),
+            the j-th column of VR.
+            If the j-th and (j+1)-st eigenvalues form a complex
+            conjugate pair, then v(j) = VR(:,j) + i*VR(:,j+1) and
+            v(j+1) = VR(:,j) - i*VR(:,j+1).
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= 1; if
+            JOBVR = 'V', LDVR >= N.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,3*N), and
+            if JOBVL = 'V' or JOBVR = 'V', LWORK >= 4*N.  For good
+            performance, LWORK must generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = i, the QR algorithm failed to compute all the
+                  eigenvalues, and no eigenvectors have been computed;
+                  elements i+1:N of WR and WI contain eigenvalues which
+                  have converged.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --wr;
+    --wi;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -1;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if ((*ldvl < 1) || (wantvl && *ldvl < *n)) {
+	*info = -9;
+    } else if ((*ldvr < 1) || (wantvr && *ldvr < *n)) {
+	*info = -11;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         NB refers to the optimal block size for the immediately
+         following subroutine, as returned by ILAENV.
+         HSWORK refers to the workspace preferred by SHSEQR, as
+         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+         the worst case.)
+*/
+
+    minwrk = 1;
+    if (*info == 0 && ((*lwork >= 1) || (lquery))) {
+	maxwrk = ((*n) << (1)) + *n * ilaenv_(&c__1, "SGEHRD", " ", n, &c__1,
+		n, &c__0, (ftnlen)6, (ftnlen)1);
+	if (! wantvl && ! wantvr) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = *n * 3;
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "SHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "EN", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
+		    hswork;
+	    maxwrk = max(i__1,i__2);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = (*n) << (2);
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (*n - 1) * ilaenv_(&c__1,
+		    "SORGHR", " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "SHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "SHSEQR", "SV", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + 1, i__1 = max(i__1,i__2), i__2 = *n +
+		    hswork;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = (*n) << (2);
+	    maxwrk = max(i__1,i__2);
+	}
+	work[1] = (real) maxwrk;
+    }
+    if (*lwork < minwrk && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEEV ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0.f && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	slascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*
+       Balance the matrix
+       (Workspace: need N)
+*/
+
+    ibal = 1;
+    sgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &work[ibal], &ierr);
+
+/*
+       Reduce to upper Hessenberg form
+       (Workspace: need 3*N, prefer 2*N+N*NB)
+*/
+
+    itau = ibal + *n;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    sgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvl) {
+
+/*
+          Want left eigenvectors
+          Copy Householder vectors to VL
+*/
+
+	*(unsigned char *)side = 'L';
+	slacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*
+          Generate orthogonal matrix in VL
+          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VL
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vl[vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*
+             Want left and right eigenvectors
+             Copy Schur vectors to VR
+*/
+
+	    *(unsigned char *)side = 'B';
+	    slacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*
+          Want right eigenvectors
+          Copy Householder vectors to VR
+*/
+
+	*(unsigned char *)side = 'R';
+	slacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*
+          Generate orthogonal matrix in VR
+          (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	sorghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VR
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*
+          Compute eigenvalues only
+          (Workspace: need N+1, prefer N+HSWORK (see comments) )
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	shseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &wr[1], &wi[1], &
+		vr[vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO > 0 from SHSEQR, then quit */
+
+    if (*info > 0) {
+	goto L50;
+    }
+
+    if ((wantvl) || (wantvr)) {
+
+/*
+          Compute left and/or right eigenvectors
+          (Workspace: need 4*N)
+*/
+
+	strevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
+		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &ierr);
+    }
+
+    if (wantvl) {
+
+/*
+          Undo balancing of left eigenvectors
+          (Workspace: need N)
+*/
+
+	sgebak_("B", "L", n, &ilo, &ihi, &work[ibal], n, &vl[vl_offset], ldvl,
+		 &ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.f) {
+		scl = 1.f / snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.f) {
+		r__1 = snrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+		r__2 = snrm2_(n, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		scl = 1.f / slapy2_(&r__1, &r__2);
+		sscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    r__1 = vl[k + i__ * vl_dim1];
+/* Computing 2nd power */
+		    r__2 = vl[k + (i__ + 1) * vl_dim1];
+		    work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2;
+/* L10: */
+		}
+		k = isamax_(n, &work[iwrk], &c__1);
+		slartg_(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1],
+			&cs, &sn, &r__);
+		srot_(n, &vl[i__ * vl_dim1 + 1], &c__1, &vl[(i__ + 1) *
+			vl_dim1 + 1], &c__1, &cs, &sn);
+		vl[k + (i__ + 1) * vl_dim1] = 0.f;
+	    }
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*
+          Undo balancing of right eigenvectors
+          (Workspace: need N)
+*/
+
+	sgebak_("B", "R", n, &ilo, &ihi, &work[ibal], n, &vr[vr_offset], ldvr,
+		 &ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (wi[i__] == 0.f) {
+		scl = 1.f / snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    } else if (wi[i__] > 0.f) {
+		r__1 = snrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+		r__2 = snrm2_(n, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		scl = 1.f / slapy2_(&r__1, &r__2);
+		sscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+		sscal_(n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &c__1);
+		i__2 = *n;
+		for (k = 1; k <= i__2; ++k) {
+/* Computing 2nd power */
+		    r__1 = vr[k + i__ * vr_dim1];
+/* Computing 2nd power */
+		    r__2 = vr[k + (i__ + 1) * vr_dim1];
+		    work[iwrk + k - 1] = r__1 * r__1 + r__2 * r__2;
+/* L30: */
+		}
+		k = isamax_(n, &work[iwrk], &c__1);
+		slartg_(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1],
+			&cs, &sn, &r__);
+		srot_(n, &vr[i__ * vr_dim1 + 1], &c__1, &vr[(i__ + 1) *
+			vr_dim1 + 1], &c__1, &cs, &sn);
+		vr[k + (i__ + 1) * vr_dim1] = 0.f;
+	    }
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[*info +
+		1], &i__2, &ierr);
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[*info +
+		1], &i__2, &ierr);
+	if (*info > 0) {
+	    i__1 = ilo - 1;
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wr[1],
+		    n, &ierr);
+	    i__1 = ilo - 1;
+	    slascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &wi[1],
+		    n, &ierr);
+	}
+    }
+
+    work[1] = (real) maxwrk;
+    return 0;
+
+/*     End of SGEEV */
+
+} /* sgeev_ */
+
+/* Subroutine */ int sgehd2_(integer *n, integer *ilo, integer *ihi, real *a,
+	integer *lda, real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    static real aii;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SGEHD2 reduces a real general matrix A to upper Hessenberg form H by
+    an orthogonal similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to SGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= max(1,N).
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the n by n general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the orthogonal matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) REAL array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) REAL array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEHD2", &i__1);
+	return 0;
+    }
+
+    i__1 = *ihi - 1;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+
+/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
+
+	i__2 = *ihi - i__;
+/* Computing MIN */
+	i__3 = i__ + 2;
+	slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	aii = a[i__ + 1 + i__ * a_dim1];
+	a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
+
+	i__2 = *ihi - i__;
+	slarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
+
+/*        Apply H(i) to A(i+1:ihi,i+1:n) from the left */
+
+	i__2 = *ihi - i__;
+	i__3 = *n - i__;
+	slarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
+
+	a[i__ + 1 + i__ * a_dim1] = aii;
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of SGEHD2 */
+
+} /* sgehd2_ */
+
+/* Subroutine */ int sgehrd_(integer *n, integer *ilo, integer *ihi, real *a,
+	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__;
+    static real t[4160]	/* was [65][64] */;
+    static integer ib;
+    static real ei;
+    static integer nb, nh, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *), sgehd2_(integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *), slarfb_(
+	    char *, char *, char *, char *, integer *, integer *, integer *,
+	    real *, integer *, real *, integer *, real *, integer *, real *,
+	    integer *), slahrd_(integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGEHRD reduces a real general matrix A to upper Hessenberg form H by
+    an orthogonal similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to SGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the N-by-N general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the orthogonal matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) REAL array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+            zero.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MIN */
+    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    lwkopt = *n * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEHRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	tau[i__] = 0.f;
+/* L10: */
+    }
+    i__1 = *n - 1;
+    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
+	tau[i__] = 0.f;
+/* L20: */
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+/*
+       Determine the block size.
+
+   Computing MIN
+*/
+    i__1 = 64, i__2 = ilaenv_(&c__1, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    nbmin = 2;
+    iws = 1;
+    if (nb > 1 && nb < nh) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "SGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    iws = *n * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code.
+
+   Computing MAX
+*/
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEHRD", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+		if (*lwork >= *n * nbmin) {
+		    nb = *lwork / *n;
+		} else {
+		    nb = 1;
+		}
+	    }
+	}
+    }
+    ldwork = *n;
+
+    if ((nb < nbmin) || (nb >= nh)) {
+
+/*        Use unblocked code below */
+
+	i__ = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	i__1 = *ihi - 1 - nx;
+	i__2 = nb;
+	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - i__;
+	    ib = min(i__3,i__4);
+
+/*
+             Reduce columns i:i+ib-1 to Hessenberg form, returning the
+             matrices V and T of the block reflector H = I - V*T*V'
+             which performs the reduction, and also the matrix Y = A*V*T
+*/
+
+	    slahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+		    c__65, &work[1], &ldwork);
+
+/*
+             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+             to 1.
+*/
+
+	    ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.f;
+	    i__3 = *ihi - i__ - ib + 1;
+	    sgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b1290, &
+		    work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
+		    c_b1011, &a[(i__ + ib) * a_dim1 + 1], lda);
+	    a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
+
+/*
+             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+             left
+*/
+
+	    i__3 = *ihi - i__;
+	    i__4 = *n - i__ - ib + 1;
+	    slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+		    i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
+		    i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
+/* L30: */
+	}
+    }
+
+/*     Use unblocked code to reduce the rest of the matrix */
+
+    sgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1] = (real) iws;
+
+    return 0;
+
+/*     End of SGEHRD */
+
+} /* sgehrd_ */
+
+/* Subroutine */ int sgelq2_(integer *m, integer *n, real *a, integer *lda,
+	real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static real aii;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SGELQ2 computes an LQ factorization of a real m by n matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m by min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) REAL array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELQ2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
+
+	i__2 = *n - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) * a_dim1]
+		, lda, &tau[i__]);
+	if (i__ < *m) {
+
+/*           Apply H(i) to A(i+1:m,i:n) from the right */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+	    slarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGELQ2 */
+
+} /* sgelq2_ */
+
+/* Subroutine */ int sgelqf_(integer *m, integer *n, real *a, integer *lda,
+	real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int sgelq2_(integer *, integer *, real *, integer
+	    *, real *, real *, integer *), slarfb_(char *, char *, char *,
+	    char *, integer *, integer *, integer *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGELQF computes an LQ factorization of a real M-by-N matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k) . . . H(2) H(1), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:n) is stored on exit in A(i,i+1:n),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *m * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELQF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SGELQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGELQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the LQ factorization of the current block
+             A(i:i+ib-1,i:n)
+*/
+
+	    i__3 = *n - i__ + 1;
+	    sgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *n - i__ + 1;
+		slarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i+ib:m,i:n) from the right */
+
+		i__3 = *m - i__ - ib + 1;
+		i__4 = *n - i__ + 1;
+		slarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	sgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGELQF */
+
+} /* sgelqf_ */
+
+/* Subroutine */ int sgelsd_(integer *m, integer *n, integer *nrhs, real *a,
+	integer *lda, real *b, integer *ldb, real *s, real *rcond, integer *
+	rank, real *work, integer *lwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+
+    /* Builtin functions */
+    double log(doublereal);
+
+    /* Local variables */
+    static integer ie, il, mm;
+    static real eps, anrm, bnrm;
+    static integer itau, nlvl, iascl, ibscl;
+    static real sfmin;
+    static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
+    extern /* Subroutine */ int slabad_(real *, real *), sgebrd_(integer *,
+	    integer *, real *, integer *, real *, real *, real *, real *,
+	    real *, integer *, integer *);
+    extern doublereal slamch_(char *), slange_(char *, integer *,
+	    integer *, real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static real bignum;
+    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer
+	    *, real *, real *, integer *, integer *), slalsd_(char *, integer
+	    *, integer *, integer *, real *, real *, real *, integer *, real *
+	    , integer *, real *, integer *, integer *), slascl_(char *
+	    , integer *, integer *, real *, real *, integer *, integer *,
+	    real *, integer *, integer *);
+    static integer wlalsd;
+    extern /* Subroutine */ int sgeqrf_(integer *, integer *, real *, integer
+	    *, real *, real *, integer *, integer *), slacpy_(char *, integer
+	    *, integer *, real *, integer *, real *, integer *),
+	    slaset_(char *, integer *, integer *, real *, real *, real *,
+	    integer *);
+    static integer ldwork;
+    extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , real *, integer *, integer *);
+    static integer minwrk, maxwrk;
+    static real smlnum;
+    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
+    static logical lquery;
+    static integer smlsiz;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SGELSD computes the minimum-norm solution to a real linear least
+    squares problem:
+        minimize 2-norm(| b - A*x |)
+    using the singular value decomposition (SVD) of A. A is an M-by-N
+    matrix which may be rank-deficient.
+
+    Several right hand side vectors b and solution vectors x can be
+    handled in a single call; they are stored as the columns of the
+    M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+    matrix X.
+
+    The problem is solved in three steps:
+    (1) Reduce the coefficient matrix A to bidiagonal form with
+        Householder transformations, reducing the original problem
+        into a "bidiagonal least squares problem" (BLS)
+    (2) Solve the BLS using a divide and conquer approach.
+    (3) Apply back all the Householder tranformations to solve
+        the original least squares problem.
+
+    The effective rank of A is determined by treating as zero those
+    singular values which are less than RCOND times the largest singular
+    value.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of A. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of A. N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrices B and X. NRHS >= 0.
+
+    A       (input) REAL array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, A has been destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (input/output) REAL array, dimension (LDB,NRHS)
+            On entry, the M-by-NRHS right hand side matrix B.
+            On exit, B is overwritten by the N-by-NRHS solution
+            matrix X.  If m >= n and RANK = n, the residual
+            sum-of-squares for the solution in the i-th column is given
+            by the sum of squares of elements n+1:m in that column.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B. LDB >= max(1,max(M,N)).
+
+    S       (output) REAL array, dimension (min(M,N))
+            The singular values of A in decreasing order.
+            The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+
+    RCOND   (input) REAL
+            RCOND is used to determine the effective rank of A.
+            Singular values S(i) <= RCOND*S(1) are treated as zero.
+            If RCOND < 0, machine precision is used instead.
+
+    RANK    (output) INTEGER
+            The effective rank of A, i.e., the number of singular values
+            which are greater than RCOND*S(1).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK must be at least 1.
+            The exact minimum amount of workspace needed depends on M,
+            N and NRHS. As long as LWORK is at least
+                12*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2,
+            if M is greater than or equal to N or
+                12*M + 2*M*SMLSIZ + 8*M*NLVL + M*NRHS + (SMLSIZ+1)**2,
+            if M is less than N, the code will execute correctly.
+            SMLSIZ is returned by ILAENV and is equal to the maximum
+            size of the subproblems at the bottom of the computation
+            tree (usually about 25), and
+               NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+            For good performance, LWORK should generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+
+    IWORK   (workspace) INTEGER array, dimension (LIWORK)
+            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+            where MINMN = MIN( M,N ).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  the algorithm for computing the SVD failed to converge;
+                  if INFO = i, i off-diagonal elements of an intermediate
+                  bidiagonal form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input arguments.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    maxmn = max(*m,*n);
+    mnthr = ilaenv_(&c__6, "SGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*ldb < max(1,maxmn)) {
+	*info = -7;
+    }
+
+    smlsiz = ilaenv_(&c__9, "SGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Compute workspace.
+       (Note: Comments in the code beginning "Workspace:" describe the
+       minimal amount of workspace needed at that point in the code,
+       as well as the preferred amount for good performance.
+       NB refers to the optimal block size for the immediately
+       following subroutine, as returned by ILAENV.)
+*/
+
+    minwrk = 1;
+    minmn = max(1,minmn);
+/* Computing MAX */
+    i__1 = (integer) (log((real) minmn / (real) (smlsiz + 1)) / log(2.f)) + 1;
+    nlvl = max(i__1,0);
+
+    if (*info == 0) {
+	maxwrk = 0;
+	mm = *m;
+	if (*m >= *n && *m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m,
+		    n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + *nrhs * ilaenv_(&c__1, "SORMQR", "LT",
+		    m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
+	    maxwrk = max(i__1,i__2);
+	}
+	if (*m >= *n) {
+
+/*
+             Path 1 - overdetermined or exactly determined.
+
+   Computing MAX
+*/
+	    i__1 = maxwrk, i__2 = *n * 3 + (mm + *n) * ilaenv_(&c__1, "SGEBRD"
+		    , " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + *nrhs * ilaenv_(&c__1, "SORMBR",
+		    "QLT", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + (*n - 1) * ilaenv_(&c__1, "SORMBR",
+		     "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = max(i__1,i__2);
+/* Computing 2nd power */
+	    i__1 = smlsiz + 1;
+	    wlalsd = *n * 9 + ((*n) << (1)) * smlsiz + ((*n) << (3)) * nlvl +
+		    *n * *nrhs + i__1 * i__1;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * 3 + wlalsd;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = *n * 3 + mm, i__2 = *n * 3 + *nrhs, i__1 = max(i__1,i__2),
+		    i__2 = *n * 3 + wlalsd;
+	    minwrk = max(i__1,i__2);
+	}
+	if (*n > *m) {
+/* Computing 2nd power */
+	    i__1 = smlsiz + 1;
+	    wlalsd = *m * 9 + ((*m) << (1)) * smlsiz + ((*m) << (3)) * nlvl +
+		    *m * *nrhs + i__1 * i__1;
+	    if (*n >= mnthr) {
+
+/*
+                Path 2a - underdetermined, with many more columns
+                than rows.
+*/
+
+		maxwrk = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &c_n1,
+			&c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + ((*m) << (1))
+			* ilaenv_(&c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *nrhs *
+			ilaenv_(&c__1, "SORMBR", "QLT", m, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + (*m - 1) *
+			ilaenv_(&c__1, "SORMBR", "PLN", m, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+		if (*nrhs > 1) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+		    maxwrk = max(i__1,i__2);
+		} else {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + ((*m) << (1));
+		    maxwrk = max(i__1,i__2);
+		}
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m + *nrhs * ilaenv_(&c__1, "SORMLQ",
+			"LT", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)2);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + wlalsd;
+		maxwrk = max(i__1,i__2);
+	    } else {
+
+/*              Path 2 - remaining underdetermined cases. */
+
+		maxwrk = *m * 3 + (*n + *m) * ilaenv_(&c__1, "SGEBRD", " ", m,
+			 n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + *nrhs * ilaenv_(&c__1, "SORMBR"
+			, "QLT", m, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR",
+			"PLN", n, nrhs, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * 3 + wlalsd;
+		maxwrk = max(i__1,i__2);
+	    }
+/* Computing MAX */
+	    i__1 = *m * 3 + *nrhs, i__2 = *m * 3 + *m, i__1 = max(i__1,i__2),
+		    i__2 = *m * 3 + wlalsd;
+	    minwrk = max(i__1,i__2);
+	}
+	minwrk = min(minwrk,maxwrk);
+	work[1] = (real) maxwrk;
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGELSD", &i__1);
+	return 0;
+    } else if (lquery) {
+	goto L10;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*m == 0) || (*n == 0)) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters. */
+
+    eps = slamch_("P");
+    sfmin = slamch_("S");
+    smlnum = sfmin / eps;
+    bignum = 1.f / smlnum;
+    slabad_(&smlnum, &bignum);
+
+/*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, &work[1]);
+    iascl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 2;
+    } else if (anrm == 0.f) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = max(*m,*n);
+	slaset_("F", &i__1, nrhs, &c_b320, &c_b320, &b[b_offset], ldb);
+	slaset_("F", &minmn, &c__1, &c_b320, &c_b320, &s[1], &c__1)
+		;
+	*rank = 0;
+	goto L10;
+    }
+
+/*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */
+
+    bnrm = slange_("M", m, nrhs, &b[b_offset], ldb, &work[1]);
+    ibscl = 0;
+    if (bnrm > 0.f && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	slascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     If M < N make sure certain entries of B are zero. */
+
+    if (*m < *n) {
+	i__1 = *n - *m;
+	slaset_("F", &i__1, nrhs, &c_b320, &c_b320, &b[*m + 1 + b_dim1], ldb);
+    }
+
+/*     Overdetermined case. */
+
+    if (*m >= *n) {
+
+/*        Path 1 - overdetermined or exactly determined. */
+
+	mm = *m;
+	if (*m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+	    itau = 1;
+	    nwork = itau + *n;
+
+/*
+             Compute A=Q*R.
+             (Workspace: need 2*N, prefer N+N*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+
+/*
+             Multiply B by transpose(Q).
+             (Workspace: need N+NRHS, prefer N+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    sormqr_("L", "T", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+/*           Zero out below R. */
+
+	    if (*n > 1) {
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		slaset_("L", &i__1, &i__2, &c_b320, &c_b320, &a[a_dim1 + 2],
+			lda);
+	    }
+	}
+
+	ie = 1;
+	itauq = ie + *n;
+	itaup = itauq + *n;
+	nwork = itaup + *n;
+
+/*
+          Bidiagonalize R in A.
+          (Workspace: need 3*N+MM, prefer 3*N+(MM+N)*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	sgebrd_(&mm, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of R.
+          (Workspace: need 3*N+NRHS, prefer 3*N+NRHS*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	sormbr_("Q", "L", "T", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
+		&b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	slalsd_("U", &smlsiz, n, nrhs, &s[1], &work[ie], &b[b_offset], ldb,
+		rcond, rank, &work[nwork], &iwork[1], info);
+	if (*info != 0) {
+	    goto L10;
+	}
+
+/*        Multiply B by right bidiagonalizing vectors of R. */
+
+	i__1 = *lwork - nwork + 1;
+	sormbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
+		b[b_offset], ldb, &work[nwork], &i__1, info);
+
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = *m, i__2 = ((*m) << (1)) - 4, i__1 = max(i__1,i__2), i__1 =
+		max(i__1,*nrhs), i__2 = *n - *m * 3;
+	if (*n >= mnthr && *lwork >= ((*m) << (2)) + *m * *m + max(i__1,i__2))
+		 {
+
+/*
+          Path 2a - underdetermined, with many more columns than rows
+          and sufficient workspace for an efficient algorithm.
+*/
+
+	    ldwork = *m;
+/*
+   Computing MAX
+   Computing MAX
+*/
+	    i__3 = *m, i__4 = ((*m) << (1)) - 4, i__3 = max(i__3,i__4), i__3 =
+		     max(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__1 = ((*m) << (2)) + *m * *lda + max(i__3,i__4), i__2 = *m * *
+		    lda + *m + *m * *nrhs;
+	    if (*lwork >= max(i__1,i__2)) {
+		ldwork = *lda;
+	    }
+	    itau = 1;
+	    nwork = *m + 1;
+
+/*
+          Compute A=L*Q.
+          (Workspace: need 2*M, prefer M+M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+	    il = nwork;
+
+/*        Copy L to WORK(IL), zeroing out above its diagonal. */
+
+	    slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
+	    i__1 = *m - 1;
+	    i__2 = *m - 1;
+	    slaset_("U", &i__1, &i__2, &c_b320, &c_b320, &work[il + ldwork], &
+		    ldwork);
+	    ie = il + ldwork * *m;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+          Bidiagonalize L in WORK(IL).
+          (Workspace: need M*M+5*M, prefer M*M+4*M+2*M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    sgebrd_(m, m, &work[il], &ldwork, &s[1], &work[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of L.
+          (Workspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    sormbr_("Q", "L", "T", m, nrhs, m, &work[il], &ldwork, &work[
+		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    slalsd_("U", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of L. */
+
+	    i__1 = *lwork - nwork + 1;
+	    sormbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
+		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Zero out below first M rows of B. */
+
+	    i__1 = *n - *m;
+	    slaset_("F", &i__1, nrhs, &c_b320, &c_b320, &b[*m + 1 + b_dim1],
+		    ldb);
+	    nwork = itau + *m;
+
+/*
+          Multiply transpose(Q) by B.
+          (Workspace: need M+NRHS, prefer M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    sormlq_("L", "T", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+	} else {
+
+/*        Path 2 - remaining underdetermined cases. */
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+          Bidiagonalize A.
+          (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors.
+          (Workspace: need 3*M+NRHS, prefer 3*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    sormbr_("Q", "L", "T", m, nrhs, n, &a[a_offset], lda, &work[itauq]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    slalsd_("L", &smlsiz, m, nrhs, &s[1], &work[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &iwork[1], info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of A. */
+
+	    i__1 = *lwork - nwork + 1;
+	    sormbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+	}
+    }
+
+/*     Undo scaling. */
+
+    if (iascl == 1) {
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    } else if (iascl == 2) {
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    }
+    if (ibscl == 1) {
+	slascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	slascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L10:
+    work[1] = (real) maxwrk;
+    return 0;
+
+/*     End of SGELSD */
+
+} /* sgelsd_ */
+
+/* Subroutine */ int sgeqr2_(integer *m, integer *n, real *a, integer *lda,
+	real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static real aii;
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SGEQR2 computes a QR factorization of a real m by n matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(m,n) by n upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) REAL array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQR2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
+		, &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i) to A(i:m,i+1:n) from the left */
+
+	    aii = a[i__ + i__ * a_dim1];
+	    a[i__ + i__ * a_dim1] = 1.f;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    slarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    a[i__ + i__ * a_dim1] = aii;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGEQR2 */
+
+} /* sgeqr2_ */
+
+/* Subroutine */ int sgeqrf_(integer *m, integer *n, real *a, integer *lda,
+	real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int sgeqr2_(integer *, integer *, real *, integer
+	    *, real *, real *, integer *), slarfb_(char *, char *, char *,
+	    char *, integer *, integer *, integer *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SGEQRF computes a QR factorization of a real M-by-N matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the orthogonal matrix Q as a
+            product of min(m,n) elementary reflectors (see Further
+            Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) REAL array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGEQRF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the QR factorization of the current block
+             A(i:m,i:i+ib-1)
+*/
+
+	    i__3 = *m - i__ + 1;
+	    sgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *m - i__ + 1;
+		slarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		slarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
+			i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &work[ib
+			+ 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	sgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SGEQRF */
+
+} /* sgeqrf_ */
+
+/* Subroutine */ int sgesdd_(char *jobz, integer *m, integer *n, real *a,
+	integer *lda, real *s, real *u, integer *ldu, real *vt, integer *ldvt,
+	 real *work, integer *lwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2, i__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, ie, il, ir, iu, blk;
+    static real dum[1], eps;
+    static integer ivt, iscl;
+    static real anrm;
+    static integer idum[1], ierr, itau;
+    extern logical lsame_(char *, char *);
+    static integer chunk;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer minmn, wrkbl, itaup, itauq, mnthr;
+    static logical wntqa;
+    static integer nwork;
+    static logical wntqn, wntqo, wntqs;
+    static integer bdspac;
+    extern /* Subroutine */ int sbdsdc_(char *, char *, integer *, real *,
+	    real *, real *, integer *, real *, integer *, real *, integer *,
+	    real *, integer *, integer *), sgebrd_(integer *,
+	    integer *, real *, integer *, real *, real *, real *, real *,
+	    real *, integer *, integer *);
+    extern doublereal slamch_(char *), slange_(char *, integer *,
+	    integer *, real *, integer *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static real bignum;
+    extern /* Subroutine */ int sgelqf_(integer *, integer *, real *, integer
+	    *, real *, real *, integer *, integer *), slascl_(char *, integer
+	    *, integer *, real *, real *, integer *, integer *, real *,
+	    integer *, integer *), sgeqrf_(integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *), slacpy_(char
+	    *, integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *), sorgbr_(char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, integer *
+	    );
+    static integer ldwrkl;
+    extern /* Subroutine */ int sormbr_(char *, char *, char *, integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , real *, integer *, integer *);
+    static integer ldwrkr, minwrk, ldwrku, maxwrk;
+    extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *);
+    static integer ldwkvt;
+    static real smlnum;
+    static logical wntqas;
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *);
+    static logical lquery;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SGESDD computes the singular value decomposition (SVD) of a real
+    M-by-N matrix A, optionally computing the left and right singular
+    vectors.  If singular vectors are desired, it uses a
+    divide-and-conquer algorithm.
+
+    The SVD is written
+
+         A = U * SIGMA * transpose(V)
+
+    where SIGMA is an M-by-N matrix which is zero except for its
+    min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+    V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+    are the singular values of A; they are real and non-negative, and
+    are returned in descending order.  The first min(m,n) columns of
+    U and V are the left and right singular vectors of A.
+
+    Note that the routine returns VT = V**T, not V.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            Specifies options for computing all or part of the matrix U:
+            = 'A':  all M columns of U and all N rows of V**T are
+                    returned in the arrays U and VT;
+            = 'S':  the first min(M,N) columns of U and the first
+                    min(M,N) rows of V**T are returned in the arrays U
+                    and VT;
+            = 'O':  If M >= N, the first N columns of U are overwritten
+                    on the array A and all rows of V**T are returned in
+                    the array VT;
+                    otherwise, all columns of U are returned in the
+                    array U and the first M rows of V**T are overwritten
+                    in the array VT;
+            = 'N':  no columns of U or rows of V**T are computed.
+
+    M       (input) INTEGER
+            The number of rows of the input matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the input matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit,
+            if JOBZ = 'O',  A is overwritten with the first N columns
+                            of U (the left singular vectors, stored
+                            columnwise) if M >= N;
+                            A is overwritten with the first M rows
+                            of V**T (the right singular vectors, stored
+                            rowwise) otherwise.
+            if JOBZ .ne. 'O', the contents of A are destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    S       (output) REAL array, dimension (min(M,N))
+            The singular values of A, sorted so that S(i) >= S(i+1).
+
+    U       (output) REAL array, dimension (LDU,UCOL)
+            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+            UCOL = min(M,N) if JOBZ = 'S'.
+            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+            orthogonal matrix U;
+            if JOBZ = 'S', U contains the first min(M,N) columns of U
+            (the left singular vectors, stored columnwise);
+            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1; if
+            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+
+    VT      (output) REAL array, dimension (LDVT,N)
+            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+            N-by-N orthogonal matrix V**T;
+            if JOBZ = 'S', VT contains the first min(M,N) rows of
+            V**T (the right singular vectors, stored rowwise);
+            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1; if
+            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+            if JOBZ = 'S', LDVT >= min(M,N).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= 1.
+            If JOBZ = 'N',
+              LWORK >= 3*min(M,N) + max(max(M,N),6*min(M,N)).
+            If JOBZ = 'O',
+              LWORK >= 3*min(M,N)*min(M,N) +
+                       max(max(M,N),5*min(M,N)*min(M,N)+4*min(M,N)).
+            If JOBZ = 'S' or 'A'
+              LWORK >= 3*min(M,N)*min(M,N) +
+                       max(max(M,N),4*min(M,N)*min(M,N)+4*min(M,N)).
+            For good performance, LWORK should generally be larger.
+            If LWORK < 0 but other input arguments are legal, WORK(1)
+            returns the optimal LWORK.
+
+    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  SBDSDC did not converge, updating process failed.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    mnthr = (integer) (minmn * 11.f / 6.f);
+    wntqa = lsame_(jobz, "A");
+    wntqs = lsame_(jobz, "S");
+    wntqas = (wntqa) || (wntqs);
+    wntqo = lsame_(jobz, "O");
+    wntqn = lsame_(jobz, "N");
+    minwrk = 1;
+    maxwrk = 1;
+    lquery = *lwork == -1;
+
+    if (! ((((wntqa) || (wntqs)) || (wntqo)) || (wntqn))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (((*ldu < 1) || (wntqas && *ldu < *m)) || (wntqo && *m < *n && *
+	    ldu < *m)) {
+	*info = -8;
+    } else if ((((*ldvt < 1) || (wntqa && *ldvt < *n)) || (wntqs && *ldvt <
+	    minmn)) || (wntqo && *m >= *n && *ldvt < *n)) {
+	*info = -10;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         NB refers to the optimal block size for the immediately
+         following subroutine, as returned by ILAENV.)
+*/
+
+    if (*info == 0 && *m > 0 && *n > 0) {
+	if (*m >= *n) {
+
+/*           Compute space needed for SBDSDC */
+
+	    if (wntqn) {
+		bdspac = *n * 7;
+	    } else {
+		bdspac = *n * 3 * *n + ((*n) << (2));
+	    }
+	    if (*m >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1 (M much larger than N, JOBZ='N') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
+			    c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = bdspac + *n;
+		} else if (wntqo) {
+
+/*                 Path 2 (M much larger than N, JOBZ='O') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
+			    c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + ((*n) << (1)) * *n;
+		    minwrk = bdspac + ((*n) << (1)) * *n + *n * 3;
+		} else if (wntqs) {
+
+/*                 Path 3 (M much larger than N, JOBZ='S') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "SORGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
+			    c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		} else if (wntqa) {
+
+/*                 Path 4 (M much larger than N, JOBZ='A') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "SGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "SORGQR",
+			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + ((*n) << (1)) * ilaenv_(&
+			    c__1, "SGEBRD", " ", n, n, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *n * *n;
+		    minwrk = bdspac + *n * *n + *n * 3;
+		}
+	    } else {
+
+/*              Path 5 (M at least N, but not much larger) */
+
+		wrkbl = *n * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
+			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		if (wntqn) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		} else if (wntqo) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *m, i__2 = *n * *n + bdspac;
+		    minwrk = *n * 3 + max(i__1,i__2);
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n * 3 + *n * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = bdspac + *n * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *n * 3 + max(*m,bdspac);
+		}
+	    }
+	} else {
+
+/*           Compute space needed for SBDSDC */
+
+	    if (wntqn) {
+		bdspac = *m * 7;
+	    } else {
+		bdspac = *m * 3 * *m + ((*m) << (2));
+	    }
+	    if (*n >= mnthr) {
+		if (wntqn) {
+
+/*                 Path 1t (N much larger than M, JOBZ='N') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
+			    c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = bdspac + *m;
+		} else if (wntqo) {
+
+/*                 Path 2t (N much larger than M, JOBZ='O') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
+			    c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + ((*m) << (1)) * *m;
+		    minwrk = bdspac + ((*m) << (1)) * *m + *m * 3;
+		} else if (wntqs) {
+
+/*                 Path 3t (N much larger than M, JOBZ='S') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "SORGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
+			    c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		} else if (wntqa) {
+
+/*                 Path 4t (N much larger than M, JOBZ='A') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "SGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "SORGLQ",
+			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + ((*m) << (1)) * ilaenv_(&
+			    c__1, "SGEBRD", " ", m, m, &c_n1, &c_n1, (ftnlen)
+			    6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, m, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *m;
+		    minwrk = bdspac + *m * *m + *m * 3;
+		}
+	    } else {
+
+/*              Path 5t (N greater than M, but not much larger) */
+
+		wrkbl = *m * 3 + (*m + *n) * ilaenv_(&c__1, "SGEBRD", " ", m,
+			n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		if (wntqn) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		} else if (wntqo) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl + *m * *n;
+/* Computing MAX */
+		    i__1 = *n, i__2 = *m * *m + bdspac;
+		    minwrk = *m * 3 + max(i__1,i__2);
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", m, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "QLN", m, m, n, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m * 3 + *m * ilaenv_(&c__1, "SORMBR"
+			    , "PRT", n, n, m, &c_n1, (ftnlen)6, (ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = bdspac + *m * 3;
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3 + max(*n,bdspac);
+		}
+	    }
+	}
+	work[1] = (real) maxwrk;
+    }
+
+    if (*lwork < minwrk && ! lquery) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESDD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	if (*lwork >= 1) {
+	    work[1] = 1.f;
+	}
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = slamch_("P");
+    smlnum = sqrt(slamch_("S")) / eps;
+    bignum = 1.f / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = slange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0.f && anrm < smlnum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	slascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*
+          A has at least as many rows as columns. If A has sufficiently
+          more rows than columns, first reduce using the QR
+          decomposition (if sufficient workspace available)
+*/
+
+	if (*m >= mnthr) {
+
+	    if (wntqn) {
+
+/*
+                Path 1 (M much larger than N, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need 2*N, prefer N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out below R */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		slaset_("L", &i__1, &i__2, &c_b320, &c_b320, &a[a_dim1 + 2],
+			lda);
+		ie = 1;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (Workspace: need 4*N, prefer 3*N+2*N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *n;
+
+/*
+                Perform bidiagonal SVD, computing singular values only
+                (Workspace: need N+BDSPAC)
+*/
+
+		sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2 (M much larger than N, JOBZ = 'O')
+                N left singular vectors to be overwritten on A and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is LDWRKR by N */
+
+		if (*lwork >= *lda * *n + *n * *n + *n * 3 + bdspac) {
+		    ldwrkr = *lda;
+		} else {
+		    ldwrkr = (*lwork - *n * *n - *n * 3 - bdspac) / *n;
+		}
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		slaset_("L", &i__1, &i__2, &c_b320, &c_b320, &work[ir + 1], &
+			ldwrkr);
+
+/*
+                Generate Q in A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in VT, copying result to WORK(IR)
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*              WORK(IU) is N by N */
+
+		iu = nwork;
+		nwork = iu + *n * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite WORK(IU) by left singular vectors of R
+                and VT by right singular vectors of R
+                (Workspace: need 2*N*N+3*N, prefer 2*N*N+2*N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &work[iu], n, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IU), storing result in WORK(IR) and copying to A
+                (Workspace: need 2*N*N, prefer N*N+M*N)
+*/
+
+		i__1 = *m;
+		i__2 = ldwrkr;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrkr);
+		    sgemm_("N", "N", &chunk, n, n, &c_b1011, &a[i__ + a_dim1],
+			     lda, &work[iu], n, &c_b320, &work[ir], &ldwrkr);
+		    slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+			    a_dim1], lda);
+/* L10: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3 (M much larger than N, JOBZ='S')
+                N left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is N by N */
+
+		ldwrkr = *n;
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		slacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		slaset_("L", &i__2, &i__1, &c_b320, &c_b320, &work[ir + 1], &
+			ldwrkr);
+
+/*
+                Generate Q in A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sorgqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in WORK(IR)
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagoal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of R and VT
+                by right singular vectors of R
+                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IR), storing result in U
+                (Workspace: need N*N)
+*/
+
+		slacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		sgemm_("N", "N", m, n, n, &c_b1011, &a[a_offset], lda, &work[
+			ir], &ldwrkr, &c_b320, &u[u_offset], ldu);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4 (M much larger than N, JOBZ='A')
+                M left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		itau = iu + ldwrku * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R, copying result to U
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		slacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Generate Q in U
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+		i__2 = *lwork - nwork + 1;
+		sorgqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+
+/*              Produce R in A, zeroing out other entries */
+
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		slaset_("L", &i__2, &i__1, &c_b320, &c_b320, &a[a_dim1 + 2],
+			lda);
+		ie = itau;
+		itauq = ie + *n;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(n, n, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], n, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite WORK(IU) by left singular vectors of R and VT
+                by right singular vectors of R
+                (Workspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			ierr);
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in U by left singular vectors of R in
+                WORK(IU), storing result in A
+                (Workspace: need N*N)
+*/
+
+		sgemm_("N", "N", m, n, n, &c_b1011, &u[u_offset], ldu, &work[
+			iu], &ldwrku, &c_b320, &a[a_offset], lda);
+
+/*              Copy left singular vectors of A from A to U */
+
+		slacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+	    }
+
+	} else {
+
+/*
+             M .LT. MNTHR
+
+             Path 5 (M at least N, but not much larger)
+             Reduce to bidiagonal form without QR decomposition
+*/
+
+	    ie = 1;
+	    itauq = ie + *n;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*
+             Bidiagonalize A
+             (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Perform bidiagonal SVD, only computing singular values
+                (Workspace: need N+BDSPAC)
+*/
+
+		sbdsdc_("U", "N", n, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		    nwork = iu + ldwrku * *n;
+		    slaset_("F", m, n, &c_b320, &c_b320, &work[iu], &ldwrku);
+		} else {
+
+/*                 WORK( IU ) is N by N */
+
+		    ldwrku = *n;
+		    nwork = iu + ldwrku * *n;
+
+/*                 WORK(IR) is LDWRKR by N */
+
+		    ir = nwork;
+		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in WORK(IU) and computing right
+                singular vectors of bidiagonal matrix in VT
+                (Workspace: need N+N*N+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &work[iu], &ldwrku, &
+			vt[vt_offset], ldvt, dum, idum, &work[nwork], &iwork[
+			1], info);
+
+/*
+                Overwrite VT by right singular vectors of A
+                (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+		if (*lwork >= *m * *n + *n * 3 + bdspac) {
+
+/*
+                   Overwrite WORK(IU) by left singular vectors of A
+                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			    itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			    ierr);
+
+/*                 Copy left singular vectors of A from WORK(IU) to A */
+
+		    slacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate Q in A
+                   (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    sorgbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by left singular vectors of
+                   bidiagonal matrix in WORK(IU), storing result in
+                   WORK(IR) and copying to A
+                   (Workspace: need 2*N*N, prefer N*N+M*N)
+*/
+
+		    i__2 = *m;
+		    i__1 = ldwrkr;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *m - i__ + 1;
+			chunk = min(i__3,ldwrkr);
+			sgemm_("N", "N", &chunk, n, n, &c_b1011, &a[i__ +
+				a_dim1], lda, &work[iu], &ldwrku, &c_b320, &
+				work[ir], &ldwrkr);
+			slacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+				a_dim1], lda);
+/* L20: */
+		    }
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		slaset_("F", m, n, &c_b320, &c_b320, &u[u_offset], ldu);
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 3*N, prefer 2*N+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need N+BDSPAC)
+*/
+
+		slaset_("F", m, m, &c_b320, &c_b320, &u[u_offset], ldu);
+		sbdsdc_("U", "I", n, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*              Set the right corner of U to identity matrix */
+
+		i__1 = *m - *n;
+		i__2 = *m - *n;
+		slaset_("F", &i__1, &i__2, &c_b320, &c_b1011, &u[*n + 1 + (*n
+			+ 1) * u_dim1], ldu);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need N*N+2*N+M, prefer N*N+2*N+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    } else {
+
+/*
+          A has more columns than rows. If A has sufficiently more
+          columns than rows, first reduce using the LQ decomposition (if
+          sufficient workspace available)
+*/
+
+	if (*n >= mnthr) {
+
+	    if (wntqn) {
+
+/*
+                Path 1t (N much larger than M, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need 2*M, prefer M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out above L */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		slaset_("U", &i__1, &i__2, &c_b320, &c_b320, &a[((a_dim1) << (
+			1)) + 1], lda);
+		ie = 1;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (Workspace: need 4*M, prefer 3*M+2*M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nwork = ie + *m;
+
+/*
+                Perform bidiagonal SVD, computing singular values only
+                (Workspace: need M+BDSPAC)
+*/
+
+		sbdsdc_("U", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2t (N much larger than M, JOBZ='O')
+                M right singular vectors to be overwritten on A and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              IVT is M by M */
+
+		il = ivt + *m * *m;
+		if (*lwork >= *m * *n + *m * *m + *m * 3 + bdspac) {
+
+/*                 WORK(IL) is M by N */
+
+		    ldwrkl = *m;
+		    chunk = *n;
+		} else {
+		    ldwrkl = *m;
+		    chunk = (*lwork - *m * *m) / *m;
+		}
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy L to WORK(IL), zeroing about above it */
+
+		slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		slaset_("U", &i__1, &i__2, &c_b320, &c_b320, &work[il +
+			ldwrkl], &ldwrkl);
+
+/*
+                Generate Q in A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IL)
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U, and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M+M*M+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], m, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of L and WORK(IVT)
+                by right singular vectors of L
+                (Workspace: need 2*M*M+3*M, prefer 2*M*M+2*M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &work[ivt], m, &work[nwork], &i__1, &ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by Q
+                in A, storing result in WORK(IL) and copying to A
+                (Workspace: need 2*M*M, prefer M*M+M*N)
+*/
+
+		i__1 = *n;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    sgemm_("N", "N", m, &blk, m, &c_b1011, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b320, &work[il], &
+			    ldwrkl);
+		    slacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
+			    + 1], lda);
+/* L30: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3t (N much larger than M, JOBZ='S')
+                M right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		il = 1;
+
+/*              WORK(IL) is M by M */
+
+		ldwrkl = *m;
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy L to WORK(IL), zeroing out above it */
+
+		slacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		slaset_("U", &i__2, &i__1, &c_b320, &c_b320, &work[il +
+			ldwrkl], &ldwrkl);
+
+/*
+                Generate Q in A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sorglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IU), copying result to U
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(m, m, &work[il], &ldwrkl, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of L and VT
+                by right singular vectors of L
+                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IL) by
+                Q in A, storing result in VT
+                (Workspace: need M*M)
+*/
+
+		slacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		sgemm_("N", "N", m, n, m, &c_b1011, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b320, &vt[vt_offset], ldvt);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4t (N much larger than M, JOBZ='A')
+                N right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+
+		ldwkvt = *m;
+		itau = ivt + ldwkvt * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q, copying result to VT
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		slacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Generate Q in VT
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sorglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+			nwork], &i__2, &ierr);
+
+/*              Produce L in A, zeroing out other entries */
+
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		slaset_("U", &i__2, &i__1, &c_b320, &c_b320, &a[((a_dim1) << (
+			1)) + 1], lda);
+		ie = itau;
+		itauq = ie + *m;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sgebrd_(m, m, &a[a_offset], lda, &s[1], &work[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M+M*M+BDSPAC)
+*/
+
+		sbdsdc_("U", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*
+                Overwrite U by left singular vectors of L and WORK(IVT)
+                by right singular vectors of L
+                (Workspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+		i__2 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, m, m, &a[a_offset], lda, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by
+                Q in VT, storing result in A
+                (Workspace: need M*M)
+*/
+
+		sgemm_("N", "N", m, n, m, &c_b1011, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b320, &a[a_offset], lda);
+
+/*              Copy right singular vectors of A from A to VT */
+
+		slacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+	    }
+
+	} else {
+
+/*
+             N .LT. MNTHR
+
+             Path 5t (N greater than M, but not much larger)
+             Reduce to bidiagonal form without LQ decomposition
+*/
+
+	    ie = 1;
+	    itauq = ie + *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+             Bidiagonalize A
+             (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    sgebrd_(m, n, &a[a_offset], lda, &s[1], &work[ie], &work[itauq], &
+		    work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Perform bidiagonal SVD, only computing singular values
+                (Workspace: need M+BDSPAC)
+*/
+
+		sbdsdc_("L", "N", m, &s[1], &work[ie], dum, &c__1, dum, &c__1,
+			 dum, idum, &work[nwork], &iwork[1], info);
+	    } else if (wntqo) {
+		ldwkvt = *m;
+		ivt = nwork;
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    slaset_("F", m, n, &c_b320, &c_b320, &work[ivt], &ldwkvt);
+		    nwork = ivt + ldwkvt * *n;
+		} else {
+
+/*                 WORK( IVT ) is M by M */
+
+		    nwork = ivt + ldwkvt * *m;
+		    il = nwork;
+
+/*                 WORK(IL) is M by CHUNK */
+
+		    chunk = (*lwork - *m * *m - *m * 3) / *m;
+		}
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in WORK(IVT)
+                (Workspace: need M*M+BDSPAC)
+*/
+
+		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &
+			work[ivt], &ldwkvt, dum, idum, &work[nwork], &iwork[1]
+			, info);
+
+/*
+                Overwrite U by left singular vectors of A
+                (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *m * 3 + bdspac) {
+
+/*
+                   Overwrite WORK(IVT) by left singular vectors of A
+                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
+			    &ierr);
+
+/*                 Copy right singular vectors of A from WORK(IVT) to A */
+
+		    slacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate P**T in A
+                   (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    sorgbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by right singular vectors of
+                   bidiagonal matrix in WORK(IVT), storing result in
+                   WORK(IL) and copying to A
+                   (Workspace: need 2*M*M, prefer M*M+M*N)
+*/
+
+		    i__2 = *n;
+		    i__1 = chunk;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *n - i__ + 1;
+			blk = min(i__3,chunk);
+			sgemm_("N", "N", m, &blk, m, &c_b1011, &work[ivt], &
+				ldwkvt, &a[i__ * a_dim1 + 1], lda, &c_b320, &
+				work[il], m);
+			slacpy_("F", m, &blk, &work[il], m, &a[i__ * a_dim1 +
+				1], lda);
+/* L40: */
+		    }
+		}
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		slaset_("F", m, n, &c_b320, &c_b320, &vt[vt_offset], ldvt);
+		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 3*M, prefer 2*M+M*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", m, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else if (wntqa) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in U and computing right singular
+                vectors of bidiagonal matrix in VT
+                (Workspace: need M+BDSPAC)
+*/
+
+		slaset_("F", n, n, &c_b320, &c_b320, &vt[vt_offset], ldvt);
+		sbdsdc_("L", "I", m, &s[1], &work[ie], &u[u_offset], ldu, &vt[
+			vt_offset], ldvt, dum, idum, &work[nwork], &iwork[1],
+			info);
+
+/*              Set the right corner of VT to identity matrix */
+
+		i__1 = *n - *m;
+		i__2 = *n - *m;
+		slaset_("F", &i__1, &i__2, &c_b320, &c_b1011, &vt[*m + 1 + (*
+			m + 1) * vt_dim1], ldvt);
+
+/*
+                Overwrite U by left singular vectors of A and VT
+                by right singular vectors of A
+                (Workspace: need 2*M+N, prefer 2*M+N*NB)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		sormbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+		i__1 = *lwork - nwork + 1;
+		sormbr_("P", "R", "T", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    slascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    slascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1] = (real) maxwrk;
+
+    return 0;
+
+/*     End of SGESDD */
+
+} /* sgesdd_ */
+
+/* Subroutine */ int sgesv_(integer *n, integer *nrhs, real *a, integer *lda,
+	integer *ipiv, real *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int xerbla_(char *, integer *), sgetrf_(
+	    integer *, integer *, real *, integer *, integer *, integer *),
+	    sgetrs_(char *, integer *, integer *, real *, integer *, integer *
+	    , real *, integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SGESV computes the solution to a real system of linear equations
+       A * X = B,
+    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+
+    The LU decomposition with partial pivoting and row interchanges is
+    used to factor A as
+       A = P * L * U,
+    where P is a permutation matrix, L is unit lower triangular, and U is
+    upper triangular.  The factored form of A is then used to solve the
+    system of equations A * X = B.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of linear equations, i.e., the order of the
+            matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the N-by-N coefficient matrix A.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (output) INTEGER array, dimension (N)
+            The pivot indices that define the permutation matrix P;
+            row i of the matrix was interchanged with row IPIV(i).
+
+    B       (input/output) REAL array, dimension (LDB,NRHS)
+            On entry, the N-by-NRHS matrix of right hand side matrix B.
+            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+                  has been completed, but the factor U is exactly
+                  singular, so the solution could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGESV ", &i__1);
+	return 0;
+    }
+
+/*     Compute the LU factorization of A. */
+
+    sgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
+
+/*        Solve the system A*X = B, overwriting B with X. */
+
+	sgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+		b_offset], ldb, info);
+    }
+    return 0;
+
+/*     End of SGESV */
+
+} /* sgesv_ */
+
+/* Subroutine */ int sgetf2_(integer *m, integer *n, real *a, integer *lda,
+	integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Local variables */
+    static integer j, jp;
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, integer *), sscal_(integer *
+	    , real *, real *, integer *), sswap_(integer *, real *, integer *,
+	     real *, integer *), xerbla_(char *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1992
+
+
+    Purpose
+    =======
+
+    SGETF2 computes an LU factorization of a general m-by-n matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+                 has been completed, but the factor U is exactly
+                 singular, and division by zero will occur if it is used
+                 to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    i__1 = min(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
+
+/*        Find pivot and test for singularity. */
+
+	i__2 = *m - j + 1;
+	jp = j - 1 + isamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	ipiv[j] = jp;
+	if (a[jp + j * a_dim1] != 0.f) {
+
+/*           Apply the interchange to columns 1:N. */
+
+	    if (jp != j) {
+		sswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	    }
+
+/*           Compute elements J+1:M of J-th column. */
+
+	    if (j < *m) {
+		i__2 = *m - j;
+		r__1 = 1.f / a[j + j * a_dim1];
+		sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+
+	} else if (*info == 0) {
+
+	    *info = j;
+	}
+
+	if (j < min(*m,*n)) {
+
+/*           Update trailing submatrix. */
+
+	    i__2 = *m - j;
+	    i__3 = *n - j;
+	    sger_(&i__2, &i__3, &c_b1290, &a[j + 1 + j * a_dim1], &c__1, &a[j
+		    + (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1],
+		    lda);
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of SGETF2 */
+
+} /* sgetf2_ */
+
+/* Subroutine */ int sgetrf_(integer *m, integer *n, real *a, integer *lda,
+	integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer i__, j, jb, nb, iinfo;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *), strsm_(char *, char *, char *,
+	     char *, integer *, integer *, real *, real *, integer *, real *,
+	    integer *), sgetf2_(integer *,
+	    integer *, real *, integer *, integer *, integer *), xerbla_(char
+	    *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer
+	    *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SGETRF computes an LU factorization of a general M-by-N matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the M-by-N matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+                  has been completed, but the factor U is exactly
+                  singular, and division by zero will occur if it is used
+                  to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "SGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    if ((nb <= 1) || (nb >= min(*m,*n))) {
+
+/*        Use unblocked code. */
+
+	sgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code. */
+
+	i__1 = min(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = min(*m,*n) - j + 1;
+	    jb = min(i__3,nb);
+
+/*
+             Factor diagonal and subdiagonal blocks and test for exact
+             singularity.
+*/
+
+	    i__3 = *m - j + 1;
+	    sgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+
+/*           Adjust INFO and the pivot indices. */
+
+	    if (*info == 0 && iinfo > 0) {
+		*info = iinfo + j - 1;
+	    }
+/* Computing MIN */
+	    i__4 = *m, i__5 = j + jb - 1;
+	    i__3 = min(i__4,i__5);
+	    for (i__ = j; i__ <= i__3; ++i__) {
+		ipiv[i__] = j - 1 + ipiv[i__];
+/* L10: */
+	    }
+
+/*           Apply interchanges to columns 1:J-1. */
+
+	    i__3 = j - 1;
+	    i__4 = j + jb - 1;
+	    slaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+
+	    if (j + jb <= *n) {
+
+/*              Apply interchanges to columns J+JB:N. */
+
+		i__3 = *n - j - jb + 1;
+		i__4 = j + jb - 1;
+		slaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+			ipiv[1], &c__1);
+
+/*              Compute block row of U. */
+
+		i__3 = *n - j - jb + 1;
+		strsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b1011, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+			a_dim1], lda);
+		if (j + jb <= *m) {
+
+/*                 Update trailing submatrix. */
+
+		    i__3 = *m - j - jb + 1;
+		    i__4 = *n - j - jb + 1;
+		    sgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
+			    &c_b1290, &a[j + jb + j * a_dim1], lda, &a[j + (j
+			    + jb) * a_dim1], lda, &c_b1011, &a[j + jb + (j +
+			    jb) * a_dim1], lda);
+		}
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of SGETRF */
+
+} /* sgetrf_ */
+
+/* Subroutine */ int sgetrs_(char *trans, integer *n, integer *nrhs, real *a,
+	integer *lda, integer *ipiv, real *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), xerbla_(char *, integer *);
+    static logical notran;
+    extern /* Subroutine */ int slaswp_(integer *, real *, integer *, integer
+	    *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SGETRS solves a system of linear equations
+       A * X = B  or  A' * X = B
+    with a general N-by-N matrix A using the LU factorization computed
+    by SGETRF.
+
+    Arguments
+    =========
+
+    TRANS   (input) CHARACTER*1
+            Specifies the form of the system of equations:
+            = 'N':  A * X = B  (No transpose)
+            = 'T':  A'* X = B  (Transpose)
+            = 'C':  A'* X = B  (Conjugate transpose = Transpose)
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The factors L and U from the factorization A = P*L*U
+            as computed by SGETRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (input) INTEGER array, dimension (N)
+            The pivot indices from SGETRF; for 1<=i<=N, row i of the
+            matrix was interchanged with row IPIV(i).
+
+    B       (input/output) REAL array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SGETRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*nrhs == 0)) {
+	return 0;
+    }
+
+    if (notran) {
+
+/*
+          Solve A * X = B.
+
+          Apply row interchanges to the right hand sides.
+*/
+
+	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+
+/*        Solve L*X = B, overwriting B with X. */
+
+	strsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b1011, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b1011,
+		 &a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A' * X = B.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b1011, &
+		a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	strsm_("Left", "Lower", "Transpose", "Unit", n, nrhs, &c_b1011, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Apply row interchanges to the solution vectors. */
+
+	slaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
+
+    return 0;
+
+/*     End of SGETRS */
+
+} /* sgetrs_ */
+
+/* Subroutine */ int shseqr_(char *job, char *compz, integer *n, integer *ilo,
+	 integer *ihi, real *h__, integer *ldh, real *wr, real *wi, real *z__,
+	 integer *ldz, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    real r__1, r__2;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__, j, k, l;
+    static real s[225]	/* was [15][15] */, v[16];
+    static integer i1, i2, ii, nh, nr, ns, nv;
+    static real vv[16];
+    static integer itn;
+    static real tau;
+    static integer its;
+    static real ulp, tst1;
+    static integer maxb;
+    static real absw;
+    static integer ierr;
+    static real unfl, temp, ovfl;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static integer itemp;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    static logical initz, wantt;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static logical wantz;
+    extern doublereal slapy2_(real *, real *);
+    extern /* Subroutine */ int slabad_(real *, real *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
+	    real *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern doublereal slanhs_(char *, integer *, real *, integer *, real *);
+    extern /* Subroutine */ int slahqr_(logical *, logical *, integer *,
+	    integer *, integer *, real *, integer *, real *, real *, integer *
+	    , integer *, real *, integer *, integer *), slacpy_(char *,
+	    integer *, integer *, real *, integer *, real *, integer *), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *), slarfx_(char *, integer *, integer *,
+	    real *, real *, real *, integer *, real *);
+    static real smlnum;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SHSEQR computes the eigenvalues of a real upper Hessenberg matrix H
+    and, optionally, the matrices T and Z from the Schur decomposition
+    H = Z T Z**T, where T is an upper quasi-triangular matrix (the Schur
+    form), and Z is the orthogonal matrix of Schur vectors.
+
+    Optionally Z may be postmultiplied into an input orthogonal matrix Q,
+    so that this routine can give the Schur factorization of a matrix A
+    which has been reduced to the Hessenberg form H by the orthogonal
+    matrix Q:  A = Q*H*Q**T = (QZ)*T*(QZ)**T.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            = 'E':  compute eigenvalues only;
+            = 'S':  compute eigenvalues and the Schur form T.
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  no Schur vectors are computed;
+            = 'I':  Z is initialized to the unit matrix and the matrix Z
+                    of Schur vectors of H is returned;
+            = 'V':  Z must contain an orthogonal matrix Q on entry, and
+                    the product Q*Z is returned.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to SGEBAL, and then passed to SGEHRD
+            when the matrix output by SGEBAL is reduced to Hessenberg
+            form. Otherwise ILO and IHI should be set to 1 and N
+            respectively.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    H       (input/output) REAL array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if JOB = 'S', H contains the upper quasi-triangular
+            matrix T from the Schur decomposition (the Schur form);
+            2-by-2 diagonal blocks (corresponding to complex conjugate
+            pairs of eigenvalues) are returned in standard form, with
+            H(i,i) = H(i+1,i+1) and H(i+1,i)*H(i,i+1) < 0. If JOB = 'E',
+            the contents of H are unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    WR      (output) REAL array, dimension (N)
+    WI      (output) REAL array, dimension (N)
+            The real and imaginary parts, respectively, of the computed
+            eigenvalues. If two eigenvalues are computed as a complex
+            conjugate pair, they are stored in consecutive elements of
+            WR and WI, say the i-th and (i+1)th, with WI(i) > 0 and
+            WI(i+1) < 0. If JOB = 'S', the eigenvalues are stored in the
+            same order as on the diagonal of the Schur form returned in
+            H, with WR(i) = H(i,i) and, if H(i:i+1,i:i+1) is a 2-by-2
+            diagonal block, WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and
+            WI(i+1) = -WI(i).
+
+    Z       (input/output) REAL array, dimension (LDZ,N)
+            If COMPZ = 'N': Z is not referenced.
+            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+            contains the orthogonal matrix Z of the Schur vectors of H.
+            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+            which is assumed to be equal to the unit matrix except for
+            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+            Normally Q is the orthogonal matrix generated by SORGHR after
+            the call to SGEHRD which formed the Hessenberg matrix H.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.
+            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, SHSEQR failed to compute all of the
+                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+                  elements 1:ilo-1 and i+1:n of WR and WI contain those
+                  eigenvalues which have been successfully computed.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    wantt = lsame_(job, "S");
+    initz = lsame_(compz, "I");
+    wantz = (initz) || (lsame_(compz, "V"));
+
+    *info = 0;
+    work[1] = (real) max(1,*n);
+    lquery = *lwork == -1;
+    if (! lsame_(job, "E") && ! wantt) {
+	*info = -1;
+    } else if (! lsame_(compz, "N") && ! wantz) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -4;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -5;
+    } else if (*ldh < max(1,*n)) {
+	*info = -7;
+    } else if ((*ldz < 1) || (wantz && *ldz < max(1,*n))) {
+	*info = -11;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SHSEQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize Z, if necessary */
+
+    if (initz) {
+	slaset_("Full", n, n, &c_b320, &c_b1011, &z__[z_offset], ldz);
+    }
+
+/*     Store the eigenvalues isolated by SGEBAL. */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.f;
+/* L10: */
+    }
+    i__1 = *n;
+    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.f;
+/* L20: */
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	wi[*ilo] = 0.f;
+	return 0;
+    }
+
+/*
+       Set rows and columns ILO to IHI to zero below the first
+       subdiagonal.
+*/
+
+    i__1 = *ihi - 2;
+    for (j = *ilo; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = j + 2; i__ <= i__2; ++i__) {
+	    h__[i__ + j * h_dim1] = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    nh = *ihi - *ilo + 1;
+
+/*
+       Determine the order of the multi-shift QR algorithm to be used.
+
+   Writing concatenation
+*/
+    i__3[0] = 1, a__1[0] = job;
+    i__3[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    ns = ilaenv_(&c__4, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+/* Writing concatenation */
+    i__3[0] = 1, a__1[0] = job;
+    i__3[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    maxb = ilaenv_(&c__8, "SHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (((ns <= 2) || (ns > nh)) || (maxb >= nh)) {
+
+/*        Use the standard double-shift algorithm */
+
+	slahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &wr[1], &wi[
+		1], ilo, ihi, &z__[z_offset], ldz, info);
+	return 0;
+    }
+    maxb = max(3,maxb);
+/* Computing MIN */
+    i__1 = min(ns,maxb);
+    ns = min(i__1,15);
+
+/*
+       Now 2 < NS <= MAXB < NH.
+
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
+    smlnum = unfl * (nh / ulp);
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of multiple-shift QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of at most MAXB. Each iteration of the loop
+       works with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L50:
+    l = *ilo;
+    if (i__ < *ilo) {
+	goto L170;
+    }
+
+/*
+       Perform multiple-shift QR iterations on rows and columns ILO to I
+       until a submatrix of order at most MAXB splits off at the bottom
+       because a subdiagonal element has become negligible.
+*/
+
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2
+		    = h__[k + k * h_dim1], dabs(r__2));
+	    if (tst1 == 0.f) {
+		i__4 = i__ - l + 1;
+		tst1 = slanhs_("1", &i__4, &h__[l + l * h_dim1], ldh, &work[1]
+			);
+	    }
+/* Computing MAX */
+	    r__2 = ulp * tst1;
+	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2,
+		    smlnum)) {
+		goto L70;
+	    }
+/* L60: */
+	}
+L70:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible. */
+
+	    h__[l + (l - 1) * h_dim1] = 0.f;
+	}
+
+/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+
+	if (l >= i__ - maxb + 1) {
+	    goto L160;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! wantt) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if ((its == 20) || (its == 30)) {
+
+/*           Exceptional shifts. */
+
+	    i__2 = i__;
+	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
+		wr[ii] = ((r__1 = h__[ii + (ii - 1) * h_dim1], dabs(r__1)) + (
+			r__2 = h__[ii + ii * h_dim1], dabs(r__2))) * 1.5f;
+		wi[ii] = 0.f;
+/* L80: */
+	    }
+	} else {
+
+/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
+
+	    slacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
+		    h_dim1], ldh, s, &c__15);
+	    slahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &wr[i__ -
+		    ns + 1], &wi[i__ - ns + 1], &c__1, &ns, &z__[z_offset],
+		    ldz, &ierr);
+	    if (ierr > 0) {
+
+/*
+                If SLAHQR failed to compute all NS eigenvalues, use the
+                unconverged diagonal elements as the remaining shifts.
+*/
+
+		i__2 = ierr;
+		for (ii = 1; ii <= i__2; ++ii) {
+		    wr[i__ - ns + ii] = s[ii + ii * 15 - 16];
+		    wi[i__ - ns + ii] = 0.f;
+/* L90: */
+		}
+	    }
+	}
+
+/*
+          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+          where G is the Hessenberg submatrix H(L:I,L:I) and w is
+          the vector of shifts (stored in WR and WI). The result is
+          stored in the local array V.
+*/
+
+	v[0] = 1.f;
+	i__2 = ns + 1;
+	for (ii = 2; ii <= i__2; ++ii) {
+	    v[ii - 1] = 0.f;
+/* L100: */
+	}
+	nv = 1;
+	i__2 = i__;
+	for (j = i__ - ns + 1; j <= i__2; ++j) {
+	    if (wi[j] >= 0.f) {
+		if (wi[j] == 0.f) {
+
+/*                 real shift */
+
+		    i__4 = nv + 1;
+		    scopy_(&i__4, v, &c__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    r__1 = -wr[j];
+		    sgemv_("No transpose", &i__4, &nv, &c_b1011, &h__[l + l *
+			    h_dim1], ldh, vv, &c__1, &r__1, v, &c__1);
+		    ++nv;
+		} else if (wi[j] > 0.f) {
+
+/*                 complex conjugate pair of shifts */
+
+		    i__4 = nv + 1;
+		    scopy_(&i__4, v, &c__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    r__1 = wr[j] * -2.f;
+		    sgemv_("No transpose", &i__4, &nv, &c_b1011, &h__[l + l *
+			    h_dim1], ldh, v, &c__1, &r__1, vv, &c__1);
+		    i__4 = nv + 1;
+		    itemp = isamax_(&i__4, vv, &c__1);
+/* Computing MAX */
+		    r__2 = (r__1 = vv[itemp - 1], dabs(r__1));
+		    temp = 1.f / dmax(r__2,smlnum);
+		    i__4 = nv + 1;
+		    sscal_(&i__4, &temp, vv, &c__1);
+		    absw = slapy2_(&wr[j], &wi[j]);
+		    temp = temp * absw * absw;
+		    i__4 = nv + 2;
+		    i__5 = nv + 1;
+		    sgemv_("No transpose", &i__4, &i__5, &c_b1011, &h__[l + l
+			    * h_dim1], ldh, vv, &c__1, &temp, v, &c__1);
+		    nv += 2;
+		}
+
+/*
+                Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+                reset it to the unit vector.
+*/
+
+		itemp = isamax_(&nv, v, &c__1);
+		temp = (r__1 = v[itemp - 1], dabs(r__1));
+		if (temp == 0.f) {
+		    v[0] = 1.f;
+		    i__4 = nv;
+		    for (ii = 2; ii <= i__4; ++ii) {
+			v[ii - 1] = 0.f;
+/* L110: */
+		    }
+		} else {
+		    temp = dmax(temp,smlnum);
+		    r__1 = 1.f / temp;
+		    sscal_(&nv, &r__1, v, &c__1);
+		}
+	    }
+/* L120: */
+	}
+
+/*        Multiple-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = l; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__4 = ns + 1, i__5 = i__ - k + 1;
+	    nr = min(i__4,i__5);
+	    if (k > l) {
+		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    slarfg_(&nr, v, &v[1], &c__1, &tau);
+	    if (k > l) {
+		h__[k + (k - 1) * h_dim1] = v[0];
+		i__4 = i__;
+		for (ii = k + 1; ii <= i__4; ++ii) {
+		    h__[ii + (k - 1) * h_dim1] = 0.f;
+/* L130: */
+		}
+	    }
+	    v[0] = 1.f;
+
+/*
+             Apply G from the left to transform the rows of the matrix in
+             columns K to I2.
+*/
+
+	    i__4 = i2 - k + 1;
+	    slarfx_("Left", &nr, &i__4, v, &tau, &h__[k + k * h_dim1], ldh, &
+		    work[1]);
+
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+NR,I).
+
+   Computing MIN
+*/
+	    i__5 = k + nr;
+	    i__4 = min(i__5,i__) - i1 + 1;
+	    slarfx_("Right", &i__4, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
+		    &work[1]);
+
+	    if (wantz) {
+
+/*              Accumulate transformations in the matrix Z */
+
+		slarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
+			ldz, &work[1]);
+	    }
+/* L140: */
+	}
+
+/* L150: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L160:
+
+/*
+       A submatrix of order <= MAXB in rows and columns L to I has split
+       off. Use the double-shift QR algorithm to handle it.
+*/
+
+    slahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &wr[1], &wi[1],
+	    ilo, ihi, &z__[z_offset], ldz, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with a new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L50;
+
+L170:
+    work[1] = (real) max(1,*n);
+    return 0;
+
+/*     End of SHSEQR */
+
+} /* shseqr_ */
+
+/* Subroutine */ int slabad_(real *small, real *large)
+{
+    /* Builtin functions */
+    double r_lg10(real *), sqrt(doublereal);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLABAD takes as input the values computed by SLAMCH for underflow and
+    overflow, and returns the square root of each of these values if the
+    log of LARGE is sufficiently large.  This subroutine is intended to
+    identify machines with a large exponent range, such as the Crays, and
+    redefine the underflow and overflow limits to be the square roots of
+    the values computed by SLAMCH.  This subroutine is needed because
+    SLAMCH does not compensate for poor arithmetic in the upper half of
+    the exponent range, as is found on a Cray.
+
+    Arguments
+    =========
+
+    SMALL   (input/output) REAL
+            On entry, the underflow threshold as computed by SLAMCH.
+            On exit, if LOG10(LARGE) is sufficiently large, the square
+            root of SMALL, otherwise unchanged.
+
+    LARGE   (input/output) REAL
+            On entry, the overflow threshold as computed by SLAMCH.
+            On exit, if LOG10(LARGE) is sufficiently large, the square
+            root of LARGE, otherwise unchanged.
+
+    =====================================================================
+
+
+       If it looks like we're on a Cray, take the square root of
+       SMALL and LARGE to avoid overflow and underflow problems.
+*/
+
+    if (r_lg10(large) > 2e3f) {
+	*small = sqrt(*small);
+	*large = sqrt(*large);
+    }
+
+    return 0;
+
+/*     End of SLABAD */
+
+} /* slabad_ */
+
+/* Subroutine */ int slabrd_(integer *m, integer *n, integer *nb, real *a,
+	integer *lda, real *d__, real *e, real *tauq, real *taup, real *x,
+	integer *ldx, real *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+
+    /* Local variables */
+    static integer i__;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *), slarfg_(
+	    integer *, real *, real *, integer *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLABRD reduces the first NB rows and columns of a real general
+    m by n matrix A to upper or lower bidiagonal form by an orthogonal
+    transformation Q' * A * P, and returns the matrices X and Y which
+    are needed to apply the transformation to the unreduced part of A.
+
+    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+    bidiagonal form.
+
+    This is an auxiliary routine called by SGEBRD
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.
+
+    NB      (input) INTEGER
+            The number of leading rows and columns of A to be reduced.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit, the first NB rows and columns of the matrix are
+            overwritten; the rest of the array is unchanged.
+            If m >= n, elements on and below the diagonal in the first NB
+              columns, with the array TAUQ, represent the orthogonal
+              matrix Q as a product of elementary reflectors; and
+              elements above the diagonal in the first NB rows, with the
+              array TAUP, represent the orthogonal matrix P as a product
+              of elementary reflectors.
+            If m < n, elements below the diagonal in the first NB
+              columns, with the array TAUQ, represent the orthogonal
+              matrix Q as a product of elementary reflectors, and
+              elements on and above the diagonal in the first NB rows,
+              with the array TAUP, represent the orthogonal matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) REAL array, dimension (NB)
+            The diagonal elements of the first NB rows and columns of
+            the reduced matrix.  D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (NB)
+            The off-diagonal elements of the first NB rows and columns of
+            the reduced matrix.
+
+    TAUQ    (output) REAL array dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix Q. See Further Details.
+
+    TAUP    (output) REAL array, dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the orthogonal matrix P. See Further Details.
+
+    X       (output) REAL array, dimension (LDX,NB)
+            The m-by-nb matrix X required to update the unreduced part
+            of A.
+
+    LDX     (input) INTEGER
+            The leading dimension of the array X. LDX >= M.
+
+    Y       (output) REAL array, dimension (LDY,NB)
+            The n-by-nb matrix Y required to update the unreduced part
+            of A.
+
+    LDY     (output) INTEGER
+            The leading dimension of the array Y. LDY >= N.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are real scalars, and v and u are real vectors.
+
+    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The elements of the vectors v and u together form the m-by-nb matrix
+    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+    the transformation to the unreduced part of the matrix, using a block
+    update of the form:  A := A - V*Y' - X*U'.
+
+    The contents of A on exit are illustrated by the following examples
+    with nb = 2:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )
+
+    where a denotes an element of the original matrix which is unchanged,
+    vi denotes an element of the vector defining H(i), and ui an element
+    of the vector defining G(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if ((*m <= 0) || (*n <= 0)) {
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:m,i) */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + a_dim1],
+		    lda, &y[i__ + y_dim1], ldy, &c_b1011, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &x[i__ + x_dim1],
+		    ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b1011, &a[i__ + i__ *
+		     a_dim1], &c__1);
+
+/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ *
+		    a_dim1], &c__1, &tauq[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		a[i__ + i__ * a_dim1] = 1.f;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + (i__ + 1)
+			 * a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &
+			c_b320, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + a_dim1],
+			lda, &a[i__ + i__ * a_dim1], &c__1, &c_b320, &y[i__ *
+			y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b1011, &
+			y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &x[i__ + x_dim1],
+			ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b320, &y[i__ *
+			y_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1290, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b1011, &y[i__ + 1 + i__ * y_dim1], &c__1)
+			;
+		i__2 = *n - i__;
+		sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+
+/*              Update A(i,i+1:n) */
+
+		i__2 = *n - i__;
+		sgemv_("No transpose", &i__2, &i__, &c_b1290, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b1011, &a[i__
+			+ (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1290, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b1011, &a[
+			i__ + (i__ + 1) * a_dim1], lda);
+
+/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + (i__ + 1) * a_dim1], &a[i__ + min(
+			i__3,*n) * a_dim1], lda, &taup[i__]);
+		e[i__] = a[i__ + (i__ + 1) * a_dim1];
+		a[i__ + (i__ + 1) * a_dim1] = 1.f;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 + (
+			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
+			 lda, &c_b320, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__, &c_b1011, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b320, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		sgemv_("No transpose", &i__2, &i__, &c_b1290, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b1011, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b320, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b1011, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i,i:n) */
+
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &y[i__ + y_dim1],
+		    ldy, &a[i__ + a_dim1], lda, &c_b1011, &a[i__ + i__ *
+		    a_dim1], lda);
+	    i__2 = i__ - 1;
+	    i__3 = *n - i__ + 1;
+	    sgemv_("Transpose", &i__2, &i__3, &c_b1290, &a[i__ * a_dim1 + 1],
+		    lda, &x[i__ + x_dim1], ldx, &c_b1011, &a[i__ + i__ *
+		    a_dim1], lda);
+
+/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    slarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[i__ + min(i__3,*n) *
+		    a_dim1], lda, &taup[i__]);
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    if (i__ < *m) {
+		a[i__ + i__ * a_dim1] = 1.f;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 +
+			i__ * a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &
+			c_b320, &x[i__ + 1 + i__ * x_dim1], &c__1)
+			;
+		i__2 = *n - i__ + 1;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &y[i__ + y_dim1],
+			ldy, &a[i__ + i__ * a_dim1], lda, &c_b320, &x[i__ *
+			x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b1011, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__ + 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[i__ *
+			a_dim1 + 1], lda, &a[i__ + i__ * a_dim1], lda, &
+			c_b320, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b1011, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		sscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+
+/*              Update A(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + 1 +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b1011, &a[i__
+			+ 1 + i__ * a_dim1], &c__1);
+		i__2 = *m - i__;
+		sgemv_("No transpose", &i__2, &i__, &c_b1290, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b1011, &
+			a[i__ + 1 + i__ * a_dim1], &c__1);
+
+/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
+
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*m) +
+			i__ * a_dim1], &c__1, &tauq[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 + (i__
+			+ 1) * a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &c_b320, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b320, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b1011, &
+			y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		sgemv_("Transpose", &i__2, &i__, &c_b1011, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b320, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		sgemv_("Transpose", &i__, &i__2, &c_b1290, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b1011, &y[i__ + 1 + i__ * y_dim1], &c__1)
+			;
+		i__2 = *n - i__;
+		sscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of SLABRD */
+
+} /* slabrd_ */
+
+/* Subroutine */ int slacpy_(char *uplo, integer *m, integer *n, real *a,
+	integer *lda, real *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLACPY copies all or part of a two-dimensional matrix A to another
+    matrix B.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper triangle
+            or trapezoid is accessed; if UPLO = 'L', only the lower
+            triangle or trapezoid is accessed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (output) REAL array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		b[i__ + j * b_dim1] = a[i__ + j * a_dim1];
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+    return 0;
+
+/*     End of SLACPY */
+
+} /* slacpy_ */
+
+/* Subroutine */ int sladiv_(real *a, real *b, real *c__, real *d__, real *p,
+	real *q)
+{
+    static real e, f;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLADIV performs complex division in  real arithmetic
+
+                          a + i*b
+               p + i*q = ---------
+                          c + i*d
+
+    The algorithm is due to Robert L. Smith and can be found
+    in D. Knuth, The art of Computer Programming, Vol.2, p.195
+
+    Arguments
+    =========
+
+    A       (input) REAL
+    B       (input) REAL
+    C       (input) REAL
+    D       (input) REAL
+            The scalars a, b, c, and d in the above expression.
+
+    P       (output) REAL
+    Q       (output) REAL
+            The scalars p and q in the above expression.
+
+    =====================================================================
+*/
+
+
+    if (dabs(*d__) < dabs(*c__)) {
+	e = *d__ / *c__;
+	f = *c__ + *d__ * e;
+	*p = (*a + *b * e) / f;
+	*q = (*b - *a * e) / f;
+    } else {
+	e = *c__ / *d__;
+	f = *d__ + *c__ * e;
+	*p = (*b + *a * e) / f;
+	*q = (-(*a) + *b * e) / f;
+    }
+
+    return 0;
+
+/*     End of SLADIV */
+
+} /* sladiv_ */
+
+/* Subroutine */ int slae2_(real *a, real *b, real *c__, real *rt1, real *rt2)
+{
+    /* System generated locals */
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real ab, df, tb, sm, rt, adf, acmn, acmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAE2  computes the eigenvalues of a 2-by-2 symmetric matrix
+       [  A   B  ]
+       [  B   C  ].
+    On return, RT1 is the eigenvalue of larger absolute value, and RT2
+    is the eigenvalue of smaller absolute value.
+
+    Arguments
+    =========
+
+    A       (input) REAL
+            The (1,1) element of the 2-by-2 matrix.
+
+    B       (input) REAL
+            The (1,2) and (2,1) elements of the 2-by-2 matrix.
+
+    C       (input) REAL
+            The (2,2) element of the 2-by-2 matrix.
+
+    RT1     (output) REAL
+            The eigenvalue of larger absolute value.
+
+    RT2     (output) REAL
+            The eigenvalue of smaller absolute value.
+
+    Further Details
+    ===============
+
+    RT1 is accurate to a few ulps barring over/underflow.
+
+    RT2 may be inaccurate if there is massive cancellation in the
+    determinant A*C-B*B; higher precision or correctly rounded or
+    correctly truncated arithmetic would be needed to compute RT2
+    accurately in all cases.
+
+    Overflow is possible only if RT1 is within a factor of 5 of overflow.
+    Underflow is harmless if the input data is 0 or exceeds
+       underflow_threshold / macheps.
+
+   =====================================================================
+
+
+       Compute the eigenvalues
+*/
+
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = dabs(df);
+    tb = *b + *b;
+    ab = dabs(tb);
+    if (dabs(*a) > dabs(*c__)) {
+	acmx = *a;
+	acmn = *c__;
+    } else {
+	acmx = *c__;
+	acmn = *a;
+    }
+    if (adf > ab) {
+/* Computing 2nd power */
+	r__1 = ab / adf;
+	rt = adf * sqrt(r__1 * r__1 + 1.f);
+    } else if (adf < ab) {
+/* Computing 2nd power */
+	r__1 = adf / ab;
+	rt = ab * sqrt(r__1 * r__1 + 1.f);
+    } else {
+
+/*        Includes case AB=ADF=0 */
+
+	rt = ab * sqrt(2.f);
+    }
+    if (sm < 0.f) {
+	*rt1 = (sm - rt) * .5f;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.f) {
+	*rt1 = (sm + rt) * .5f;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
+
+/*        Includes case RT1 = RT2 = 0 */
+
+	*rt1 = rt * .5f;
+	*rt2 = rt * -.5f;
+    }
+    return 0;
+
+/*     End of SLAE2 */
+
+} /* slae2_ */
+
+/* Subroutine */ int slaed0_(integer *icompq, integer *qsiz, integer *n, real
+	*d__, real *e, real *q, integer *ldq, real *qstore, integer *ldqs,
+	real *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, k, iq, lgn, msd2, smm1, spm1, spm2;
+    static real temp;
+    static integer curr;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer iperm, indxq, iwrem;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static integer iqptr, tlvls;
+    extern /* Subroutine */ int slaed1_(integer *, real *, real *, integer *,
+	    integer *, real *, integer *, real *, integer *, integer *),
+	    slaed7_(integer *, integer *, integer *, integer *, integer *,
+	    integer *, real *, real *, integer *, integer *, real *, integer *
+	    , real *, integer *, integer *, integer *, integer *, integer *,
+	    real *, real *, integer *, integer *);
+    static integer igivcl;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer igivnm, submat;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
+	    integer *, real *, integer *);
+    static integer curprb, subpbs, igivpt, curlvl, matsiz, iprmpt, smlsiz;
+    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAED0 computes all eigenvalues and corresponding eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+            = 2:  Compute eigenvalues and eigenvectors of tridiagonal
+                  matrix.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the main diagonal of the tridiagonal matrix.
+           On exit, its eigenvalues.
+
+    E      (input) REAL array, dimension (N-1)
+           The off-diagonal elements of the tridiagonal matrix.
+           On exit, E has been destroyed.
+
+    Q      (input/output) REAL array, dimension (LDQ, N)
+           On entry, Q must contain an N-by-N orthogonal matrix.
+           If ICOMPQ = 0    Q is not referenced.
+           If ICOMPQ = 1    On entry, Q is a subset of the columns of the
+                            orthogonal matrix used to reduce the full
+                            matrix to tridiagonal form corresponding to
+                            the subset of the full matrix which is being
+                            decomposed at this time.
+           If ICOMPQ = 2    On entry, Q will be the identity matrix.
+                            On exit, Q contains the eigenvectors of the
+                            tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  If eigenvectors are
+           desired, then  LDQ >= max(1,N).  In any case,  LDQ >= 1.
+
+    QSTORE (workspace) REAL array, dimension (LDQS, N)
+           Referenced only when ICOMPQ = 1.  Used to store parts of
+           the eigenvector matrix when the updating matrix multiplies
+           take place.
+
+    LDQS   (input) INTEGER
+           The leading dimension of the array QSTORE.  If ICOMPQ = 1,
+           then  LDQS >= max(1,N).  In any case,  LDQS >= 1.
+
+    WORK   (workspace) REAL array,
+           If ICOMPQ = 0 or 1, the dimension of WORK must be at least
+                       1 + 3*N + 2*N*lg N + 2*N**2
+                       ( lg( N ) = smallest integer k
+                                   such that 2^k >= N )
+           If ICOMPQ = 2, the dimension of WORK must be at least
+                       4*N + N**2.
+
+    IWORK  (workspace) INTEGER array,
+           If ICOMPQ = 0 or 1, the dimension of IWORK must be at least
+                          6 + 6*N + 5*N*lg N.
+                          ( lg( N ) = smallest integer k
+                                      such that 2^k >= N )
+           If ICOMPQ = 2, the dimension of IWORK must be at least
+                          3 + 5*N.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    qstore_dim1 = *ldqs;
+    qstore_offset = 1 + qstore_dim1;
+    qstore -= qstore_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 2)) {
+	*info = -1;
+    } else if (*icompq == 1 && *qsiz < max(0,*n)) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ldq < max(1,*n)) {
+	*info = -7;
+    } else if (*ldqs < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED0", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "SLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Determine the size and placement of the submatrices, and save in
+       the leading elements of IWORK.
+*/
+
+    iwork[1] = *n;
+    subpbs = 1;
+    tlvls = 0;
+L10:
+    if (iwork[subpbs] > smlsiz) {
+	for (j = subpbs; j >= 1; --j) {
+	    iwork[j * 2] = (iwork[j] + 1) / 2;
+	    iwork[((j) << (1)) - 1] = iwork[j] / 2;
+/* L20: */
+	}
+	++tlvls;
+	subpbs <<= 1;
+	goto L10;
+    }
+    i__1 = subpbs;
+    for (j = 2; j <= i__1; ++j) {
+	iwork[j] += iwork[j - 1];
+/* L30: */
+    }
+
+/*
+       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+       using rank-1 modifications (cuts).
+*/
+
+    spm1 = subpbs - 1;
+    i__1 = spm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	submat = iwork[i__] + 1;
+	smm1 = submat - 1;
+	d__[smm1] -= (r__1 = e[smm1], dabs(r__1));
+	d__[submat] -= (r__1 = e[smm1], dabs(r__1));
+/* L40: */
+    }
+
+    indxq = ((*n) << (2)) + 3;
+    if (*icompq != 2) {
+
+/*
+          Set up workspaces for eigenvalues only/accumulate new vectors
+          routine
+*/
+
+	temp = log((real) (*n)) / log(2.f);
+	lgn = (integer) temp;
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	iprmpt = indxq + *n + 1;
+	iperm = iprmpt + *n * lgn;
+	iqptr = iperm + *n * lgn;
+	igivpt = iqptr + *n + 2;
+	igivcl = igivpt + *n * lgn;
+
+	igivnm = 1;
+	iq = igivnm + ((*n) << (1)) * lgn;
+/* Computing 2nd power */
+	i__1 = *n;
+	iwrem = iq + i__1 * i__1 + 1;
+
+/*        Initialize pointers */
+
+	i__1 = subpbs;
+	for (i__ = 0; i__ <= i__1; ++i__) {
+	    iwork[iprmpt + i__] = 1;
+	    iwork[igivpt + i__] = 1;
+/* L50: */
+	}
+	iwork[iqptr] = 1;
+    }
+
+/*
+       Solve each submatrix eigenproblem at the bottom of the divide and
+       conquer tree.
+*/
+
+    curr = 0;
+    i__1 = spm1;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	if (i__ == 0) {
+	    submat = 1;
+	    matsiz = iwork[1];
+	} else {
+	    submat = iwork[i__] + 1;
+	    matsiz = iwork[i__ + 1] - iwork[i__];
+	}
+	if (*icompq == 2) {
+	    ssteqr_("I", &matsiz, &d__[submat], &e[submat], &q[submat +
+		    submat * q_dim1], ldq, &work[1], info);
+	    if (*info != 0) {
+		goto L130;
+	    }
+	} else {
+	    ssteqr_("I", &matsiz, &d__[submat], &e[submat], &work[iq - 1 +
+		    iwork[iqptr + curr]], &matsiz, &work[1], info);
+	    if (*info != 0) {
+		goto L130;
+	    }
+	    if (*icompq == 1) {
+		sgemm_("N", "N", qsiz, &matsiz, &matsiz, &c_b1011, &q[submat *
+			 q_dim1 + 1], ldq, &work[iq - 1 + iwork[iqptr + curr]]
+			, &matsiz, &c_b320, &qstore[submat * qstore_dim1 + 1],
+			 ldqs);
+	    }
+/* Computing 2nd power */
+	    i__2 = matsiz;
+	    iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
+	    ++curr;
+	}
+	k = 1;
+	i__2 = iwork[i__ + 1];
+	for (j = submat; j <= i__2; ++j) {
+	    iwork[indxq + j] = k;
+	    ++k;
+/* L60: */
+	}
+/* L70: */
+    }
+
+/*
+       Successively merge eigensystems of adjacent submatrices
+       into eigensystem for the corresponding larger matrix.
+
+       while ( SUBPBS > 1 )
+*/
+
+    curlvl = 1;
+L80:
+    if (subpbs > 1) {
+	spm2 = subpbs - 2;
+	i__1 = spm2;
+	for (i__ = 0; i__ <= i__1; i__ += 2) {
+	    if (i__ == 0) {
+		submat = 1;
+		matsiz = iwork[2];
+		msd2 = iwork[1];
+		curprb = 0;
+	    } else {
+		submat = iwork[i__] + 1;
+		matsiz = iwork[i__ + 2] - iwork[i__];
+		msd2 = matsiz / 2;
+		++curprb;
+	    }
+
+/*
+       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+       into an eigensystem of size MATSIZ.
+       SLAED1 is used only for the full eigensystem of a tridiagonal
+       matrix.
+       SLAED7 handles the cases in which eigenvalues only or eigenvalues
+       and eigenvectors of a full symmetric matrix (which was reduced to
+       tridiagonal form) are desired.
+*/
+
+	    if (*icompq == 2) {
+		slaed1_(&matsiz, &d__[submat], &q[submat + submat * q_dim1],
+			ldq, &iwork[indxq + submat], &e[submat + msd2 - 1], &
+			msd2, &work[1], &iwork[subpbs + 1], info);
+	    } else {
+		slaed7_(icompq, &matsiz, qsiz, &tlvls, &curlvl, &curprb, &d__[
+			submat], &qstore[submat * qstore_dim1 + 1], ldqs, &
+			iwork[indxq + submat], &e[submat + msd2 - 1], &msd2, &
+			work[iq], &iwork[iqptr], &iwork[iprmpt], &iwork[iperm]
+			, &iwork[igivpt], &iwork[igivcl], &work[igivnm], &
+			work[iwrem], &iwork[subpbs + 1], info);
+	    }
+	    if (*info != 0) {
+		goto L130;
+	    }
+	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
+/* L90: */
+	}
+	subpbs /= 2;
+	++curlvl;
+	goto L80;
+    }
+
+/*
+       end while
+
+       Re-merge the eigenvalues/vectors which were deflated at the final
+       merge step.
+*/
+
+    if (*icompq == 1) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+	    scopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1
+		    + 1], &c__1);
+/* L100: */
+	}
+	scopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    } else if (*icompq == 2) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+	    scopy_(n, &q[j * q_dim1 + 1], &c__1, &work[*n * i__ + 1], &c__1);
+/* L110: */
+	}
+	scopy_(n, &work[1], &c__1, &d__[1], &c__1);
+	slacpy_("A", n, n, &work[*n + 1], n, &q[q_offset], ldq);
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    j = iwork[indxq + i__];
+	    work[i__] = d__[j];
+/* L120: */
+	}
+	scopy_(n, &work[1], &c__1, &d__[1], &c__1);
+    }
+    goto L140;
+
+L130:
+    *info = submat * (*n + 1) + submat + matsiz - 1;
+
+L140:
+    return 0;
+
+/*     End of SLAED0 */
+
+} /* slaed0_ */
+
+/* Subroutine */ int slaed1_(integer *n, real *d__, real *q, integer *ldq,
+	integer *indxq, real *rho, integer *cutpnt, real *work, integer *
+	iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, k, n1, n2, is, iw, iz, iq2, cpp1, indx, indxc, indxp;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slaed2_(integer *, integer *, integer *, real *, real
+	    *, integer *, integer *, real *, real *, real *, real *, real *,
+	    integer *, integer *, integer *, integer *, integer *), slaed3_(
+	    integer *, integer *, integer *, real *, real *, integer *, real *
+	    , real *, real *, integer *, integer *, real *, real *, integer *)
+	    ;
+    static integer idlmda;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
+	    integer *, integer *, real *, integer *, integer *, integer *);
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAED1 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix.  This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and eigenvectors of a tridiagonal matrix.  SLAED7 handles
+    the case in which eigenvalues only or eigenvalues and eigenvectors
+    of a full symmetric matrix (which was reduced to tridiagonal form)
+    are desired.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+       where Z = Q'u, u is a vector of length N with ones in the
+       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine SLAED2.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine SLAED4 (as called by SLAED3).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) REAL array, dimension (LDQ,N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input/output) INTEGER array, dimension (N)
+           On entry, the permutation which separately sorts the two
+           subproblems in D into ascending order.
+           On exit, the permutation which will reintegrate the
+           subproblems back into sorted order,
+           i.e. D( INDXQ( I = 1, N ) ) will be in ascending order.
+
+    RHO    (input) REAL
+           The subdiagonal entry used to create the rank-1 modification.
+
+    CUTPNT (input) INTEGER
+           The location of the last eigenvalue in the leading sub-matrix.
+           min(1,N) <= CUTPNT <= N/2.
+
+    WORK   (workspace) REAL array, dimension (4*N + N**2)
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    } else if (*ldq < max(1,*n)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MIN */
+	i__1 = 1, i__2 = *n / 2;
+	if ((min(i__1,i__2) > *cutpnt) || (*n / 2 < *cutpnt)) {
+	    *info = -7;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED1", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are integer pointers which indicate
+       the portion of the workspace
+       used by a particular array in SLAED2 and SLAED3.
+*/
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    scopy_(cutpnt, &q[*cutpnt + q_dim1], ldq, &work[iz], &c__1);
+    cpp1 = *cutpnt + 1;
+    i__1 = *n - *cutpnt;
+    scopy_(&i__1, &q[cpp1 + cpp1 * q_dim1], ldq, &work[iz + *cutpnt], &c__1);
+
+/*     Deflate eigenvalues. */
+
+    slaed2_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, &indxq[1], rho, &work[
+	    iz], &work[idlmda], &work[iw], &work[iq2], &iwork[indx], &iwork[
+	    indxc], &iwork[indxp], &iwork[coltyp], info);
+
+    if (*info != 0) {
+	goto L20;
+    }
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	is = (iwork[coltyp] + iwork[coltyp + 1]) * *cutpnt + (iwork[coltyp +
+		1] + iwork[coltyp + 2]) * (*n - *cutpnt) + iq2;
+	slaed3_(&k, n, cutpnt, &d__[1], &q[q_offset], ldq, rho, &work[idlmda],
+		 &work[iq2], &iwork[indxc], &iwork[coltyp], &work[iw], &work[
+		is], info);
+	if (*info != 0) {
+	    goto L20;
+	}
+
+/*     Prepare the INDXQ sorting permutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L10: */
+	}
+    }
+
+L20:
+    return 0;
+
+/*     End of SLAED1 */
+
+} /* slaed1_ */
+
+/* Subroutine */ int slaed2_(integer *k, integer *n, integer *n1, real *d__,
+	real *q, integer *ldq, integer *indxq, real *rho, real *z__, real *
+	dlamda, real *w, real *q2, integer *indx, integer *indxc, integer *
+	indxp, integer *coltyp, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+    real r__1, r__2, r__3, r__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real c__;
+    static integer i__, j;
+    static real s, t;
+    static integer k2, n2, ct, nj, pj, js, iq1, iq2, n1p1;
+    static real eps, tau, tol;
+    static integer psm[4], imax, jmax, ctot[4];
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *), sscal_(integer *, real *, real *,
+	    integer *), scopy_(integer *, real *, integer *, real *, integer *
+	    );
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
+	    *, integer *, integer *), slacpy_(char *, integer *, integer *,
+	    real *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLAED2 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny entry in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    K      (output) INTEGER
+           The number of non-deflated eigenvalues, and the order of the
+           related secular equation. 0 <= K <=N.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    N1     (input) INTEGER
+           The location of the last eigenvalue in the leading sub-matrix.
+           min(1,N) <= N1 <= N/2.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, D contains the eigenvalues of the two submatrices to
+           be combined.
+           On exit, D contains the trailing (N-K) updated eigenvalues
+           (those which were deflated) sorted into increasing order.
+
+    Q      (input/output) REAL array, dimension (LDQ, N)
+           On entry, Q contains the eigenvectors of two submatrices in
+           the two square blocks with corners at (1,1), (N1,N1)
+           and (N1+1, N1+1), (N,N).
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input/output) INTEGER array, dimension (N)
+           The permutation which separately sorts the two sub-problems
+           in D into ascending order.  Note that elements in the second
+           half of this permutation must first have N1 added to their
+           values. Destroyed on exit.
+
+    RHO    (input/output) REAL
+           On entry, the off-diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined.
+           On exit, RHO has been modified to the value required by
+           SLAED3.
+
+    Z      (input) REAL array, dimension (N)
+           On entry, Z contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).
+           On exit, the contents of Z have been destroyed by the updating
+           process.
+
+    DLAMDA (output) REAL array, dimension (N)
+           A copy of the first K eigenvalues which will be used by
+           SLAED3 to form the secular equation.
+
+    W      (output) REAL array, dimension (N)
+           The first k values of the final deflation-altered z-vector
+           which will be passed to SLAED3.
+
+    Q2     (output) REAL array, dimension (N1**2+(N-N1)**2)
+           A copy of the first K eigenvectors which will be used by
+           SLAED3 in a matrix multiply (SGEMM) to solve for the new
+           eigenvectors.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           The permutation used to sort the contents of DLAMDA into
+           ascending order.
+
+    INDXC  (output) INTEGER array, dimension (N)
+           The permutation used to arrange the columns of the deflated
+           Q matrix into three groups:  the first group contains non-zero
+           elements only at and above N1, the second contains
+           non-zero elements only below N1, and the third is dense.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           The permutation used to place deflated values of D at the end
+           of the array.  INDXP(1:K) points to the nondeflated D-values
+           and INDXP(K+1:N) points to the deflated eigenvalues.
+
+    COLTYP (workspace/output) INTEGER array, dimension (N)
+           During execution, a label which will indicate which of the
+           following types a column in the Q2 matrix is:
+           1 : non-zero in the upper half only;
+           2 : dense;
+           3 : non-zero in the lower half only;
+           4 : deflated.
+           On exit, COLTYP(i) is the number of columns of type i,
+           for i=1 to 4 only.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --z__;
+    --dlamda;
+    --w;
+    --q2;
+    --indx;
+    --indxc;
+    --indxp;
+    --coltyp;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MIN */
+	i__1 = 1, i__2 = *n / 2;
+	if ((min(i__1,i__2) > *n1) || (*n / 2 < *n1)) {
+	    *info = -3;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n2 = *n - *n1;
+    n1p1 = *n1 + 1;
+
+    if (*rho < 0.f) {
+	sscal_(&n2, &c_b1290, &z__[n1p1], &c__1);
+    }
+
+/*
+       Normalize z so that norm(z) = 1.  Since z is the concatenation of
+       two normalized vectors, norm2(z) = sqrt(2).
+*/
+
+    t = 1.f / sqrt(2.f);
+    sscal_(n, &t, &z__[1], &c__1);
+
+/*     RHO = ABS( norm(z)**2 * RHO ) */
+
+    *rho = (r__1 = *rho * 2.f, dabs(r__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+	indxq[i__] += *n1;
+/* L10: */
+    }
+
+/*     re-integrate the deflated parts from the last pass */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+/* L20: */
+    }
+    slamrg_(n1, &n2, &dlamda[1], &c__1, &c__1, &indxc[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	indx[i__] = indxq[indxc[i__]];
+/* L30: */
+    }
+
+/*     Calculate the allowable deflation tolerance */
+
+    imax = isamax_(n, &z__[1], &c__1);
+    jmax = isamax_(n, &d__[1], &c__1);
+    eps = slamch_("Epsilon");
+/* Computing MAX */
+    r__3 = (r__1 = d__[jmax], dabs(r__1)), r__4 = (r__2 = z__[imax], dabs(
+	    r__2));
+    tol = eps * 8.f * dmax(r__3,r__4);
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
+	*k = 0;
+	iq2 = 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__ = indx[j];
+	    scopy_(n, &q[i__ * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	    dlamda[j] = d__[i__];
+	    iq2 += *n;
+/* L40: */
+	}
+	slacpy_("A", n, n, &q2[1], n, &q[q_offset], ldq);
+	scopy_(n, &dlamda[1], &c__1, &d__[1], &c__1);
+	goto L190;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    i__1 = *n1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	coltyp[i__] = 1;
+/* L50: */
+    }
+    i__1 = *n;
+    for (i__ = n1p1; i__ <= i__1; ++i__) {
+	coltyp[i__] = 3;
+/* L60: */
+    }
+
+
+    *k = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	nj = indx[j];
+	if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    coltyp[nj] = 4;
+	    indxp[k2] = nj;
+	    if (j == *n) {
+		goto L100;
+	    }
+	} else {
+	    pj = nj;
+	    goto L80;
+	}
+/* L70: */
+    }
+L80:
+    ++j;
+    nj = indx[j];
+    if (j > *n) {
+	goto L100;
+    }
+    if (*rho * (r__1 = z__[nj], dabs(r__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	coltyp[nj] = 4;
+	indxp[k2] = nj;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[pj];
+	c__ = z__[nj];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = slapy2_(&c__, &s);
+	t = d__[nj] - d__[pj];
+	c__ /= tau;
+	s = -s / tau;
+	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[nj] = tau;
+	    z__[pj] = 0.f;
+	    if (coltyp[nj] != coltyp[pj]) {
+		coltyp[nj] = 2;
+	    }
+	    coltyp[pj] = 4;
+	    srot_(n, &q[pj * q_dim1 + 1], &c__1, &q[nj * q_dim1 + 1], &c__1, &
+		    c__, &s);
+/* Computing 2nd power */
+	    r__1 = c__;
+/* Computing 2nd power */
+	    r__2 = s;
+	    t = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
+/* Computing 2nd power */
+	    r__1 = s;
+/* Computing 2nd power */
+	    r__2 = c__;
+	    d__[nj] = d__[pj] * (r__1 * r__1) + d__[nj] * (r__2 * r__2);
+	    d__[pj] = t;
+	    --k2;
+	    i__ = 1;
+L90:
+	    if (k2 + i__ <= *n) {
+		if (d__[pj] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = pj;
+		    ++i__;
+		    goto L90;
+		} else {
+		    indxp[k2 + i__ - 1] = pj;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = pj;
+	    }
+	    pj = nj;
+	} else {
+	    ++(*k);
+	    dlamda[*k] = d__[pj];
+	    w[*k] = z__[pj];
+	    indxp[*k] = pj;
+	    pj = nj;
+	}
+    }
+    goto L80;
+L100:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    dlamda[*k] = d__[pj];
+    w[*k] = z__[pj];
+    indxp[*k] = pj;
+
+/*
+       Count up the total number of the various types of columns, then
+       form a permutation which positions the four column types into
+       four uniform groups (although one or more of these groups may be
+       empty).
+*/
+
+    for (j = 1; j <= 4; ++j) {
+	ctot[j - 1] = 0;
+/* L110: */
+    }
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	ct = coltyp[j];
+	++ctot[ct - 1];
+/* L120: */
+    }
+
+/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
+
+    psm[0] = 1;
+    psm[1] = ctot[0] + 1;
+    psm[2] = psm[1] + ctot[1];
+    psm[3] = psm[2] + ctot[2];
+    *k = *n - ctot[3];
+
+/*
+       Fill out the INDXC array so that the permutation which it induces
+       will place all type-1 columns first, all type-2 columns next,
+       then all type-3's, and finally all type-4's.
+*/
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	js = indxp[j];
+	ct = coltyp[js];
+	indx[psm[ct - 1]] = js;
+	indxc[psm[ct - 1]] = j;
+	++psm[ct - 1];
+/* L130: */
+    }
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    i__ = 1;
+    iq1 = 1;
+    iq2 = (ctot[0] + ctot[1]) * *n1 + 1;
+    i__1 = ctot[0];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq1 += *n1;
+/* L140: */
+    }
+
+    i__1 = ctot[1];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	scopy_(n1, &q[js * q_dim1 + 1], &c__1, &q2[iq1], &c__1);
+	scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq1 += *n1;
+	iq2 += n2;
+/* L150: */
+    }
+
+    i__1 = ctot[2];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	scopy_(&n2, &q[*n1 + 1 + js * q_dim1], &c__1, &q2[iq2], &c__1);
+	z__[i__] = d__[js];
+	++i__;
+	iq2 += n2;
+/* L160: */
+    }
+
+    iq1 = iq2;
+    i__1 = ctot[3];
+    for (j = 1; j <= i__1; ++j) {
+	js = indx[i__];
+	scopy_(n, &q[js * q_dim1 + 1], &c__1, &q2[iq2], &c__1);
+	iq2 += *n;
+	z__[i__] = d__[js];
+	++i__;
+/* L170: */
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    slacpy_("A", n, &ctot[3], &q2[iq1], n, &q[(*k + 1) * q_dim1 + 1], ldq);
+    i__1 = *n - *k;
+    scopy_(&i__1, &z__[*k + 1], &c__1, &d__[*k + 1], &c__1);
+
+/*     Copy CTOT into COLTYP for referencing in SLAED3. */
+
+    for (j = 1; j <= 4; ++j) {
+	coltyp[j] = ctot[j - 1];
+/* L180: */
+    }
+
+L190:
+    return 0;
+
+/*     End of SLAED2 */
+
+} /* slaed2_ */
+
+/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__,
+	real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer *
+	indx, integer *ctot, real *w, real *s, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer i__, j, n2, n12, ii, n23, iq2;
+    static real temp;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *), scopy_(integer *, real *,
+	    integer *, real *, integer *), slaed4_(integer *, integer *, real
+	    *, real *, real *, real *, real *, integer *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_(
+	    char *, integer *, integer *, real *, integer *, real *, integer *
+	    ), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAED3 finds the roots of the secular equation, as defined by the
+    values in D, W, and RHO, between 1 and K.  It makes the
+    appropriate calls to SLAED4 and then updates the eigenvectors by
+    multiplying the matrix of eigenvectors of the pair of eigensystems
+    being combined by the matrix of eigenvectors of the K-by-K system
+    which is solved here.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved by
+            SLAED4.  K >= 0.
+
+    N       (input) INTEGER
+            The number of rows and columns in the Q matrix.
+            N >= K (deflation may result in N>K).
+
+    N1      (input) INTEGER
+            The location of the last eigenvalue in the leading submatrix.
+            min(1,N) <= N1 <= N/2.
+
+    D       (output) REAL array, dimension (N)
+            D(I) contains the updated eigenvalues for
+            1 <= I <= K.
+
+    Q       (output) REAL array, dimension (LDQ,N)
+            Initially the first K columns are used as workspace.
+            On output the columns 1 to K contain
+            the updated eigenvectors.
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    RHO     (input) REAL
+            The value of the parameter in the rank one update equation.
+            RHO >= 0 required.
+
+    DLAMDA  (input/output) REAL array, dimension (K)
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation. May be changed on output by
+            having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
+            Cray-2, or Cray C-90, as described above.
+
+    Q2      (input) REAL array, dimension (LDQ2, N)
+            The first K columns of this matrix contain the non-deflated
+            eigenvectors for the split problem.
+
+    INDX    (input) INTEGER array, dimension (N)
+            The permutation used to arrange the columns of the deflated
+            Q matrix into three groups (see SLAED2).
+            The rows of the eigenvectors found by SLAED4 must be likewise
+            permuted before the matrix multiply can take place.
+
+    CTOT    (input) INTEGER array, dimension (4)
+            A count of the total number of the various types of columns
+            in Q, as described in INDX.  The fourth column type is any
+            column which has been deflated.
+
+    W       (input/output) REAL array, dimension (K)
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating vector. Destroyed on
+            output.
+
+    S       (workspace) REAL array, dimension (N1 + 1)*K
+            Will contain the eigenvectors of the repaired matrix which
+            will be multiplied by the previously accumulated eigenvectors
+            to update the system.
+
+    LDS     (input) INTEGER
+            The leading dimension of S.  LDS >= max(1,K).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --q2;
+    --indx;
+    --ctot;
+    --w;
+    --s;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*k < 0) {
+	*info = -1;
+    } else if (*n < *k) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED3", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 0) {
+	return 0;
+    }
+
+/*
+       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DLAMDA(I) if it is 1; this makes the subsequent
+       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DLAMDA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DLAMDA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+/* L10: */
+    }
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+		info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    goto L120;
+	}
+/* L20: */
+    }
+
+    if (*k == 1) {
+	goto L110;
+    }
+    if (*k == 2) {
+	i__1 = *k;
+	for (j = 1; j <= i__1; ++j) {
+	    w[1] = q[j * q_dim1 + 1];
+	    w[2] = q[j * q_dim1 + 2];
+	    ii = indx[1];
+	    q[j * q_dim1 + 1] = w[ii];
+	    ii = indx[2];
+	    q[j * q_dim1 + 2] = w[ii];
+/* L30: */
+	}
+	goto L110;
+    }
+
+/*     Compute updated W. */
+
+    scopy_(k, &w[1], &c__1, &s[1], &c__1);
+
+/*     Initialize W(I) = Q(I,I) */
+
+    i__1 = *ldq + 1;
+    scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L40: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L50: */
+	}
+/* L60: */
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__1 = sqrt(-w[i__]);
+	w[i__] = r_sign(&r__1, &s[i__]);
+/* L70: */
+    }
+
+/*     Compute eigenvectors of the modified rank-1 modification. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    s[i__] = w[i__] / q[i__ + j * q_dim1];
+/* L80: */
+	}
+	temp = snrm2_(k, &s[1], &c__1);
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    ii = indx[i__];
+	    q[i__ + j * q_dim1] = s[ii] / temp;
+/* L90: */
+	}
+/* L100: */
+    }
+
+/*     Compute the updated eigenvectors. */
+
+L110:
+
+    n2 = *n - *n1;
+    n12 = ctot[1] + ctot[2];
+    n23 = ctot[2] + ctot[3];
+
+    slacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23);
+    iq2 = *n1 * n12 + 1;
+    if (n23 != 0) {
+	sgemm_("N", "N", &n2, k, &n23, &c_b1011, &q2[iq2], &n2, &s[1], &n23, &
+		c_b320, &q[*n1 + 1 + q_dim1], ldq);
+    } else {
+	slaset_("A", &n2, k, &c_b320, &c_b320, &q[*n1 + 1 + q_dim1], ldq);
+    }
+
+    slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12);
+    if (n12 != 0) {
+	sgemm_("N", "N", n1, k, &n12, &c_b1011, &q2[1], n1, &s[1], &n12, &
+		c_b320, &q[q_offset], ldq);
+    } else {
+	slaset_("A", n1, k, &c_b320, &c_b320, &q[q_dim1 + 1], ldq);
+    }
+
+
+L120:
+    return 0;
+
+/*     End of SLAED3 */
+
+} /* slaed3_ */
+
+/* Subroutine */ int slaed4_(integer *n, integer *i__, real *d__, real *z__,
+	real *delta, real *rho, real *dlam, integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real a, b, c__;
+    static integer j;
+    static real w;
+    static integer ii;
+    static real dw, zz[3];
+    static integer ip1;
+    static real del, eta, phi, eps, tau, psi;
+    static integer iim1, iip1;
+    static real dphi, dpsi;
+    static integer iter;
+    static real temp, prew, temp1, dltlb, dltub, midpt;
+    static integer niter;
+    static logical swtch;
+    extern /* Subroutine */ int slaed5_(integer *, real *, real *, real *,
+	    real *, real *), slaed6_(integer *, logical *, real *, real *,
+	    real *, real *, real *, integer *);
+    static logical swtch3;
+    extern doublereal slamch_(char *);
+    static logical orgati;
+    static real erretm, rhoinv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       December 23, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the I-th updated eigenvalue of a symmetric
+    rank-one modification to a diagonal matrix whose elements are
+    given in the array d, and that
+
+               D(i) < D(j)  for  i < j
+
+    and that RHO > 0.  This is arranged by the calling routine, and is
+    no loss in generality.  The rank-one modified system is thus
+
+               diag( D )  +  RHO *  Z * Z_transpose.
+
+    where we assume the Euclidean norm of Z is 1.
+
+    The method consists of approximating the rational functions in the
+    secular equation by simpler interpolating rational functions.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The length of all arrays.
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  1 <= I <= N.
+
+    D      (input) REAL array, dimension (N)
+           The original eigenvalues.  It is assumed that they are in
+           order, D(I) < D(J)  for I < J.
+
+    Z      (input) REAL array, dimension (N)
+           The components of the updating vector.
+
+    DELTA  (output) REAL array, dimension (N)
+           If N .ne. 1, DELTA contains (D(j) - lambda_I) in its  j-th
+           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+           contains the information necessary to construct the
+           eigenvectors.
+
+    RHO    (input) REAL
+           The scalar in the symmetric updating formula.
+
+    DLAM   (output) REAL
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit
+           > 0:  if INFO = 1, the updating process failed.
+
+    Internal Parameters
+    ===================
+
+    Logical variable ORGATI (origin-at-i?) is used for distinguishing
+    whether D(i) or D(i+1) is treated as the origin.
+
+              ORGATI = .true.    origin at i
+              ORGATI = .false.   origin at i+1
+
+     Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+     if we are working with THREE poles!
+
+     MAXIT is the maximum number of iterations allowed for each
+     eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Since this routine is called in an inner loop, we do no argument
+       checking.
+
+       Quick return for N=1 and 2.
+*/
+
+    /* Parameter adjustments */
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n == 1) {
+
+/*         Presumably, I=1 upon entry */
+
+	*dlam = d__[1] + *rho * z__[1] * z__[1];
+	delta[1] = 1.f;
+	return 0;
+    }
+    if (*n == 2) {
+	slaed5_(i__, &d__[1], &z__[1], &delta[1], rho, dlam);
+	return 0;
+    }
+
+/*     Compute machine epsilon */
+
+    eps = slamch_("Epsilon");
+    rhoinv = 1.f / *rho;
+
+/*     The case I = N */
+
+    if (*i__ == *n) {
+
+/*        Initialize some basic variables */
+
+	ii = *n - 1;
+	niter = 1;
+
+/*        Calculate initial guess */
+
+	midpt = *rho / 2.f;
+
+/*
+          If ||Z||_2 is not one, then TEMP should be set to
+          RHO * ||Z||_2^2 / TWO
+*/
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - midpt;
+/* L10: */
+	}
+
+	psi = 0.f;
+	i__1 = *n - 2;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / delta[j];
+/* L20: */
+	}
+
+	c__ = rhoinv + psi;
+	w = c__ + z__[ii] * z__[ii] / delta[ii] + z__[*n] * z__[*n] / delta[*
+		n];
+
+	if (w <= 0.f) {
+	    temp = z__[*n - 1] * z__[*n - 1] / (d__[*n] - d__[*n - 1] + *rho)
+		    + z__[*n] * z__[*n] / *rho;
+	    if (c__ <= temp) {
+		tau = *rho;
+	    } else {
+		del = d__[*n] - d__[*n - 1];
+		a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n]
+			;
+		b = z__[*n] * z__[*n] * del;
+		if (a < 0.f) {
+		    tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+		} else {
+		    tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+		}
+	    }
+
+/*
+             It can be proved that
+                 D(N)+RHO/2 <= LAMBDA(N) < D(N)+TAU <= D(N)+RHO
+*/
+
+	    dltlb = midpt;
+	    dltub = *rho;
+	} else {
+	    del = d__[*n] - d__[*n - 1];
+	    a = -c__ * del + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
+	    b = z__[*n] * z__[*n] * del;
+	    if (a < 0.f) {
+		tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+	    } else {
+		tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+	    }
+
+/*
+             It can be proved that
+                 D(N) < D(N)+TAU < LAMBDA(N) < D(N)+RHO/2
+*/
+
+	    dltlb = 0.f;
+	    dltub = midpt;
+	}
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - tau;
+/* L30: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L40: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / delta[*n];
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
+		dpsi + dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Test for convergence */
+
+	if (dabs(w) <= eps * erretm) {
+	    *dlam = d__[*i__] + tau;
+	    goto L250;
+	}
+
+	if (w <= 0.f) {
+	    dltlb = dmax(dltlb,tau);
+	} else {
+	    dltub = dmin(dltub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+	a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] * (
+		dpsi + dphi);
+	b = delta[*n - 1] * delta[*n] * w;
+	if (c__ < 0.f) {
+	    c__ = dabs(c__);
+	}
+	if (c__ == 0.f) {
+/*
+            ETA = B/A
+             ETA = RHO - TAU
+*/
+	    eta = dltub - tau;
+	} else if (a >= 0.f) {
+	    eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
+		    c__ * 2.f);
+	} else {
+	    eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+		    r__1))));
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta > 0.f) {
+	    eta = -w / (dpsi + dphi);
+	}
+	temp = tau + eta;
+	if ((temp > dltub) || (temp < dltlb)) {
+	    if (w < 0.f) {
+		eta = (dltub - tau) / 2.f;
+	    } else {
+		eta = (dltlb - tau) / 2.f;
+	    }
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+/* L50: */
+	}
+
+	tau += eta;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L60: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / delta[*n];
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
+		dpsi + dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 30; ++niter) {
+
+/*           Test for convergence */
+
+	    if (dabs(w) <= eps * erretm) {
+		*dlam = d__[*i__] + tau;
+		goto L250;
+	    }
+
+	    if (w <= 0.f) {
+		dltlb = dmax(dltlb,tau);
+	    } else {
+		dltub = dmin(dltub,tau);
+	    }
+
+/*           Calculate the new step */
+
+	    c__ = w - delta[*n - 1] * dpsi - delta[*n] * dphi;
+	    a = (delta[*n - 1] + delta[*n]) * w - delta[*n - 1] * delta[*n] *
+		    (dpsi + dphi);
+	    b = delta[*n - 1] * delta[*n] * w;
+	    if (a >= 0.f) {
+		eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
+	    } else {
+		eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta > 0.f) {
+		eta = -w / (dpsi + dphi);
+	    }
+	    temp = tau + eta;
+	    if ((temp > dltub) || (temp < dltlb)) {
+		if (w < 0.f) {
+		    eta = (dltub - tau) / 2.f;
+		} else {
+		    eta = (dltlb - tau) / 2.f;
+		}
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+/* L70: */
+	    }
+
+	    tau += eta;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.f;
+	    psi = 0.f;
+	    erretm = 0.f;
+	    i__1 = ii;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / delta[j];
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L80: */
+	    }
+	    erretm = dabs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    temp = z__[*n] / delta[*n];
+	    phi = z__[*n] * temp;
+	    dphi = temp * temp;
+	    erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) *
+		    (dpsi + dphi);
+
+	    w = rhoinv + phi + psi;
+/* L90: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	*dlam = d__[*i__] + tau;
+	goto L250;
+
+/*        End for the case I = N */
+
+    } else {
+
+/*        The case for I < N */
+
+	niter = 1;
+	ip1 = *i__ + 1;
+
+/*        Calculate initial guess */
+
+	del = d__[ip1] - d__[*i__];
+	midpt = del / 2.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - midpt;
+/* L100: */
+	}
+
+	psi = 0.f;
+	i__1 = *i__ - 1;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / delta[j];
+/* L110: */
+	}
+
+	phi = 0.f;
+	i__1 = *i__ + 2;
+	for (j = *n; j >= i__1; --j) {
+	    phi += z__[j] * z__[j] / delta[j];
+/* L120: */
+	}
+	c__ = rhoinv + psi + phi;
+	w = c__ + z__[*i__] * z__[*i__] / delta[*i__] + z__[ip1] * z__[ip1] /
+		delta[ip1];
+
+	if (w > 0.f) {
+
+/*
+             d(i)< the ith eigenvalue < (d(i)+d(i+1))/2
+
+             We choose d(i) as origin.
+*/
+
+	    orgati = TRUE_;
+	    a = c__ * del + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
+	    b = z__[*i__] * z__[*i__] * del;
+	    if (a > 0.f) {
+		tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
+	    } else {
+		tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
+	    }
+	    dltlb = 0.f;
+	    dltub = midpt;
+	} else {
+
+/*
+             (d(i)+d(i+1))/2 <= the ith eigenvalue < d(i+1)
+
+             We choose d(i+1) as origin.
+*/
+
+	    orgati = FALSE_;
+	    a = c__ * del - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
+	    b = z__[ip1] * z__[ip1] * del;
+	    if (a < 0.f) {
+		tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs(
+			r__1))));
+	    } else {
+		tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1))))
+			/ (c__ * 2.f);
+	    }
+	    dltlb = -midpt;
+	    dltub = 0.f;
+	}
+
+	if (orgati) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] = d__[j] - d__[*i__] - tau;
+/* L130: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] = d__[j] - d__[ip1] - tau;
+/* L140: */
+	    }
+	}
+	if (orgati) {
+	    ii = *i__;
+	} else {
+	    ii = *i__ + 1;
+	}
+	iim1 = ii - 1;
+	iip1 = ii + 1;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L150: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.f;
+	phi = 0.f;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / delta[j];
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L160: */
+	}
+
+	w = rhoinv + phi + psi;
+
+/*
+          W is the value of the secular function with
+          its ii-th element removed.
+*/
+
+	swtch3 = FALSE_;
+	if (orgati) {
+	    if (w < 0.f) {
+		swtch3 = TRUE_;
+	    }
+	} else {
+	    if (w > 0.f) {
+		swtch3 = TRUE_;
+	    }
+	}
+	if ((ii == 1) || (ii == *n)) {
+	    swtch3 = FALSE_;
+	}
+
+	temp = z__[ii] / delta[ii];
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w += temp;
+	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
+		+ dabs(tau) * dw;
+
+/*        Test for convergence */
+
+	if (dabs(w) <= eps * erretm) {
+	    if (orgati) {
+		*dlam = d__[*i__] + tau;
+	    } else {
+		*dlam = d__[ip1] + tau;
+	    }
+	    goto L250;
+	}
+
+	if (w <= 0.f) {
+	    dltlb = dmax(dltlb,tau);
+	} else {
+	    dltub = dmin(dltub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	if (! swtch3) {
+	    if (orgati) {
+/* Computing 2nd power */
+		r__1 = z__[*i__] / delta[*i__];
+		c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (r__1 *
+			r__1);
+	    } else {
+/* Computing 2nd power */
+		r__1 = z__[ip1] / delta[ip1];
+		c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) * (r__1 *
+			r__1);
+	    }
+	    a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1] *
+		    dw;
+	    b = delta[*i__] * delta[ip1] * w;
+	    if (c__ == 0.f) {
+		if (a == 0.f) {
+		    if (orgati) {
+			a = z__[*i__] * z__[*i__] + delta[ip1] * delta[ip1] *
+				(dpsi + dphi);
+		    } else {
+			a = z__[ip1] * z__[ip1] + delta[*i__] * delta[*i__] *
+				(dpsi + dphi);
+		    }
+		}
+		eta = b / a;
+	    } else if (a <= 0.f) {
+		eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
+	    } else {
+		eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
+	    }
+	} else {
+
+/*           Interpolation using THREE most relevant poles */
+
+	    temp = rhoinv + psi + phi;
+	    if (orgati) {
+		temp1 = z__[iim1] / delta[iim1];
+		temp1 *= temp1;
+		c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1] - d__[
+			iip1]) * temp1;
+		zz[0] = z__[iim1] * z__[iim1];
+		zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 + dphi);
+	    } else {
+		temp1 = z__[iip1] / delta[iip1];
+		temp1 *= temp1;
+		c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1] - d__[
+			iim1]) * temp1;
+		zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi - temp1));
+		zz[2] = z__[iip1] * z__[iip1];
+	    }
+	    zz[1] = z__[ii] * z__[ii];
+	    slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta, info);
+	    if (*info != 0) {
+		goto L250;
+	    }
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta >= 0.f) {
+	    eta = -w / dw;
+	}
+	temp = tau + eta;
+	if ((temp > dltub) || (temp < dltlb)) {
+	    if (w < 0.f) {
+		eta = (dltub - tau) / 2.f;
+	    } else {
+		eta = (dltlb - tau) / 2.f;
+	    }
+	}
+
+	prew = w;
+
+/* L170: */
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+/* L180: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / delta[j];
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L190: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.f;
+	phi = 0.f;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / delta[j];
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L200: */
+	}
+
+	temp = z__[ii] / delta[ii];
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w = rhoinv + phi + psi + temp;
+	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
+		+ (r__1 = tau + eta, dabs(r__1)) * dw;
+
+	swtch = FALSE_;
+	if (orgati) {
+	    if (-w > dabs(prew) / 10.f) {
+		swtch = TRUE_;
+	    }
+	} else {
+	    if (w > dabs(prew) / 10.f) {
+		swtch = TRUE_;
+	    }
+	}
+
+	tau += eta;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 30; ++niter) {
+
+/*           Test for convergence */
+
+	    if (dabs(w) <= eps * erretm) {
+		if (orgati) {
+		    *dlam = d__[*i__] + tau;
+		} else {
+		    *dlam = d__[ip1] + tau;
+		}
+		goto L250;
+	    }
+
+	    if (w <= 0.f) {
+		dltlb = dmax(dltlb,tau);
+	    } else {
+		dltub = dmin(dltub,tau);
+	    }
+
+/*           Calculate the new step */
+
+	    if (! swtch3) {
+		if (! swtch) {
+		    if (orgati) {
+/* Computing 2nd power */
+			r__1 = z__[*i__] / delta[*i__];
+			c__ = w - delta[ip1] * dw - (d__[*i__] - d__[ip1]) * (
+				r__1 * r__1);
+		    } else {
+/* Computing 2nd power */
+			r__1 = z__[ip1] / delta[ip1];
+			c__ = w - delta[*i__] * dw - (d__[ip1] - d__[*i__]) *
+				(r__1 * r__1);
+		    }
+		} else {
+		    temp = z__[ii] / delta[ii];
+		    if (orgati) {
+			dpsi += temp * temp;
+		    } else {
+			dphi += temp * temp;
+		    }
+		    c__ = w - delta[*i__] * dpsi - delta[ip1] * dphi;
+		}
+		a = (delta[*i__] + delta[ip1]) * w - delta[*i__] * delta[ip1]
+			* dw;
+		b = delta[*i__] * delta[ip1] * w;
+		if (c__ == 0.f) {
+		    if (a == 0.f) {
+			if (! swtch) {
+			    if (orgati) {
+				a = z__[*i__] * z__[*i__] + delta[ip1] *
+					delta[ip1] * (dpsi + dphi);
+			    } else {
+				a = z__[ip1] * z__[ip1] + delta[*i__] * delta[
+					*i__] * (dpsi + dphi);
+			    }
+			} else {
+			    a = delta[*i__] * delta[*i__] * dpsi + delta[ip1]
+				    * delta[ip1] * dphi;
+			}
+		    }
+		    eta = b / a;
+		} else if (a <= 0.f) {
+		    eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1))
+			    )) / (c__ * 2.f);
+		} else {
+		    eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__,
+			    dabs(r__1))));
+		}
+	    } else {
+
+/*              Interpolation using THREE most relevant poles */
+
+		temp = rhoinv + psi + phi;
+		if (swtch) {
+		    c__ = temp - delta[iim1] * dpsi - delta[iip1] * dphi;
+		    zz[0] = delta[iim1] * delta[iim1] * dpsi;
+		    zz[2] = delta[iip1] * delta[iip1] * dphi;
+		} else {
+		    if (orgati) {
+			temp1 = z__[iim1] / delta[iim1];
+			temp1 *= temp1;
+			c__ = temp - delta[iip1] * (dpsi + dphi) - (d__[iim1]
+				- d__[iip1]) * temp1;
+			zz[0] = z__[iim1] * z__[iim1];
+			zz[2] = delta[iip1] * delta[iip1] * (dpsi - temp1 +
+				dphi);
+		    } else {
+			temp1 = z__[iip1] / delta[iip1];
+			temp1 *= temp1;
+			c__ = temp - delta[iim1] * (dpsi + dphi) - (d__[iip1]
+				- d__[iim1]) * temp1;
+			zz[0] = delta[iim1] * delta[iim1] * (dpsi + (dphi -
+				temp1));
+			zz[2] = z__[iip1] * z__[iip1];
+		    }
+		}
+		slaed6_(&niter, &orgati, &c__, &delta[iim1], zz, &w, &eta,
+			info);
+		if (*info != 0) {
+		    goto L250;
+		}
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta >= 0.f) {
+		eta = -w / dw;
+	    }
+	    temp = tau + eta;
+	    if ((temp > dltub) || (temp < dltlb)) {
+		if (w < 0.f) {
+		    eta = (dltub - tau) / 2.f;
+		} else {
+		    eta = (dltlb - tau) / 2.f;
+		}
+	    }
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+/* L210: */
+	    }
+
+	    tau += eta;
+	    prew = w;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.f;
+	    psi = 0.f;
+	    erretm = 0.f;
+	    i__1 = iim1;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / delta[j];
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L220: */
+	    }
+	    erretm = dabs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    dphi = 0.f;
+	    phi = 0.f;
+	    i__1 = iip1;
+	    for (j = *n; j >= i__1; --j) {
+		temp = z__[j] / delta[j];
+		phi += z__[j] * temp;
+		dphi += temp * temp;
+		erretm += phi;
+/* L230: */
+	    }
+
+	    temp = z__[ii] / delta[ii];
+	    dw = dpsi + dphi + temp * temp;
+	    temp = z__[ii] * temp;
+	    w = rhoinv + phi + psi + temp;
+	    erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) *
+		    3.f + dabs(tau) * dw;
+	    if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) {
+		swtch = ! swtch;
+	    }
+
+/* L240: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	if (orgati) {
+	    *dlam = d__[*i__] + tau;
+	} else {
+	    *dlam = d__[ip1] + tau;
+	}
+
+    }
+
+L250:
+
+    return 0;
+
+/*     End of SLAED4 */
+
+} /* slaed4_ */
+
+/* Subroutine */ int slaed5_(integer *i__, real *d__, real *z__, real *delta,
+	real *rho, real *dlam)
+{
+    /* System generated locals */
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real b, c__, w, del, tau, temp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    This subroutine computes the I-th eigenvalue of a symmetric rank-one
+    modification of a 2-by-2 diagonal matrix
+
+               diag( D )  +  RHO *  Z * transpose(Z) .
+
+    The diagonal elements in the array D are assumed to satisfy
+
+               D(i) < D(j)  for  i < j .
+
+    We also assume RHO > 0 and that the Euclidean norm of the vector
+    Z is one.
+
+    Arguments
+    =========
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  I = 1 or I = 2.
+
+    D      (input) REAL array, dimension (2)
+           The original eigenvalues.  We assume D(1) < D(2).
+
+    Z      (input) REAL array, dimension (2)
+           The components of the updating vector.
+
+    DELTA  (output) REAL array, dimension (2)
+           The vector DELTA contains the information necessary
+           to construct the eigenvectors.
+
+    RHO    (input) REAL
+           The scalar in the symmetric updating formula.
+
+    DLAM   (output) REAL
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    del = d__[2] - d__[1];
+    if (*i__ == 1) {
+	w = *rho * 2.f * (z__[2] * z__[2] - z__[1] * z__[1]) / del + 1.f;
+	if (w > 0.f) {
+	    b = del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[1] * z__[1] * del;
+
+/*           B > ZERO, always */
+
+	    tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1))
+		    ));
+	    *dlam = d__[1] + tau;
+	    delta[1] = -z__[1] / tau;
+	    delta[2] = z__[2] / (del - tau);
+	} else {
+	    b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[2] * z__[2] * del;
+	    if (b > 0.f) {
+		tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f));
+	    } else {
+		tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f;
+	    }
+	    *dlam = d__[2] + tau;
+	    delta[1] = -z__[1] / (del + tau);
+	    delta[2] = -z__[2] / tau;
+	}
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
+    } else {
+
+/*     Now I=2 */
+
+	b = -del + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	c__ = *rho * z__[2] * z__[2] * del;
+	if (b > 0.f) {
+	    tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f;
+	} else {
+	    tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f));
+	}
+	*dlam = d__[2] + tau;
+	delta[1] = -z__[1] / (del + tau);
+	delta[2] = -z__[2] / tau;
+	temp = sqrt(delta[1] * delta[1] + delta[2] * delta[2]);
+	delta[1] /= temp;
+	delta[2] /= temp;
+    }
+    return 0;
+
+/*     End OF SLAED5 */
+
+} /* slaed5_ */
+
+/* Subroutine */ int slaed6_(integer *kniter, logical *orgati, real *rho,
+	real *d__, real *z__, real *finit, real *tau, integer *info)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2, r__3, r__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal), log(doublereal), pow_ri(real *, integer *);
+
+    /* Local variables */
+    static real a, b, c__, f;
+    static integer i__;
+    static real fc, df, ddf, eta, eps, base;
+    static integer iter;
+    static real temp, temp1, temp2, temp3, temp4;
+    static logical scale;
+    static integer niter;
+    static real small1, small2, sminv1, sminv2, dscale[3], sclfac;
+    extern doublereal slamch_(char *);
+    static real zscale[3], erretm, sclinv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAED6 computes the positive or negative root (closest to the origin)
+    of
+                     z(1)        z(2)        z(3)
+    f(x) =   rho + --------- + ---------- + ---------
+                    d(1)-x      d(2)-x      d(3)-x
+
+    It is assumed that
+
+          if ORGATI = .true. the root is between d(2) and d(3);
+          otherwise it is between d(1) and d(2)
+
+    This routine will be called by SLAED4 when necessary. In most cases,
+    the root sought is the smallest in magnitude, though it might not be
+    in some extremely rare situations.
+
+    Arguments
+    =========
+
+    KNITER       (input) INTEGER
+                 Refer to SLAED4 for its significance.
+
+    ORGATI       (input) LOGICAL
+                 If ORGATI is true, the needed root is between d(2) and
+                 d(3); otherwise it is between d(1) and d(2).  See
+                 SLAED4 for further details.
+
+    RHO          (input) REAL
+                 Refer to the equation f(x) above.
+
+    D            (input) REAL array, dimension (3)
+                 D satisfies d(1) < d(2) < d(3).
+
+    Z            (input) REAL array, dimension (3)
+                 Each of the elements in z must be positive.
+
+    FINIT        (input) REAL
+                 The value of f at 0. It is more accurate than the one
+                 evaluated inside this routine (if someone wants to do
+                 so).
+
+    TAU          (output) REAL
+                 The root of the equation f(x).
+
+    INFO         (output) INTEGER
+                 = 0: successful exit
+                 > 0: if INFO = 1, failure to converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+    --d__;
+
+    /* Function Body */
+
+    *info = 0;
+
+    niter = 1;
+    *tau = 0.f;
+    if (*kniter == 2) {
+	if (*orgati) {
+	    temp = (d__[3] - d__[2]) / 2.f;
+	    c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
+	    a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
+	    b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
+	} else {
+	    temp = (d__[1] - d__[2]) / 2.f;
+	    c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
+	    a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
+	    b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
+	}
+/* Computing MAX */
+	r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
+		c__);
+	temp = dmax(r__1,r__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.f) {
+	    *tau = b / a;
+	} else if (a <= 0.f) {
+	    *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
+		    c__ * 2.f);
+	} else {
+	    *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+		    r__1))));
+	}
+	temp = *rho + z__[1] / (d__[1] - *tau) + z__[2] / (d__[2] - *tau) +
+		z__[3] / (d__[3] - *tau);
+	if (dabs(*finit) <= dabs(temp)) {
+	    *tau = 0.f;
+	}
+    }
+
+/*
+       On first call to routine, get machine parameters for
+       possible scaling to avoid overflow
+*/
+
+    if (first) {
+	eps = slamch_("Epsilon");
+	base = slamch_("Base");
+	i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f)
+		;
+	small1 = pow_ri(&base, &i__1);
+	sminv1 = 1.f / small1;
+	small2 = small1 * small1;
+	sminv2 = sminv1 * sminv1;
+	first = FALSE_;
+    }
+
+/*
+       Determine if scaling of inputs necessary to avoid overflow
+       when computing 1/TEMP**3
+*/
+
+    if (*orgati) {
+/* Computing MIN */
+	r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - *
+		tau, dabs(r__2));
+	temp = dmin(r__3,r__4);
+    } else {
+/* Computing MIN */
+	r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - *
+		tau, dabs(r__2));
+	temp = dmin(r__3,r__4);
+    }
+    scale = FALSE_;
+    if (temp <= small1) {
+	scale = TRUE_;
+	if (temp <= small2) {
+
+/*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
+
+	    sclfac = sminv2;
+	    sclinv = small2;
+	} else {
+
+/*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
+
+	    sclfac = sminv1;
+	    sclinv = small1;
+	}
+
+/*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
+
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__] * sclfac;
+	    zscale[i__ - 1] = z__[i__] * sclfac;
+/* L10: */
+	}
+	*tau *= sclfac;
+    } else {
+
+/*        Copy D and Z to DSCALE and ZSCALE */
+
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    dscale[i__ - 1] = d__[i__];
+	    zscale[i__ - 1] = z__[i__];
+/* L20: */
+	}
+    }
+
+    fc = 0.f;
+    df = 0.f;
+    ddf = 0.f;
+    for (i__ = 1; i__ <= 3; ++i__) {
+	temp = 1.f / (dscale[i__ - 1] - *tau);
+	temp1 = zscale[i__ - 1] * temp;
+	temp2 = temp1 * temp;
+	temp3 = temp2 * temp;
+	fc += temp1 / dscale[i__ - 1];
+	df += temp2;
+	ddf += temp3;
+/* L30: */
+    }
+    f = *finit + *tau * fc;
+
+    if (dabs(f) <= 0.f) {
+	goto L60;
+    }
+
+/*
+          Iteration begins
+
+       It is not hard to see that
+
+             1) Iterations will go up monotonically
+                if FINIT < 0;
+
+             2) Iterations will go down monotonically
+                if FINIT > 0.
+*/
+
+    iter = niter + 1;
+
+    for (niter = iter; niter <= 20; ++niter) {
+
+	if (*orgati) {
+	    temp1 = dscale[1] - *tau;
+	    temp2 = dscale[2] - *tau;
+	} else {
+	    temp1 = dscale[0] - *tau;
+	    temp2 = dscale[1] - *tau;
+	}
+	a = (temp1 + temp2) * f - temp1 * temp2 * df;
+	b = temp1 * temp2 * f;
+	c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
+/* Computing MAX */
+	r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
+		c__);
+	temp = dmax(r__1,r__2);
+	a /= temp;
+	b /= temp;
+	c__ /= temp;
+	if (c__ == 0.f) {
+	    eta = b / a;
+	} else if (a <= 0.f) {
+	    eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
+		    c__ * 2.f);
+	} else {
+	    eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+		    r__1))));
+	}
+	if (f * eta >= 0.f) {
+	    eta = -f / df;
+	}
+
+	temp = eta + *tau;
+	if (*orgati) {
+	    if (eta > 0.f && temp >= dscale[2]) {
+		eta = (dscale[2] - *tau) / 2.f;
+	    }
+	    if (eta < 0.f && temp <= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.f;
+	    }
+	} else {
+	    if (eta > 0.f && temp >= dscale[1]) {
+		eta = (dscale[1] - *tau) / 2.f;
+	    }
+	    if (eta < 0.f && temp <= dscale[0]) {
+		eta = (dscale[0] - *tau) / 2.f;
+	    }
+	}
+	*tau += eta;
+
+	fc = 0.f;
+	erretm = 0.f;
+	df = 0.f;
+	ddf = 0.f;
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    temp = 1.f / (dscale[i__ - 1] - *tau);
+	    temp1 = zscale[i__ - 1] * temp;
+	    temp2 = temp1 * temp;
+	    temp3 = temp2 * temp;
+	    temp4 = temp1 / dscale[i__ - 1];
+	    fc += temp4;
+	    erretm += dabs(temp4);
+	    df += temp2;
+	    ddf += temp3;
+/* L40: */
+	}
+	f = *finit + *tau * fc;
+	erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df;
+	if (dabs(f) <= eps * erretm) {
+	    goto L60;
+	}
+/* L50: */
+    }
+    *info = 1;
+L60:
+
+/*     Undo scaling */
+
+    if (scale) {
+	*tau *= sclinv;
+    }
+    return 0;
+
+/*     End of SLAED6 */
+
+} /* slaed6_ */
+
+/* Subroutine */ int slaed7_(integer *icompq, integer *n, integer *qsiz,
+	integer *tlvls, integer *curlvl, integer *curpbm, real *d__, real *q,
+	integer *ldq, integer *indxq, real *rho, integer *cutpnt, real *
+	qstore, integer *qptr, integer *prmptr, integer *perm, integer *
+	givptr, integer *givcol, real *givnum, real *work, integer *iwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, k, n1, n2, is, iw, iz, iq2, ptr, ldq2, indx, curr,
+	    indxc;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer indxp;
+    extern /* Subroutine */ int slaed8_(integer *, integer *, integer *,
+	    integer *, real *, real *, integer *, integer *, real *, integer *
+	    , real *, real *, real *, integer *, real *, integer *, integer *,
+	     integer *, real *, integer *, integer *, integer *), slaed9_(
+	    integer *, integer *, integer *, integer *, real *, real *,
+	    integer *, real *, real *, real *, real *, integer *, integer *),
+	    slaeda_(integer *, integer *, integer *, integer *, integer *,
+	    integer *, integer *, integer *, real *, real *, integer *, real *
+	    , real *, integer *);
+    static integer idlmda;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
+	    integer *, integer *, real *, integer *, integer *, integer *);
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAED7 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix. This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and optionally eigenvectors of a dense symmetric matrix
+    that has been reduced to tridiagonal form.  SLAED1 handles
+    the case in which all eigenvalues and eigenvectors of a symmetric
+    tridiagonal matrix are desired.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+       where Z = Q'u, u is a vector of length N with ones in the
+       CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine SLAED8.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine SLAED4 (as called by SLAED9).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= CURLVL <= TLVLS.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) REAL array, dimension (LDQ, N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (output) INTEGER array, dimension (N)
+           The permutation which will reintegrate the subproblem just
+           solved back into sorted order, i.e., D( INDXQ( I = 1, N ) )
+           will be in ascending order.
+
+    RHO    (input) REAL
+           The subdiagonal element used to create the rank-1
+           modification.
+
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    QSTORE (input/output) REAL array, dimension (N**2+1)
+           Stores eigenvectors of submatrices encountered during
+           divide and conquer, packed together. QPTR points to
+           beginning of the submatrices.
+
+    QPTR   (input/output) INTEGER array, dimension (N+2)
+           List of indices pointing to beginning of submatrices stored
+           in QSTORE. The submatrices are numbered starting at the
+           bottom left of the divide and conquer tree, from left to
+           right and bottom to top.
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and also the size of
+           the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) REAL array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    WORK   (workspace) REAL array, dimension (3*N+QSIZ*N)
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --qstore;
+    --qptr;
+    --prmptr;
+    --perm;
+    --givptr;
+    givcol -= 3;
+    givnum -= 3;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*icompq == 1 && *qsiz < *n) {
+	*info = -4;
+    } else if (*ldq < max(1,*n)) {
+	*info = -9;
+    } else if ((min(1,*n) > *cutpnt) || (*n < *cutpnt)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED7", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in SLAED8 and SLAED9.
+*/
+
+    if (*icompq == 1) {
+	ldq2 = *qsiz;
+    } else {
+	ldq2 = *n;
+    }
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq2 = iw + *n;
+    is = iq2 + *n * ldq2;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *tlvls - i__;
+	ptr += pow_ii(&c__2, &i__2);
+/* L10: */
+    }
+    curr = ptr + *curpbm;
+    slaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
+	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &work[iz], &work[iz
+	    + *n], info);
+
+/*
+       When solving the final problem, we no longer need the stored data,
+       so we will overwrite the data from this level onto the previously
+       used storage space.
+*/
+
+    if (*curlvl == *tlvls) {
+	qptr[curr] = 1;
+	prmptr[curr] = 1;
+	givptr[curr] = 1;
+    }
+
+/*     Sort and Deflate eigenvalues. */
+
+    slaed8_(icompq, &k, n, qsiz, &d__[1], &q[q_offset], ldq, &indxq[1], rho,
+	    cutpnt, &work[iz], &work[idlmda], &work[iq2], &ldq2, &work[iw], &
+	    perm[prmptr[curr]], &givptr[curr + 1], &givcol[((givptr[curr]) <<
+	    (1)) + 1], &givnum[((givptr[curr]) << (1)) + 1], &iwork[indxp], &
+	    iwork[indx], info);
+    prmptr[curr + 1] = prmptr[curr] + *n;
+    givptr[curr + 1] += givptr[curr];
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	slaed9_(&k, &c__1, &k, n, &d__[1], &work[is], &k, rho, &work[idlmda],
+		&work[iw], &qstore[qptr[curr]], &k, info);
+	if (*info != 0) {
+	    goto L30;
+	}
+	if (*icompq == 1) {
+	    sgemm_("N", "N", qsiz, &k, &k, &c_b1011, &work[iq2], &ldq2, &
+		    qstore[qptr[curr]], &k, &c_b320, &q[q_offset], ldq);
+	}
+/* Computing 2nd power */
+	i__1 = k;
+	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
+
+/*     Prepare the INDXQ sorting permutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	qptr[curr + 1] = qptr[curr];
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L20: */
+	}
+    }
+
+L30:
+    return 0;
+
+/*     End of SLAED7 */
+
+} /* slaed7_ */
+
+/* Subroutine */ int slaed8_(integer *icompq, integer *k, integer *n, integer
+	*qsiz, real *d__, real *q, integer *ldq, integer *indxq, real *rho,
+	integer *cutpnt, real *z__, real *dlamda, real *q2, integer *ldq2,
+	real *w, integer *perm, integer *givptr, integer *givcol, real *
+	givnum, integer *indxp, integer *indx, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real c__;
+    static integer i__, j;
+    static real s, t;
+    static integer k2, n1, n2, jp, n1p1;
+    static real eps, tau, tol;
+    static integer jlam, imax, jmax;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *), sscal_(integer *, real *, real *,
+	    integer *), scopy_(integer *, real *, integer *, real *, integer *
+	    );
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slamrg_(integer *, integer *, real *, integer
+	    *, integer *, integer *), slacpy_(char *, integer *, integer *,
+	    real *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAED8 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny element in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            = 0:  Compute eigenvalues only.
+            = 1:  Compute eigenvectors of original dense symmetric matrix
+                  also.  On entry, Q contains the orthogonal matrix used
+                  to reduce the original matrix to tridiagonal form.
+
+    K      (output) INTEGER
+           The number of non-deflated eigenvalues, and the order of the
+           related secular equation.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the orthogonal matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry, the eigenvalues of the two submatrices to be
+           combined.  On exit, the trailing (N-K) updated eigenvalues
+           (those which were deflated) sorted into increasing order.
+
+    Q      (input/output) REAL array, dimension (LDQ,N)
+           If ICOMPQ = 0, Q is not referenced.  Otherwise,
+           on entry, Q contains the eigenvectors of the partially solved
+           system which has been previously updated in matrix
+           multiplies with other partially solved eigensystems.
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    INDXQ  (input) INTEGER array, dimension (N)
+           The permutation which separately sorts the two sub-problems
+           in D into ascending order.  Note that elements in the second
+           half of this permutation must first have CUTPNT added to
+           their values in order to be accurate.
+
+    RHO    (input/output) REAL
+           On entry, the off-diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined.
+           On exit, RHO has been modified to the value required by
+           SLAED3.
+
+    CUTPNT (input) INTEGER
+           The location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    Z      (input) REAL array, dimension (N)
+           On entry, Z contains the updating vector (the last row of
+           the first sub-eigenvector matrix and the first row of the
+           second sub-eigenvector matrix).
+           On exit, the contents of Z are destroyed by the updating
+           process.
+
+    DLAMDA (output) REAL array, dimension (N)
+           A copy of the first K eigenvalues which will be used by
+           SLAED3 to form the secular equation.
+
+    Q2     (output) REAL array, dimension (LDQ2,N)
+           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+           a copy of the first K eigenvectors which will be used by
+           SLAED7 in a matrix multiply (SGEMM) to update the new
+           eigenvectors.
+
+    LDQ2   (input) INTEGER
+           The leading dimension of the array Q2.  LDQ2 >= max(1,N).
+
+    W      (output) REAL array, dimension (N)
+           The first k values of the final deflation-altered z-vector and
+           will be passed to SLAED3.
+
+    PERM   (output) INTEGER array, dimension (N)
+           The permutations (from deflation and sorting) to be applied
+           to each eigenblock.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (output) INTEGER array, dimension (2, N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (output) REAL array, dimension (2, N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           The permutation used to place deflated values of D at the end
+           of the array.  INDXP(1:K) points to the nondeflated D-values
+           and INDXP(K+1:N) points to the deflated eigenvalues.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           The permutation used to sort the contents of D into ascending
+           order.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --z__;
+    --dlamda;
+    q2_dim1 = *ldq2;
+    q2_offset = 1 + q2_dim1;
+    q2 -= q2_offset;
+    --w;
+    --perm;
+    givcol -= 3;
+    givnum -= 3;
+    --indxp;
+    --indx;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*icompq == 1 && *qsiz < *n) {
+	*info = -4;
+    } else if (*ldq < max(1,*n)) {
+	*info = -7;
+    } else if ((*cutpnt < min(1,*n)) || (*cutpnt > *n)) {
+	*info = -10;
+    } else if (*ldq2 < max(1,*n)) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED8", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n1 = *cutpnt;
+    n2 = *n - n1;
+    n1p1 = n1 + 1;
+
+    if (*rho < 0.f) {
+	sscal_(&n2, &c_b1290, &z__[n1p1], &c__1);
+    }
+
+/*     Normalize z so that norm(z) = 1 */
+
+    t = 1.f / sqrt(2.f);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	indx[j] = j;
+/* L10: */
+    }
+    sscal_(n, &t, &z__[1], &c__1);
+    *rho = (r__1 = *rho * 2.f, dabs(r__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
+	indxq[i__] += *cutpnt;
+/* L20: */
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+	w[i__] = z__[indxq[i__]];
+/* L30: */
+    }
+    i__ = 1;
+    j = *cutpnt + 1;
+    slamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = dlamda[indx[i__]];
+	z__[i__] = w[indx[i__]];
+/* L40: */
+    }
+
+/*     Calculate the allowable deflation tolerence */
+
+    imax = isamax_(n, &z__[1], &c__1);
+    jmax = isamax_(n, &d__[1], &c__1);
+    eps = slamch_("Epsilon");
+    tol = eps * 8.f * (r__1 = d__[jmax], dabs(r__1));
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (r__1 = z__[imax], dabs(r__1)) <= tol) {
+	*k = 0;
+	if (*icompq == 0) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
+/* L50: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		perm[j] = indxq[indx[j]];
+		scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1
+			+ 1], &c__1);
+/* L60: */
+	    }
+	    slacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
+	}
+	return 0;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    *k = 0;
+    *givptr = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    indxp[k2] = j;
+	    if (j == *n) {
+		goto L110;
+	    }
+	} else {
+	    jlam = j;
+	    goto L80;
+	}
+/* L70: */
+    }
+L80:
+    ++j;
+    if (j > *n) {
+	goto L100;
+    }
+    if (*rho * (r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	indxp[k2] = j;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[jlam];
+	c__ = z__[j];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = slapy2_(&c__, &s);
+	t = d__[j] - d__[jlam];
+	c__ /= tau;
+	s = -s / tau;
+	if ((r__1 = t * c__ * s, dabs(r__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[j] = tau;
+	    z__[jlam] = 0.f;
+
+/*           Record the appropriate Givens rotation */
+
+	    ++(*givptr);
+	    givcol[((*givptr) << (1)) + 1] = indxq[indx[jlam]];
+	    givcol[((*givptr) << (1)) + 2] = indxq[indx[j]];
+	    givnum[((*givptr) << (1)) + 1] = c__;
+	    givnum[((*givptr) << (1)) + 2] = s;
+	    if (*icompq == 1) {
+		srot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[
+			indxq[indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
+	    }
+	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
+	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
+	    d__[jlam] = t;
+	    --k2;
+	    i__ = 1;
+L90:
+	    if (k2 + i__ <= *n) {
+		if (d__[jlam] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = jlam;
+		    ++i__;
+		    goto L90;
+		} else {
+		    indxp[k2 + i__ - 1] = jlam;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = jlam;
+	    }
+	    jlam = j;
+	} else {
+	    ++(*k);
+	    w[*k] = z__[jlam];
+	    dlamda[*k] = d__[jlam];
+	    indxp[*k] = jlam;
+	    jlam = j;
+	}
+    }
+    goto L80;
+L100:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    w[*k] = z__[jlam];
+    dlamda[*k] = d__[jlam];
+    indxp[*k] = jlam;
+
+L110:
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    if (*icompq == 0) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+/* L120: */
+	}
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    jp = indxp[j];
+	    dlamda[j] = d__[jp];
+	    perm[j] = indxq[indx[jp]];
+	    scopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
+		    , &c__1);
+/* L130: */
+	}
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    if (*k < *n) {
+	if (*icompq == 0) {
+	    i__1 = *n - *k;
+	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	} else {
+	    i__1 = *n - *k;
+	    scopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	    i__1 = *n - *k;
+	    slacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*
+		    k + 1) * q_dim1 + 1], ldq);
+	}
+    }
+
+    return 0;
+
+/*     End of SLAED8 */
+
+} /* slaed8_ */
+
+/* Subroutine */ int slaed9_(integer *k, integer *kstart, integer *kstop,
+	integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda,
+	 real *w, real *s, integer *lds, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer i__, j;
+    static real temp;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slaed4_(integer *, integer *, real *, real *, real *,
+	    real *, real *, integer *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAED9 finds the roots of the secular equation, as defined by the
+    values in D, Z, and RHO, between KSTART and KSTOP.  It makes the
+    appropriate calls to SLAED4 and then stores the new matrix of
+    eigenvectors for use in calculating the next level of Z vectors.
+
+    Arguments
+    =========
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved by
+            SLAED4.  K >= 0.
+
+    KSTART  (input) INTEGER
+    KSTOP   (input) INTEGER
+            The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP
+            are to be computed.  1 <= KSTART <= KSTOP <= K.
+
+    N       (input) INTEGER
+            The number of rows and columns in the Q matrix.
+            N >= K (delation may result in N > K).
+
+    D       (output) REAL array, dimension (N)
+            D(I) contains the updated eigenvalues
+            for KSTART <= I <= KSTOP.
+
+    Q       (workspace) REAL array, dimension (LDQ,N)
+
+    LDQ     (input) INTEGER
+            The leading dimension of the array Q.  LDQ >= max( 1, N ).
+
+    RHO     (input) REAL
+            The value of the parameter in the rank one update equation.
+            RHO >= 0 required.
+
+    DLAMDA  (input) REAL array, dimension (K)
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation.
+
+    W       (input) REAL array, dimension (K)
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating vector.
+
+    S       (output) REAL array, dimension (LDS, K)
+            Will contain the eigenvectors of the repaired matrix which
+            will be stored for subsequent Z vector calculation and
+            multiplied by the previously accumulated eigenvectors
+            to update the system.
+
+    LDS     (input) INTEGER
+            The leading dimension of S.  LDS >= max( 1, K ).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dlamda;
+    --w;
+    s_dim1 = *lds;
+    s_offset = 1 + s_dim1;
+    s -= s_offset;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*k < 0) {
+	*info = -1;
+    } else if ((*kstart < 1) || (*kstart > max(1,*k))) {
+	*info = -2;
+    } else if ((max(1,*kstop) < *kstart) || (*kstop > max(1,*k))) {
+	*info = -3;
+    } else if (*n < *k) {
+	*info = -4;
+    } else if (*ldq < max(1,*k)) {
+	*info = -7;
+    } else if (*lds < max(1,*k)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAED9", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 0) {
+	return 0;
+    }
+
+/*
+       Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DLAMDA(I) if it is 1; this makes the subsequent
+       subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DLAMDA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DLAMDA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__];
+/* L10: */
+    }
+
+    i__1 = *kstop;
+    for (j = *kstart; j <= i__1; ++j) {
+	slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j],
+		info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    goto L120;
+	}
+/* L20: */
+    }
+
+    if ((*k == 1) || (*k == 2)) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = *k;
+	    for (j = 1; j <= i__2; ++j) {
+		s[j + i__ * s_dim1] = q[j + i__ * q_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+	goto L120;
+    }
+
+/*     Compute updated W. */
+
+    scopy_(k, &w[1], &c__1, &s[s_offset], &c__1);
+
+/*     Initialize W(I) = Q(I,I) */
+
+    i__1 = *ldq + 1;
+    scopy_(k, &q[q_offset], &i__1, &w[1], &c__1);
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L50: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]);
+/* L60: */
+	}
+/* L70: */
+    }
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__1 = sqrt(-w[i__]);
+	w[i__] = r_sign(&r__1, &s[i__ + s_dim1]);
+/* L80: */
+    }
+
+/*     Compute eigenvectors of the modified rank-1 modification. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1];
+/* L90: */
+	}
+	temp = snrm2_(k, &q[j * q_dim1 + 1], &c__1);
+	i__2 = *k;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp;
+/* L100: */
+	}
+/* L110: */
+    }
+
+L120:
+    return 0;
+
+/*     End of SLAED9 */
+
+} /* slaed9_ */
+
+/* Subroutine */ int slaeda_(integer *n, integer *tlvls, integer *curlvl,
+	integer *curpbm, integer *prmptr, integer *perm, integer *givptr,
+	integer *givcol, real *givnum, real *q, integer *qptr, real *z__,
+	real *ztemp, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, k, mid, ptr, curr;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    static integer bsiz1, bsiz2, psiz1, psiz2, zptr1;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *),
+	    xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAEDA computes the Z vector corresponding to the merge step in the
+    CURLVLth step of the merge process with TLVLS steps for the CURPBMth
+    problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= curlvl <= tlvls.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and incidentally the
+           size of the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) REAL array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    Q      (input) REAL array, dimension (N**2)
+           Contains the square eigenblocks from previous levels, the
+           starting positions for blocks are given by QPTR.
+
+    QPTR   (input) INTEGER array, dimension (N+2)
+           Contains a list of pointers which indicate where in Q an
+           eigenblock is stored.  SQRT( QPTR(i+1) - QPTR(i) ) indicates
+           the size of the block.
+
+    Z      (output) REAL array, dimension (N)
+           On output this vector contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).
+
+    ZTEMP  (workspace) REAL array, dimension (N)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --ztemp;
+    --z__;
+    --qptr;
+    --q;
+    givnum -= 3;
+    givcol -= 3;
+    --givptr;
+    --perm;
+    --prmptr;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAEDA", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine location of first number in second half. */
+
+    mid = *n / 2 + 1;
+
+/*     Gather last/first rows of appropriate eigenblocks into center of Z */
+
+    ptr = 1;
+
+/*
+       Determine location of lowest level subproblem in the full storage
+       scheme
+*/
+
+    i__1 = *curlvl - 1;
+    curr = ptr + *curpbm * pow_ii(&c__2, curlvl) + pow_ii(&c__2, &i__1) - 1;
+
+/*
+       Determine size of these matrices.  We add HALF to the value of
+       the SQRT in case the machine underestimates one of these square
+       roots.
+*/
+
+    bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
+    bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) + .5f);
+    i__1 = mid - bsiz1 - 1;
+    for (k = 1; k <= i__1; ++k) {
+	z__[k] = 0.f;
+/* L10: */
+    }
+    scopy_(&bsiz1, &q[qptr[curr] + bsiz1 - 1], &bsiz1, &z__[mid - bsiz1], &
+	    c__1);
+    scopy_(&bsiz2, &q[qptr[curr + 1]], &bsiz2, &z__[mid], &c__1);
+    i__1 = *n;
+    for (k = mid + bsiz2; k <= i__1; ++k) {
+	z__[k] = 0.f;
+/* L20: */
+    }
+
+/*
+       Loop thru remaining levels 1 -> CURLVL applying the Givens
+       rotations and permutation and then multiplying the center matrices
+       against the current Z.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (k = 1; k <= i__1; ++k) {
+	i__2 = *curlvl - k;
+	i__3 = *curlvl - k - 1;
+	curr = ptr + *curpbm * pow_ii(&c__2, &i__2) + pow_ii(&c__2, &i__3) -
+		1;
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	zptr1 = mid - psiz1;
+
+/*       Apply Givens at CURR and CURR+1 */
+
+	i__2 = givptr[curr + 1] - 1;
+	for (i__ = givptr[curr]; i__ <= i__2; ++i__) {
+	    srot_(&c__1, &z__[zptr1 + givcol[((i__) << (1)) + 1] - 1], &c__1,
+		    &z__[zptr1 + givcol[((i__) << (1)) + 2] - 1], &c__1, &
+		    givnum[((i__) << (1)) + 1], &givnum[((i__) << (1)) + 2]);
+/* L30: */
+	}
+	i__2 = givptr[curr + 2] - 1;
+	for (i__ = givptr[curr + 1]; i__ <= i__2; ++i__) {
+	    srot_(&c__1, &z__[mid - 1 + givcol[((i__) << (1)) + 1]], &c__1, &
+		    z__[mid - 1 + givcol[((i__) << (1)) + 2]], &c__1, &givnum[
+		    ((i__) << (1)) + 1], &givnum[((i__) << (1)) + 2]);
+/* L40: */
+	}
+	psiz1 = prmptr[curr + 1] - prmptr[curr];
+	psiz2 = prmptr[curr + 2] - prmptr[curr + 1];
+	i__2 = psiz1 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[i__ + 1] = z__[zptr1 + perm[prmptr[curr] + i__] - 1];
+/* L50: */
+	}
+	i__2 = psiz2 - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    ztemp[psiz1 + i__ + 1] = z__[mid + perm[prmptr[curr + 1] + i__] -
+		    1];
+/* L60: */
+	}
+
+/*
+          Multiply Blocks at CURR and CURR+1
+
+          Determine size of these matrices.  We add HALF to the value of
+          the SQRT in case the machine underestimates one of these
+          square roots.
+*/
+
+	bsiz1 = (integer) (sqrt((real) (qptr[curr + 1] - qptr[curr])) + .5f);
+	bsiz2 = (integer) (sqrt((real) (qptr[curr + 2] - qptr[curr + 1])) +
+		.5f);
+	if (bsiz1 > 0) {
+	    sgemv_("T", &bsiz1, &bsiz1, &c_b1011, &q[qptr[curr]], &bsiz1, &
+		    ztemp[1], &c__1, &c_b320, &z__[zptr1], &c__1);
+	}
+	i__2 = psiz1 - bsiz1;
+	scopy_(&i__2, &ztemp[bsiz1 + 1], &c__1, &z__[zptr1 + bsiz1], &c__1);
+	if (bsiz2 > 0) {
+	    sgemv_("T", &bsiz2, &bsiz2, &c_b1011, &q[qptr[curr + 1]], &bsiz2,
+		    &ztemp[psiz1 + 1], &c__1, &c_b320, &z__[mid], &c__1);
+	}
+	i__2 = psiz2 - bsiz2;
+	scopy_(&i__2, &ztemp[psiz1 + bsiz2 + 1], &c__1, &z__[mid + bsiz2], &
+		c__1);
+
+	i__2 = *tlvls - k;
+	ptr += pow_ii(&c__2, &i__2);
+/* L70: */
+    }
+
+    return 0;
+
+/*     End of SLAEDA */
+
+} /* slaeda_ */
+
+/* Subroutine */ int slaev2_(real *a, real *b, real *c__, real *rt1, real *
+	rt2, real *cs1, real *sn1)
+{
+    /* System generated locals */
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real ab, df, cs, ct, tb, sm, tn, rt, adf, acs;
+    static integer sgn1, sgn2;
+    static real acmn, acmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAEV2 computes the eigendecomposition of a 2-by-2 symmetric matrix
+       [  A   B  ]
+       [  B   C  ].
+    On return, RT1 is the eigenvalue of larger absolute value, RT2 is the
+    eigenvalue of smaller absolute value, and (CS1,SN1) is the unit right
+    eigenvector for RT1, giving the decomposition
+
+       [ CS1  SN1 ] [  A   B  ] [ CS1 -SN1 ]  =  [ RT1  0  ]
+       [-SN1  CS1 ] [  B   C  ] [ SN1  CS1 ]     [  0  RT2 ].
+
+    Arguments
+    =========
+
+    A       (input) REAL
+            The (1,1) element of the 2-by-2 matrix.
+
+    B       (input) REAL
+            The (1,2) element and the conjugate of the (2,1) element of
+            the 2-by-2 matrix.
+
+    C       (input) REAL
+            The (2,2) element of the 2-by-2 matrix.
+
+    RT1     (output) REAL
+            The eigenvalue of larger absolute value.
+
+    RT2     (output) REAL
+            The eigenvalue of smaller absolute value.
+
+    CS1     (output) REAL
+    SN1     (output) REAL
+            The vector (CS1, SN1) is a unit right eigenvector for RT1.
+
+    Further Details
+    ===============
+
+    RT1 is accurate to a few ulps barring over/underflow.
+
+    RT2 may be inaccurate if there is massive cancellation in the
+    determinant A*C-B*B; higher precision or correctly rounded or
+    correctly truncated arithmetic would be needed to compute RT2
+    accurately in all cases.
+
+    CS1 and SN1 are accurate to a few ulps barring over/underflow.
+
+    Overflow is possible only if RT1 is within a factor of 5 of overflow.
+    Underflow is harmless if the input data is 0 or exceeds
+       underflow_threshold / macheps.
+
+   =====================================================================
+
+
+       Compute the eigenvalues
+*/
+
+    sm = *a + *c__;
+    df = *a - *c__;
+    adf = dabs(df);
+    tb = *b + *b;
+    ab = dabs(tb);
+    if (dabs(*a) > dabs(*c__)) {
+	acmx = *a;
+	acmn = *c__;
+    } else {
+	acmx = *c__;
+	acmn = *a;
+    }
+    if (adf > ab) {
+/* Computing 2nd power */
+	r__1 = ab / adf;
+	rt = adf * sqrt(r__1 * r__1 + 1.f);
+    } else if (adf < ab) {
+/* Computing 2nd power */
+	r__1 = adf / ab;
+	rt = ab * sqrt(r__1 * r__1 + 1.f);
+    } else {
+
+/*        Includes case AB=ADF=0 */
+
+	rt = ab * sqrt(2.f);
+    }
+    if (sm < 0.f) {
+	*rt1 = (sm - rt) * .5f;
+	sgn1 = -1;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else if (sm > 0.f) {
+	*rt1 = (sm + rt) * .5f;
+	sgn1 = 1;
+
+/*
+          Order of execution important.
+          To get fully accurate smaller eigenvalue,
+          next line needs to be executed in higher precision.
+*/
+
+	*rt2 = acmx / *rt1 * acmn - *b / *rt1 * *b;
+    } else {
+
+/*        Includes case RT1 = RT2 = 0 */
+
+	*rt1 = rt * .5f;
+	*rt2 = rt * -.5f;
+	sgn1 = 1;
+    }
+
+/*     Compute the eigenvector */
+
+    if (df >= 0.f) {
+	cs = df + rt;
+	sgn2 = 1;
+    } else {
+	cs = df - rt;
+	sgn2 = -1;
+    }
+    acs = dabs(cs);
+    if (acs > ab) {
+	ct = -tb / cs;
+	*sn1 = 1.f / sqrt(ct * ct + 1.f);
+	*cs1 = ct * *sn1;
+    } else {
+	if (ab == 0.f) {
+	    *cs1 = 1.f;
+	    *sn1 = 0.f;
+	} else {
+	    tn = -cs / tb;
+	    *cs1 = 1.f / sqrt(tn * tn + 1.f);
+	    *sn1 = tn * *cs1;
+	}
+    }
+    if (sgn1 == sgn2) {
+	tn = *cs1;
+	*cs1 = -(*sn1);
+	*sn1 = tn;
+    }
+    return 0;
+
+/*     End of SLAEV2 */
+
+} /* slaev2_ */
+
+/* Subroutine */ int slahqr_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, real *h__, integer *ldh, real *wr, real *
+	wi, integer *iloz, integer *ihiz, real *z__, integer *ldz, integer *
+	info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer i__, j, k, l, m;
+    static real s, v[3];
+    static integer i1, i2;
+    static real t1, t2, t3, v1, v2, v3, h00, h10, h11, h12, h21, h22, h33,
+	    h44;
+    static integer nh;
+    static real cs;
+    static integer nr;
+    static real sn;
+    static integer nz;
+    static real ave, h33s, h44s;
+    static integer itn, its;
+    static real ulp, sum, tst1, h43h34, disc, unfl, ovfl, work[1];
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *), scopy_(integer *, real *, integer *,
+	    real *, integer *), slanv2_(real *, real *, real *, real *, real *
+	    , real *, real *, real *, real *, real *), slabad_(real *, real *)
+	    ;
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *,
+	    real *);
+    extern doublereal slanhs_(char *, integer *, real *, integer *, real *);
+    static real smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAHQR is an auxiliary routine called by SHSEQR to update the
+    eigenvalues and Schur decomposition already computed by SHSEQR, by
+    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+
+    Arguments
+    =========
+
+    WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+    WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper quasi-triangular in
+            rows and columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless
+            ILO = 1). SLAHQR works primarily with the Hessenberg
+            submatrix in rows and columns ILO to IHI, but applies
+            transformations to all of H if WANTT is .TRUE..
+            1 <= ILO <= max(1,IHI); IHI <= N.
+
+    H       (input/output) REAL array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if WANTT is .TRUE., H is upper quasi-triangular in
+            rows and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+            standard form. If WANTT is .FALSE., the contents of H are
+            unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    WR      (output) REAL array, dimension (N)
+    WI      (output) REAL array, dimension (N)
+            The real and imaginary parts, respectively, of the computed
+            eigenvalues ILO to IHI are stored in the corresponding
+            elements of WR and WI. If two eigenvalues are computed as a
+            complex conjugate pair, they are stored in consecutive
+            elements of WR and WI, say the i-th and (i+1)th, with
+            WI(i) > 0 and WI(i+1) < 0. If WANTT is .TRUE., the
+            eigenvalues are stored in the same order as on the diagonal
+            of the Schur form returned in H, with WR(i) = H(i,i), and, if
+            H(i:i+1,i:i+1) is a 2-by-2 diagonal block,
+            WI(i) = sqrt(H(i+1,i)*H(i,i+1)) and WI(i+1) = -WI(i).
+
+    ILOZ    (input) INTEGER
+    IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE..
+            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+
+    Z       (input/output) REAL array, dimension (LDZ,N)
+            If WANTZ is .TRUE., on entry Z must contain the current
+            matrix Z of transformations accumulated by SHSEQR, and on
+            exit Z has been updated; transformations are applied only to
+            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+            If WANTZ is .FALSE., Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z. LDZ >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            > 0: SLAHQR failed to compute all the eigenvalues ILO to IHI
+                 in a total of 30*(IHI-ILO+1) iterations; if INFO = i,
+                 elements i+1:ihi of WR and WI contain those eigenvalues
+                 which have been successfully computed.
+
+    Further Details
+    ===============
+
+    2-96 Based on modifications by
+       David Day, Sandia National Laboratory, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --wr;
+    --wi;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	wr[*ilo] = h__[*ilo + *ilo * h_dim1];
+	wi[*ilo] = 0.f;
+	return 0;
+    }
+
+    nh = *ihi - *ilo + 1;
+    nz = *ihiz - *iloz + 1;
+
+/*
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
+    smlnum = unfl * (nh / ulp);
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (*wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of 1 or 2. Each iteration of the loop works
+       with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L10:
+    l = *ilo;
+    if (i__ < *ilo) {
+	goto L150;
+    }
+
+/*
+       Perform QR iterations on rows and columns ILO to I until a
+       submatrix of order 1 or 2 splits off at the bottom because a
+       subdiagonal element has become negligible.
+*/
+
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    tst1 = (r__1 = h__[k - 1 + (k - 1) * h_dim1], dabs(r__1)) + (r__2
+		    = h__[k + k * h_dim1], dabs(r__2));
+	    if (tst1 == 0.f) {
+		i__3 = i__ - l + 1;
+		tst1 = slanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, work);
+	    }
+/* Computing MAX */
+	    r__2 = ulp * tst1;
+	    if ((r__1 = h__[k + (k - 1) * h_dim1], dabs(r__1)) <= dmax(r__2,
+		    smlnum)) {
+		goto L30;
+	    }
+/* L20: */
+	}
+L30:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible */
+
+	    h__[l + (l - 1) * h_dim1] = 0.f;
+	}
+
+/*        Exit from loop if a submatrix of order 1 or 2 has split off. */
+
+	if (l >= i__ - 1) {
+	    goto L140;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! (*wantt)) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if ((its == 10) || (its == 20)) {
+
+/*           Exceptional shift. */
+
+	    s = (r__1 = h__[i__ + (i__ - 1) * h_dim1], dabs(r__1)) + (r__2 =
+		    h__[i__ - 1 + (i__ - 2) * h_dim1], dabs(r__2));
+	    h44 = s * .75f + h__[i__ + i__ * h_dim1];
+	    h33 = h44;
+	    h43h34 = s * -.4375f * s;
+	} else {
+
+/*
+             Prepare to use Francis' double shift
+             (i.e. 2nd degree generalized Rayleigh quotient)
+*/
+
+	    h44 = h__[i__ + i__ * h_dim1];
+	    h33 = h__[i__ - 1 + (i__ - 1) * h_dim1];
+	    h43h34 = h__[i__ + (i__ - 1) * h_dim1] * h__[i__ - 1 + i__ *
+		    h_dim1];
+	    s = h__[i__ - 1 + (i__ - 2) * h_dim1] * h__[i__ - 1 + (i__ - 2) *
+		    h_dim1];
+	    disc = (h33 - h44) * .5f;
+	    disc = disc * disc + h43h34;
+	    if (disc > 0.f) {
+
+/*              Real roots: use Wilkinson's shift twice */
+
+		disc = sqrt(disc);
+		ave = (h33 + h44) * .5f;
+		if (dabs(h33) - dabs(h44) > 0.f) {
+		    h33 = h33 * h44 - h43h34;
+		    h44 = h33 / (r_sign(&disc, &ave) + ave);
+		} else {
+		    h44 = r_sign(&disc, &ave) + ave;
+		}
+		h33 = h44;
+		h43h34 = 0.f;
+	    }
+	}
+
+/*        Look for two consecutive small subdiagonal elements. */
+
+	i__2 = l;
+	for (m = i__ - 2; m >= i__2; --m) {
+/*
+             Determine the effect of starting the double-shift QR
+             iteration at row M, and see if this would make H(M,M-1)
+             negligible.
+*/
+
+	    h11 = h__[m + m * h_dim1];
+	    h22 = h__[m + 1 + (m + 1) * h_dim1];
+	    h21 = h__[m + 1 + m * h_dim1];
+	    h12 = h__[m + (m + 1) * h_dim1];
+	    h44s = h44 - h11;
+	    h33s = h33 - h11;
+	    v1 = (h33s * h44s - h43h34) / h21 + h12;
+	    v2 = h22 - h11 - h33s - h44s;
+	    v3 = h__[m + 2 + (m + 1) * h_dim1];
+	    s = dabs(v1) + dabs(v2) + dabs(v3);
+	    v1 /= s;
+	    v2 /= s;
+	    v3 /= s;
+	    v[0] = v1;
+	    v[1] = v2;
+	    v[2] = v3;
+	    if (m == l) {
+		goto L50;
+	    }
+	    h00 = h__[m - 1 + (m - 1) * h_dim1];
+	    h10 = h__[m + (m - 1) * h_dim1];
+	    tst1 = dabs(v1) * (dabs(h00) + dabs(h11) + dabs(h22));
+	    if (dabs(h10) * (dabs(v2) + dabs(v3)) <= ulp * tst1) {
+		goto L50;
+	    }
+/* L40: */
+	}
+L50:
+
+/*        Double-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = m; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__3 = 3, i__4 = i__ - k + 1;
+	    nr = min(i__3,i__4);
+	    if (k > m) {
+		scopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    slarfg_(&nr, v, &v[1], &c__1, &t1);
+	    if (k > m) {
+		h__[k + (k - 1) * h_dim1] = v[0];
+		h__[k + 1 + (k - 1) * h_dim1] = 0.f;
+		if (k < i__ - 1) {
+		    h__[k + 2 + (k - 1) * h_dim1] = 0.f;
+		}
+	    } else if (m > l) {
+		h__[k + (k - 1) * h_dim1] = -h__[k + (k - 1) * h_dim1];
+	    }
+	    v2 = v[1];
+	    t2 = t1 * v2;
+	    if (nr == 3) {
+		v3 = v[2];
+		t3 = t1 * v3;
+
+/*
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
+*/
+
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1]
+			    + v3 * h__[k + 2 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+		    h__[k + 2 + j * h_dim1] -= sum * t3;
+/* L60: */
+		}
+
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
+
+   Computing MIN
+*/
+		i__4 = k + 3;
+		i__3 = min(i__4,i__);
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			     + v3 * h__[j + (k + 2) * h_dim1];
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+		    h__[j + (k + 2) * h_dim1] -= sum * t3;
+/* L70: */
+		}
+
+		if (*wantz) {
+
+/*                 Accumulate transformations in the matrix Z */
+
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1] + v3 * z__[j + (k + 2) * z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+			z__[j + (k + 2) * z_dim1] -= sum * t3;
+/* L80: */
+		    }
+		}
+	    } else if (nr == 2) {
+
+/*
+                Apply G from the left to transform the rows of the matrix
+                in columns K to I2.
+*/
+
+		i__3 = i2;
+		for (j = k; j <= i__3; ++j) {
+		    sum = h__[k + j * h_dim1] + v2 * h__[k + 1 + j * h_dim1];
+		    h__[k + j * h_dim1] -= sum * t1;
+		    h__[k + 1 + j * h_dim1] -= sum * t2;
+/* L90: */
+		}
+
+/*
+                Apply G from the right to transform the columns of the
+                matrix in rows I1 to min(K+3,I).
+*/
+
+		i__3 = i__;
+		for (j = i1; j <= i__3; ++j) {
+		    sum = h__[j + k * h_dim1] + v2 * h__[j + (k + 1) * h_dim1]
+			    ;
+		    h__[j + k * h_dim1] -= sum * t1;
+		    h__[j + (k + 1) * h_dim1] -= sum * t2;
+/* L100: */
+		}
+
+		if (*wantz) {
+
+/*                 Accumulate transformations in the matrix Z */
+
+		    i__3 = *ihiz;
+		    for (j = *iloz; j <= i__3; ++j) {
+			sum = z__[j + k * z_dim1] + v2 * z__[j + (k + 1) *
+				z_dim1];
+			z__[j + k * z_dim1] -= sum * t1;
+			z__[j + (k + 1) * z_dim1] -= sum * t2;
+/* L110: */
+		    }
+		}
+	    }
+/* L120: */
+	}
+
+/* L130: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L140:
+
+    if (l == i__) {
+
+/*        H(I,I-1) is negligible: one eigenvalue has converged. */
+
+	wr[i__] = h__[i__ + i__ * h_dim1];
+	wi[i__] = 0.f;
+    } else if (l == i__ - 1) {
+
+/*
+          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
+
+          Transform the 2-by-2 submatrix to standard Schur form,
+          and compute and store the eigenvalues.
+*/
+
+	slanv2_(&h__[i__ - 1 + (i__ - 1) * h_dim1], &h__[i__ - 1 + i__ *
+		h_dim1], &h__[i__ + (i__ - 1) * h_dim1], &h__[i__ + i__ *
+		h_dim1], &wr[i__ - 1], &wi[i__ - 1], &wr[i__], &wi[i__], &cs,
+		&sn);
+
+	if (*wantt) {
+
+/*           Apply the transformation to the rest of H. */
+
+	    if (i2 > i__) {
+		i__1 = i2 - i__;
+		srot_(&i__1, &h__[i__ - 1 + (i__ + 1) * h_dim1], ldh, &h__[
+			i__ + (i__ + 1) * h_dim1], ldh, &cs, &sn);
+	    }
+	    i__1 = i__ - i1 - 1;
+	    srot_(&i__1, &h__[i1 + (i__ - 1) * h_dim1], &c__1, &h__[i1 + i__ *
+		     h_dim1], &c__1, &cs, &sn);
+	}
+	if (*wantz) {
+
+/*           Apply the transformation to Z. */
+
+	    srot_(&nz, &z__[*iloz + (i__ - 1) * z_dim1], &c__1, &z__[*iloz +
+		    i__ * z_dim1], &c__1, &cs, &sn);
+	}
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L10;
+
+L150:
+    return 0;
+
+/*     End of SLAHQR */
+
+} /* slahqr_ */
+
+/* Subroutine */ int slahrd_(integer *n, integer *k, integer *nb, real *a,
+	integer *lda, real *tau, real *t, integer *ldt, real *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    real r__1;
+
+    /* Local variables */
+    static integer i__;
+    static real ei;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *), scopy_(
+	    integer *, real *, integer *, real *, integer *), saxpy_(integer *
+	    , real *, real *, integer *, real *, integer *), strmv_(char *,
+	    char *, char *, integer *, real *, integer *, real *, integer *), slarfg_(integer *, real *, real *,
+	    integer *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLAHRD reduces the first NB columns of a real general n-by-(n-k+1)
+    matrix A so that elements below the k-th subdiagonal are zero. The
+    reduction is performed by an orthogonal similarity transformation
+    Q' * A * Q. The routine returns the matrices V and T which determine
+    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+
+    This is an auxiliary routine called by SGEHRD.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    K       (input) INTEGER
+            The offset for the reduction. Elements below the k-th
+            subdiagonal in the first NB columns are reduced to zero.
+
+    NB      (input) INTEGER
+            The number of columns to be reduced.
+
+    A       (input/output) REAL array, dimension (LDA,N-K+1)
+            On entry, the n-by-(n-k+1) general matrix A.
+            On exit, the elements on and above the k-th subdiagonal in
+            the first NB columns are overwritten with the corresponding
+            elements of the reduced matrix; the elements below the k-th
+            subdiagonal, with the array TAU, represent the matrix Q as a
+            product of elementary reflectors. The other columns of A are
+            unchanged. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) REAL array, dimension (NB)
+            The scalar factors of the elementary reflectors. See Further
+            Details.
+
+    T       (output) REAL array, dimension (LDT,NB)
+            The upper triangular matrix T.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T.  LDT >= NB.
+
+    Y       (output) REAL array, dimension (LDY,NB)
+            The n-by-nb matrix Y.
+
+    LDY     (input) INTEGER
+            The leading dimension of the array Y. LDY >= N.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of nb elementary reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+    A(i+k+1:n,i), and tau in TAU(i).
+
+    The elements of the vectors v together form the (n-k+1)-by-nb matrix
+    V which is needed, with T and Y, to apply the transformation to the
+    unreduced part of the matrix, using an update of the form:
+    A := (I - V*T*V') * (A - Y*V').
+
+    The contents of A on exit are illustrated by the following example
+    with n = 7, k = 3 and nb = 2:
+
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( h   h   a   a   a )
+       ( v1  h   a   a   a )
+       ( v1  v2  a   a   a )
+       ( v1  v2  a   a   a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    --tau;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if (*n <= 1) {
+	return 0;
+    }
+
+    i__1 = *nb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (i__ > 1) {
+
+/*
+             Update A(1:n,i)
+
+             Compute i-th column of A - Y * V'
+*/
+
+	    i__2 = i__ - 1;
+	    sgemv_("No transpose", n, &i__2, &c_b1290, &y[y_offset], ldy, &a[*
+		    k + i__ - 1 + a_dim1], lda, &c_b1011, &a[i__ * a_dim1 + 1]
+		    , &c__1);
+
+/*
+             Apply I - V * T' * V' to this column (call it b) from the
+             left, using the last column of T as workspace
+
+             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+                      ( V2 )             ( b2 )
+
+             where V1 is unit lower triangular
+
+             w := V1' * b1
+*/
+
+	    i__2 = i__ - 1;
+	    scopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
+		    1], &c__1);
+	    i__2 = i__ - 1;
+	    strmv_("Lower", "Transpose", "Unit", &i__2, &a[*k + 1 + a_dim1],
+		    lda, &t[*nb * t_dim1 + 1], &c__1);
+
+/*           w := w + V2'*b2 */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[*k + i__ + a_dim1],
+		     lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b1011, &t[*
+		    nb * t_dim1 + 1], &c__1);
+
+/*           w := T'*w */
+
+	    i__2 = i__ - 1;
+	    strmv_("Upper", "Transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		     &t[*nb * t_dim1 + 1], &c__1);
+
+/*           b2 := b2 - V2*w */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[*k + i__ +
+		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1, &c_b1011, &a[*
+		    k + i__ + i__ * a_dim1], &c__1);
+
+/*           b1 := b1 - V1*w */
+
+	    i__2 = i__ - 1;
+	    strmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+		    , lda, &t[*nb * t_dim1 + 1], &c__1);
+	    i__2 = i__ - 1;
+	    saxpy_(&i__2, &c_b1290, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 +
+		    i__ * a_dim1], &c__1);
+
+	    a[*k + i__ - 1 + (i__ - 1) * a_dim1] = ei;
+	}
+
+/*
+          Generate the elementary reflector H(i) to annihilate
+          A(k+i+1:n,i)
+*/
+
+	i__2 = *n - *k - i__ + 1;
+/* Computing MIN */
+	i__3 = *k + i__ + 1;
+	slarfg_(&i__2, &a[*k + i__ + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		a_dim1], &c__1, &tau[i__]);
+	ei = a[*k + i__ + i__ * a_dim1];
+	a[*k + i__ + i__ * a_dim1] = 1.f;
+
+/*        Compute  Y(1:n,i) */
+
+	i__2 = *n - *k - i__ + 1;
+	sgemv_("No transpose", n, &i__2, &c_b1011, &a[(i__ + 1) * a_dim1 + 1],
+		 lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b320, &y[i__ *
+		y_dim1 + 1], &c__1);
+	i__2 = *n - *k - i__ + 1;
+	i__3 = i__ - 1;
+	sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[*k + i__ + a_dim1],
+		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b320, &t[i__ *
+		t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	sgemv_("No transpose", n, &i__2, &c_b1290, &y[y_offset], ldy, &t[i__ *
+		 t_dim1 + 1], &c__1, &c_b1011, &y[i__ * y_dim1 + 1], &c__1);
+	sscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+
+/*        Compute T(1:i,i) */
+
+	i__2 = i__ - 1;
+	r__1 = -tau[i__];
+	sscal_(&i__2, &r__1, &t[i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		&t[i__ * t_dim1 + 1], &c__1)
+		;
+	t[i__ + i__ * t_dim1] = tau[i__];
+
+/* L10: */
+    }
+    a[*k + *nb + *nb * a_dim1] = ei;
+
+    return 0;
+
+/*     End of SLAHRD */
+
+} /* slahrd_ */
+
+/* Subroutine */ int slaln2_(logical *ltrans, integer *na, integer *nw, real *
+	smin, real *ca, real *a, integer *lda, real *d1, real *d2, real *b,
+	integer *ldb, real *wr, real *wi, real *x, integer *ldx, real *scale,
+	real *xnorm, integer *info)
+{
+    /* Initialized data */
+
+    static logical cswap[4] = { FALSE_,FALSE_,TRUE_,TRUE_ };
+    static logical rswap[4] = { FALSE_,TRUE_,FALSE_,TRUE_ };
+    static integer ipivot[16]	/* was [4][4] */ = { 1,2,3,4,2,1,4,3,3,4,1,2,
+	    4,3,2,1 };
+
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset;
+    real r__1, r__2, r__3, r__4, r__5, r__6;
+    static real equiv_0[4], equiv_1[4];
+
+    /* Local variables */
+    static integer j;
+#define ci (equiv_0)
+#define cr (equiv_1)
+    static real bi1, bi2, br1, br2, xi1, xi2, xr1, xr2, ci21, ci22, cr21,
+	    cr22, li21, csi, ui11, lr21, ui12, ui22;
+#define civ (equiv_0)
+    static real csr, ur11, ur12, ur22;
+#define crv (equiv_1)
+    static real bbnd, cmax, ui11r, ui12s, temp, ur11r, ur12s, u22abs;
+    static integer icmax;
+    static real bnorm, cnorm, smini;
+    extern doublereal slamch_(char *);
+    static real bignum;
+    extern /* Subroutine */ int sladiv_(real *, real *, real *, real *, real *
+	    , real *);
+    static real smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLALN2 solves a system of the form  (ca A - w D ) X = s B
+    or (ca A' - w D) X = s B   with possible scaling ("s") and
+    perturbation of A.  (A' means A-transpose.)
+
+    A is an NA x NA real matrix, ca is a real scalar, D is an NA x NA
+    real diagonal matrix, w is a real or complex value, and X and B are
+    NA x 1 matrices -- real if w is real, complex if w is complex.  NA
+    may be 1 or 2.
+
+    If w is complex, X and B are represented as NA x 2 matrices,
+    the first column of each being the real part and the second
+    being the imaginary part.
+
+    "s" is a scaling factor (.LE. 1), computed by SLALN2, which is
+    so chosen that X can be computed without overflow.  X is further
+    scaled if necessary to assure that norm(ca A - w D)*norm(X) is less
+    than overflow.
+
+    If both singular values of (ca A - w D) are less than SMIN,
+    SMIN*identity will be used instead of (ca A - w D).  If only one
+    singular value is less than SMIN, one element of (ca A - w D) will be
+    perturbed enough to make the smallest singular value roughly SMIN.
+    If both singular values are at least SMIN, (ca A - w D) will not be
+    perturbed.  In any case, the perturbation will be at most some small
+    multiple of max( SMIN, ulp*norm(ca A - w D) ).  The singular values
+    are computed by infinity-norm approximations, and thus will only be
+    correct to a factor of 2 or so.
+
+    Note: all input quantities are assumed to be smaller than overflow
+    by a reasonable factor.  (See BIGNUM.)
+
+    Arguments
+    ==========
+
+    LTRANS  (input) LOGICAL
+            =.TRUE.:  A-transpose will be used.
+            =.FALSE.: A will be used (not transposed.)
+
+    NA      (input) INTEGER
+            The size of the matrix A.  It may (only) be 1 or 2.
+
+    NW      (input) INTEGER
+            1 if "w" is real, 2 if "w" is complex.  It may only be 1
+            or 2.
+
+    SMIN    (input) REAL
+            The desired lower bound on the singular values of A.  This
+            should be a safe distance away from underflow or overflow,
+            say, between (underflow/machine precision) and  (machine
+            precision * overflow ).  (See BIGNUM and ULP.)
+
+    CA      (input) REAL
+            The coefficient c, which A is multiplied by.
+
+    A       (input) REAL array, dimension (LDA,NA)
+            The NA x NA matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of A.  It must be at least NA.
+
+    D1      (input) REAL
+            The 1,1 element in the diagonal matrix D.
+
+    D2      (input) REAL
+            The 2,2 element in the diagonal matrix D.  Not used if NW=1.
+
+    B       (input) REAL array, dimension (LDB,NW)
+            The NA x NW matrix B (right-hand side).  If NW=2 ("w" is
+            complex), column 1 contains the real part of B and column 2
+            contains the imaginary part.
+
+    LDB     (input) INTEGER
+            The leading dimension of B.  It must be at least NA.
+
+    WR      (input) REAL
+            The real part of the scalar "w".
+
+    WI      (input) REAL
+            The imaginary part of the scalar "w".  Not used if NW=1.
+
+    X       (output) REAL array, dimension (LDX,NW)
+            The NA x NW matrix X (unknowns), as computed by SLALN2.
+            If NW=2 ("w" is complex), on exit, column 1 will contain
+            the real part of X and column 2 will contain the imaginary
+            part.
+
+    LDX     (input) INTEGER
+            The leading dimension of X.  It must be at least NA.
+
+    SCALE   (output) REAL
+            The scale factor that B must be multiplied by to insure
+            that overflow does not occur when computing X.  Thus,
+            (ca A - w D) X  will be SCALE*B, not B (ignoring
+            perturbations of A.)  It will be at most 1.
+
+    XNORM   (output) REAL
+            The infinity-norm of X, when X is regarded as an NA x NW
+            real matrix.
+
+    INFO    (output) INTEGER
+            An error flag.  It will be set to zero if no error occurs,
+            a negative number if an argument is in error, or a positive
+            number if  ca A - w D  had to be perturbed.
+            The possible values are:
+            = 0: No error occurred, and (ca A - w D) did not have to be
+                   perturbed.
+            = 1: (ca A - w D) had to be perturbed to make its smallest
+                 (or only) singular value greater than SMIN.
+            NOTE: In the interests of speed, this routine does not
+                  check the inputs for errors.
+
+   =====================================================================
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+
+    /* Function Body */
+
+/*     Compute BIGNUM */
+
+    smlnum = 2.f * slamch_("Safe minimum");
+    bignum = 1.f / smlnum;
+    smini = dmax(*smin,smlnum);
+
+/*     Don't check for input errors */
+
+    *info = 0;
+
+/*     Standard Initializations */
+
+    *scale = 1.f;
+
+    if (*na == 1) {
+
+/*        1 x 1  (i.e., scalar) system   C X = B */
+
+	if (*nw == 1) {
+
+/*
+             Real 1x1 system.
+
+             C = ca A - w D
+*/
+
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    cnorm = dabs(csr);
+
+/*           If | C | < SMINI, use C = SMINI */
+
+	    if (cnorm < smini) {
+		csr = smini;
+		cnorm = smini;
+		*info = 1;
+	    }
+
+/*           Check scaling for  X = B / C */
+
+	    bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1));
+	    if (cnorm < 1.f && bnorm > 1.f) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1.f / bnorm;
+		}
+	    }
+
+/*           Compute X */
+
+	    x[x_dim1 + 1] = b[b_dim1 + 1] * *scale / csr;
+	    *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1));
+	} else {
+
+/*
+             Complex 1x1 system (w is complex)
+
+             C = ca A - w D
+*/
+
+	    csr = *ca * a[a_dim1 + 1] - *wr * *d1;
+	    csi = -(*wi) * *d1;
+	    cnorm = dabs(csr) + dabs(csi);
+
+/*           If | C | < SMINI, use C = SMINI */
+
+	    if (cnorm < smini) {
+		csr = smini;
+		csi = 0.f;
+		cnorm = smini;
+		*info = 1;
+	    }
+
+/*           Check scaling for  X = B / C */
+
+	    bnorm = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[((b_dim1)
+		    << (1)) + 1], dabs(r__2));
+	    if (cnorm < 1.f && bnorm > 1.f) {
+		if (bnorm > bignum * cnorm) {
+		    *scale = 1.f / bnorm;
+		}
+	    }
+
+/*           Compute X */
+
+	    r__1 = *scale * b[b_dim1 + 1];
+	    r__2 = *scale * b[((b_dim1) << (1)) + 1];
+	    sladiv_(&r__1, &r__2, &csr, &csi, &x[x_dim1 + 1], &x[((x_dim1) <<
+		    (1)) + 1]);
+	    *xnorm = (r__1 = x[x_dim1 + 1], dabs(r__1)) + (r__2 = x[((x_dim1)
+		    << (1)) + 1], dabs(r__2));
+	}
+
+    } else {
+
+/*
+          2x2 System
+
+          Compute the real part of  C = ca A - w D  (or  ca A' - w D )
+*/
+
+	cr[0] = *ca * a[a_dim1 + 1] - *wr * *d1;
+	cr[3] = *ca * a[((a_dim1) << (1)) + 2] - *wr * *d2;
+	if (*ltrans) {
+	    cr[2] = *ca * a[a_dim1 + 2];
+	    cr[1] = *ca * a[((a_dim1) << (1)) + 1];
+	} else {
+	    cr[1] = *ca * a[a_dim1 + 2];
+	    cr[2] = *ca * a[((a_dim1) << (1)) + 1];
+	}
+
+	if (*nw == 1) {
+
+/*
+             Real 2x2 system  (w is real)
+
+             Find the largest element in C
+*/
+
+	    cmax = 0.f;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((r__1 = crv[j - 1], dabs(r__1)) > cmax) {
+		    cmax = (r__1 = crv[j - 1], dabs(r__1));
+		    icmax = j;
+		}
+/* L10: */
+	    }
+
+/*           If norm(C) < SMINI, use SMINI*identity. */
+
+	    if (cmax < smini) {
+/* Computing MAX */
+		r__3 = (r__1 = b[b_dim1 + 1], dabs(r__1)), r__4 = (r__2 = b[
+			b_dim1 + 2], dabs(r__2));
+		bnorm = dmax(r__3,r__4);
+		if (smini < 1.f && bnorm > 1.f) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1.f / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
+
+/*           Gaussian elimination with complete pivoting. */
+
+	    ur11 = crv[icmax - 1];
+	    cr21 = crv[ipivot[((icmax) << (2)) - 3] - 1];
+	    ur12 = crv[ipivot[((icmax) << (2)) - 2] - 1];
+	    cr22 = crv[ipivot[((icmax) << (2)) - 1] - 1];
+	    ur11r = 1.f / ur11;
+	    lr21 = ur11r * cr21;
+	    ur22 = cr22 - ur12 * lr21;
+
+/*           If smaller pivot < SMINI, use SMINI */
+
+	    if (dabs(ur22) < smini) {
+		ur22 = smini;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br1 = b[b_dim1 + 2];
+		br2 = b[b_dim1 + 1];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+	    }
+	    br2 -= lr21 * br1;
+/* Computing MAX */
+	    r__2 = (r__1 = br1 * (ur22 * ur11r), dabs(r__1)), r__3 = dabs(br2)
+		    ;
+	    bbnd = dmax(r__2,r__3);
+	    if (bbnd > 1.f && dabs(ur22) < 1.f) {
+		if (bbnd >= bignum * dabs(ur22)) {
+		    *scale = 1.f / bbnd;
+		}
+	    }
+
+	    xr2 = br2 * *scale / ur22;
+	    xr1 = *scale * br1 * ur11r - xr2 * (ur11r * ur12);
+	    if (cswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+	    } else {
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+	    }
+/* Computing MAX */
+	    r__1 = dabs(xr1), r__2 = dabs(xr2);
+	    *xnorm = dmax(r__1,r__2);
+
+/*           Further scaling if  norm(A) norm(X) > overflow */
+
+	    if (*xnorm > 1.f && cmax > 1.f) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
+	} else {
+
+/*
+             Complex 2x2 system  (w is complex)
+
+             Find the largest element in C
+*/
+
+	    ci[0] = -(*wi) * *d1;
+	    ci[1] = 0.f;
+	    ci[2] = 0.f;
+	    ci[3] = -(*wi) * *d2;
+	    cmax = 0.f;
+	    icmax = 0;
+
+	    for (j = 1; j <= 4; ++j) {
+		if ((r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j - 1],
+			dabs(r__2)) > cmax) {
+		    cmax = (r__1 = crv[j - 1], dabs(r__1)) + (r__2 = civ[j -
+			    1], dabs(r__2));
+		    icmax = j;
+		}
+/* L20: */
+	    }
+
+/*           If norm(C) < SMINI, use SMINI*identity. */
+
+	    if (cmax < smini) {
+/* Computing MAX */
+		r__5 = (r__1 = b[b_dim1 + 1], dabs(r__1)) + (r__2 = b[((
+			b_dim1) << (1)) + 1], dabs(r__2)), r__6 = (r__3 = b[
+			b_dim1 + 2], dabs(r__3)) + (r__4 = b[((b_dim1) << (1))
+			 + 2], dabs(r__4));
+		bnorm = dmax(r__5,r__6);
+		if (smini < 1.f && bnorm > 1.f) {
+		    if (bnorm > bignum * smini) {
+			*scale = 1.f / bnorm;
+		    }
+		}
+		temp = *scale / smini;
+		x[x_dim1 + 1] = temp * b[b_dim1 + 1];
+		x[x_dim1 + 2] = temp * b[b_dim1 + 2];
+		x[((x_dim1) << (1)) + 1] = temp * b[((b_dim1) << (1)) + 1];
+		x[((x_dim1) << (1)) + 2] = temp * b[((b_dim1) << (1)) + 2];
+		*xnorm = temp * bnorm;
+		*info = 1;
+		return 0;
+	    }
+
+/*           Gaussian elimination with complete pivoting. */
+
+	    ur11 = crv[icmax - 1];
+	    ui11 = civ[icmax - 1];
+	    cr21 = crv[ipivot[((icmax) << (2)) - 3] - 1];
+	    ci21 = civ[ipivot[((icmax) << (2)) - 3] - 1];
+	    ur12 = crv[ipivot[((icmax) << (2)) - 2] - 1];
+	    ui12 = civ[ipivot[((icmax) << (2)) - 2] - 1];
+	    cr22 = crv[ipivot[((icmax) << (2)) - 1] - 1];
+	    ci22 = civ[ipivot[((icmax) << (2)) - 1] - 1];
+	    if ((icmax == 1) || (icmax == 4)) {
+
+/*              Code when off-diagonals of pivoted C are real */
+
+		if (dabs(ur11) > dabs(ui11)) {
+		    temp = ui11 / ur11;
+/* Computing 2nd power */
+		    r__1 = temp;
+		    ur11r = 1.f / (ur11 * (r__1 * r__1 + 1.f));
+		    ui11r = -temp * ur11r;
+		} else {
+		    temp = ur11 / ui11;
+/* Computing 2nd power */
+		    r__1 = temp;
+		    ui11r = -1.f / (ui11 * (r__1 * r__1 + 1.f));
+		    ur11r = -temp * ui11r;
+		}
+		lr21 = cr21 * ur11r;
+		li21 = cr21 * ui11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ur12 * ui11r;
+		ur22 = cr22 - ur12 * lr21;
+		ui22 = ci22 - ur12 * li21;
+	    } else {
+
+/*              Code when diagonals of pivoted C are real */
+
+		ur11r = 1.f / ur11;
+		ui11r = 0.f;
+		lr21 = cr21 * ur11r;
+		li21 = ci21 * ur11r;
+		ur12s = ur12 * ur11r;
+		ui12s = ui12 * ur11r;
+		ur22 = cr22 - ur12 * lr21 + ui12 * li21;
+		ui22 = -ur12 * li21 - ui12 * lr21;
+	    }
+	    u22abs = dabs(ur22) + dabs(ui22);
+
+/*           If smaller pivot < SMINI, use SMINI */
+
+	    if (u22abs < smini) {
+		ur22 = smini;
+		ui22 = 0.f;
+		*info = 1;
+	    }
+	    if (rswap[icmax - 1]) {
+		br2 = b[b_dim1 + 1];
+		br1 = b[b_dim1 + 2];
+		bi2 = b[((b_dim1) << (1)) + 1];
+		bi1 = b[((b_dim1) << (1)) + 2];
+	    } else {
+		br1 = b[b_dim1 + 1];
+		br2 = b[b_dim1 + 2];
+		bi1 = b[((b_dim1) << (1)) + 1];
+		bi2 = b[((b_dim1) << (1)) + 2];
+	    }
+	    br2 = br2 - lr21 * br1 + li21 * bi1;
+	    bi2 = bi2 - li21 * br1 - lr21 * bi1;
+/* Computing MAX */
+	    r__1 = (dabs(br1) + dabs(bi1)) * (u22abs * (dabs(ur11r) + dabs(
+		    ui11r))), r__2 = dabs(br2) + dabs(bi2);
+	    bbnd = dmax(r__1,r__2);
+	    if (bbnd > 1.f && u22abs < 1.f) {
+		if (bbnd >= bignum * u22abs) {
+		    *scale = 1.f / bbnd;
+		    br1 = *scale * br1;
+		    bi1 = *scale * bi1;
+		    br2 = *scale * br2;
+		    bi2 = *scale * bi2;
+		}
+	    }
+
+	    sladiv_(&br2, &bi2, &ur22, &ui22, &xr2, &xi2);
+	    xr1 = ur11r * br1 - ui11r * bi1 - ur12s * xr2 + ui12s * xi2;
+	    xi1 = ui11r * br1 + ur11r * bi1 - ui12s * xr2 - ur12s * xi2;
+	    if (cswap[icmax - 1]) {
+		x[x_dim1 + 1] = xr2;
+		x[x_dim1 + 2] = xr1;
+		x[((x_dim1) << (1)) + 1] = xi2;
+		x[((x_dim1) << (1)) + 2] = xi1;
+	    } else {
+		x[x_dim1 + 1] = xr1;
+		x[x_dim1 + 2] = xr2;
+		x[((x_dim1) << (1)) + 1] = xi1;
+		x[((x_dim1) << (1)) + 2] = xi2;
+	    }
+/* Computing MAX */
+	    r__1 = dabs(xr1) + dabs(xi1), r__2 = dabs(xr2) + dabs(xi2);
+	    *xnorm = dmax(r__1,r__2);
+
+/*           Further scaling if  norm(A) norm(X) > overflow */
+
+	    if (*xnorm > 1.f && cmax > 1.f) {
+		if (*xnorm > bignum / cmax) {
+		    temp = cmax / bignum;
+		    x[x_dim1 + 1] = temp * x[x_dim1 + 1];
+		    x[x_dim1 + 2] = temp * x[x_dim1 + 2];
+		    x[((x_dim1) << (1)) + 1] = temp * x[((x_dim1) << (1)) + 1]
+			    ;
+		    x[((x_dim1) << (1)) + 2] = temp * x[((x_dim1) << (1)) + 2]
+			    ;
+		    *xnorm = temp * *xnorm;
+		    *scale = temp * *scale;
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SLALN2 */
+
+} /* slaln2_ */
+
+#undef crv
+#undef civ
+#undef cr
+#undef ci
+
+
+/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx,
+	integer *ldbx, integer *perm, integer *givptr, integer *givcol,
+	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
+	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
+	work, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset,
+	    difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1,
+	    poles_offset, i__1, i__2;
+    real r__1;
+
+    /* Local variables */
+    static integer i__, j, m, n;
+    static real dj;
+    static integer nlp1;
+    static real temp;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    extern doublereal snrm2_(integer *, real *, integer *);
+    static real diflj, difrj, dsigj;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *), scopy_(
+	    integer *, real *, integer *, real *, integer *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real dsigjp;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
+	    real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 1, 1999
+
+
+    Purpose
+    =======
+
+    SLALS0 applies back the multiplying factors of either the left or the
+    right singular vector matrix of a diagonal matrix appended by a row
+    to the right hand side matrix B in solving the least squares problem
+    using the divide-and-conquer SVD approach.
+
+    For the left singular vector matrix, three types of orthogonal
+    matrices are involved:
+
+    (1L) Givens rotations: the number of such rotations is GIVPTR; the
+         pairs of columns/rows they were applied to are stored in GIVCOL;
+         and the C- and S-values of these rotations are stored in GIVNUM.
+
+    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+         J-th row.
+
+    (3L) The left singular vector matrix of the remaining matrix.
+
+    For the right singular vector matrix, four types of orthogonal
+    matrices are involved:
+
+    (1R) The right singular vector matrix of the remaining matrix.
+
+    (2R) If SQRE = 1, one extra Givens rotation to generate the right
+         null space.
+
+    (3R) The inverse transformation of (2L).
+
+    (4R) The inverse transformation of (1L).
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed in
+           factored form:
+           = 0: Left singular vector matrix.
+           = 1: Right singular vector matrix.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block. NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block. NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input/output) REAL array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B. LDB must be at least
+           max(1,MAX( M, N ) ).
+
+    BX     (workspace) REAL array, dimension ( LDBX, NRHS )
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    PERM   (input) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) applied
+           to the two blocks.
+
+    GIVPTR (input) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of rows/columns
+           involved in a Givens rotation.
+
+    LDGCOL (input) INTEGER
+           The leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (input) REAL array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value used in the
+           corresponding Givens rotation.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of arrays DIFR, POLES and
+           GIVNUM, must be at least K.
+
+    POLES  (input) REAL array, dimension ( LDGNUM, 2 )
+           On entry, POLES(1:K, 1) contains the new singular
+           values obtained from solving the secular equation, and
+           POLES(1:K, 2) is an array containing the poles in the secular
+           equation.
+
+    DIFL   (input) REAL array, dimension ( K ).
+           On entry, DIFL(I) is the distance between I-th updated
+           (undeflated) singular value and the I-th (undeflated) old
+           singular value.
+
+    DIFR   (input) REAL array, dimension ( LDGNUM, 2 ).
+           On entry, DIFR(I, 1) contains the distances between I-th
+           updated (undeflated) singular value and the I+1-th
+           (undeflated) old singular value. And DIFR(I, 2) is the
+           normalizing factor for the I-th right singular vector.
+
+    Z      (input) REAL array, dimension ( K )
+           Contain the components of the deflation-adjusted updating row
+           vector.
+
+    K      (input) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    C      (input) REAL
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (input) REAL
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    WORK   (workspace) REAL array, dimension ( K )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    difr_dim1 = *ldgnum;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    poles_dim1 = *ldgnum;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    --difl;
+    --z__;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    }
+
+    n = *nl + *nr + 1;
+
+    if (*nrhs < 1) {
+	*info = -5;
+    } else if (*ldb < n) {
+	*info = -7;
+    } else if (*ldbx < n) {
+	*info = -9;
+    } else if (*givptr < 0) {
+	*info = -11;
+    } else if (*ldgcol < n) {
+	*info = -13;
+    } else if (*ldgnum < n) {
+	*info = -15;
+    } else if (*k < 1) {
+	*info = -20;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLALS0", &i__1);
+	return 0;
+    }
+
+    m = n + *sqre;
+    nlp1 = *nl + 1;
+
+    if (*icompq == 0) {
+
+/*
+          Apply back orthogonal transformations from the left.
+
+          Step (1L): apply back the Givens rotations performed.
+*/
+
+	i__1 = *givptr;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    srot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
+		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
+		    i__ + ((givnum_dim1) << (1))], &givnum[i__ + givnum_dim1])
+		    ;
+/* L10: */
+	}
+
+/*        Step (2L): permute rows of B. */
+
+	scopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    scopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
+		    ldbx);
+/* L20: */
+	}
+
+/*
+          Step (3L): apply the inverse of the left singular vector
+          matrix to BX.
+*/
+
+	if (*k == 1) {
+	    scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
+	    if (z__[1] < 0.f) {
+		sscal_(nrhs, &c_b1290, &b[b_offset], ldb);
+	    }
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		diflj = difl[j];
+		dj = poles[j + poles_dim1];
+		dsigj = -poles[j + ((poles_dim1) << (1))];
+		if (j < *k) {
+		    difrj = -difr[j + difr_dim1];
+		    dsigjp = -poles[j + 1 + ((poles_dim1) << (1))];
+		}
+		if ((z__[j] == 0.f) || (poles[j + ((poles_dim1) << (1))] ==
+			0.f)) {
+		    work[j] = 0.f;
+		} else {
+		    work[j] = -poles[j + ((poles_dim1) << (1))] * z__[j] /
+			    diflj / (poles[j + ((poles_dim1) << (1))] + dj);
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if ((z__[i__] == 0.f) || (poles[i__ + ((poles_dim1) << (1)
+			    )] == 0.f)) {
+			work[i__] = 0.f;
+		    } else {
+			work[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
+				i__] / (slamc3_(&poles[i__ + ((poles_dim1) <<
+				(1))], &dsigj) - diflj) / (poles[i__ + ((
+				poles_dim1) << (1))] + dj);
+		    }
+/* L30: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if ((z__[i__] == 0.f) || (poles[i__ + ((poles_dim1) << (1)
+			    )] == 0.f)) {
+			work[i__] = 0.f;
+		    } else {
+			work[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
+				i__] / (slamc3_(&poles[i__ + ((poles_dim1) <<
+				(1))], &dsigjp) + difrj) / (poles[i__ + ((
+				poles_dim1) << (1))] + dj);
+		    }
+/* L40: */
+		}
+		work[1] = -1.f;
+		temp = snrm2_(k, &work[1], &c__1);
+		sgemv_("T", k, nrhs, &c_b1011, &bx[bx_offset], ldbx, &work[1],
+			 &c__1, &c_b320, &b[j + b_dim1], ldb);
+		slascl_("G", &c__0, &c__0, &temp, &c_b1011, &c__1, nrhs, &b[j
+			+ b_dim1], ldb, info);
+/* L50: */
+	    }
+	}
+
+/*        Move the deflated rows of BX to B also. */
+
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    slacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
+		    + b_dim1], ldb);
+	}
+    } else {
+
+/*
+          Apply back the right orthogonal transformations.
+
+          Step (1R): apply back the new right singular vector matrix
+          to B.
+*/
+
+	if (*k == 1) {
+	    scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		dsigj = poles[j + ((poles_dim1) << (1))];
+		if (z__[j] == 0.f) {
+		    work[j] = 0.f;
+		} else {
+		    work[j] = -z__[j] / difl[j] / (dsigj + poles[j +
+			    poles_dim1]) / difr[j + ((difr_dim1) << (1))];
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.f) {
+			work[i__] = 0.f;
+		    } else {
+			r__1 = -poles[i__ + 1 + ((poles_dim1) << (1))];
+			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[
+				i__ + difr_dim1]) / (dsigj + poles[i__ +
+				poles_dim1]) / difr[i__ + ((difr_dim1) << (1))
+				];
+		    }
+/* L60: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.f) {
+			work[i__] = 0.f;
+		    } else {
+			r__1 = -poles[i__ + ((poles_dim1) << (1))];
+			work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[
+				i__]) / (dsigj + poles[i__ + poles_dim1]) /
+				difr[i__ + ((difr_dim1) << (1))];
+		    }
+/* L70: */
+		}
+		sgemv_("T", k, nrhs, &c_b1011, &b[b_offset], ldb, &work[1], &
+			c__1, &c_b320, &bx[j + bx_dim1], ldbx);
+/* L80: */
+	    }
+	}
+
+/*
+          Step (2R): if SQRE = 1, apply back the rotation that is
+          related to the right null space of the subproblem.
+*/
+
+	if (*sqre == 1) {
+	    scopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
+	    srot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
+		    s);
+	}
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    slacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
+		    bx_dim1], ldbx);
+	}
+
+/*        Step (3R): permute rows of B. */
+
+	scopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
+	if (*sqre == 1) {
+	    scopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
+	}
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    scopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
+		    ldb);
+/* L90: */
+	}
+
+/*        Step (4R): apply back the Givens rotations performed. */
+
+	for (i__ = *givptr; i__ >= 1; --i__) {
+	    r__1 = -givnum[i__ + givnum_dim1];
+	    srot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
+		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
+		    i__ + ((givnum_dim1) << (1))], &r__1);
+/* L100: */
+	}
+    }
+
+    return 0;
+
+/*     End of SLALS0 */
+
+} /* slals0_ */
+
+/* Subroutine */ int slalsa_(integer *icompq, integer *smlsiz, integer *n,
+	integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, real *
+	u, integer *ldu, real *vt, integer *k, real *difl, real *difr, real *
+	z__, real *poles, integer *givptr, integer *givcol, integer *ldgcol,
+	integer *perm, real *givnum, real *c__, real *s, real *work, integer *
+	iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, b_dim1,
+	    b_offset, bx_dim1, bx_offset, difl_dim1, difl_offset, difr_dim1,
+	    difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset,
+	     u_dim1, u_offset, vt_dim1, vt_offset, z_dim1, z_offset, i__1,
+	    i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
+	    ndb1, nlp1, lvl2, nrp1, nlvl, sqre, inode, ndiml;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer ndimr;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slals0_(integer *, integer *, integer *, integer *,
+	    integer *, real *, integer *, real *, integer *, integer *,
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , real *, real *, integer *, real *, real *, real *, integer *),
+	    xerbla_(char *, integer *), slasdt_(integer *, integer *,
+	    integer *, integer *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLALSA is an itermediate step in solving the least squares problem
+    by computing the SVD of the coefficient matrix in compact form (The
+    singular vectors are computed as products of simple orthorgonal
+    matrices.).
+
+    If ICOMPQ = 0, SLALSA applies the inverse of the left singular vector
+    matrix of an upper bidiagonal matrix to the right hand side; and if
+    ICOMPQ = 1, SLALSA applies the right singular vector matrix to the
+    right hand side. The singular vector matrices were generated in
+    compact form by SLALSA.
+
+    Arguments
+    =========
+
+
+    ICOMPQ (input) INTEGER
+           Specifies whether the left or the right singular vector
+           matrix is involved.
+           = 0: Left singular vector matrix
+           = 1: Right singular vector matrix
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The row and column dimensions of the upper bidiagonal matrix.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input) REAL array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,MAX( M, N ) ).
+
+    BX     (output) REAL array, dimension ( LDBX, NRHS )
+           On exit, the result of applying the left or right singular
+           vector matrix to B.
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    U      (input) REAL array, dimension ( LDU, SMLSIZ ).
+           On entry, U contains the left singular vector matrices of all
+           subproblems at the bottom level.
+
+    LDU    (input) INTEGER, LDU = > N.
+           The leading dimension of arrays U, VT, DIFL, DIFR,
+           POLES, GIVNUM, and Z.
+
+    VT     (input) REAL array, dimension ( LDU, SMLSIZ+1 ).
+           On entry, VT' contains the right singular vector matrices of
+           all subproblems at the bottom level.
+
+    K      (input) INTEGER array, dimension ( N ).
+
+    DIFL   (input) REAL array, dimension ( LDU, NLVL ).
+           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+
+    DIFR   (input) REAL array, dimension ( LDU, 2 * NLVL ).
+           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+           distances between singular values on the I-th level and
+           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+           record the normalizing factors of the right singular vectors
+           matrices of subproblems on I-th level.
+
+    Z      (input) REAL array, dimension ( LDU, NLVL ).
+           On entry, Z(1, I) contains the components of the deflation-
+           adjusted updating row vector for subproblems on the I-th
+           level.
+
+    POLES  (input) REAL array, dimension ( LDU, 2 * NLVL ).
+           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+           singular values involved in the secular equations on the I-th
+           level.
+
+    GIVPTR (input) INTEGER array, dimension ( N ).
+           On entry, GIVPTR( I ) records the number of Givens
+           rotations performed on the I-th problem on the computation
+           tree.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+           locations of Givens rotations performed on the I-th level on
+           the computation tree.
+
+    LDGCOL (input) INTEGER, LDGCOL = > N.
+           The leading dimension of arrays GIVCOL and PERM.
+
+    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+           On entry, PERM(*, I) records permutations done on the I-th
+           level of the computation tree.
+
+    GIVNUM (input) REAL array, dimension ( LDU, 2 * NLVL ).
+           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+           values of Givens rotations performed on the I-th level on the
+           computation tree.
+
+    C      (input) REAL array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           C( I ) contains the C-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    S      (input) REAL array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           S( I ) contains the S-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    WORK   (workspace) REAL array.
+           The dimension must be at least N.
+
+    IWORK  (workspace) INTEGER array.
+           The dimension must be at least 3 * N
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    givnum_dim1 = *ldu;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    poles_dim1 = *ldu;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    z_dim1 = *ldu;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    difr_dim1 = *ldu;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    difl_dim1 = *ldu;
+    difl_offset = 1 + difl_dim1;
+    difl -= difl_offset;
+    vt_dim1 = *ldu;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    --k;
+    --givptr;
+    perm_dim1 = *ldgcol;
+    perm_offset = 1 + perm_dim1;
+    perm -= perm_offset;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    --c__;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*smlsiz < 3) {
+	*info = -2;
+    } else if (*n < *smlsiz) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if (*ldb < *n) {
+	*info = -6;
+    } else if (*ldbx < *n) {
+	*info = -8;
+    } else if (*ldu < *n) {
+	*info = -10;
+    } else if (*ldgcol < *n) {
+	*info = -19;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLALSA", &i__1);
+	return 0;
+    }
+
+/*     Book-keeping and  setting up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+
+    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       The following code applies back the left singular vector factors.
+       For applying back the right singular vector factors, go to 50.
+*/
+
+    if (*icompq == 1) {
+	goto L50;
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by SLASDQ. The corresponding left and right singular vector
+       matrices are in explicit form. First apply back the left
+       singular vector matrices.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+          IC : center row of each node
+          NL : number of rows of left  subproblem
+          NR : number of rows of right subproblem
+          NLF: starting row of the left   subproblem
+          NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlf = ic - nl;
+	nrf = ic + 1;
+	sgemm_("T", "N", &nl, nrhs, &nl, &c_b1011, &u[nlf + u_dim1], ldu, &b[
+		nlf + b_dim1], ldb, &c_b320, &bx[nlf + bx_dim1], ldbx);
+	sgemm_("T", "N", &nr, nrhs, &nr, &c_b1011, &u[nrf + u_dim1], ldu, &b[
+		nrf + b_dim1], ldb, &c_b320, &bx[nrf + bx_dim1], ldbx);
+/* L10: */
+    }
+
+/*
+       Next copy the rows of B that correspond to unchanged rows
+       in the bidiagonal matrix to BX.
+*/
+
+    i__1 = nd;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	ic = iwork[inode + i__ - 1];
+	scopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
+/* L20: */
+    }
+
+/*
+       Finally go through the left singular vector matrices of all
+       the other subproblems bottom-up on the tree.
+*/
+
+    j = pow_ii(&c__2, &nlvl);
+    sqre = 0;
+
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          find the first node LF and last node LL on
+          the current level LVL
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    --j;
+	    slals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
+		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &work[1], info);
+/* L30: */
+	}
+/* L40: */
+    }
+    goto L90;
+
+/*     ICOMPQ = 1: applying back the right singular vector factors. */
+
+L50:
+
+/*
+       First now go through the right singular vector matrices of all
+       the tree nodes top-down.
+*/
+
+    j = 0;
+    i__1 = nlvl;
+    for (lvl = 1; lvl <= i__1; ++lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          Find the first node LF and last node LL on
+          the current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__2 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__2);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__2 = lf;
+	for (i__ = ll; i__ >= i__2; --i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    if (i__ == ll) {
+		sqre = 0;
+	    } else {
+		sqre = 1;
+	    }
+	    ++j;
+	    slals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
+		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &work[1], info);
+/* L60: */
+	}
+/* L70: */
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by SLASDQ. The corresponding right singular vector
+       matrices are in explicit form. Apply them back.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlp1 = nl + 1;
+	if (i__ == nd) {
+	    nrp1 = nr;
+	} else {
+	    nrp1 = nr + 1;
+	}
+	nlf = ic - nl;
+	nrf = ic + 1;
+	sgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1011, &vt[nlf + vt_dim1],
+		ldu, &b[nlf + b_dim1], ldb, &c_b320, &bx[nlf + bx_dim1], ldbx);
+	sgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1011, &vt[nrf + vt_dim1],
+		ldu, &b[nrf + b_dim1], ldb, &c_b320, &bx[nrf + bx_dim1], ldbx);
+/* L80: */
+    }
+
+L90:
+
+    return 0;
+
+/*     End of SLALSA */
+
+} /* slalsa_ */
+
+/* Subroutine */ int slalsd_(char *uplo, integer *smlsiz, integer *n, integer
+	*nrhs, real *d__, real *e, real *b, integer *ldb, real *rcond,
+	integer *rank, real *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, i__1, i__2;
+    real r__1;
+
+    /* Builtin functions */
+    double log(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer c__, i__, j, k;
+    static real r__;
+    static integer s, u, z__;
+    static real cs;
+    static integer bx;
+    static real sn;
+    static integer st, vt, nm1, st1;
+    static real eps;
+    static integer iwk;
+    static real tol;
+    static integer difl, difr, perm, nsub, nlvl, sqre, bxst;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *), sgemm_(char *, char *, integer *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    , real *, real *, integer *);
+    static integer poles, sizei, nsize;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static integer nwork, icmpq1, icmpq2;
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int slasda_(integer *, integer *, integer *,
+	    integer *, real *, real *, real *, integer *, real *, integer *,
+	    real *, real *, real *, real *, integer *, integer *, integer *,
+	    integer *, real *, real *, real *, real *, integer *, integer *),
+	    xerbla_(char *, integer *), slalsa_(integer *, integer *,
+	    integer *, integer *, real *, integer *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, real *, real *, real *,
+	    integer *, integer *, integer *, integer *, real *, real *, real *
+	    , real *, integer *, integer *), slascl_(char *, integer *,
+	    integer *, real *, real *, integer *, integer *, real *, integer *
+	    , integer *);
+    static integer givcol;
+    extern integer isamax_(integer *, real *, integer *);
+    extern /* Subroutine */ int slasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, real *, real *, real *, integer *, real *
+	    , integer *, real *, integer *, real *, integer *),
+	    slacpy_(char *, integer *, integer *, real *, integer *, real *,
+	    integer *), slartg_(real *, real *, real *, real *, real *
+	    ), slaset_(char *, integer *, integer *, real *, real *, real *,
+	    integer *);
+    static real orgnrm;
+    static integer givnum;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+    static integer givptr, smlszp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLALSD uses the singular value decomposition of A to solve the least
+    squares problem of finding X to minimize the Euclidean norm of each
+    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+    are N-by-NRHS. The solution X overwrites B.
+
+    The singular values of A smaller than RCOND times the largest
+    singular value are treated as zero in solving the least squares
+    problem; in this case a minimum norm solution is returned.
+    The actual singular values are returned in D in ascending order.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    UPLO   (input) CHARACTER*1
+           = 'U': D and E define an upper bidiagonal matrix.
+           = 'L': D and E define a  lower bidiagonal matrix.
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The dimension of the  bidiagonal matrix.  N >= 0.
+
+    NRHS   (input) INTEGER
+           The number of columns of B. NRHS must be at least 1.
+
+    D      (input/output) REAL array, dimension (N)
+           On entry D contains the main diagonal of the bidiagonal
+           matrix. On exit, if INFO = 0, D contains its singular values.
+
+    E      (input) REAL array, dimension (N-1)
+           Contains the super-diagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    B      (input/output) REAL array, dimension (LDB,NRHS)
+           On input, B contains the right hand sides of the least
+           squares problem. On output, B contains the solution X.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,N).
+
+    RCOND  (input) REAL
+           The singular values of A less than or equal to RCOND times
+           the largest singular value are treated as zero in solving
+           the least squares problem. If RCOND is negative,
+           machine precision is used instead.
+           For example, if diag(S)*X=B were the least squares problem,
+           where diag(S) is a diagonal matrix of singular values, the
+           solution would be X(i) = B(i) / S(i) if S(i) is greater than
+           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+           RCOND*max(S).
+
+    RANK   (output) INTEGER
+           The number of singular values of A greater than RCOND times
+           the largest singular value.
+
+    WORK   (workspace) REAL array, dimension at least
+           (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
+           where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
+
+    IWORK  (workspace) INTEGER array, dimension at least
+           (3*N*NLVL + 11*N)
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+           > 0:  The algorithm failed to compute an singular value while
+                 working on the submatrix lying in rows and columns
+                 INFO/(N+1) through MOD(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if ((*ldb < 1) || (*ldb < *n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLALSD", &i__1);
+	return 0;
+    }
+
+    eps = slamch_("Epsilon");
+
+/*     Set up the tolerance. */
+
+    if ((*rcond <= 0.f) || (*rcond >= 1.f)) {
+	*rcond = eps;
+    }
+
+    *rank = 0;
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+	if (d__[1] == 0.f) {
+	    slaset_("A", &c__1, nrhs, &c_b320, &c_b320, &b[b_offset], ldb);
+	} else {
+	    *rank = 1;
+	    slascl_("G", &c__0, &c__0, &d__[1], &c_b1011, &c__1, nrhs, &b[
+		    b_offset], ldb, info);
+	    d__[1] = dabs(d__[1]);
+	}
+	return 0;
+    }
+
+/*     Rotate the matrix if it is lower bidiagonal. */
+
+    if (*(unsigned char *)uplo == 'L') {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (*nrhs == 1) {
+		srot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
+			c__1, &cs, &sn);
+	    } else {
+		work[((i__) << (1)) - 1] = cs;
+		work[i__ * 2] = sn;
+	    }
+/* L10: */
+	}
+	if (*nrhs > 1) {
+	    i__1 = *nrhs;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = *n - 1;
+		for (j = 1; j <= i__2; ++j) {
+		    cs = work[((j) << (1)) - 1];
+		    sn = work[j * 2];
+		    srot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__ *
+			     b_dim1], &c__1, &cs, &sn);
+/* L20: */
+		}
+/* L30: */
+	    }
+	}
+    }
+
+/*     Scale. */
+
+    nm1 = *n - 1;
+    orgnrm = slanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.f) {
+	slaset_("A", n, nrhs, &c_b320, &c_b320, &b[b_offset], ldb);
+	return 0;
+    }
+
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, &c__1, &d__[1], n, info);
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &nm1, &c__1, &e[1], &nm1,
+	    info);
+
+/*
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
+*/
+
+    if (*n <= *smlsiz) {
+	nwork = *n * *n + 1;
+	slaset_("A", n, n, &c_b320, &c_b1011, &work[1], n);
+	slasdq_("U", &c__0, n, n, &c__0, nrhs, &d__[1], &e[1], &work[1], n, &
+		work[1], n, &b[b_offset], ldb, &work[nwork], info);
+	if (*info != 0) {
+	    return 0;
+	}
+	tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (d__[i__] <= tol) {
+		slaset_("A", &c__1, nrhs, &c_b320, &c_b320, &b[i__ + b_dim1],
+			ldb);
+	    } else {
+		slascl_("G", &c__0, &c__0, &d__[i__], &c_b1011, &c__1, nrhs, &
+			b[i__ + b_dim1], ldb, info);
+		++(*rank);
+	    }
+/* L40: */
+	}
+	sgemm_("T", "N", n, nrhs, n, &c_b1011, &work[1], n, &b[b_offset], ldb,
+		 &c_b320, &work[nwork], n);
+	slacpy_("A", n, nrhs, &work[nwork], n, &b[b_offset], ldb);
+
+/*        Unscale. */
+
+	slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n,
+		info);
+	slasrt_("D", n, &d__[1], info);
+	slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, nrhs, &b[b_offset],
+		ldb, info);
+
+	return 0;
+    }
+
+/*     Book-keeping and setting up some constants. */
+
+    nlvl = (integer) (log((real) (*n) / (real) (*smlsiz + 1)) / log(2.f)) + 1;
+
+    smlszp = *smlsiz + 1;
+
+    u = 1;
+    vt = *smlsiz * *n + 1;
+    difl = vt + smlszp * *n;
+    difr = difl + nlvl * *n;
+    z__ = difr + ((nlvl * *n) << (1));
+    c__ = z__ + nlvl * *n;
+    s = c__ + *n;
+    poles = s + *n;
+    givnum = poles + ((nlvl) << (1)) * *n;
+    bx = givnum + ((nlvl) << (1)) * *n;
+    nwork = bx + *n * *nrhs;
+
+    sizei = *n + 1;
+    k = sizei + *n;
+    givptr = k + *n;
+    perm = givptr + *n;
+    givcol = perm + nlvl * *n;
+    iwk = givcol + ((nlvl * *n) << (1));
+
+    st = 1;
+    sqre = 0;
+    icmpq1 = 1;
+    icmpq2 = 0;
+    nsub = 0;
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = d__[i__], dabs(r__1)) < eps) {
+	    d__[i__] = r_sign(&eps, &d__[i__]);
+	}
+/* L50: */
+    }
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (((r__1 = e[i__], dabs(r__1)) < eps) || (i__ == nm1)) {
+	    ++nsub;
+	    iwork[nsub] = st;
+
+/*
+             Subproblem found. First determine its size and then
+             apply divide and conquer on it.
+*/
+
+	    if (i__ < nm1) {
+
+/*              A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else if ((r__1 = e[i__], dabs(r__1)) >= eps) {
+
+/*              A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else {
+
+/*
+                A subproblem with E(NM1) small. This implies an
+                1-by-1 subproblem at D(N), which is not solved
+                explicitly.
+*/
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+		++nsub;
+		iwork[nsub] = *n;
+		iwork[sizei + nsub - 1] = 1;
+		scopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
+	    }
+	    st1 = st - 1;
+	    if (nsize == 1) {
+
+/*
+                This is a 1-by-1 subproblem and is not solved
+                explicitly.
+*/
+
+		scopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
+	    } else if (nsize <= *smlsiz) {
+
+/*              This is a small subproblem and is solved by SLASDQ. */
+
+		slaset_("A", &nsize, &nsize, &c_b320, &c_b1011, &work[vt +
+			st1], n);
+		slasdq_("U", &c__0, &nsize, &nsize, &c__0, nrhs, &d__[st], &e[
+			st], &work[vt + st1], n, &work[nwork], n, &b[st +
+			b_dim1], ldb, &work[nwork], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		slacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
+			st1], n);
+	    } else {
+
+/*              A large problem. Solve it using divide and conquer. */
+
+		slasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
+			work[u + st1], n, &work[vt + st1], &iwork[k + st1], &
+			work[difl + st1], &work[difr + st1], &work[z__ + st1],
+			 &work[poles + st1], &iwork[givptr + st1], &iwork[
+			givcol + st1], n, &iwork[perm + st1], &work[givnum +
+			st1], &work[c__ + st1], &work[s + st1], &work[nwork],
+			&iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		bxst = bx + st1;
+		slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
+			work[bxst], n, &work[u + st1], n, &work[vt + st1], &
+			iwork[k + st1], &work[difl + st1], &work[difr + st1],
+			&work[z__ + st1], &work[poles + st1], &iwork[givptr +
+			st1], &iwork[givcol + st1], n, &iwork[perm + st1], &
+			work[givnum + st1], &work[c__ + st1], &work[s + st1],
+			&work[nwork], &iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    st = i__ + 1;
+	}
+/* L60: */
+    }
+
+/*     Apply the singular values and treat the tiny ones as zero. */
+
+    tol = *rcond * (r__1 = d__[isamax_(n, &d__[1], &c__1)], dabs(r__1));
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+          Some of the elements in D can be negative because 1-by-1
+          subproblems were not solved explicitly.
+*/
+
+	if ((r__1 = d__[i__], dabs(r__1)) <= tol) {
+	    slaset_("A", &c__1, nrhs, &c_b320, &c_b320, &work[bx + i__ - 1],
+		    n);
+	} else {
+	    ++(*rank);
+	    slascl_("G", &c__0, &c__0, &d__[i__], &c_b1011, &c__1, nrhs, &
+		    work[bx + i__ - 1], n, info);
+	}
+	d__[i__] = (r__1 = d__[i__], dabs(r__1));
+/* L70: */
+    }
+
+/*     Now apply back the right singular vectors. */
+
+    icmpq2 = 1;
+    i__1 = nsub;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	st = iwork[i__];
+	st1 = st - 1;
+	nsize = iwork[sizei + i__ - 1];
+	bxst = bx + st1;
+	if (nsize == 1) {
+	    scopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
+	} else if (nsize <= *smlsiz) {
+	    sgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1011, &work[vt + st1],
+		    n, &work[bxst], n, &c_b320, &b[st + b_dim1], ldb);
+	} else {
+	    slalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
+		    b_dim1], ldb, &work[u + st1], n, &work[vt + st1], &iwork[
+		    k + st1], &work[difl + st1], &work[difr + st1], &work[z__
+		    + st1], &work[poles + st1], &iwork[givptr + st1], &iwork[
+		    givcol + st1], n, &iwork[perm + st1], &work[givnum + st1],
+		     &work[c__ + st1], &work[s + st1], &work[nwork], &iwork[
+		    iwk], info);
+	    if (*info != 0) {
+		return 0;
+	    }
+	}
+/* L80: */
+    }
+
+/*     Unscale and sort the singular values. */
+
+    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, n, &c__1, &d__[1], n, info);
+    slasrt_("D", n, &d__[1], info);
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, n, nrhs, &b[b_offset], ldb,
+	    info);
+
+    return 0;
+
+/*     End of SLALSD */
+
+} /* slalsd_ */
+
+doublereal slamch_(char *cmach)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    integer i__1;
+    real ret_val;
+
+    /* Builtin functions */
+    double pow_ri(real *, integer *);
+
+    /* Local variables */
+    static real t;
+    static integer it;
+    static real rnd, eps, base;
+    static integer beta;
+    static real emin, prec, emax;
+    static integer imin, imax;
+    static logical lrnd;
+    static real rmin, rmax, rmach;
+    extern logical lsame_(char *, char *);
+    static real small, sfmin;
+    extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real
+	    *, integer *, real *, integer *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMCH determines single precision machine parameters.
+
+    Arguments
+    =========
+
+    CMACH   (input) CHARACTER*1
+            Specifies the value to be returned by SLAMCH:
+            = 'E' or 'e',   SLAMCH := eps
+            = 'S' or 's ,   SLAMCH := sfmin
+            = 'B' or 'b',   SLAMCH := base
+            = 'P' or 'p',   SLAMCH := eps*base
+            = 'N' or 'n',   SLAMCH := t
+            = 'R' or 'r',   SLAMCH := rnd
+            = 'M' or 'm',   SLAMCH := emin
+            = 'U' or 'u',   SLAMCH := rmin
+            = 'L' or 'l',   SLAMCH := emax
+            = 'O' or 'o',   SLAMCH := rmax
+
+            where
+
+            eps   = relative machine precision
+            sfmin = safe minimum, such that 1/sfmin does not overflow
+            base  = base of the machine
+            prec  = eps*base
+            t     = number of (base) digits in the mantissa
+            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise
+            emin  = minimum exponent before (gradual) underflow
+            rmin  = underflow threshold - base**(emin-1)
+            emax  = largest exponent before overflow
+            rmax  = overflow threshold  - (base**emax)*(1-eps)
+
+   =====================================================================
+*/
+
+
+    if (first) {
+	first = FALSE_;
+	slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
+	base = (real) beta;
+	t = (real) it;
+	if (lrnd) {
+	    rnd = 1.f;
+	    i__1 = 1 - it;
+	    eps = pow_ri(&base, &i__1) / 2;
+	} else {
+	    rnd = 0.f;
+	    i__1 = 1 - it;
+	    eps = pow_ri(&base, &i__1);
+	}
+	prec = eps * base;
+	emin = (real) imin;
+	emax = (real) imax;
+	sfmin = rmin;
+	small = 1.f / rmax;
+	if (small >= sfmin) {
+
+/*
+             Use SMALL plus a bit, to avoid the possibility of rounding
+             causing overflow when computing  1/sfmin.
+*/
+
+	    sfmin = small * (eps + 1.f);
+	}
+    }
+
+    if (lsame_(cmach, "E")) {
+	rmach = eps;
+    } else if (lsame_(cmach, "S")) {
+	rmach = sfmin;
+    } else if (lsame_(cmach, "B")) {
+	rmach = base;
+    } else if (lsame_(cmach, "P")) {
+	rmach = prec;
+    } else if (lsame_(cmach, "N")) {
+	rmach = t;
+    } else if (lsame_(cmach, "R")) {
+	rmach = rnd;
+    } else if (lsame_(cmach, "M")) {
+	rmach = emin;
+    } else if (lsame_(cmach, "U")) {
+	rmach = rmin;
+    } else if (lsame_(cmach, "L")) {
+	rmach = emax;
+    } else if (lsame_(cmach, "O")) {
+	rmach = rmax;
+    }
+
+    ret_val = rmach;
+    return ret_val;
+
+/*     End of SLAMCH */
+
+} /* slamch_ */
+
+
+/* *********************************************************************** */
+
+/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical
+	*ieee1)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    real r__1, r__2;
+
+    /* Local variables */
+    static real a, b, c__, f, t1, t2;
+    static integer lt;
+    static real one, qtr;
+    static logical lrnd;
+    static integer lbeta;
+    static real savec;
+    static logical lieee1;
+    extern doublereal slamc3_(real *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC1 determines the machine parameters given by BETA, T, RND, and
+    IEEE1.
+
+    Arguments
+    =========
+
+    BETA    (output) INTEGER
+            The base of the machine.
+
+    T       (output) INTEGER
+            The number of ( BETA ) digits in the mantissa.
+
+    RND     (output) LOGICAL
+            Specifies whether proper rounding  ( RND = .TRUE. )  or
+            chopping  ( RND = .FALSE. )  occurs in addition. This may not
+            be a reliable guide to the way in which the machine performs
+            its arithmetic.
+
+    IEEE1   (output) LOGICAL
+            Specifies whether rounding appears to be done in the IEEE
+            'round to nearest' style.
+
+    Further Details
+    ===============
+
+    The routine is based on the routine  ENVRON  by Malcolm and
+    incorporates suggestions by Gentleman and Marovich. See
+
+       Malcolm M. A. (1972) Algorithms to reveal properties of
+          floating-point arithmetic. Comms. of the ACM, 15, 949-951.
+
+       Gentleman W. M. and Marovich S. B. (1974) More on algorithms
+          that reveal properties of floating point arithmetic units.
+          Comms. of the ACM, 17, 276-277.
+
+   =====================================================================
+*/
+
+
+    if (first) {
+	first = FALSE_;
+	one = 1.f;
+
+/*
+          LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BETA,
+          IEEE1, T and RND.
+
+          Throughout this routine  we use the function  SLAMC3  to ensure
+          that relevant values are  stored and not held in registers,  or
+          are not affected by optimizers.
+
+          Compute  a = 2.0**m  with the  smallest positive integer m such
+          that
+
+             fl( a + 1.0 ) = a.
+*/
+
+	a = 1.f;
+	c__ = 1.f;
+
+/* +       WHILE( C.EQ.ONE )LOOP */
+L10:
+	if (c__ == one) {
+	    a *= 2;
+	    c__ = slamc3_(&a, &one);
+	    r__1 = -a;
+	    c__ = slamc3_(&c__, &r__1);
+	    goto L10;
+	}
+/*
+   +       END WHILE
+
+          Now compute  b = 2.0**m  with the smallest positive integer m
+          such that
+
+             fl( a + b ) .gt. a.
+*/
+
+	b = 1.f;
+	c__ = slamc3_(&a, &b);
+
+/* +       WHILE( C.EQ.A )LOOP */
+L20:
+	if (c__ == a) {
+	    b *= 2;
+	    c__ = slamc3_(&a, &b);
+	    goto L20;
+	}
+/*
+   +       END WHILE
+
+          Now compute the base.  a and c  are neighbouring floating point
+          numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and so
+          their difference is beta. Adding 0.25 to c is to ensure that it
+          is truncated to beta and not ( beta - 1 ).
+*/
+
+	qtr = one / 4;
+	savec = c__;
+	r__1 = -a;
+	c__ = slamc3_(&c__, &r__1);
+	lbeta = c__ + qtr;
+
+/*
+          Now determine whether rounding or chopping occurs,  by adding a
+          bit  less  than  beta/2  and a  bit  more  than  beta/2  to  a.
+*/
+
+	b = (real) lbeta;
+	r__1 = b / 2;
+	r__2 = -b / 100;
+	f = slamc3_(&r__1, &r__2);
+	c__ = slamc3_(&f, &a);
+	if (c__ == a) {
+	    lrnd = TRUE_;
+	} else {
+	    lrnd = FALSE_;
+	}
+	r__1 = b / 2;
+	r__2 = b / 100;
+	f = slamc3_(&r__1, &r__2);
+	c__ = slamc3_(&f, &a);
+	if (lrnd && c__ == a) {
+	    lrnd = FALSE_;
+	}
+
+/*
+          Try and decide whether rounding is done in the  IEEE  'round to
+          nearest' style. B/2 is half a unit in the last place of the two
+          numbers A and SAVEC. Furthermore, A is even, i.e. has last  bit
+          zero, and SAVEC is odd. Thus adding B/2 to A should not  change
+          A, but adding B/2 to SAVEC should change SAVEC.
+*/
+
+	r__1 = b / 2;
+	t1 = slamc3_(&r__1, &a);
+	r__1 = b / 2;
+	t2 = slamc3_(&r__1, &savec);
+	lieee1 = t1 == a && t2 > savec && lrnd;
+
+/*
+          Now find  the  mantissa, t.  It should  be the  integer part of
+          log to the base beta of a,  however it is safer to determine  t
+          by powering.  So we find t as the smallest positive integer for
+          which
+
+             fl( beta**t + 1.0 ) = 1.0.
+*/
+
+	lt = 0;
+	a = 1.f;
+	c__ = 1.f;
+
+/* +       WHILE( C.EQ.ONE )LOOP */
+L30:
+	if (c__ == one) {
+	    ++lt;
+	    a *= lbeta;
+	    c__ = slamc3_(&a, &one);
+	    r__1 = -a;
+	    c__ = slamc3_(&c__, &r__1);
+	    goto L30;
+	}
+/* +       END WHILE */
+
+    }
+
+    *beta = lbeta;
+    *t = lt;
+    *rnd = lrnd;
+    *ieee1 = lieee1;
+    return 0;
+
+/*     End of SLAMC1 */
+
+} /* slamc1_ */
+
+
+/* *********************************************************************** */
+
+/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real *
+	eps, integer *emin, real *rmin, integer *emax, real *rmax)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+    static logical iwarn = FALSE_;
+
+    /* Format strings */
+    static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre"
+	    "ct:-\002,\002  EMIN = \002,i8,/\002 If, after inspection, the va"
+	    "lue EMIN looks\002,\002 acceptable please comment out \002,/\002"
+	    " the IF block as marked within the code of routine\002,\002 SLAM"
+	    "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)";
+
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2, r__3, r__4, r__5;
+
+    /* Builtin functions */
+    double pow_ri(real *, integer *);
+    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
+
+    /* Local variables */
+    static real a, b, c__;
+    static integer i__, lt;
+    static real one, two;
+    static logical ieee;
+    static real half;
+    static logical lrnd;
+    static real leps, zero;
+    static integer lbeta;
+    static real rbase;
+    static integer lemin, lemax, gnmin;
+    static real small;
+    static integer gpmin;
+    static real third, lrmin, lrmax, sixth;
+    static logical lieee1;
+    extern /* Subroutine */ int slamc1_(integer *, integer *, logical *,
+	    logical *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int slamc4_(integer *, real *, integer *),
+	    slamc5_(integer *, integer *, integer *, logical *, integer *,
+	    real *);
+    static integer ngnmin, ngpmin;
+
+    /* Fortran I/O blocks */
+    static cilist io___3081 = { 0, 6, 0, fmt_9999, 0 };
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC2 determines the machine parameters specified in its argument
+    list.
+
+    Arguments
+    =========
+
+    BETA    (output) INTEGER
+            The base of the machine.
+
+    T       (output) INTEGER
+            The number of ( BETA ) digits in the mantissa.
+
+    RND     (output) LOGICAL
+            Specifies whether proper rounding  ( RND = .TRUE. )  or
+            chopping  ( RND = .FALSE. )  occurs in addition. This may not
+            be a reliable guide to the way in which the machine performs
+            its arithmetic.
+
+    EPS     (output) REAL
+            The smallest positive number such that
+
+               fl( 1.0 - EPS ) .LT. 1.0,
+
+            where fl denotes the computed value.
+
+    EMIN    (output) INTEGER
+            The minimum exponent before (gradual) underflow occurs.
+
+    RMIN    (output) REAL
+            The smallest normalized number for the machine, given by
+            BASE**( EMIN - 1 ), where  BASE  is the floating point value
+            of BETA.
+
+    EMAX    (output) INTEGER
+            The maximum exponent before overflow occurs.
+
+    RMAX    (output) REAL
+            The largest positive number for the machine, given by
+            BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point
+            value of BETA.
+
+    Further Details
+    ===============
+
+    The computation of  EPS  is based on a routine PARANOIA by
+    W. Kahan of the University of California at Berkeley.
+
+   =====================================================================
+*/
+
+
+    if (first) {
+	first = FALSE_;
+	zero = 0.f;
+	one = 1.f;
+	two = 2.f;
+
+/*
+          LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values of
+          BETA, T, RND, EPS, EMIN and RMIN.
+
+          Throughout this routine  we use the function  SLAMC3  to ensure
+          that relevant values are stored  and not held in registers,  or
+          are not affected by optimizers.
+
+          SLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1.
+*/
+
+	slamc1_(&lbeta, &lt, &lrnd, &lieee1);
+
+/*        Start to find EPS. */
+
+	b = (real) lbeta;
+	i__1 = -lt;
+	a = pow_ri(&b, &i__1);
+	leps = a;
+
+/*        Try some tricks to see whether or not this is the correct  EPS. */
+
+	b = two / 3;
+	half = one / 2;
+	r__1 = -half;
+	sixth = slamc3_(&b, &r__1);
+	third = slamc3_(&sixth, &sixth);
+	r__1 = -half;
+	b = slamc3_(&third, &r__1);
+	b = slamc3_(&b, &sixth);
+	b = dabs(b);
+	if (b < leps) {
+	    b = leps;
+	}
+
+	leps = 1.f;
+
+/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
+L10:
+	if (leps > b && b > zero) {
+	    leps = b;
+	    r__1 = half * leps;
+/* Computing 5th power */
+	    r__3 = two, r__4 = r__3, r__3 *= r__3;
+/* Computing 2nd power */
+	    r__5 = leps;
+	    r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5);
+	    c__ = slamc3_(&r__1, &r__2);
+	    r__1 = -c__;
+	    c__ = slamc3_(&half, &r__1);
+	    b = slamc3_(&half, &c__);
+	    r__1 = -b;
+	    c__ = slamc3_(&half, &r__1);
+	    b = slamc3_(&half, &c__);
+	    goto L10;
+	}
+/* +       END WHILE */
+
+	if (a < leps) {
+	    leps = a;
+	}
+
+/*
+          Computation of EPS complete.
+
+          Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3)).
+          Keep dividing  A by BETA until (gradual) underflow occurs. This
+          is detected when we cannot recover the previous A.
+*/
+
+	rbase = one / lbeta;
+	small = one;
+	for (i__ = 1; i__ <= 3; ++i__) {
+	    r__1 = small * rbase;
+	    small = slamc3_(&r__1, &zero);
+/* L20: */
+	}
+	a = slamc3_(&one, &small);
+	slamc4_(&ngpmin, &one, &lbeta);
+	r__1 = -one;
+	slamc4_(&ngnmin, &r__1, &lbeta);
+	slamc4_(&gpmin, &a, &lbeta);
+	r__1 = -a;
+	slamc4_(&gnmin, &r__1, &lbeta);
+	ieee = FALSE_;
+
+	if (ngpmin == ngnmin && gpmin == gnmin) {
+	    if (ngpmin == gpmin) {
+		lemin = ngpmin;
+/*
+              ( Non twos-complement machines, no gradual underflow;
+                e.g.,  VAX )
+*/
+	    } else if (gpmin - ngpmin == 3) {
+		lemin = ngpmin - 1 + lt;
+		ieee = TRUE_;
+/*
+              ( Non twos-complement machines, with gradual underflow;
+                e.g., IEEE standard followers )
+*/
+	    } else {
+		lemin = min(ngpmin,gpmin);
+/*            ( A guess; no known machine ) */
+		iwarn = TRUE_;
+	    }
+
+	} else if (ngpmin == gpmin && ngnmin == gnmin) {
+	    if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
+		lemin = max(ngpmin,ngnmin);
+/*
+              ( Twos-complement machines, no gradual underflow;
+                e.g., CYBER 205 )
+*/
+	    } else {
+		lemin = min(ngpmin,ngnmin);
+/*            ( A guess; no known machine ) */
+		iwarn = TRUE_;
+	    }
+
+	} else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
+		 {
+	    if (gpmin - min(ngpmin,ngnmin) == 3) {
+		lemin = max(ngpmin,ngnmin) - 1 + lt;
+/*
+              ( Twos-complement machines with gradual underflow;
+                no known machine )
+*/
+	    } else {
+		lemin = min(ngpmin,ngnmin);
+/*            ( A guess; no known machine ) */
+		iwarn = TRUE_;
+	    }
+
+	} else {
+/* Computing MIN */
+	    i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
+	    lemin = min(i__1,gnmin);
+/*         ( A guess; no known machine ) */
+	    iwarn = TRUE_;
+	}
+/*
+   **
+   Comment out this if block if EMIN is ok
+*/
+	if (iwarn) {
+	    first = TRUE_;
+	    s_wsfe(&io___3081);
+	    do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer));
+	    e_wsfe();
+	}
+/*
+   **
+
+          Assume IEEE arithmetic if we found denormalised  numbers above,
+          or if arithmetic seems to round in the  IEEE style,  determined
+          in routine SLAMC1. A true IEEE machine should have both  things
+          true; however, faulty machines may have one or the other.
+*/
+
+	ieee = (ieee) || (lieee1);
+
+/*
+          Compute  RMIN by successive division by  BETA. We could compute
+          RMIN as BASE**( EMIN - 1 ),  but some machines underflow during
+          this computation.
+*/
+
+	lrmin = 1.f;
+	i__1 = 1 - lemin;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    r__1 = lrmin * rbase;
+	    lrmin = slamc3_(&r__1, &zero);
+/* L30: */
+	}
+
+/*        Finally, call SLAMC5 to compute EMAX and RMAX. */
+
+	slamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
+    }
+
+    *beta = lbeta;
+    *t = lt;
+    *rnd = lrnd;
+    *eps = leps;
+    *emin = lemin;
+    *rmin = lrmin;
+    *emax = lemax;
+    *rmax = lrmax;
+
+    return 0;
+
+
+/*     End of SLAMC2 */
+
+} /* slamc2_ */
+
+
+/* *********************************************************************** */
+
+doublereal slamc3_(real *a, real *b)
+{
+    /* System generated locals */
+    real ret_val;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC3  is intended to force  A  and  B  to be stored prior to doing
+    the addition of  A  and  B ,  for use in situations where optimizers
+    might hold one of these in a register.
+
+    Arguments
+    =========
+
+    A, B    (input) REAL
+            The values A and B.
+
+   =====================================================================
+*/
+
+
+    ret_val = *a + *b;
+
+    return ret_val;
+
+/*     End of SLAMC3 */
+
+} /* slamc3_ */
+
+
+/* *********************************************************************** */
+
+/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1;
+
+    /* Local variables */
+    static real a;
+    static integer i__;
+    static real b1, b2, c1, c2, d1, d2, one, zero, rbase;
+    extern doublereal slamc3_(real *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC4 is a service routine for SLAMC2.
+
+    Arguments
+    =========
+
+    EMIN    (output) EMIN
+            The minimum exponent before (gradual) underflow, computed by
+            setting A = START and dividing by BASE until the previous A
+            can not be recovered.
+
+    START   (input) REAL
+            The starting point for determining EMIN.
+
+    BASE    (input) INTEGER
+            The base of the machine.
+
+   =====================================================================
+*/
+
+
+    a = *start;
+    one = 1.f;
+    rbase = one / *base;
+    zero = 0.f;
+    *emin = 1;
+    r__1 = a * rbase;
+    b1 = slamc3_(&r__1, &zero);
+    c1 = a;
+    c2 = a;
+    d1 = a;
+    d2 = a;
+/*
+   +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.
+      $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP
+*/
+L10:
+    if (c1 == a && c2 == a && d1 == a && d2 == a) {
+	--(*emin);
+	a = b1;
+	r__1 = a / *base;
+	b1 = slamc3_(&r__1, &zero);
+	r__1 = b1 * *base;
+	c1 = slamc3_(&r__1, &zero);
+	d1 = zero;
+	i__1 = *base;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d1 += b1;
+/* L20: */
+	}
+	r__1 = a * rbase;
+	b2 = slamc3_(&r__1, &zero);
+	r__1 = b2 / rbase;
+	c2 = slamc3_(&r__1, &zero);
+	d2 = zero;
+	i__1 = *base;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d2 += b2;
+/* L30: */
+	}
+	goto L10;
+    }
+/* +    END WHILE */
+
+    return 0;
+
+/*     End of SLAMC4 */
+
+} /* slamc4_ */
+
+
+/* *********************************************************************** */
+
+/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin,
+	logical *ieee, integer *emax, real *rmax)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1;
+
+    /* Local variables */
+    static integer i__;
+    static real y, z__;
+    static integer try__, lexp;
+    static real oldy;
+    static integer uexp, nbits;
+    extern doublereal slamc3_(real *, real *);
+    static real recbas;
+    static integer exbits, expsum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAMC5 attempts to compute RMAX, the largest machine floating-point
+    number, without overflow.  It assumes that EMAX + abs(EMIN) sum
+    approximately to a power of 2.  It will fail on machines where this
+    assumption does not hold, for example, the Cyber 205 (EMIN = -28625,
+    EMAX = 28718).  It will also fail if the value supplied for EMIN is
+    too large (i.e. too close to zero), probably with overflow.
+
+    Arguments
+    =========
+
+    BETA    (input) INTEGER
+            The base of floating-point arithmetic.
+
+    P       (input) INTEGER
+            The number of base BETA digits in the mantissa of a
+            floating-point value.
+
+    EMIN    (input) INTEGER
+            The minimum exponent before (gradual) underflow.
+
+    IEEE    (input) LOGICAL
+            A logical flag specifying whether or not the arithmetic
+            system is thought to comply with the IEEE standard.
+
+    EMAX    (output) INTEGER
+            The largest exponent before overflow
+
+    RMAX    (output) REAL
+            The largest machine floating-point number.
+
+   =====================================================================
+
+
+       First compute LEXP and UEXP, two powers of 2 that bound
+       abs(EMIN). We then assume that EMAX + abs(EMIN) will sum
+       approximately to the bound that is closest to abs(EMIN).
+       (EMAX is the exponent of the required number RMAX).
+*/
+
+    lexp = 1;
+    exbits = 1;
+L10:
+    try__ = (lexp) << (1);
+    if (try__ <= -(*emin)) {
+	lexp = try__;
+	++exbits;
+	goto L10;
+    }
+    if (lexp == -(*emin)) {
+	uexp = lexp;
+    } else {
+	uexp = try__;
+	++exbits;
+    }
+
+/*
+       Now -LEXP is less than or equal to EMIN, and -UEXP is greater
+       than or equal to EMIN. EXBITS is the number of bits needed to
+       store the exponent.
+*/
+
+    if (uexp + *emin > -lexp - *emin) {
+	expsum = (lexp) << (1);
+    } else {
+	expsum = (uexp) << (1);
+    }
+
+/*
+       EXPSUM is the exponent range, approximately equal to
+       EMAX - EMIN + 1 .
+*/
+
+    *emax = expsum + *emin - 1;
+    nbits = exbits + 1 + *p;
+
+/*
+       NBITS is the total number of bits needed to store a
+       floating-point number.
+*/
+
+    if (nbits % 2 == 1 && *beta == 2) {
+
+/*
+          Either there are an odd number of bits used to store a
+          floating-point number, which is unlikely, or some bits are
+          not used in the representation of numbers, which is possible,
+          (e.g. Cray machines) or the mantissa has an implicit bit,
+          (e.g. IEEE machines, Dec Vax machines), which is perhaps the
+          most likely. We have to assume the last alternative.
+          If this is true, then we need to reduce EMAX by one because
+          there must be some way of representing zero in an implicit-bit
+          system. On machines like Cray, we are reducing EMAX by one
+          unnecessarily.
+*/
+
+	--(*emax);
+    }
+
+    if (*ieee) {
+
+/*
+          Assume we are on an IEEE machine which reserves one exponent
+          for infinity and NaN.
+*/
+
+	--(*emax);
+    }
+
+/*
+       Now create RMAX, the largest machine number, which should
+       be equal to (1.0 - BETA**(-P)) * BETA**EMAX .
+
+       First compute 1.0 - BETA**(-P), being careful that the
+       result is less than 1.0 .
+*/
+
+    recbas = 1.f / *beta;
+    z__ = *beta - 1.f;
+    y = 0.f;
+    i__1 = *p;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	z__ *= recbas;
+	if (y < 1.f) {
+	    oldy = y;
+	}
+	y = slamc3_(&y, &z__);
+/* L20: */
+    }
+    if (y >= 1.f) {
+	y = oldy;
+    }
+
+/*     Now multiply by BETA**EMAX to get RMAX. */
+
+    i__1 = *emax;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__1 = y * *beta;
+	y = slamc3_(&r__1, &c_b320);
+/* L30: */
+    }
+
+    *rmax = y;
+    return 0;
+
+/*     End of SLAMC5 */
+
+} /* slamc5_ */
+
+/* Subroutine */ int slamrg_(integer *n1, integer *n2, real *a, integer *
+	strd1, integer *strd2, integer *index)
+{
+    /* System generated locals */
+    integer i__1;
+
+    /* Local variables */
+    static integer i__, ind1, ind2, n1sv, n2sv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAMRG will create a permutation list which will merge the elements
+    of A (which is composed of two independently sorted sets) into a
+    single set which is sorted in ascending order.
+
+    Arguments
+    =========
+
+    N1     (input) INTEGER
+    N2     (input) INTEGER
+           These arguements contain the respective lengths of the two
+           sorted lists to be merged.
+
+    A      (input) REAL array, dimension (N1+N2)
+           The first N1 elements of A contain a list of numbers which
+           are sorted in either ascending or descending order.  Likewise
+           for the final N2 elements.
+
+    STRD1  (input) INTEGER
+    STRD2  (input) INTEGER
+           These are the strides to be taken through the array A.
+           Allowable strides are 1 and -1.  They indicate whether a
+           subset of A is sorted in ascending (STRDx = 1) or descending
+           (STRDx = -1) order.
+
+    INDEX  (output) INTEGER array, dimension (N1+N2)
+           On exit this array will contain a permutation such that
+           if B( I ) = A( INDEX( I ) ) for I=1,N1+N2, then B will be
+           sorted in ascending order.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --index;
+    --a;
+
+    /* Function Body */
+    n1sv = *n1;
+    n2sv = *n2;
+    if (*strd1 > 0) {
+	ind1 = 1;
+    } else {
+	ind1 = *n1;
+    }
+    if (*strd2 > 0) {
+	ind2 = *n1 + 1;
+    } else {
+	ind2 = *n1 + *n2;
+    }
+    i__ = 1;
+/*     while ( (N1SV > 0) & (N2SV > 0) ) */
+L10:
+    if (n1sv > 0 && n2sv > 0) {
+	if (a[ind1] <= a[ind2]) {
+	    index[i__] = ind1;
+	    ++i__;
+	    ind1 += *strd1;
+	    --n1sv;
+	} else {
+	    index[i__] = ind2;
+	    ++i__;
+	    ind2 += *strd2;
+	    --n2sv;
+	}
+	goto L10;
+    }
+/*     end while */
+    if (n1sv == 0) {
+	i__1 = n2sv;
+	for (n1sv = 1; n1sv <= i__1; ++n1sv) {
+	    index[i__] = ind2;
+	    ++i__;
+	    ind2 += *strd2;
+/* L20: */
+	}
+    } else {
+/*     N2SV .EQ. 0 */
+	i__1 = n1sv;
+	for (n2sv = 1; n2sv <= i__1; ++n2sv) {
+	    index[i__] = ind1;
+	    ++i__;
+	    ind1 += *strd1;
+/* L30: */
+	}
+    }
+
+    return 0;
+
+/*     End of SLAMRG */
+
+} /* slamrg_ */
+
+doublereal slange_(char *norm, integer *m, integer *n, real *a, integer *lda,
+	real *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real ret_val, r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
+	    real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLANGE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real matrix A.
+
+    Description
+    ===========
+
+    SLANGE returns the value
+
+       SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in SLANGE as described
+            above.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.  When M = 0,
+            SLANGE is set to zero.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.  When N = 0,
+            SLANGE is set to zero.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(M,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (min(*m,*n) == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		value = dmax(r__2,r__3);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if ((lsame_(norm, "O")) || (*(unsigned char *
+	    )norm == '1')) {
+
+/*        Find norm1(A). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.f;
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+/* L30: */
+	    }
+	    value = dmax(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.f;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.f;
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__1 = value, r__2 = work[i__];
+	    value = dmax(r__1,r__2);
+/* L80: */
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    slassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of SLANGE */
+
+} /* slange_ */
+
+doublereal slanhs_(char *norm, integer *n, real *a, integer *lda, real *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    real ret_val, r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
+	    real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLANHS  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    Hessenberg matrix A.
+
+    Description
+    ===========
+
+    SLANHS returns the value
+
+       SLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in SLANHS as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, SLANHS is
+            set to zero.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The n by n upper Hessenberg matrix A; the part of A below the
+            first sub-diagonal is not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		value = dmax(r__2,r__3);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if ((lsame_(norm, "O")) || (*(unsigned char *
+	    )norm == '1')) {
+
+/*        Find norm1(A). */
+
+	value = 0.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.f;
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+/* L30: */
+	    }
+	    value = dmax(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.f;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.f;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__1 = value, r__2 = work[i__];
+	    value = dmax(r__1,r__2);
+/* L80: */
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of SLANHS */
+
+} /* slanhs_ */
+
+doublereal slanst_(char *norm, integer *n, real *d__, real *e)
+{
+    /* System generated locals */
+    integer i__1;
+    real ret_val, r__1, r__2, r__3, r__4, r__5;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static real sum, scale;
+    extern logical lsame_(char *, char *);
+    static real anorm;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
+	    real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLANST  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real symmetric tridiagonal matrix A.
+
+    Description
+    ===========
+
+    SLANST returns the value
+
+       SLANST = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in SLANST as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, SLANST is
+            set to zero.
+
+    D       (input) REAL array, dimension (N)
+            The diagonal elements of A.
+
+    E       (input) REAL array, dimension (N-1)
+            The (n-1) sub-diagonal or super-diagonal elements of A.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --e;
+    --d__;
+
+    /* Function Body */
+    if (*n <= 0) {
+	anorm = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	anorm = (r__1 = d__[*n], dabs(r__1));
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    r__2 = anorm, r__3 = (r__1 = d__[i__], dabs(r__1));
+	    anorm = dmax(r__2,r__3);
+/* Computing MAX */
+	    r__2 = anorm, r__3 = (r__1 = e[i__], dabs(r__1));
+	    anorm = dmax(r__2,r__3);
+/* L10: */
+	}
+    } else if (((lsame_(norm, "O")) || (*(unsigned char
+	    *)norm == '1')) || (lsame_(norm, "I"))) {
+
+/*        Find norm1(A). */
+
+	if (*n == 1) {
+	    anorm = dabs(d__[1]);
+	} else {
+/* Computing MAX */
+	    r__3 = dabs(d__[1]) + dabs(e[1]), r__4 = (r__1 = e[*n - 1], dabs(
+		    r__1)) + (r__2 = d__[*n], dabs(r__2));
+	    anorm = dmax(r__3,r__4);
+	    i__1 = *n - 1;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		r__4 = anorm, r__5 = (r__1 = d__[i__], dabs(r__1)) + (r__2 =
+			e[i__], dabs(r__2)) + (r__3 = e[i__ - 1], dabs(r__3));
+		anorm = dmax(r__4,r__5);
+/* L20: */
+	    }
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	if (*n > 1) {
+	    i__1 = *n - 1;
+	    slassq_(&i__1, &e[1], &c__1, &scale, &sum);
+	    sum *= 2;
+	}
+	slassq_(n, &d__[1], &c__1, &scale, &sum);
+	anorm = scale * sqrt(sum);
+    }
+
+    ret_val = anorm;
+    return ret_val;
+
+/*     End of SLANST */
+
+} /* slanst_ */
+
+doublereal slansy_(char *norm, char *uplo, integer *n, real *a, integer *lda,
+	real *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real ret_val, r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static real sum, absa, scale;
+    extern logical lsame_(char *, char *);
+    static real value;
+    extern /* Subroutine */ int slassq_(integer *, real *, integer *, real *,
+	    real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLANSY  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    real symmetric matrix A.
+
+    Description
+    ===========
+
+    SLANSY returns the value
+
+       SLANSY = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in SLANSY as described
+            above.
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is to be referenced.
+            = 'U':  Upper triangular part of A is referenced
+            = 'L':  Lower triangular part of A is referenced
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, SLANSY is
+            set to zero.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The symmetric matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of A contains the upper triangular part
+            of the matrix A, and the strictly lower triangular part of A
+            is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of A contains the lower triangular part of
+            the matrix A, and the strictly upper triangular part of A is
+            not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) REAL array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+            WORK is not referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.f;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(
+			    r__1));
+		    value = dmax(r__2,r__3);
+/* L10: */
+		}
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n;
+		for (i__ = j; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    r__2 = value, r__3 = (r__1 = a[i__ + j * a_dim1], dabs(
+			    r__1));
+		    value = dmax(r__2,r__3);
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (((lsame_(norm, "I")) || (lsame_(norm,
+	    "O"))) || (*(unsigned char *)norm == '1')) {
+
+/*        Find normI(A) ( = norm1(A), since A is symmetric). */
+
+	value = 0.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = 0.f;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		    sum += absa;
+		    work[i__] += absa;
+/* L50: */
+		}
+		work[j] = sum + (r__1 = a[j + j * a_dim1], dabs(r__1));
+/* L60: */
+	    }
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		r__1 = value, r__2 = work[i__];
+		value = dmax(r__1,r__2);
+/* L70: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] = 0.f;
+/* L80: */
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = work[j] + (r__1 = a[j + j * a_dim1], dabs(r__1));
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    absa = (r__1 = a[i__ + j * a_dim1], dabs(r__1));
+		    sum += absa;
+		    work[i__] += absa;
+/* L90: */
+		}
+		value = dmax(value,sum);
+/* L100: */
+	    }
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.f;
+	sum = 1.f;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		i__2 = j - 1;
+		slassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L110: */
+	    }
+	} else {
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		slassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
+/* L120: */
+	    }
+	}
+	sum *= 2;
+	i__1 = *lda + 1;
+	slassq_(n, &a[a_offset], &i__1, &scale, &sum);
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of SLANSY */
+
+} /* slansy_ */
+
+/* Subroutine */ int slanv2_(real *a, real *b, real *c__, real *d__, real *
+	rt1r, real *rt1i, real *rt2r, real *rt2i, real *cs, real *sn)
+{
+    /* System generated locals */
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double r_sign(real *, real *), sqrt(doublereal);
+
+    /* Local variables */
+    static real p, z__, aa, bb, cc, dd, cs1, sn1, sab, sac, eps, tau, temp,
+	    scale, bcmax, bcmis, sigma;
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLANV2 computes the Schur factorization of a real 2-by-2 nonsymmetric
+    matrix in standard form:
+
+         [ A  B ] = [ CS -SN ] [ AA  BB ] [ CS  SN ]
+         [ C  D ]   [ SN  CS ] [ CC  DD ] [-SN  CS ]
+
+    where either
+    1) CC = 0 so that AA and DD are real eigenvalues of the matrix, or
+    2) AA = DD and BB*CC < 0, so that AA + or - sqrt(BB*CC) are complex
+    conjugate eigenvalues.
+
+    Arguments
+    =========
+
+    A       (input/output) REAL
+    B       (input/output) REAL
+    C       (input/output) REAL
+    D       (input/output) REAL
+            On entry, the elements of the input matrix.
+            On exit, they are overwritten by the elements of the
+            standardised Schur form.
+
+    RT1R    (output) REAL
+    RT1I    (output) REAL
+    RT2R    (output) REAL
+    RT2I    (output) REAL
+            The real and imaginary parts of the eigenvalues. If the
+            eigenvalues are a complex conjugate pair, RT1I > 0.
+
+    CS      (output) REAL
+    SN      (output) REAL
+            Parameters of the rotation matrix.
+
+    Further Details
+    ===============
+
+    Modified by V. Sima, Research Institute for Informatics, Bucharest,
+    Romania, to reduce the risk of cancellation errors,
+    when computing real eigenvalues, and to ensure, if possible, that
+    abs(RT1R) >= abs(RT2R).
+
+    =====================================================================
+*/
+
+
+    eps = slamch_("P");
+    if (*c__ == 0.f) {
+	*cs = 1.f;
+	*sn = 0.f;
+	goto L10;
+
+    } else if (*b == 0.f) {
+
+/*        Swap rows and columns */
+
+	*cs = 0.f;
+	*sn = 1.f;
+	temp = *d__;
+	*d__ = *a;
+	*a = temp;
+	*b = -(*c__);
+	*c__ = 0.f;
+	goto L10;
+    } else if (*a - *d__ == 0.f && r_sign(&c_b1011, b) != r_sign(&c_b1011,
+	    c__)) {
+	*cs = 1.f;
+	*sn = 0.f;
+	goto L10;
+    } else {
+
+	temp = *a - *d__;
+	p = temp * .5f;
+/* Computing MAX */
+	r__1 = dabs(*b), r__2 = dabs(*c__);
+	bcmax = dmax(r__1,r__2);
+/* Computing MIN */
+	r__1 = dabs(*b), r__2 = dabs(*c__);
+	bcmis = dmin(r__1,r__2) * r_sign(&c_b1011, b) * r_sign(&c_b1011, c__);
+/* Computing MAX */
+	r__1 = dabs(p);
+	scale = dmax(r__1,bcmax);
+	z__ = p / scale * p + bcmax / scale * bcmis;
+
+/*
+          If Z is of the order of the machine accuracy, postpone the
+          decision on the nature of eigenvalues
+*/
+
+	if (z__ >= eps * 4.f) {
+
+/*           Real eigenvalues. Compute A and D. */
+
+	    r__1 = sqrt(scale) * sqrt(z__);
+	    z__ = p + r_sign(&r__1, &p);
+	    *a = *d__ + z__;
+	    *d__ -= bcmax / z__ * bcmis;
+
+/*           Compute B and the rotation matrix */
+
+	    tau = slapy2_(c__, &z__);
+	    *cs = z__ / tau;
+	    *sn = *c__ / tau;
+	    *b -= *c__;
+	    *c__ = 0.f;
+	} else {
+
+/*
+             Complex eigenvalues, or real (almost) equal eigenvalues.
+             Make diagonal elements equal.
+*/
+
+	    sigma = *b + *c__;
+	    tau = slapy2_(&sigma, &temp);
+	    *cs = sqrt((dabs(sigma) / tau + 1.f) * .5f);
+	    *sn = -(p / (tau * *cs)) * r_sign(&c_b1011, &sigma);
+
+/*
+             Compute [ AA  BB ] = [ A  B ] [ CS -SN ]
+                     [ CC  DD ]   [ C  D ] [ SN  CS ]
+*/
+
+	    aa = *a * *cs + *b * *sn;
+	    bb = -(*a) * *sn + *b * *cs;
+	    cc = *c__ * *cs + *d__ * *sn;
+	    dd = -(*c__) * *sn + *d__ * *cs;
+
+/*
+             Compute [ A  B ] = [ CS  SN ] [ AA  BB ]
+                     [ C  D ]   [-SN  CS ] [ CC  DD ]
+*/
+
+	    *a = aa * *cs + cc * *sn;
+	    *b = bb * *cs + dd * *sn;
+	    *c__ = -aa * *sn + cc * *cs;
+	    *d__ = -bb * *sn + dd * *cs;
+
+	    temp = (*a + *d__) * .5f;
+	    *a = temp;
+	    *d__ = temp;
+
+	    if (*c__ != 0.f) {
+		if (*b != 0.f) {
+		    if (r_sign(&c_b1011, b) == r_sign(&c_b1011, c__)) {
+
+/*                    Real eigenvalues: reduce to upper triangular form */
+
+			sab = sqrt((dabs(*b)));
+			sac = sqrt((dabs(*c__)));
+			r__1 = sab * sac;
+			p = r_sign(&r__1, c__);
+			tau = 1.f / sqrt((r__1 = *b + *c__, dabs(r__1)));
+			*a = temp + p;
+			*d__ = temp - p;
+			*b -= *c__;
+			*c__ = 0.f;
+			cs1 = sab * tau;
+			sn1 = sac * tau;
+			temp = *cs * cs1 - *sn * sn1;
+			*sn = *cs * sn1 + *sn * cs1;
+			*cs = temp;
+		    }
+		} else {
+		    *b = -(*c__);
+		    *c__ = 0.f;
+		    temp = *cs;
+		    *cs = -(*sn);
+		    *sn = temp;
+		}
+	    }
+	}
+
+    }
+
+L10:
+
+/*     Store eigenvalues in (RT1R,RT1I) and (RT2R,RT2I). */
+
+    *rt1r = *a;
+    *rt2r = *d__;
+    if (*c__ == 0.f) {
+	*rt1i = 0.f;
+	*rt2i = 0.f;
+    } else {
+	*rt1i = sqrt((dabs(*b))) * sqrt((dabs(*c__)));
+	*rt2i = -(*rt1i);
+    }
+    return 0;
+
+/*     End of SLANV2 */
+
+} /* slanv2_ */
+
+doublereal slapy2_(real *x, real *y)
+{
+    /* System generated locals */
+    real ret_val, r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real w, z__, xabs, yabs;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAPY2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
+    overflow.
+
+    Arguments
+    =========
+
+    X       (input) REAL
+    Y       (input) REAL
+            X and Y specify the values x and y.
+
+    =====================================================================
+*/
+
+
+    xabs = dabs(*x);
+    yabs = dabs(*y);
+    w = dmax(xabs,yabs);
+    z__ = dmin(xabs,yabs);
+    if (z__ == 0.f) {
+	ret_val = w;
+    } else {
+/* Computing 2nd power */
+	r__1 = z__ / w;
+	ret_val = w * sqrt(r__1 * r__1 + 1.f);
+    }
+    return ret_val;
+
+/*     End of SLAPY2 */
+
+} /* slapy2_ */
+
+doublereal slapy3_(real *x, real *y, real *z__)
+{
+    /* System generated locals */
+    real ret_val, r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real w, xabs, yabs, zabs;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLAPY3 returns sqrt(x**2+y**2+z**2), taking care not to cause
+    unnecessary overflow.
+
+    Arguments
+    =========
+
+    X       (input) REAL
+    Y       (input) REAL
+    Z       (input) REAL
+            X, Y and Z specify the values x, y and z.
+
+    =====================================================================
+*/
+
+
+    xabs = dabs(*x);
+    yabs = dabs(*y);
+    zabs = dabs(*z__);
+/* Computing MAX */
+    r__1 = max(xabs,yabs);
+    w = dmax(r__1,zabs);
+    if (w == 0.f) {
+	ret_val = 0.f;
+    } else {
+/* Computing 2nd power */
+	r__1 = xabs / w;
+/* Computing 2nd power */
+	r__2 = yabs / w;
+/* Computing 2nd power */
+	r__3 = zabs / w;
+	ret_val = w * sqrt(r__1 * r__1 + r__2 * r__2 + r__3 * r__3);
+    }
+    return ret_val;
+
+/*     End of SLAPY3 */
+
+} /* slapy3_ */
+
+/* Subroutine */ int slarf_(char *side, integer *m, integer *n, real *v,
+	integer *incv, real *tau, real *c__, integer *ldc, real *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset;
+    real r__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLARF applies a real elementary reflector H to a real m by n matrix
+    C, from either the left or the right. H is represented in the form
+
+          H = I - tau * v * v'
+
+    where tau is a real scalar and v is a real vector.
+
+    If tau = 0, then H is taken to be the unit matrix.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) REAL array, dimension
+                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+            The vector v in the representation of H. V is not used if
+            TAU = 0.
+
+    INCV    (input) INTEGER
+            The increment between elements of v. INCV <> 0.
+
+    TAU     (input) REAL
+            The value tau in the representation of H.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) REAL array, dimension
+                           (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C */
+
+	if (*tau != 0.f) {
+
+/*           w := C' * v */
+
+	    sgemv_("Transpose", m, n, &c_b1011, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b320, &work[1], &c__1);
+
+/*           C := C - v * w' */
+
+	    r__1 = -(*tau);
+	    sger_(m, n, &r__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
+		    ldc);
+	}
+    } else {
+
+/*        Form  C * H */
+
+	if (*tau != 0.f) {
+
+/*           w := C * v */
+
+	    sgemv_("No transpose", m, n, &c_b1011, &c__[c_offset], ldc, &v[1],
+		     incv, &c_b320, &work[1], &c__1);
+
+/*           C := C - w * v' */
+
+	    r__1 = -(*tau);
+	    sger_(m, n, &r__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
+		    ldc);
+	}
+    }
+    return 0;
+
+/*     End of SLARF */
+
+} /* slarf_ */
+
+/* Subroutine */ int slarfb_(char *side, char *trans, char *direct, char *
+	storev, integer *m, integer *n, integer *k, real *v, integer *ldv,
+	real *t, integer *ldt, real *c__, integer *ldc, real *work, integer *
+	ldwork)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
+	    work_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *), scopy_(integer *, real *,
+	    integer *, real *, integer *), strmm_(char *, char *, char *,
+	    char *, integer *, integer *, real *, real *, integer *, real *,
+	    integer *);
+    static char transt[1];
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLARFB applies a real block reflector H or its transpose H' to a
+    real m by n matrix C, from either the left or the right.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply H or H' from the Left
+            = 'R': apply H or H' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply H (No transpose)
+            = 'T': apply H' (Transpose)
+
+    DIRECT  (input) CHARACTER*1
+            Indicates how H is formed from a product of elementary
+            reflectors
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Indicates how the vectors which define the elementary
+            reflectors are stored:
+            = 'C': Columnwise
+            = 'R': Rowwise
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    K       (input) INTEGER
+            The order of the matrix T (= the number of elementary
+            reflectors whose product defines the block reflector).
+
+    V       (input) REAL array, dimension
+                                  (LDV,K) if STOREV = 'C'
+                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
+                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+            if STOREV = 'R', LDV >= K.
+
+    T       (input) REAL array, dimension (LDT,K)
+            The triangular k by k matrix T in the representation of the
+            block reflector.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= max(1,M).
+
+    WORK    (workspace) REAL array, dimension (LDWORK,K)
+
+    LDWORK  (input) INTEGER
+            The leading dimension of the array WORK.
+            If SIDE = 'L', LDWORK >= max(1,N);
+            if SIDE = 'R', LDWORK >= max(1,M).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    work_dim1 = *ldwork;
+    work_offset = 1 + work_dim1;
+    work -= work_offset;
+
+    /* Function Body */
+    if ((*m <= 0) || (*n <= 0)) {
+	return 0;
+    }
+
+    if (lsame_(trans, "N")) {
+	*(unsigned char *)transt = 'T';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+    if (lsame_(storev, "C")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1 )    (first K rows)
+                       ( V2 )
+             where  V1  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+/* L10: */
+		}
+
+/*              W := W * V1 */
+
+		strmm_("Right", "Lower", "No transpose", "Unit", n, k, &
+			c_b1011, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2 */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b1011,
+			     &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 + v_dim1],
+			    ldv, &c_b1011, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b1011, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2 * W' */
+
+		    i__1 = *m - *k;
+		    sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b1290,
+			     &v[*k + 1 + v_dim1], ldv, &work[work_offset],
+			    ldwork, &c_b1011, &c__[*k + 1 + c_dim1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b1011,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
+/* L20: */
+		    }
+/* L30: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L40: */
+		}
+
+/*              W := W * V1 */
+
+		strmm_("Right", "Lower", "No transpose", "Unit", m, k, &
+			c_b1011, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2 */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b1011, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k
+			    + 1 + v_dim1], ldv, &c_b1011, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b1011, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2' */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b1290,
+			     &work[work_offset], ldwork, &v[*k + 1 + v_dim1],
+			    ldv, &c_b1011, &c__[(*k + 1) * c_dim1 + 1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b1011,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
+/* L50: */
+		    }
+/* L60: */
+		}
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1 )
+                       ( V2 )    (last K rows)
+             where  V2  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L70: */
+		}
+
+/*              W := W * V2 */
+
+		strmm_("Right", "Upper", "No transpose", "Unit", n, k, &
+			c_b1011, &v[*m - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1 */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "No transpose", n, k, &i__1, &c_b1011,
+			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b1011,
+			     &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b1011, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1 * W' */
+
+		    i__1 = *m - *k;
+		    sgemm_("No transpose", "Transpose", &i__1, n, k, &c_b1290,
+			     &v[v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b1011, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b1011,
+			&v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L80: */
+		    }
+/* L90: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L100: */
+		}
+
+/*              W := W * V2 */
+
+		strmm_("Right", "Upper", "No transpose", "Unit", m, k, &
+			c_b1011, &v[*n - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1 */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b1011, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b1011, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b1011, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1' */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "Transpose", m, &i__1, k, &c_b1290,
+			     &work[work_offset], ldwork, &v[v_offset], ldv, &
+			    c_b1011, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b1011,
+			&v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+
+/*              C2 := C2 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L110: */
+		    }
+/* L120: */
+		}
+	    }
+	}
+
+    } else if (lsame_(storev, "R")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1  V2 )    (V1: first K columns)
+             where  V1  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+/* L130: */
+		}
+
+/*              W := W * V1' */
+
+		strmm_("Right", "Upper", "Transpose", "Unit", n, k, &c_b1011,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2' */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b1011, &
+			    c__[*k + 1 + c_dim1], ldc, &v[(*k + 1) * v_dim1 +
+			    1], ldv, &c_b1011, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		strmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b1011, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2' * W' */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b1290, &
+			    v[(*k + 1) * v_dim1 + 1], ldv, &work[work_offset],
+			     ldwork, &c_b1011, &c__[*k + 1 + c_dim1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		strmm_("Right", "Upper", "No transpose", "Unit", n, k, &
+			c_b1011, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[j + i__ * c_dim1] -= work[i__ + j * work_dim1];
+/* L140: */
+		    }
+/* L150: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L160: */
+		}
+
+/*              W := W * V1' */
+
+		strmm_("Right", "Upper", "Transpose", "Unit", m, k, &c_b1011,
+			&v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2' */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b1011,
+			     &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k + 1) *
+			    v_dim1 + 1], ldv, &c_b1011, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		strmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b1011, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2 */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b1290, &work[work_offset], ldwork, &v[(*k + 1) *
+			     v_dim1 + 1], ldv, &c_b1011, &c__[(*k + 1) *
+			    c_dim1 + 1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		strmm_("Right", "Upper", "No transpose", "Unit", m, k, &
+			c_b1011, &v[v_offset], ldv, &work[work_offset],
+			ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + j * c_dim1] -= work[i__ + j * work_dim1];
+/* L170: */
+		    }
+/* L180: */
+		}
+
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1  V2 )    (V2: last K columns)
+             where  V2  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L190: */
+		}
+
+/*              W := W * V2' */
+
+		strmm_("Right", "Lower", "Transpose", "Unit", n, k, &c_b1011,
+			&v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1' */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "Transpose", n, k, &i__1, &c_b1011, &
+			    c__[c_offset], ldc, &v[v_offset], ldv, &c_b1011, &
+			    work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		strmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b1011, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1' * W' */
+
+		    i__1 = *m - *k;
+		    sgemm_("Transpose", "Transpose", &i__1, n, k, &c_b1290, &
+			    v[v_offset], ldv, &work[work_offset], ldwork, &
+			    c_b1011, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		strmm_("Right", "Lower", "No transpose", "Unit", n, k, &
+			c_b1011, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[*m - *k + j + i__ * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L200: */
+		    }
+/* L210: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    scopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L220: */
+		}
+
+/*              W := W * V2' */
+
+		strmm_("Right", "Lower", "Transpose", "Unit", m, k, &c_b1011,
+			&v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1' */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "Transpose", m, k, &i__1, &c_b1011,
+			     &c__[c_offset], ldc, &v[v_offset], ldv, &c_b1011,
+			     &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		strmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b1011, &
+			t[t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1 */
+
+		    i__1 = *n - *k;
+		    sgemm_("No transpose", "No transpose", m, &i__1, k, &
+			    c_b1290, &work[work_offset], ldwork, &v[v_offset],
+			     ldv, &c_b1011, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		strmm_("Right", "Lower", "No transpose", "Unit", m, k, &
+			c_b1011, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			c__[i__ + (*n - *k + j) * c_dim1] -= work[i__ + j *
+				work_dim1];
+/* L230: */
+		    }
+/* L240: */
+		}
+
+	    }
+
+	}
+    }
+
+    return 0;
+
+/*     End of SLARFB */
+
+} /* slarfb_ */
+
+/* Subroutine */ int slarfg_(integer *n, real *alpha, real *x, integer *incx,
+	real *tau)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1;
+
+    /* Builtin functions */
+    double r_sign(real *, real *);
+
+    /* Local variables */
+    static integer j, knt;
+    static real beta;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static real xnorm;
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    static real safmin, rsafmn;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLARFG generates a real elementary reflector H of order n, such
+    that
+
+          H * ( alpha ) = ( beta ),   H' * H = I.
+              (   x   )   (   0  )
+
+    where alpha and beta are scalars, and x is an (n-1)-element real
+    vector. H is represented in the form
+
+          H = I - tau * ( 1 ) * ( 1 v' ) ,
+                        ( v )
+
+    where tau is a real scalar and v is a real (n-1)-element
+    vector.
+
+    If the elements of x are all zero, then tau = 0 and H is taken to be
+    the unit matrix.
+
+    Otherwise  1 <= tau <= 2.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the elementary reflector.
+
+    ALPHA   (input/output) REAL
+            On entry, the value alpha.
+            On exit, it is overwritten with the value beta.
+
+    X       (input/output) REAL array, dimension
+                           (1+(N-2)*abs(INCX))
+            On entry, the vector x.
+            On exit, it is overwritten with the vector v.
+
+    INCX    (input) INTEGER
+            The increment between elements of X. INCX > 0.
+
+    TAU     (output) REAL
+            The value tau.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n <= 1) {
+	*tau = 0.f;
+	return 0;
+    }
+
+    i__1 = *n - 1;
+    xnorm = snrm2_(&i__1, &x[1], incx);
+
+    if (xnorm == 0.f) {
+
+/*        H  =  I */
+
+	*tau = 0.f;
+    } else {
+
+/*        general case */
+
+	r__1 = slapy2_(alpha, &xnorm);
+	beta = -r_sign(&r__1, alpha);
+	safmin = slamch_("S") / slamch_("E");
+	if (dabs(beta) < safmin) {
+
+/*           XNORM, BETA may be inaccurate; scale X and recompute them */
+
+	    rsafmn = 1.f / safmin;
+	    knt = 0;
+L10:
+	    ++knt;
+	    i__1 = *n - 1;
+	    sscal_(&i__1, &rsafmn, &x[1], incx);
+	    beta *= rsafmn;
+	    *alpha *= rsafmn;
+	    if (dabs(beta) < safmin) {
+		goto L10;
+	    }
+
+/*           New BETA is at most 1, at least SAFMIN */
+
+	    i__1 = *n - 1;
+	    xnorm = snrm2_(&i__1, &x[1], incx);
+	    r__1 = slapy2_(alpha, &xnorm);
+	    beta = -r_sign(&r__1, alpha);
+	    *tau = (beta - *alpha) / beta;
+	    i__1 = *n - 1;
+	    r__1 = 1.f / (*alpha - beta);
+	    sscal_(&i__1, &r__1, &x[1], incx);
+
+/*           If ALPHA is subnormal, it may lose relative accuracy */
+
+	    *alpha = beta;
+	    i__1 = knt;
+	    for (j = 1; j <= i__1; ++j) {
+		*alpha *= safmin;
+/* L20: */
+	    }
+	} else {
+	    *tau = (beta - *alpha) / beta;
+	    i__1 = *n - 1;
+	    r__1 = 1.f / (*alpha - beta);
+	    sscal_(&i__1, &r__1, &x[1], incx);
+	    *alpha = beta;
+	}
+    }
+
+    return 0;
+
+/*     End of SLARFG */
+
+} /* slarfg_ */
+
+/* Subroutine */ int slarft_(char *direct, char *storev, integer *n, integer *
+	k, real *v, integer *ldv, real *tau, real *t, integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Local variables */
+    static integer i__, j;
+    static real vii;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *), strmv_(char *, char *, char *, integer *, real *,
+	    integer *, real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLARFT forms the triangular factor T of a real block reflector H
+    of order n, which is defined as a product of k elementary reflectors.
+
+    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+
+    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+
+    If STOREV = 'C', the vector which defines the elementary reflector
+    H(i) is stored in the i-th column of the array V, and
+
+       H  =  I - V * T * V'
+
+    If STOREV = 'R', the vector which defines the elementary reflector
+    H(i) is stored in the i-th row of the array V, and
+
+       H  =  I - V' * T * V
+
+    Arguments
+    =========
+
+    DIRECT  (input) CHARACTER*1
+            Specifies the order in which the elementary reflectors are
+            multiplied to form the block reflector:
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Specifies how the vectors which define the elementary
+            reflectors are stored (see also Further Details):
+            = 'C': columnwise
+            = 'R': rowwise
+
+    N       (input) INTEGER
+            The order of the block reflector H. N >= 0.
+
+    K       (input) INTEGER
+            The order of the triangular factor T (= the number of
+            elementary reflectors). K >= 1.
+
+    V       (input/output) REAL array, dimension
+                                 (LDV,K) if STOREV = 'C'
+                                 (LDV,N) if STOREV = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i).
+
+    T       (output) REAL array, dimension (LDT,K)
+            The k by k triangular factor T of the block reflector.
+            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+            lower triangular. The rest of the array is not used.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    Further Details
+    ===============
+
+    The shape of the matrix V and the storage of the vectors which define
+    the H(i) is best illustrated by the following example with n = 5 and
+    k = 3. The elements equal to 1 are not stored; the corresponding
+    array elements are modified but restored on exit. The rest of the
+    array is not used.
+
+    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+
+                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+                     ( v1  1    )                     (     1 v2 v2 v2 )
+                     ( v1 v2  1 )                     (        1 v3 v3 )
+                     ( v1 v2 v3 )
+                     ( v1 v2 v3 )
+
+    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+
+                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+                     (     1 v3 )
+                     (        1 )
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (tau[i__] == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    t[j + i__ * t_dim1] = 0.f;
+/* L10: */
+		}
+	    } else {
+
+/*              general case */
+
+		vii = v[i__ + i__ * v_dim1];
+		v[i__ + i__ * v_dim1] = 1.f;
+		if (lsame_(storev, "C")) {
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+
+		    i__2 = *n - i__ + 1;
+		    i__3 = i__ - 1;
+		    r__1 = -tau[i__];
+		    sgemv_("Transpose", &i__2, &i__3, &r__1, &v[i__ + v_dim1],
+			     ldv, &v[i__ + i__ * v_dim1], &c__1, &c_b320, &t[
+			    i__ * t_dim1 + 1], &c__1);
+		} else {
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__ + 1;
+		    r__1 = -tau[i__];
+		    sgemv_("No transpose", &i__2, &i__3, &r__1, &v[i__ *
+			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
+			    c_b320, &t[i__ * t_dim1 + 1], &c__1);
+		}
+		v[i__ + i__ * v_dim1] = vii;
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		strmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+/* L20: */
+	}
+    } else {
+	for (i__ = *k; i__ >= 1; --i__) {
+	    if (tau[i__] == 0.f) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    t[j + i__ * t_dim1] = 0.f;
+/* L30: */
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+			vii = v[*n - *k + i__ + i__ * v_dim1];
+			v[*n - *k + i__ + i__ * v_dim1] = 1.f;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*/
+
+			i__1 = *n - *k + i__;
+			i__2 = *k - i__;
+			r__1 = -tau[i__];
+			sgemv_("Transpose", &i__1, &i__2, &r__1, &v[(i__ + 1)
+				* v_dim1 + 1], ldv, &v[i__ * v_dim1 + 1], &
+				c__1, &c_b320, &t[i__ + 1 + i__ * t_dim1], &
+				c__1);
+			v[*n - *k + i__ + i__ * v_dim1] = vii;
+		    } else {
+			vii = v[i__ + (*n - *k + i__) * v_dim1];
+			v[i__ + (*n - *k + i__) * v_dim1] = 1.f;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*/
+
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__;
+			r__1 = -tau[i__];
+			sgemv_("No transpose", &i__1, &i__2, &r__1, &v[i__ +
+				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
+				c_b320, &t[i__ + 1 + i__ * t_dim1], &c__1);
+			v[i__ + (*n - *k + i__) * v_dim1] = vii;
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    strmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		}
+		t[i__ + i__ * t_dim1] = tau[i__];
+	    }
+/* L40: */
+	}
+    }
+    return 0;
+
+/*     End of SLARFT */
+
+} /* slarft_ */
+
+/* Subroutine */ int slarfx_(char *side, integer *m, integer *n, real *v,
+	real *tau, real *c__, integer *ldc, real *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, i__1;
+    real r__1;
+
+    /* Local variables */
+    static integer j;
+    static real t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4, v5, v6,
+	    v7, v8, v9, t10, v10, sum;
+    extern /* Subroutine */ int sger_(integer *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLARFX applies a real elementary reflector H to a real m by n
+    matrix C, from either the left or the right. H is represented in the
+    form
+
+          H = I - tau * v * v'
+
+    where tau is a real scalar and v is a real vector.
+
+    If tau = 0, then H is taken to be the unit matrix
+
+    This version uses inline code if H has order < 11.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) REAL array, dimension (M) if SIDE = 'L'
+                                       or (N) if SIDE = 'R'
+            The vector v in the representation of H.
+
+    TAU     (input) REAL
+            The value tau in the representation of H.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= (1,M).
+
+    WORK    (workspace) REAL array, dimension
+                        (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+            WORK is not referenced if H has order < 11.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (*tau == 0.f) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C, where H has order m. */
+
+	switch (*m) {
+	    case 1:  goto L10;
+	    case 2:  goto L30;
+	    case 3:  goto L50;
+	    case 4:  goto L70;
+	    case 5:  goto L90;
+	    case 6:  goto L110;
+	    case 7:  goto L130;
+	    case 8:  goto L150;
+	    case 9:  goto L170;
+	    case 10:  goto L190;
+	}
+
+/*
+          Code for general M
+
+          w := C'*v
+*/
+
+	sgemv_("Transpose", m, n, &c_b1011, &c__[c_offset], ldc, &v[1], &c__1,
+		 &c_b320, &work[1], &c__1);
+
+/*        C := C - tau * v * w' */
+
+	r__1 = -(*tau);
+	sger_(m, n, &r__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset], ldc)
+		;
+	goto L410;
+L10:
+
+/*        Special code for 1 x 1 Householder */
+
+	t1 = 1.f - *tau * v[1] * v[1];
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    c__[j * c_dim1 + 1] = t1 * c__[j * c_dim1 + 1];
+/* L20: */
+	}
+	goto L410;
+L30:
+
+/*        Special code for 2 x 2 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+/* L40: */
+	}
+	goto L410;
+L50:
+
+/*        Special code for 3 x 3 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+/* L60: */
+	}
+	goto L410;
+L70:
+
+/*        Special code for 4 x 4 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+/* L80: */
+	}
+	goto L410;
+L90:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+/* L100: */
+	}
+	goto L410;
+L110:
+
+/*        Special code for 6 x 6 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+/* L120: */
+	}
+	goto L410;
+L130:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+/* L140: */
+	}
+	goto L410;
+L150:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+	    c__[j * c_dim1 + 8] -= sum * t8;
+/* L160: */
+	}
+	goto L410;
+L170:
+
+/*        Special code for 9 x 9 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
+		    c_dim1 + 9];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+	    c__[j * c_dim1 + 8] -= sum * t8;
+	    c__[j * c_dim1 + 9] -= sum * t9;
+/* L180: */
+	}
+	goto L410;
+L190:
+
+/*        Special code for 10 x 10 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	v10 = v[10];
+	t10 = *tau * v10;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j * c_dim1 + 1] + v2 * c__[j * c_dim1 + 2] + v3 *
+		    c__[j * c_dim1 + 3] + v4 * c__[j * c_dim1 + 4] + v5 * c__[
+		    j * c_dim1 + 5] + v6 * c__[j * c_dim1 + 6] + v7 * c__[j *
+		    c_dim1 + 7] + v8 * c__[j * c_dim1 + 8] + v9 * c__[j *
+		    c_dim1 + 9] + v10 * c__[j * c_dim1 + 10];
+	    c__[j * c_dim1 + 1] -= sum * t1;
+	    c__[j * c_dim1 + 2] -= sum * t2;
+	    c__[j * c_dim1 + 3] -= sum * t3;
+	    c__[j * c_dim1 + 4] -= sum * t4;
+	    c__[j * c_dim1 + 5] -= sum * t5;
+	    c__[j * c_dim1 + 6] -= sum * t6;
+	    c__[j * c_dim1 + 7] -= sum * t7;
+	    c__[j * c_dim1 + 8] -= sum * t8;
+	    c__[j * c_dim1 + 9] -= sum * t9;
+	    c__[j * c_dim1 + 10] -= sum * t10;
+/* L200: */
+	}
+	goto L410;
+    } else {
+
+/*        Form  C * H, where H has order n. */
+
+	switch (*n) {
+	    case 1:  goto L210;
+	    case 2:  goto L230;
+	    case 3:  goto L250;
+	    case 4:  goto L270;
+	    case 5:  goto L290;
+	    case 6:  goto L310;
+	    case 7:  goto L330;
+	    case 8:  goto L350;
+	    case 9:  goto L370;
+	    case 10:  goto L390;
+	}
+
+/*
+          Code for general N
+
+          w := C * v
+*/
+
+	sgemv_("No transpose", m, n, &c_b1011, &c__[c_offset], ldc, &v[1], &
+		c__1, &c_b320, &work[1], &c__1);
+
+/*        C := C - tau * w * v' */
+
+	r__1 = -(*tau);
+	sger_(m, n, &r__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset], ldc)
+		;
+	goto L410;
+L210:
+
+/*        Special code for 1 x 1 Householder */
+
+	t1 = 1.f - *tau * v[1] * v[1];
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    c__[j + c_dim1] = t1 * c__[j + c_dim1];
+/* L220: */
+	}
+	goto L410;
+L230:
+
+/*        Special code for 2 x 2 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+/* L240: */
+	}
+	goto L410;
+L250:
+
+/*        Special code for 3 x 3 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+/* L260: */
+	}
+	goto L410;
+L270:
+
+/*        Special code for 4 x 4 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+/* L280: */
+	}
+	goto L410;
+L290:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+/* L300: */
+	}
+	goto L410;
+L310:
+
+/*        Special code for 6 x 6 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+/* L320: */
+	}
+	goto L410;
+L330:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
+		     c__[j + c_dim1 * 7];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+/* L340: */
+	}
+	goto L410;
+L350:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
+		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+	    c__[j + ((c_dim1) << (3))] -= sum * t8;
+/* L360: */
+	}
+	goto L410;
+L370:
+
+/*        Special code for 9 x 9 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
+		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))] +
+		    v9 * c__[j + c_dim1 * 9];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+	    c__[j + ((c_dim1) << (3))] -= sum * t8;
+	    c__[j + c_dim1 * 9] -= sum * t9;
+/* L380: */
+	}
+	goto L410;
+L390:
+
+/*        Special code for 10 x 10 Householder */
+
+	v1 = v[1];
+	t1 = *tau * v1;
+	v2 = v[2];
+	t2 = *tau * v2;
+	v3 = v[3];
+	t3 = *tau * v3;
+	v4 = v[4];
+	t4 = *tau * v4;
+	v5 = v[5];
+	t5 = *tau * v5;
+	v6 = v[6];
+	t6 = *tau * v6;
+	v7 = v[7];
+	t7 = *tau * v7;
+	v8 = v[8];
+	t8 = *tau * v8;
+	v9 = v[9];
+	t9 = *tau * v9;
+	v10 = v[10];
+	t10 = *tau * v10;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = v1 * c__[j + c_dim1] + v2 * c__[j + ((c_dim1) << (1))] + v3
+		    * c__[j + c_dim1 * 3] + v4 * c__[j + ((c_dim1) << (2))] +
+		    v5 * c__[j + c_dim1 * 5] + v6 * c__[j + c_dim1 * 6] + v7 *
+		     c__[j + c_dim1 * 7] + v8 * c__[j + ((c_dim1) << (3))] +
+		    v9 * c__[j + c_dim1 * 9] + v10 * c__[j + c_dim1 * 10];
+	    c__[j + c_dim1] -= sum * t1;
+	    c__[j + ((c_dim1) << (1))] -= sum * t2;
+	    c__[j + c_dim1 * 3] -= sum * t3;
+	    c__[j + ((c_dim1) << (2))] -= sum * t4;
+	    c__[j + c_dim1 * 5] -= sum * t5;
+	    c__[j + c_dim1 * 6] -= sum * t6;
+	    c__[j + c_dim1 * 7] -= sum * t7;
+	    c__[j + ((c_dim1) << (3))] -= sum * t8;
+	    c__[j + c_dim1 * 9] -= sum * t9;
+	    c__[j + c_dim1 * 10] -= sum * t10;
+/* L400: */
+	}
+	goto L410;
+    }
+L410:
+    return 0;
+
+/*     End of SLARFX */
+
+} /* slarfx_ */
+
+/* Subroutine */ int slartg_(real *f, real *g, real *cs, real *sn, real *r__)
+{
+    /* Initialized data */
+
+    static logical first = TRUE_;
+
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double log(doublereal), pow_ri(real *, integer *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static real f1, g1, eps, scale;
+    static integer count;
+    static real safmn2, safmx2;
+    extern doublereal slamch_(char *);
+    static real safmin;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLARTG generate a plane rotation so that
+
+       [  CS  SN  ]  .  [ F ]  =  [ R ]   where CS**2 + SN**2 = 1.
+       [ -SN  CS  ]     [ G ]     [ 0 ]
+
+    This is a slower, more accurate version of the BLAS1 routine SROTG,
+    with the following other differences:
+       F and G are unchanged on return.
+       If G=0, then CS=1 and SN=0.
+       If F=0 and (G .ne. 0), then CS=0 and SN=1 without doing any
+          floating point operations (saves work in SBDSQR when
+          there are zeros on the diagonal).
+
+    If F exceeds G in magnitude, CS will be positive.
+
+    Arguments
+    =========
+
+    F       (input) REAL
+            The first component of vector to be rotated.
+
+    G       (input) REAL
+            The second component of vector to be rotated.
+
+    CS      (output) REAL
+            The cosine of the rotation.
+
+    SN      (output) REAL
+            The sine of the rotation.
+
+    R       (output) REAL
+            The nonzero component of the rotated vector.
+
+    =====================================================================
+*/
+
+
+    if (first) {
+	first = FALSE_;
+	safmin = slamch_("S");
+	eps = slamch_("E");
+	r__1 = slamch_("B");
+	i__1 = (integer) (log(safmin / eps) / log(slamch_("B")) /
+		2.f);
+	safmn2 = pow_ri(&r__1, &i__1);
+	safmx2 = 1.f / safmn2;
+    }
+    if (*g == 0.f) {
+	*cs = 1.f;
+	*sn = 0.f;
+	*r__ = *f;
+    } else if (*f == 0.f) {
+	*cs = 0.f;
+	*sn = 1.f;
+	*r__ = *g;
+    } else {
+	f1 = *f;
+	g1 = *g;
+/* Computing MAX */
+	r__1 = dabs(f1), r__2 = dabs(g1);
+	scale = dmax(r__1,r__2);
+	if (scale >= safmx2) {
+	    count = 0;
+L10:
+	    ++count;
+	    f1 *= safmn2;
+	    g1 *= safmn2;
+/* Computing MAX */
+	    r__1 = dabs(f1), r__2 = dabs(g1);
+	    scale = dmax(r__1,r__2);
+	    if (scale >= safmx2) {
+		goto L10;
+	    }
+/* Computing 2nd power */
+	    r__1 = f1;
+/* Computing 2nd power */
+	    r__2 = g1;
+	    *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
+	    *cs = f1 / *r__;
+	    *sn = g1 / *r__;
+	    i__1 = count;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		*r__ *= safmx2;
+/* L20: */
+	    }
+	} else if (scale <= safmn2) {
+	    count = 0;
+L30:
+	    ++count;
+	    f1 *= safmx2;
+	    g1 *= safmx2;
+/* Computing MAX */
+	    r__1 = dabs(f1), r__2 = dabs(g1);
+	    scale = dmax(r__1,r__2);
+	    if (scale <= safmn2) {
+		goto L30;
+	    }
+/* Computing 2nd power */
+	    r__1 = f1;
+/* Computing 2nd power */
+	    r__2 = g1;
+	    *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
+	    *cs = f1 / *r__;
+	    *sn = g1 / *r__;
+	    i__1 = count;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		*r__ *= safmn2;
+/* L40: */
+	    }
+	} else {
+/* Computing 2nd power */
+	    r__1 = f1;
+/* Computing 2nd power */
+	    r__2 = g1;
+	    *r__ = sqrt(r__1 * r__1 + r__2 * r__2);
+	    *cs = f1 / *r__;
+	    *sn = g1 / *r__;
+	}
+	if (dabs(*f) > dabs(*g) && *cs < 0.f) {
+	    *cs = -(*cs);
+	    *sn = -(*sn);
+	    *r__ = -(*r__);
+	}
+    }
+    return 0;
+
+/*     End of SLARTG */
+
+} /* slartg_ */
+
+/* Subroutine */ int slas2_(real *f, real *g, real *h__, real *ssmin, real *
+	ssmax)
+{
+    /* System generated locals */
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real c__, fa, ga, ha, as, at, au, fhmn, fhmx;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SLAS2  computes the singular values of the 2-by-2 matrix
+       [  F   G  ]
+       [  0   H  ].
+    On return, SSMIN is the smaller singular value and SSMAX is the
+    larger singular value.
+
+    Arguments
+    =========
+
+    F       (input) REAL
+            The (1,1) element of the 2-by-2 matrix.
+
+    G       (input) REAL
+            The (1,2) element of the 2-by-2 matrix.
+
+    H       (input) REAL
+            The (2,2) element of the 2-by-2 matrix.
+
+    SSMIN   (output) REAL
+            The smaller singular value.
+
+    SSMAX   (output) REAL
+            The larger singular value.
+
+    Further Details
+    ===============
+
+    Barring over/underflow, all output quantities are correct to within
+    a few units in the last place (ulps), even in the absence of a guard
+    digit in addition/subtraction.
+
+    In IEEE arithmetic, the code works correctly if one matrix element is
+    infinite.
+
+    Overflow will not occur unless the largest singular value itself
+    overflows, or is within a few ulps of overflow. (On machines with
+    partial overflow, like the Cray, overflow may occur if the largest
+    singular value is within a factor of 2 of overflow.)
+
+    Underflow is harmless if underflow is gradual. Otherwise, results
+    may correspond to a matrix modified by perturbations of size near
+    the underflow threshold.
+
+    ====================================================================
+*/
+
+
+    fa = dabs(*f);
+    ga = dabs(*g);
+    ha = dabs(*h__);
+    fhmn = dmin(fa,ha);
+    fhmx = dmax(fa,ha);
+    if (fhmn == 0.f) {
+	*ssmin = 0.f;
+	if (fhmx == 0.f) {
+	    *ssmax = ga;
+	} else {
+/* Computing 2nd power */
+	    r__1 = dmin(fhmx,ga) / dmax(fhmx,ga);
+	    *ssmax = dmax(fhmx,ga) * sqrt(r__1 * r__1 + 1.f);
+	}
+    } else {
+	if (ga < fhmx) {
+	    as = fhmn / fhmx + 1.f;
+	    at = (fhmx - fhmn) / fhmx;
+/* Computing 2nd power */
+	    r__1 = ga / fhmx;
+	    au = r__1 * r__1;
+	    c__ = 2.f / (sqrt(as * as + au) + sqrt(at * at + au));
+	    *ssmin = fhmn * c__;
+	    *ssmax = fhmx / c__;
+	} else {
+	    au = fhmx / ga;
+	    if (au == 0.f) {
+
+/*
+                Avoid possible harmful underflow if exponent range
+                asymmetric (true SSMIN may not underflow even if
+                AU underflows)
+*/
+
+		*ssmin = fhmn * fhmx / ga;
+		*ssmax = ga;
+	    } else {
+		as = fhmn / fhmx + 1.f;
+		at = (fhmx - fhmn) / fhmx;
+/* Computing 2nd power */
+		r__1 = as * au;
+/* Computing 2nd power */
+		r__2 = at * au;
+		c__ = 1.f / (sqrt(r__1 * r__1 + 1.f) + sqrt(r__2 * r__2 + 1.f)
+			);
+		*ssmin = fhmn * c__ * au;
+		*ssmin += *ssmin;
+		*ssmax = ga / (c__ + c__);
+	    }
+	}
+    }
+    return 0;
+
+/*     End of SLAS2 */
+
+} /* slas2_ */
+
+/* Subroutine */ int slascl_(char *type__, integer *kl, integer *ku, real *
+	cfrom, real *cto, integer *m, integer *n, real *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+
+    /* Local variables */
+    static integer i__, j, k1, k2, k3, k4;
+    static real mul, cto1;
+    static logical done;
+    static real ctoc;
+    extern logical lsame_(char *, char *);
+    static integer itype;
+    static real cfrom1;
+    extern doublereal slamch_(char *);
+    static real cfromc;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real bignum, smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLASCL multiplies the M by N real matrix A by the real scalar
+    CTO/CFROM.  This is done without over/underflow as long as the final
+    result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+    A may be full, upper triangular, lower triangular, upper Hessenberg,
+    or banded.
+
+    Arguments
+    =========
+
+    TYPE    (input) CHARACTER*1
+            TYPE indices the storage type of the input matrix.
+            = 'G':  A is a full matrix.
+            = 'L':  A is a lower triangular matrix.
+            = 'U':  A is an upper triangular matrix.
+            = 'H':  A is an upper Hessenberg matrix.
+            = 'B':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the lower
+                    half stored.
+            = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the upper
+                    half stored.
+            = 'Z':  A is a band matrix with lower bandwidth KL and upper
+                    bandwidth KU.
+
+    KL      (input) INTEGER
+            The lower bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    KU      (input) INTEGER
+            The upper bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    CFROM   (input) REAL
+    CTO     (input) REAL
+            The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+            without over/underflow if the final result CTO*A(I,J)/CFROM
+            can be represented without over/underflow.  CFROM must be
+            nonzero.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,M)
+            The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+            storage type.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    INFO    (output) INTEGER
+            0  - successful exit
+            <0 - if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(type__, "G")) {
+	itype = 0;
+    } else if (lsame_(type__, "L")) {
+	itype = 1;
+    } else if (lsame_(type__, "U")) {
+	itype = 2;
+    } else if (lsame_(type__, "H")) {
+	itype = 3;
+    } else if (lsame_(type__, "B")) {
+	itype = 4;
+    } else if (lsame_(type__, "Q")) {
+	itype = 5;
+    } else if (lsame_(type__, "Z")) {
+	itype = 6;
+    } else {
+	itype = -1;
+    }
+
+    if (itype == -1) {
+	*info = -1;
+    } else if (*cfrom == 0.f) {
+	*info = -4;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (((*n < 0) || (itype == 4 && *n != *m)) || (itype == 5 && *n !=
+	    *m)) {
+	*info = -7;
+    } else if (itype <= 3 && *lda < max(1,*m)) {
+	*info = -9;
+    } else if (itype >= 4) {
+/* Computing MAX */
+	i__1 = *m - 1;
+	if ((*kl < 0) || (*kl > max(i__1,0))) {
+	    *info = -2;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = *n - 1;
+	    if (((*ku < 0) || (*ku > max(i__1,0))) || (((itype == 4) || (
+		    itype == 5)) && *kl != *ku)) {
+		*info = -3;
+	    } else if (((itype == 4 && *lda < *kl + 1) || (itype == 5 && *lda
+		    < *ku + 1)) || (itype == 6 && *lda < ((*kl) << (1)) + *ku
+		    + 1)) {
+		*info = -9;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASCL", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*m == 0)) {
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = slamch_("S");
+    bignum = 1.f / smlnum;
+
+    cfromc = *cfrom;
+    ctoc = *cto;
+
+L10:
+    cfrom1 = cfromc * smlnum;
+    cto1 = ctoc / bignum;
+    if (dabs(cfrom1) > dabs(ctoc) && ctoc != 0.f) {
+	mul = smlnum;
+	done = FALSE_;
+	cfromc = cfrom1;
+    } else if (dabs(cto1) > dabs(cfromc)) {
+	mul = bignum;
+	done = FALSE_;
+	ctoc = cto1;
+    } else {
+	mul = ctoc / cfromc;
+	done = TRUE_;
+    }
+
+    if (itype == 0) {
+
+/*        Full matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L20: */
+	    }
+/* L30: */
+	}
+
+    } else if (itype == 1) {
+
+/*        Lower triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L40: */
+	    }
+/* L50: */
+	}
+
+    } else if (itype == 2) {
+
+/*        Upper triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L60: */
+	    }
+/* L70: */
+	}
+
+    } else if (itype == 3) {
+
+/*        Upper Hessenberg matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j + 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L80: */
+	    }
+/* L90: */
+	}
+
+    } else if (itype == 4) {
+
+/*        Lower half of a symmetric band matrix */
+
+	k3 = *kl + 1;
+	k4 = *n + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = k3, i__4 = k4 - j;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L100: */
+	    }
+/* L110: */
+	}
+
+    } else if (itype == 5) {
+
+/*        Upper half of a symmetric band matrix */
+
+	k1 = *ku + 2;
+	k3 = *ku + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__2 = k1 - j;
+	    i__3 = k3;
+	    for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L120: */
+	    }
+/* L130: */
+	}
+
+    } else if (itype == 6) {
+
+/*        Band matrix */
+
+	k1 = *kl + *ku + 2;
+	k2 = *kl + 1;
+	k3 = ((*kl) << (1)) + *ku + 1;
+	k4 = *kl + *ku + 1 + *m;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__3 = k1 - j;
+/* Computing MIN */
+	    i__4 = k3, i__5 = k4 - j;
+	    i__2 = min(i__4,i__5);
+	    for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] *= mul;
+/* L140: */
+	    }
+/* L150: */
+	}
+
+    }
+
+    if (! done) {
+	goto L10;
+    }
+
+    return 0;
+
+/*     End of SLASCL */
+
+} /* slascl_ */
+
+/* Subroutine */ int slasd0_(integer *n, integer *sqre, real *d__, real *e,
+	real *u, integer *ldu, real *vt, integer *ldvt, integer *smlsiz,
+	integer *iwork, real *work, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, m, i1, ic, lf, nd, ll, nl, nr, im1, ncc, nlf, nrf,
+	    iwk, lvl, ndb1, nlp1, nrp1;
+    static real beta;
+    static integer idxq, nlvl;
+    static real alpha;
+    static integer inode, ndiml, idxqc, ndimr, itemp, sqrei;
+    extern /* Subroutine */ int slasd1_(integer *, integer *, integer *, real
+	    *, real *, real *, real *, integer *, real *, integer *, integer *
+	    , integer *, real *, integer *), xerbla_(char *, integer *), slasdq_(char *, integer *, integer *, integer *, integer
+	    *, integer *, real *, real *, real *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *), slasdt_(integer *
+	    , integer *, integer *, integer *, integer *, integer *, integer *
+	    );
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    Using a divide and conquer approach, SLASD0 computes the singular
+    value decomposition (SVD) of a real upper bidiagonal N-by-M
+    matrix B with diagonal D and offdiagonal E, where M = N + SQRE.
+    The algorithm computes orthogonal matrices U and VT such that
+    B = U * S * VT. The singular values S are overwritten on D.
+
+    A related subroutine, SLASDA, computes only the singular values,
+    and optionally, the singular vectors in compact form.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           On entry, the row dimension of the upper bidiagonal matrix.
+           This is also the dimension of the main diagonal array D.
+
+    SQRE   (input) INTEGER
+           Specifies the column dimension of the bidiagonal matrix.
+           = 0: The bidiagonal matrix has column dimension M = N;
+           = 1: The bidiagonal matrix has column dimension M = N+1;
+
+    D      (input/output) REAL array, dimension (N)
+           On entry D contains the main diagonal of the bidiagonal
+           matrix.
+           On exit D, if INFO = 0, contains its singular values.
+
+    E      (input) REAL array, dimension (M-1)
+           Contains the subdiagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    U      (output) REAL array, dimension at least (LDQ, N)
+           On exit, U contains the left singular vectors.
+
+    LDU    (input) INTEGER
+           On entry, leading dimension of U.
+
+    VT     (output) REAL array, dimension at least (LDVT, M)
+           On exit, VT' contains the right singular vectors.
+
+    LDVT   (input) INTEGER
+           On entry, leading dimension of VT.
+
+    SMLSIZ (input) INTEGER
+           On entry, maximum size of the subproblems at the
+           bottom of the computation tree.
+
+    IWORK  INTEGER work array.
+           Dimension must be at least (8 * N)
+
+    WORK   REAL work array.
+           Dimension must be at least (3 * M**2 + 2 * M)
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --iwork;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -2;
+    }
+
+    m = *n + *sqre;
+
+    if (*ldu < *n) {
+	*info = -6;
+    } else if (*ldvt < m) {
+	*info = -8;
+    } else if (*smlsiz < 3) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASD0", &i__1);
+	return 0;
+    }
+
+/*     If the input matrix is too small, call SLASDQ to find the SVD. */
+
+    if (*n <= *smlsiz) {
+	slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset],
+		ldvt, &u[u_offset], ldu, &u[u_offset], ldu, &work[1], info);
+	return 0;
+    }
+
+/*     Set up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+    idxq = ndimr + *n;
+    iwk = idxq + *n;
+    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       For the nodes on bottom level of the tree, solve
+       their subproblems by SLASDQ.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    ncc = 0;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+       IC : center row of each node
+       NL : number of rows of left  subproblem
+       NR : number of rows of right subproblem
+       NLF: starting row of the left   subproblem
+       NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nlp1 = nl + 1;
+	nr = iwork[ndimr + i1];
+	nrp1 = nr + 1;
+	nlf = ic - nl;
+	nrf = ic + 1;
+	sqrei = 1;
+	slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &vt[
+		nlf + nlf * vt_dim1], ldvt, &u[nlf + nlf * u_dim1], ldu, &u[
+		nlf + nlf * u_dim1], ldu, &work[1], info);
+	if (*info != 0) {
+	    return 0;
+	}
+	itemp = idxq + nlf - 2;
+	i__2 = nl;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[itemp + j] = j;
+/* L10: */
+	}
+	if (i__ == nd) {
+	    sqrei = *sqre;
+	} else {
+	    sqrei = 1;
+	}
+	nrp1 = nr + sqrei;
+	slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &vt[
+		nrf + nrf * vt_dim1], ldvt, &u[nrf + nrf * u_dim1], ldu, &u[
+		nrf + nrf * u_dim1], ldu, &work[1], info);
+	if (*info != 0) {
+	    return 0;
+	}
+	itemp = idxq + ic;
+	i__2 = nr;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[itemp + j - 1] = j;
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*     Now conquer each subproblem bottom-up. */
+
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+
+/*
+          Find the first node LF and last node LL on the
+          current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    if (*sqre == 0 && i__ == ll) {
+		sqrei = *sqre;
+	    } else {
+		sqrei = 1;
+	    }
+	    idxqc = idxq + nlf - 1;
+	    alpha = d__[ic];
+	    beta = e[ic];
+	    slasd1_(&nl, &nr, &sqrei, &d__[nlf], &alpha, &beta, &u[nlf + nlf *
+		     u_dim1], ldu, &vt[nlf + nlf * vt_dim1], ldvt, &iwork[
+		    idxqc], &iwork[iwk], &work[1], info);
+	    if (*info != 0) {
+		return 0;
+	    }
+/* L40: */
+	}
+/* L50: */
+    }
+
+    return 0;
+
+/*     End of SLASD0 */
+
+} /* slasd0_ */
+
+/* Subroutine */ int slasd1_(integer *nl, integer *nr, integer *sqre, real *
+	d__, real *alpha, real *beta, real *u, integer *ldu, real *vt,
+	integer *ldvt, integer *idxq, integer *iwork, real *work, integer *
+	info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, vt_dim1, vt_offset, i__1;
+    real r__1, r__2;
+
+    /* Local variables */
+    static integer i__, k, m, n, n1, n2, iq, iz, iu2, ldq, idx, ldu2, ivt2,
+	    idxc, idxp, ldvt2;
+    extern /* Subroutine */ int slasd2_(integer *, integer *, integer *,
+	    integer *, real *, real *, real *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *, real *, integer *,
+	    integer *, integer *, integer *, integer *, integer *, integer *),
+	     slasd3_(integer *, integer *, integer *, integer *, real *, real
+	    *, integer *, real *, real *, integer *, real *, integer *, real *
+	    , integer *, real *, integer *, integer *, integer *, real *,
+	    integer *);
+    static integer isigma;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
+	    char *, integer *, integer *, real *, real *, integer *, integer *
+	    , real *, integer *, integer *), slamrg_(integer *,
+	    integer *, real *, integer *, integer *, integer *);
+    static real orgnrm;
+    static integer coltyp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLASD1 computes the SVD of an upper bidiagonal N-by-M matrix B,
+    where N = NL + NR + 1 and M = N + SQRE. SLASD1 is called from SLASD0.
+
+    A related subroutine SLASD7 handles the case in which the singular
+    values (and the singular vectors in factored form) are desired.
+
+    SLASD1 computes the SVD as follows:
+
+                  ( D1(in)  0    0     0 )
+      B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+                  (   0     0   D2(in) 0 )
+
+        = U(out) * ( D(out) 0) * VT(out)
+
+    where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+    with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+    elsewhere; and the entry b is empty if SQRE = 0.
+
+    The left singular vectors of the original matrix are stored in U, and
+    the transpose of the right singular vectors are stored in VT, and the
+    singular values are in D.  The algorithm consists of three stages:
+
+       The first stage consists of deflating the size of the problem
+       when there are multiple singular values or when there are zeros in
+       the Z vector.  For each such occurence the dimension of the
+       secular equation problem is reduced by one.  This stage is
+       performed by the routine SLASD2.
+
+       The second stage consists of calculating the updated
+       singular values. This is done by finding the square roots of the
+       roots of the secular equation via the routine SLASD4 (as called
+       by SLASD3). This routine also calculates the singular vectors of
+       the current problem.
+
+       The final stage consists of computing the updated singular vectors
+       directly using the updated singular values.  The singular vectors
+       for the current problem are multiplied with the singular vectors
+       from the overall problem.
+
+    Arguments
+    =========
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    D      (input/output) REAL array,
+                          dimension (N = NL+NR+1).
+           On entry D(1:NL,1:NL) contains the singular values of the
+           upper block; and D(NL+2:N) contains the singular values of
+           the lower block. On exit D(1:N) contains the singular values
+           of the modified matrix.
+
+    ALPHA  (input) REAL
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) REAL
+           Contains the off-diagonal element associated with the added
+           row.
+
+    U      (input/output) REAL array, dimension(LDU,N)
+           On entry U(1:NL, 1:NL) contains the left singular vectors of
+           the upper block; U(NL+2:N, NL+2:N) contains the left singular
+           vectors of the lower block. On exit U contains the left
+           singular vectors of the bidiagonal matrix.
+
+    LDU    (input) INTEGER
+           The leading dimension of the array U.  LDU >= max( 1, N ).
+
+    VT     (input/output) REAL array, dimension(LDVT,M)
+           where M = N + SQRE.
+           On entry VT(1:NL+1, 1:NL+1)' contains the right singular
+           vectors of the upper block; VT(NL+2:M, NL+2:M)' contains
+           the right singular vectors of the lower block. On exit
+           VT' contains the right singular vectors of the
+           bidiagonal matrix.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= max( 1, M ).
+
+    IDXQ  (output) INTEGER array, dimension(N)
+           This contains the permutation which will reintegrate the
+           subproblem just solved back into sorted order, i.e.
+           D( IDXQ( I = 1, N ) ) will be in ascending order.
+
+    IWORK  (workspace) INTEGER array, dimension( 4 * N )
+
+    WORK   (workspace) REAL array, dimension( 3*M**2 + 2*M )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --idxq;
+    --iwork;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*nl < 1) {
+	*info = -1;
+    } else if (*nr < 1) {
+	*info = -2;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -3;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASD1", &i__1);
+	return 0;
+    }
+
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in SLASD2 and SLASD3.
+*/
+
+    ldu2 = n;
+    ldvt2 = m;
+
+    iz = 1;
+    isigma = iz + m;
+    iu2 = isigma + n;
+    ivt2 = iu2 + ldu2 * n;
+    iq = ivt2 + ldvt2 * m;
+
+    idx = 1;
+    idxc = idx + n;
+    coltyp = idxc + n;
+    idxp = coltyp + n;
+
+/*
+       Scale.
+
+   Computing MAX
+*/
+    r__1 = dabs(*alpha), r__2 = dabs(*beta);
+    orgnrm = dmax(r__1,r__2);
+    d__[*nl + 1] = 0.f;
+    i__1 = n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) {
+	    orgnrm = (r__1 = d__[i__], dabs(r__1));
+	}
+/* L10: */
+    }
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &n, &c__1, &d__[1], &n,
+	    info);
+    *alpha /= orgnrm;
+    *beta /= orgnrm;
+
+/*     Deflate singular values. */
+
+    slasd2_(nl, nr, sqre, &k, &d__[1], &work[iz], alpha, beta, &u[u_offset],
+	    ldu, &vt[vt_offset], ldvt, &work[isigma], &work[iu2], &ldu2, &
+	    work[ivt2], &ldvt2, &iwork[idxp], &iwork[idx], &iwork[idxc], &
+	    idxq[1], &iwork[coltyp], info);
+
+/*     Solve Secular Equation and update singular vectors. */
+
+    ldq = k;
+    slasd3_(nl, nr, sqre, &k, &d__[1], &work[iq], &ldq, &work[isigma], &u[
+	    u_offset], ldu, &work[iu2], &ldu2, &vt[vt_offset], ldvt, &work[
+	    ivt2], &ldvt2, &iwork[idxc], &iwork[coltyp], &work[iz], info);
+    if (*info != 0) {
+	return 0;
+    }
+
+/*     Unscale. */
+
+    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, &n, &c__1, &d__[1], &n,
+	    info);
+
+/*     Prepare the IDXQ sorting permutation. */
+
+    n1 = k;
+    n2 = n - k;
+    slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+
+    return 0;
+
+/*     End of SLASD1 */
+
+} /* slasd1_ */
+
+/* Subroutine */ int slasd2_(integer *nl, integer *nr, integer *sqre, integer
+	*k, real *d__, real *z__, real *alpha, real *beta, real *u, integer *
+	ldu, real *vt, integer *ldvt, real *dsigma, real *u2, integer *ldu2,
+	real *vt2, integer *ldvt2, integer *idxp, integer *idx, integer *idxc,
+	 integer *idxq, integer *coltyp, integer *info)
+{
+    /* System generated locals */
+    integer u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset,
+	    vt2_dim1, vt2_offset, i__1;
+    real r__1, r__2;
+
+    /* Local variables */
+    static real c__;
+    static integer i__, j, m, n;
+    static real s;
+    static integer k2;
+    static real z1;
+    static integer ct, jp;
+    static real eps, tau, tol;
+    static integer psm[4], nlp1, nlp2, idxi, idxj, ctot[4];
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    static integer idxjp, jprev;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
+	    integer *, integer *, real *, integer *, integer *, integer *);
+    static real hlftol;
+    extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *,
+	    integer *, real *, integer *), slaset_(char *, integer *,
+	    integer *, real *, real *, real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLASD2 merges the two sets of singular values together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    singular values are close together or if there is a tiny entry in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    SLASD2 is called from SLASD1.
+
+    Arguments
+    =========
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has N = NL + NR + 1 rows and
+           M = N + SQRE >= N columns.
+
+    K      (output) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    D      (input/output) REAL array, dimension(N)
+           On entry D contains the singular values of the two submatrices
+           to be combined.  On exit D contains the trailing (N-K) updated
+           singular values (those which were deflated) sorted into
+           increasing order.
+
+    ALPHA  (input) REAL
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) REAL
+           Contains the off-diagonal element associated with the added
+           row.
+
+    U      (input/output) REAL array, dimension(LDU,N)
+           On entry U contains the left singular vectors of two
+           submatrices in the two square blocks with corners at (1,1),
+           (NL, NL), and (NL+2, NL+2), (N,N).
+           On exit U contains the trailing (N-K) updated left singular
+           vectors (those which were deflated) in its last N-K columns.
+
+    LDU    (input) INTEGER
+           The leading dimension of the array U.  LDU >= N.
+
+    Z      (output) REAL array, dimension(N)
+           On exit Z contains the updating row vector in the secular
+           equation.
+
+    DSIGMA (output) REAL array, dimension (N)
+           Contains a copy of the diagonal elements (K-1 singular values
+           and one zero) in the secular equation.
+
+    U2     (output) REAL array, dimension(LDU2,N)
+           Contains a copy of the first K-1 left singular vectors which
+           will be used by SLASD3 in a matrix multiply (SGEMM) to solve
+           for the new left singular vectors. U2 is arranged into four
+           blocks. The first block contains a column with 1 at NL+1 and
+           zero everywhere else; the second block contains non-zero
+           entries only at and above NL; the third contains non-zero
+           entries only below NL+1; and the fourth is dense.
+
+    LDU2   (input) INTEGER
+           The leading dimension of the array U2.  LDU2 >= N.
+
+    VT     (input/output) REAL array, dimension(LDVT,M)
+           On entry VT' contains the right singular vectors of two
+           submatrices in the two square blocks with corners at (1,1),
+           (NL+1, NL+1), and (NL+2, NL+2), (M,M).
+           On exit VT' contains the trailing (N-K) updated right singular
+           vectors (those which were deflated) in its last N-K columns.
+           In case SQRE =1, the last row of VT spans the right null
+           space.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= M.
+
+    VT2    (output) REAL array, dimension(LDVT2,N)
+           VT2' contains a copy of the first K right singular vectors
+           which will be used by SLASD3 in a matrix multiply (SGEMM) to
+           solve for the new right singular vectors. VT2 is arranged into
+           three blocks. The first block contains a row that corresponds
+           to the special 0 diagonal element in SIGMA; the second block
+           contains non-zeros only at and before NL +1; the third block
+           contains non-zeros only at and after  NL +2.
+
+    LDVT2  (input) INTEGER
+           The leading dimension of the array VT2.  LDVT2 >= M.
+
+    IDXP   (workspace) INTEGER array, dimension(N)
+           This will contain the permutation used to place deflated
+           values of D at the end of the array. On output IDXP(2:K)
+           points to the nondeflated D-values and IDXP(K+1:N)
+           points to the deflated singular values.
+
+    IDX    (workspace) INTEGER array, dimension(N)
+           This will contain the permutation used to sort the contents of
+           D into ascending order.
+
+    IDXC   (output) INTEGER array, dimension(N)
+           This will contain the permutation used to arrange the columns
+           of the deflated U matrix into three groups:  the first group
+           contains non-zero entries only at and above NL, the second
+           contains non-zero entries only below NL+2, and the third is
+           dense.
+
+    COLTYP (workspace/output) INTEGER array, dimension(N)
+           As workspace, this will contain a label which will indicate
+           which of the following types a column in the U2 matrix or a
+           row in the VT2 matrix is:
+           1 : non-zero in the upper half only
+           2 : non-zero in the lower half only
+           3 : dense
+           4 : deflated
+
+           On exit, it is an array of dimension 4, with COLTYP(I) being
+           the dimension of the I-th type columns.
+
+    IDXQ   (input) INTEGER array, dimension(N)
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that entries in
+           the first hlaf of this permutation must first be moved one
+           position backward; and entries in the second half
+           must first have NL+1 added to their values.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --z__;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --dsigma;
+    u2_dim1 = *ldu2;
+    u2_offset = 1 + u2_dim1;
+    u2 -= u2_offset;
+    vt2_dim1 = *ldvt2;
+    vt2_offset = 1 + vt2_dim1;
+    vt2 -= vt2_offset;
+    --idxp;
+    --idx;
+    --idxc;
+    --idxq;
+    --coltyp;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*nl < 1) {
+	*info = -1;
+    } else if (*nr < 1) {
+	*info = -2;
+    } else if (*sqre != 1 && *sqre != 0) {
+	*info = -3;
+    }
+
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+    if (*ldu < n) {
+	*info = -10;
+    } else if (*ldvt < m) {
+	*info = -12;
+    } else if (*ldu2 < n) {
+	*info = -15;
+    } else if (*ldvt2 < m) {
+	*info = -17;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASD2", &i__1);
+	return 0;
+    }
+
+    nlp1 = *nl + 1;
+    nlp2 = *nl + 2;
+
+/*
+       Generate the first part of the vector Z; and move the singular
+       values in the first part of D one position backward.
+*/
+
+    z1 = *alpha * vt[nlp1 + nlp1 * vt_dim1];
+    z__[1] = z1;
+    for (i__ = *nl; i__ >= 1; --i__) {
+	z__[i__ + 1] = *alpha * vt[i__ + nlp1 * vt_dim1];
+	d__[i__ + 1] = d__[i__];
+	idxq[i__ + 1] = idxq[i__] + 1;
+/* L10: */
+    }
+
+/*     Generate the second part of the vector Z. */
+
+    i__1 = m;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	z__[i__] = *beta * vt[i__ + nlp2 * vt_dim1];
+/* L20: */
+    }
+
+/*     Initialize some reference arrays. */
+
+    i__1 = nlp1;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	coltyp[i__] = 1;
+/* L30: */
+    }
+    i__1 = n;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	coltyp[i__] = 2;
+/* L40: */
+    }
+
+/*     Sort the singular values into increasing order */
+
+    i__1 = n;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	idxq[i__] += nlp1;
+/* L50: */
+    }
+
+/*
+       DSIGMA, IDXC, IDXC, and the first column of U2
+       are used as storage space.
+*/
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	dsigma[i__] = d__[idxq[i__]];
+	u2[i__ + u2_dim1] = z__[idxq[i__]];
+	idxc[i__] = coltyp[idxq[i__]];
+/* L60: */
+    }
+
+    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	idxi = idx[i__] + 1;
+	d__[i__] = dsigma[idxi];
+	z__[i__] = u2[idxi + u2_dim1];
+	coltyp[i__] = idxc[idxi];
+/* L70: */
+    }
+
+/*     Calculate the allowable deflation tolerance */
+
+    eps = slamch_("Epsilon");
+/* Computing MAX */
+    r__1 = dabs(*alpha), r__2 = dabs(*beta);
+    tol = dmax(r__1,r__2);
+/* Computing MAX */
+    r__2 = (r__1 = d__[n], dabs(r__1));
+    tol = eps * 8.f * dmax(r__2,tol);
+
+/*
+       There are 2 kinds of deflation -- first a value in the z-vector
+       is small, second two (or more) singular values are very close
+       together (their difference is small).
+
+       If the value in the z-vector is small, we simply permute the
+       array so that the corresponding singular value is moved to the
+       end.
+
+       If two values in the D-vector are close, we perform a two-sided
+       rotation designed to make one of the corresponding z-vector
+       entries zero, and then permute the array so that the deflated
+       singular value is moved to the end.
+
+       If there are multiple singular values then the problem deflates.
+       Here the number of equal singular values are found.  As each equal
+       singular value is found, an elementary reflector is computed to
+       rotate the corresponding singular subspace so that the
+       corresponding components of Z are zero in this new basis.
+*/
+
+    *k = 1;
+    k2 = n + 1;
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	if ((r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    idxp[k2] = j;
+	    coltyp[j] = 4;
+	    if (j == n) {
+		goto L120;
+	    }
+	} else {
+	    jprev = j;
+	    goto L90;
+	}
+/* L80: */
+    }
+L90:
+    j = jprev;
+L100:
+    ++j;
+    if (j > n) {
+	goto L110;
+    }
+    if ((r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	idxp[k2] = j;
+	coltyp[j] = 4;
+    } else {
+
+/*        Check if singular values are close enough to allow deflation. */
+
+	if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    s = z__[jprev];
+	    c__ = z__[j];
+
+/*
+             Find sqrt(a**2+b**2) without overflow or
+             destructive underflow.
+*/
+
+	    tau = slapy2_(&c__, &s);
+	    c__ /= tau;
+	    s = -s / tau;
+	    z__[j] = tau;
+	    z__[jprev] = 0.f;
+
+/*
+             Apply back the Givens rotation to the left and right
+             singular vector matrices.
+*/
+
+	    idxjp = idxq[idx[jprev] + 1];
+	    idxj = idxq[idx[j] + 1];
+	    if (idxjp <= nlp1) {
+		--idxjp;
+	    }
+	    if (idxj <= nlp1) {
+		--idxj;
+	    }
+	    srot_(&n, &u[idxjp * u_dim1 + 1], &c__1, &u[idxj * u_dim1 + 1], &
+		    c__1, &c__, &s);
+	    srot_(&m, &vt[idxjp + vt_dim1], ldvt, &vt[idxj + vt_dim1], ldvt, &
+		    c__, &s);
+	    if (coltyp[j] != coltyp[jprev]) {
+		coltyp[j] = 3;
+	    }
+	    coltyp[jprev] = 4;
+	    --k2;
+	    idxp[k2] = jprev;
+	    jprev = j;
+	} else {
+	    ++(*k);
+	    u2[*k + u2_dim1] = z__[jprev];
+	    dsigma[*k] = d__[jprev];
+	    idxp[*k] = jprev;
+	    jprev = j;
+	}
+    }
+    goto L100;
+L110:
+
+/*     Record the last singular value. */
+
+    ++(*k);
+    u2[*k + u2_dim1] = z__[jprev];
+    dsigma[*k] = d__[jprev];
+    idxp[*k] = jprev;
+
+L120:
+
+/*
+       Count up the total number of the various types of columns, then
+       form a permutation which positions the four column types into
+       four groups of uniform structure (although one or more of these
+       groups may be empty).
+*/
+
+    for (j = 1; j <= 4; ++j) {
+	ctot[j - 1] = 0;
+/* L130: */
+    }
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	ct = coltyp[j];
+	++ctot[ct - 1];
+/* L140: */
+    }
+
+/*     PSM(*) = Position in SubMatrix (of types 1 through 4) */
+
+    psm[0] = 2;
+    psm[1] = ctot[0] + 2;
+    psm[2] = psm[1] + ctot[1];
+    psm[3] = psm[2] + ctot[2];
+
+/*
+       Fill out the IDXC array so that the permutation which it induces
+       will place all type-1 columns first, all type-2 columns next,
+       then all type-3's, and finally all type-4's, starting from the
+       second column. This applies similarly to the rows of VT.
+*/
+
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	jp = idxp[j];
+	ct = coltyp[jp];
+	idxc[psm[ct - 1]] = j;
+	++psm[ct - 1];
+/* L150: */
+    }
+
+/*
+       Sort the singular values and corresponding singular vectors into
+       DSIGMA, U2, and VT2 respectively.  The singular values/vectors
+       which were not deflated go into the first K slots of DSIGMA, U2,
+       and VT2 respectively, while those which were deflated go into the
+       last N - K slots, except that the first column/row will be treated
+       separately.
+*/
+
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	jp = idxp[j];
+	dsigma[j] = d__[jp];
+	idxj = idxq[idx[idxp[idxc[j]]] + 1];
+	if (idxj <= nlp1) {
+	    --idxj;
+	}
+	scopy_(&n, &u[idxj * u_dim1 + 1], &c__1, &u2[j * u2_dim1 + 1], &c__1);
+	scopy_(&m, &vt[idxj + vt_dim1], ldvt, &vt2[j + vt2_dim1], ldvt2);
+/* L160: */
+    }
+
+/*     Determine DSIGMA(1), DSIGMA(2) and Z(1) */
+
+    dsigma[1] = 0.f;
+    hlftol = tol / 2.f;
+    if (dabs(dsigma[2]) <= hlftol) {
+	dsigma[2] = hlftol;
+    }
+    if (m > n) {
+	z__[1] = slapy2_(&z1, &z__[m]);
+	if (z__[1] <= tol) {
+	    c__ = 1.f;
+	    s = 0.f;
+	    z__[1] = tol;
+	} else {
+	    c__ = z1 / z__[1];
+	    s = z__[m] / z__[1];
+	}
+    } else {
+	if (dabs(z1) <= tol) {
+	    z__[1] = tol;
+	} else {
+	    z__[1] = z1;
+	}
+    }
+
+/*     Move the rest of the updating row to Z. */
+
+    i__1 = *k - 1;
+    scopy_(&i__1, &u2[u2_dim1 + 2], &c__1, &z__[2], &c__1);
+
+/*
+       Determine the first column of U2, the first row of VT2 and the
+       last row of VT.
+*/
+
+    slaset_("A", &n, &c__1, &c_b320, &c_b320, &u2[u2_offset], ldu2)
+	    ;
+    u2[nlp1 + u2_dim1] = 1.f;
+    if (m > n) {
+	i__1 = nlp1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    vt[m + i__ * vt_dim1] = -s * vt[nlp1 + i__ * vt_dim1];
+	    vt2[i__ * vt2_dim1 + 1] = c__ * vt[nlp1 + i__ * vt_dim1];
+/* L170: */
+	}
+	i__1 = m;
+	for (i__ = nlp2; i__ <= i__1; ++i__) {
+	    vt2[i__ * vt2_dim1 + 1] = s * vt[m + i__ * vt_dim1];
+	    vt[m + i__ * vt_dim1] = c__ * vt[m + i__ * vt_dim1];
+/* L180: */
+	}
+    } else {
+	scopy_(&m, &vt[nlp1 + vt_dim1], ldvt, &vt2[vt2_dim1 + 1], ldvt2);
+    }
+    if (m > n) {
+	scopy_(&m, &vt[m + vt_dim1], ldvt, &vt2[m + vt2_dim1], ldvt2);
+    }
+
+/*
+       The deflated singular values and their corresponding vectors go
+       into the back of D, U, and V respectively.
+*/
+
+    if (n > *k) {
+	i__1 = n - *k;
+	scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	i__1 = n - *k;
+	slacpy_("A", &n, &i__1, &u2[(*k + 1) * u2_dim1 + 1], ldu2, &u[(*k + 1)
+		 * u_dim1 + 1], ldu);
+	i__1 = n - *k;
+	slacpy_("A", &i__1, &m, &vt2[*k + 1 + vt2_dim1], ldvt2, &vt[*k + 1 +
+		vt_dim1], ldvt);
+    }
+
+/*     Copy CTOT into COLTYP for referencing in SLASD3. */
+
+    for (j = 1; j <= 4; ++j) {
+	coltyp[j] = ctot[j - 1];
+/* L190: */
+    }
+
+    return 0;
+
+/*     End of SLASD2 */
+
+} /* slasd2_ */
+
+/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer
+	*k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer *
+	ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2,
+	integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer *
+	info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1,
+	    vt_offset, vt2_dim1, vt2_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer i__, j, m, n, jc;
+    static real rho;
+    static integer nlp1, nlp2, nrp1;
+    static real temp;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    static integer ctemp;
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer ktemp;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *,
+	    real *, real *, real *, real *, integer *), xerbla_(char *,
+	    integer *), slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
+	    real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLASD3 finds all the square roots of the roots of the secular
+    equation, as defined by the values in D and Z.  It makes the
+    appropriate calls to SLASD4 and then updates the singular
+    vectors by matrix multiplication.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    SLASD3 is called from SLASD1.
+
+    Arguments
+    =========
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has N = NL + NR + 1 rows and
+           M = N + SQRE >= N columns.
+
+    K      (input) INTEGER
+           The size of the secular equation, 1 =< K = < N.
+
+    D      (output) REAL array, dimension(K)
+           On exit the square roots of the roots of the secular equation,
+           in ascending order.
+
+    Q      (workspace) REAL array,
+                       dimension at least (LDQ,K).
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= K.
+
+    DSIGMA (input) REAL array, dimension(K)
+           The first K elements of this array contain the old roots
+           of the deflated updating problem.  These are the poles
+           of the secular equation.
+
+    U      (input) REAL array, dimension (LDU, N)
+           The last N - K columns of this matrix contain the deflated
+           left singular vectors.
+
+    LDU    (input) INTEGER
+           The leading dimension of the array U.  LDU >= N.
+
+    U2     (input) REAL array, dimension (LDU2, N)
+           The first K columns of this matrix contain the non-deflated
+           left singular vectors for the split problem.
+
+    LDU2   (input) INTEGER
+           The leading dimension of the array U2.  LDU2 >= N.
+
+    VT     (input) REAL array, dimension (LDVT, M)
+           The last M - K columns of VT' contain the deflated
+           right singular vectors.
+
+    LDVT   (input) INTEGER
+           The leading dimension of the array VT.  LDVT >= N.
+
+    VT2    (input) REAL array, dimension (LDVT2, N)
+           The first K columns of VT2' contain the non-deflated
+           right singular vectors for the split problem.
+
+    LDVT2  (input) INTEGER
+           The leading dimension of the array VT2.  LDVT2 >= N.
+
+    IDXC   (input) INTEGER array, dimension ( N )
+           The permutation used to arrange the columns of U (and rows of
+           VT) into three groups:  the first group contains non-zero
+           entries only at and above (or before) NL +1; the second
+           contains non-zero entries only at and below (or after) NL+2;
+           and the third is dense. The first column of U and the row of
+           VT are treated separately, however.
+
+           The rows of the singular vectors found by SLASD4
+           must be likewise permuted before the matrix multiplies can
+           take place.
+
+    CTOT   (input) INTEGER array, dimension ( 4 )
+           A count of the total number of the various types of columns
+           in U (or rows in VT), as described in IDXC. The fourth column
+           type is any column which has been deflated.
+
+    Z      (input) REAL array, dimension (K)
+           The first K elements of this array contain the components
+           of the deflation-adjusted updating row vector.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+           > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --dsigma;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    u2_dim1 = *ldu2;
+    u2_offset = 1 + u2_dim1;
+    u2 -= u2_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    vt2_dim1 = *ldvt2;
+    vt2_offset = 1 + vt2_dim1;
+    vt2 -= vt2_offset;
+    --idxc;
+    --ctot;
+    --z__;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*nl < 1) {
+	*info = -1;
+    } else if (*nr < 1) {
+	*info = -2;
+    } else if (*sqre != 1 && *sqre != 0) {
+	*info = -3;
+    }
+
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+    nlp1 = *nl + 1;
+    nlp2 = *nl + 2;
+
+    if ((*k < 1) || (*k > n)) {
+	*info = -4;
+    } else if (*ldq < *k) {
+	*info = -7;
+    } else if (*ldu < n) {
+	*info = -10;
+    } else if (*ldu2 < n) {
+	*info = -12;
+    } else if (*ldvt < m) {
+	*info = -14;
+    } else if (*ldvt2 < m) {
+	*info = -16;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASD3", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 1) {
+	d__[1] = dabs(z__[1]);
+	scopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt);
+	if (z__[1] > 0.f) {
+	    scopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1);
+	} else {
+	    i__1 = n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		u[i__ + u_dim1] = -u2[i__ + u2_dim1];
+/* L10: */
+	    }
+	}
+	return 0;
+    }
+
+/*
+       Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DSIGMA(I) if it is 1; this makes the subsequent
+       subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DSIGMA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DSIGMA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+/* L20: */
+    }
+
+/*     Keep a copy of Z. */
+
+    scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1);
+
+/*     Normalize Z. */
+
+    rho = snrm2_(k, &z__[1], &c__1);
+    slascl_("G", &c__0, &c__0, &rho, &c_b1011, k, &c__1, &z__[1], k, info);
+    rho *= rho;
+
+/*     Find the new singular values. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	slasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j],
+		 &vt[j * vt_dim1 + 1], info);
+
+/*        If the zero finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    return 0;
+	}
+/* L30: */
+    }
+
+/*     Compute updated Z. */
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1];
+	i__2 = i__ - 1;
+	for (j = 1; j <= i__2; ++j) {
+	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
+		    i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]);
+/* L40: */
+	}
+	i__2 = *k - 1;
+	for (j = i__; j <= i__2; ++j) {
+	    z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[
+		    i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]);
+/* L50: */
+	}
+	r__2 = sqrt((r__1 = z__[i__], dabs(r__1)));
+	z__[i__] = r_sign(&r__2, &q[i__ + q_dim1]);
+/* L60: */
+    }
+
+/*
+       Compute left singular vectors of the modified diagonal matrix,
+       and store related information for the right singular vectors.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ *
+		vt_dim1 + 1];
+	u[i__ * u_dim1 + 1] = -1.f;
+	i__2 = *k;
+	for (j = 2; j <= i__2; ++j) {
+	    vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__
+		    * vt_dim1];
+	    u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1];
+/* L70: */
+	}
+	temp = snrm2_(k, &u[i__ * u_dim1 + 1], &c__1);
+	q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp;
+	i__2 = *k;
+	for (j = 2; j <= i__2; ++j) {
+	    jc = idxc[j];
+	    q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp;
+/* L80: */
+	}
+/* L90: */
+    }
+
+/*     Update the left singular vector matrix. */
+
+    if (*k == 2) {
+	sgemm_("N", "N", &n, k, k, &c_b1011, &u2[u2_offset], ldu2, &q[
+		q_offset], ldq, &c_b320, &u[u_offset], ldu);
+	goto L100;
+    }
+    if (ctot[1] > 0) {
+	sgemm_("N", "N", nl, k, &ctot[1], &c_b1011, &u2[((u2_dim1) << (1)) +
+		1], ldu2, &q[q_dim1 + 2], ldq, &c_b320, &u[u_dim1 + 1], ldu);
+	if (ctot[3] > 0) {
+	    ktemp = ctot[1] + 2 + ctot[2];
+	    sgemm_("N", "N", nl, k, &ctot[3], &c_b1011, &u2[ktemp * u2_dim1 +
+		    1], ldu2, &q[ktemp + q_dim1], ldq, &c_b1011, &u[u_dim1 +
+		    1], ldu);
+	}
+    } else if (ctot[3] > 0) {
+	ktemp = ctot[1] + 2 + ctot[2];
+	sgemm_("N", "N", nl, k, &ctot[3], &c_b1011, &u2[ktemp * u2_dim1 + 1],
+		ldu2, &q[ktemp + q_dim1], ldq, &c_b320, &u[u_dim1 + 1], ldu);
+    } else {
+	slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu);
+    }
+    scopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu);
+    ktemp = ctot[1] + 2;
+    ctemp = ctot[2] + ctot[3];
+    sgemm_("N", "N", nr, k, &ctemp, &c_b1011, &u2[nlp2 + ktemp * u2_dim1],
+	    ldu2, &q[ktemp + q_dim1], ldq, &c_b320, &u[nlp2 + u_dim1], ldu);
+
+/*     Generate the right singular vectors. */
+
+L100:
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	temp = snrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1);
+	q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp;
+	i__2 = *k;
+	for (j = 2; j <= i__2; ++j) {
+	    jc = idxc[j];
+	    q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp;
+/* L110: */
+	}
+/* L120: */
+    }
+
+/*     Update the right singular vector matrix. */
+
+    if (*k == 2) {
+	sgemm_("N", "N", k, &m, k, &c_b1011, &q[q_offset], ldq, &vt2[
+		vt2_offset], ldvt2, &c_b320, &vt[vt_offset], ldvt);
+	return 0;
+    }
+    ktemp = ctot[1] + 1;
+    sgemm_("N", "N", k, &nlp1, &ktemp, &c_b1011, &q[q_dim1 + 1], ldq, &vt2[
+	    vt2_dim1 + 1], ldvt2, &c_b320, &vt[vt_dim1 + 1], ldvt);
+    ktemp = ctot[1] + 2 + ctot[2];
+    if (ktemp <= *ldvt2) {
+	sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b1011, &q[ktemp * q_dim1 + 1],
+		 ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b1011, &vt[vt_dim1 +
+		1], ldvt);
+    }
+
+    ktemp = ctot[1] + 1;
+    nrp1 = *nr + *sqre;
+    if (ktemp > 1) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    q[i__ + ktemp * q_dim1] = q[i__ + q_dim1];
+/* L130: */
+	}
+	i__1 = m;
+	for (i__ = nlp2; i__ <= i__1; ++i__) {
+	    vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1];
+/* L140: */
+	}
+    }
+    ctemp = ctot[2] + 1 + ctot[3];
+    sgemm_("N", "N", k, &nrp1, &ctemp, &c_b1011, &q[ktemp * q_dim1 + 1], ldq,
+	    &vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b320, &vt[nlp2 * vt_dim1
+	    + 1], ldvt);
+
+    return 0;
+
+/*     End of SLASD3 */
+
+} /* slasd3_ */
+
+/* Subroutine */ int slasd4_(integer *n, integer *i__, real *d__, real *z__,
+	real *delta, real *rho, real *sigma, real *work, integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real a, b, c__;
+    static integer j;
+    static real w, dd[3];
+    static integer ii;
+    static real dw, zz[3];
+    static integer ip1;
+    static real eta, phi, eps, tau, psi;
+    static integer iim1, iip1;
+    static real dphi, dpsi;
+    static integer iter;
+    static real temp, prew, sg2lb, sg2ub, temp1, temp2, dtiim, delsq, dtiip;
+    static integer niter;
+    static real dtisq;
+    static logical swtch;
+    static real dtnsq;
+    extern /* Subroutine */ int slaed6_(integer *, logical *, real *, real *,
+	    real *, real *, real *, integer *);
+    static real delsq2;
+    extern /* Subroutine */ int slasd5_(integer *, real *, real *, real *,
+	    real *, real *, real *);
+    static real dtnsq1;
+    static logical swtch3;
+    extern doublereal slamch_(char *);
+    static logical orgati;
+    static real erretm, dtipsq, rhoinv;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the square root of the I-th updated
+    eigenvalue of a positive symmetric rank-one modification to
+    a positive diagonal matrix whose entries are given as the squares
+    of the corresponding entries in the array d, and that
+
+           0 <= D(i) < D(j)  for  i < j
+
+    and that RHO > 0. This is arranged by the calling routine, and is
+    no loss in generality.  The rank-one modified system is thus
+
+           diag( D ) * diag( D ) +  RHO *  Z * Z_transpose.
+
+    where we assume the Euclidean norm of Z is 1.
+
+    The method consists of approximating the rational functions in the
+    secular equation by simpler interpolating rational functions.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The length of all arrays.
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  1 <= I <= N.
+
+    D      (input) REAL array, dimension ( N )
+           The original eigenvalues.  It is assumed that they are in
+           order, 0 <= D(I) < D(J)  for I < J.
+
+    Z      (input) REAL array, dimension ( N )
+           The components of the updating vector.
+
+    DELTA  (output) REAL array, dimension ( N )
+           If N .ne. 1, DELTA contains (D(j) - sigma_I) in its  j-th
+           component.  If N = 1, then DELTA(1) = 1.  The vector DELTA
+           contains the information necessary to construct the
+           (singular) eigenvectors.
+
+    RHO    (input) REAL
+           The scalar in the symmetric updating formula.
+
+    SIGMA  (output) REAL
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    WORK   (workspace) REAL array, dimension ( N )
+           If N .ne. 1, WORK contains (D(j) + sigma_I) in its  j-th
+           component.  If N = 1, then WORK( 1 ) = 1.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit
+           > 0:  if INFO = 1, the updating process failed.
+
+    Internal Parameters
+    ===================
+
+    Logical variable ORGATI (origin-at-i?) is used for distinguishing
+    whether D(i) or D(i+1) is treated as the origin.
+
+              ORGATI = .true.    origin at i
+              ORGATI = .false.   origin at i+1
+
+    Logical variable SWTCH3 (switch-for-3-poles?) is for noting
+    if we are working with THREE poles!
+
+    MAXIT is the maximum number of iterations allowed for each
+    eigenvalue.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Since this routine is called in an inner loop, we do no argument
+       checking.
+
+       Quick return for N=1 and 2.
+*/
+
+    /* Parameter adjustments */
+    --work;
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n == 1) {
+
+/*        Presumably, I=1 upon entry */
+
+	*sigma = sqrt(d__[1] * d__[1] + *rho * z__[1] * z__[1]);
+	delta[1] = 1.f;
+	work[1] = 1.f;
+	return 0;
+    }
+    if (*n == 2) {
+	slasd5_(i__, &d__[1], &z__[1], &delta[1], rho, sigma, &work[1]);
+	return 0;
+    }
+
+/*     Compute machine epsilon */
+
+    eps = slamch_("Epsilon");
+    rhoinv = 1.f / *rho;
+
+/*     The case I = N */
+
+    if (*i__ == *n) {
+
+/*        Initialize some basic variables */
+
+	ii = *n - 1;
+	niter = 1;
+
+/*        Calculate initial guess */
+
+	temp = *rho / 2.f;
+
+/*
+          If ||Z||_2 is not one, then TEMP should be set to
+          RHO * ||Z||_2^2 / TWO
+*/
+
+	temp1 = temp / (d__[*n] + sqrt(d__[*n] * d__[*n] + temp));
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    work[j] = d__[j] + d__[*n] + temp1;
+	    delta[j] = d__[j] - d__[*n] - temp1;
+/* L10: */
+	}
+
+	psi = 0.f;
+	i__1 = *n - 2;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / (delta[j] * work[j]);
+/* L20: */
+	}
+
+	c__ = rhoinv + psi;
+	w = c__ + z__[ii] * z__[ii] / (delta[ii] * work[ii]) + z__[*n] * z__[*
+		n] / (delta[*n] * work[*n]);
+
+	if (w <= 0.f) {
+	    temp1 = sqrt(d__[*n] * d__[*n] + *rho);
+	    temp = z__[*n - 1] * z__[*n - 1] / ((d__[*n - 1] + temp1) * (d__[*
+		    n] - d__[*n - 1] + *rho / (d__[*n] + temp1))) + z__[*n] *
+		    z__[*n] / *rho;
+
+/*
+             The following TAU is to approximate
+             SIGMA_n^2 - D( N )*D( N )
+*/
+
+	    if (c__ <= temp) {
+		tau = *rho;
+	    } else {
+		delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
+		a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*
+			n];
+		b = z__[*n] * z__[*n] * delsq;
+		if (a < 0.f) {
+		    tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+		} else {
+		    tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+		}
+	    }
+
+/*
+             It can be proved that
+                 D(N)^2+RHO/2 <= SIGMA_n^2 < D(N)^2+TAU <= D(N)^2+RHO
+*/
+
+	} else {
+	    delsq = (d__[*n] - d__[*n - 1]) * (d__[*n] + d__[*n - 1]);
+	    a = -c__ * delsq + z__[*n - 1] * z__[*n - 1] + z__[*n] * z__[*n];
+	    b = z__[*n] * z__[*n] * delsq;
+
+/*
+             The following TAU is to approximate
+             SIGMA_n^2 - D( N )*D( N )
+*/
+
+	    if (a < 0.f) {
+		tau = b * 2.f / (sqrt(a * a + b * 4.f * c__) - a);
+	    } else {
+		tau = (a + sqrt(a * a + b * 4.f * c__)) / (c__ * 2.f);
+	    }
+
+/*
+             It can be proved that
+             D(N)^2 < D(N)^2+TAU < SIGMA(N)^2 < D(N)^2+RHO/2
+*/
+
+	}
+
+/*        The following ETA is to approximate SIGMA_n - D( N ) */
+
+	eta = tau / (d__[*n] + sqrt(d__[*n] * d__[*n] + tau));
+
+	*sigma = d__[*n] + eta;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] = d__[j] - d__[*i__] - eta;
+	    work[j] = d__[j] + d__[*i__] + eta;
+/* L30: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (delta[j] * work[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L40: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / (delta[*n] * work[*n]);
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
+		dpsi + dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Test for convergence */
+
+	if (dabs(w) <= eps * erretm) {
+	    goto L240;
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	dtnsq1 = work[*n - 1] * delta[*n - 1];
+	dtnsq = work[*n] * delta[*n];
+	c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
+	a = (dtnsq + dtnsq1) * w - dtnsq * dtnsq1 * (dpsi + dphi);
+	b = dtnsq * dtnsq1 * w;
+	if (c__ < 0.f) {
+	    c__ = dabs(c__);
+	}
+	if (c__ == 0.f) {
+	    eta = *rho - *sigma * *sigma;
+	} else if (a >= 0.f) {
+	    eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
+		    c__ * 2.f);
+	} else {
+	    eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+		    r__1))));
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta > 0.f) {
+	    eta = -w / (dpsi + dphi);
+	}
+	temp = eta - dtnsq;
+	if (temp > *rho) {
+	    eta = *rho + dtnsq;
+	}
+
+	tau += eta;
+	eta /= *sigma + sqrt(eta + *sigma * *sigma);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    delta[j] -= eta;
+	    work[j] += eta;
+/* L50: */
+	}
+
+	*sigma += eta;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = ii;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L60: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	temp = z__[*n] / (work[*n] * delta[*n]);
+	phi = z__[*n] * temp;
+	dphi = temp * temp;
+	erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) * (
+		dpsi + dphi);
+
+	w = rhoinv + phi + psi;
+
+/*        Main loop to update the values of the array   DELTA */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 20; ++niter) {
+
+/*           Test for convergence */
+
+	    if (dabs(w) <= eps * erretm) {
+		goto L240;
+	    }
+
+/*           Calculate the new step */
+
+	    dtnsq1 = work[*n - 1] * delta[*n - 1];
+	    dtnsq = work[*n] * delta[*n];
+	    c__ = w - dtnsq1 * dpsi - dtnsq * dphi;
+	    a = (dtnsq + dtnsq1) * w - dtnsq1 * dtnsq * (dpsi + dphi);
+	    b = dtnsq1 * dtnsq * w;
+	    if (a >= 0.f) {
+		eta = (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
+	    } else {
+		eta = b * 2.f / (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta > 0.f) {
+		eta = -w / (dpsi + dphi);
+	    }
+	    temp = eta - dtnsq;
+	    if (temp <= 0.f) {
+		eta /= 2.f;
+	    }
+
+	    tau += eta;
+	    eta /= *sigma + sqrt(eta + *sigma * *sigma);
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		delta[j] -= eta;
+		work[j] += eta;
+/* L70: */
+	    }
+
+	    *sigma += eta;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.f;
+	    psi = 0.f;
+	    erretm = 0.f;
+	    i__1 = ii;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / (work[j] * delta[j]);
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L80: */
+	    }
+	    erretm = dabs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    temp = z__[*n] / (work[*n] * delta[*n]);
+	    phi = z__[*n] * temp;
+	    dphi = temp * temp;
+	    erretm = (-phi - psi) * 8.f + erretm - phi + rhoinv + dabs(tau) *
+		    (dpsi + dphi);
+
+	    w = rhoinv + phi + psi;
+/* L90: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+	goto L240;
+
+/*        End for the case I = N */
+
+    } else {
+
+/*        The case for I < N */
+
+	niter = 1;
+	ip1 = *i__ + 1;
+
+/*        Calculate initial guess */
+
+	delsq = (d__[ip1] - d__[*i__]) * (d__[ip1] + d__[*i__]);
+	delsq2 = delsq / 2.f;
+	temp = delsq2 / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + delsq2));
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    work[j] = d__[j] + d__[*i__] + temp;
+	    delta[j] = d__[j] - d__[*i__] - temp;
+/* L100: */
+	}
+
+	psi = 0.f;
+	i__1 = *i__ - 1;
+	for (j = 1; j <= i__1; ++j) {
+	    psi += z__[j] * z__[j] / (work[j] * delta[j]);
+/* L110: */
+	}
+
+	phi = 0.f;
+	i__1 = *i__ + 2;
+	for (j = *n; j >= i__1; --j) {
+	    phi += z__[j] * z__[j] / (work[j] * delta[j]);
+/* L120: */
+	}
+	c__ = rhoinv + psi + phi;
+	w = c__ + z__[*i__] * z__[*i__] / (work[*i__] * delta[*i__]) + z__[
+		ip1] * z__[ip1] / (work[ip1] * delta[ip1]);
+
+	if (w > 0.f) {
+
+/*
+             d(i)^2 < the ith sigma^2 < (d(i)^2+d(i+1)^2)/2
+
+             We choose d(i) as origin.
+*/
+
+	    orgati = TRUE_;
+	    sg2lb = 0.f;
+	    sg2ub = delsq2;
+	    a = c__ * delsq + z__[*i__] * z__[*i__] + z__[ip1] * z__[ip1];
+	    b = z__[*i__] * z__[*i__] * delsq;
+	    if (a > 0.f) {
+		tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
+	    } else {
+		tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
+	    }
+
+/*
+             TAU now is an estimation of SIGMA^2 - D( I )^2. The
+             following, however, is the corresponding estimation of
+             SIGMA - D( I ).
+*/
+
+	    eta = tau / (d__[*i__] + sqrt(d__[*i__] * d__[*i__] + tau));
+	} else {
+
+/*
+             (d(i)^2+d(i+1)^2)/2 <= the ith sigma^2 < d(i+1)^2/2
+
+             We choose d(i+1) as origin.
+*/
+
+	    orgati = FALSE_;
+	    sg2lb = -delsq2;
+	    sg2ub = 0.f;
+	    a = c__ * delsq - z__[*i__] * z__[*i__] - z__[ip1] * z__[ip1];
+	    b = z__[ip1] * z__[ip1] * delsq;
+	    if (a < 0.f) {
+		tau = b * 2.f / (a - sqrt((r__1 = a * a + b * 4.f * c__, dabs(
+			r__1))));
+	    } else {
+		tau = -(a + sqrt((r__1 = a * a + b * 4.f * c__, dabs(r__1))))
+			/ (c__ * 2.f);
+	    }
+
+/*
+             TAU now is an estimation of SIGMA^2 - D( IP1 )^2. The
+             following, however, is the corresponding estimation of
+             SIGMA - D( IP1 ).
+*/
+
+	    eta = tau / (d__[ip1] + sqrt((r__1 = d__[ip1] * d__[ip1] + tau,
+		    dabs(r__1))));
+	}
+
+	if (orgati) {
+	    ii = *i__;
+	    *sigma = d__[*i__] + eta;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		work[j] = d__[j] + d__[*i__] + eta;
+		delta[j] = d__[j] - d__[*i__] - eta;
+/* L130: */
+	    }
+	} else {
+	    ii = *i__ + 1;
+	    *sigma = d__[ip1] + eta;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		work[j] = d__[j] + d__[ip1] + eta;
+		delta[j] = d__[j] - d__[ip1] - eta;
+/* L140: */
+	    }
+	}
+	iim1 = ii - 1;
+	iip1 = ii + 1;
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L150: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.f;
+	phi = 0.f;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L160: */
+	}
+
+	w = rhoinv + phi + psi;
+
+/*
+          W is the value of the secular function with
+          its ii-th element removed.
+*/
+
+	swtch3 = FALSE_;
+	if (orgati) {
+	    if (w < 0.f) {
+		swtch3 = TRUE_;
+	    }
+	} else {
+	    if (w > 0.f) {
+		swtch3 = TRUE_;
+	    }
+	}
+	if ((ii == 1) || (ii == *n)) {
+	    swtch3 = FALSE_;
+	}
+
+	temp = z__[ii] / (work[ii] * delta[ii]);
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w += temp;
+	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
+		+ dabs(tau) * dw;
+
+/*        Test for convergence */
+
+	if (dabs(w) <= eps * erretm) {
+	    goto L240;
+	}
+
+	if (w <= 0.f) {
+	    sg2lb = dmax(sg2lb,tau);
+	} else {
+	    sg2ub = dmin(sg2ub,tau);
+	}
+
+/*        Calculate the new step */
+
+	++niter;
+	if (! swtch3) {
+	    dtipsq = work[ip1] * delta[ip1];
+	    dtisq = work[*i__] * delta[*i__];
+	    if (orgati) {
+/* Computing 2nd power */
+		r__1 = z__[*i__] / dtisq;
+		c__ = w - dtipsq * dw + delsq * (r__1 * r__1);
+	    } else {
+/* Computing 2nd power */
+		r__1 = z__[ip1] / dtipsq;
+		c__ = w - dtisq * dw - delsq * (r__1 * r__1);
+	    }
+	    a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
+	    b = dtipsq * dtisq * w;
+	    if (c__ == 0.f) {
+		if (a == 0.f) {
+		    if (orgati) {
+			a = z__[*i__] * z__[*i__] + dtipsq * dtipsq * (dpsi +
+				dphi);
+		    } else {
+			a = z__[ip1] * z__[ip1] + dtisq * dtisq * (dpsi +
+				dphi);
+		    }
+		}
+		eta = b / a;
+	    } else if (a <= 0.f) {
+		eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) /
+			 (c__ * 2.f);
+	    } else {
+		eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
+			r__1))));
+	    }
+	} else {
+
+/*           Interpolation using THREE most relevant poles */
+
+	    dtiim = work[iim1] * delta[iim1];
+	    dtiip = work[iip1] * delta[iip1];
+	    temp = rhoinv + psi + phi;
+	    if (orgati) {
+		temp1 = z__[iim1] / dtiim;
+		temp1 *= temp1;
+		c__ = temp - dtiip * (dpsi + dphi) - (d__[iim1] - d__[iip1]) *
+			 (d__[iim1] + d__[iip1]) * temp1;
+		zz[0] = z__[iim1] * z__[iim1];
+		if (dpsi < temp1) {
+		    zz[2] = dtiip * dtiip * dphi;
+		} else {
+		    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
+		}
+	    } else {
+		temp1 = z__[iip1] / dtiip;
+		temp1 *= temp1;
+		c__ = temp - dtiim * (dpsi + dphi) - (d__[iip1] - d__[iim1]) *
+			 (d__[iim1] + d__[iip1]) * temp1;
+		if (dphi < temp1) {
+		    zz[0] = dtiim * dtiim * dpsi;
+		} else {
+		    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
+		}
+		zz[2] = z__[iip1] * z__[iip1];
+	    }
+	    zz[1] = z__[ii] * z__[ii];
+	    dd[0] = dtiim;
+	    dd[1] = delta[ii] * work[ii];
+	    dd[2] = dtiip;
+	    slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+	    if (*info != 0) {
+		goto L240;
+	    }
+	}
+
+/*
+          Note, eta should be positive if w is negative, and
+          eta should be negative otherwise. However,
+          if for some reason caused by roundoff, eta*w > 0,
+          we simply use one Newton step instead. This way
+          will guarantee eta*w < 0.
+*/
+
+	if (w * eta >= 0.f) {
+	    eta = -w / dw;
+	}
+	if (orgati) {
+	    temp1 = work[*i__] * delta[*i__];
+	    temp = eta - temp1;
+	} else {
+	    temp1 = work[ip1] * delta[ip1];
+	    temp = eta - temp1;
+	}
+	if ((temp > sg2ub) || (temp < sg2lb)) {
+	    if (w < 0.f) {
+		eta = (sg2ub - tau) / 2.f;
+	    } else {
+		eta = (sg2lb - tau) / 2.f;
+	    }
+	}
+
+	tau += eta;
+	eta /= *sigma + sqrt(*sigma * *sigma + eta);
+
+	prew = w;
+
+	*sigma += eta;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    work[j] += eta;
+	    delta[j] -= eta;
+/* L170: */
+	}
+
+/*        Evaluate PSI and the derivative DPSI */
+
+	dpsi = 0.f;
+	psi = 0.f;
+	erretm = 0.f;
+	i__1 = iim1;
+	for (j = 1; j <= i__1; ++j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    psi += z__[j] * temp;
+	    dpsi += temp * temp;
+	    erretm += psi;
+/* L180: */
+	}
+	erretm = dabs(erretm);
+
+/*        Evaluate PHI and the derivative DPHI */
+
+	dphi = 0.f;
+	phi = 0.f;
+	i__1 = iip1;
+	for (j = *n; j >= i__1; --j) {
+	    temp = z__[j] / (work[j] * delta[j]);
+	    phi += z__[j] * temp;
+	    dphi += temp * temp;
+	    erretm += phi;
+/* L190: */
+	}
+
+	temp = z__[ii] / (work[ii] * delta[ii]);
+	dw = dpsi + dphi + temp * temp;
+	temp = z__[ii] * temp;
+	w = rhoinv + phi + psi + temp;
+	erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) * 3.f
+		+ dabs(tau) * dw;
+
+	if (w <= 0.f) {
+	    sg2lb = dmax(sg2lb,tau);
+	} else {
+	    sg2ub = dmin(sg2ub,tau);
+	}
+
+	swtch = FALSE_;
+	if (orgati) {
+	    if (-w > dabs(prew) / 10.f) {
+		swtch = TRUE_;
+	    }
+	} else {
+	    if (w > dabs(prew) / 10.f) {
+		swtch = TRUE_;
+	    }
+	}
+
+/*        Main loop to update the values of the array   DELTA and WORK */
+
+	iter = niter + 1;
+
+	for (niter = iter; niter <= 20; ++niter) {
+
+/*           Test for convergence */
+
+	    if (dabs(w) <= eps * erretm) {
+		goto L240;
+	    }
+
+/*           Calculate the new step */
+
+	    if (! swtch3) {
+		dtipsq = work[ip1] * delta[ip1];
+		dtisq = work[*i__] * delta[*i__];
+		if (! swtch) {
+		    if (orgati) {
+/* Computing 2nd power */
+			r__1 = z__[*i__] / dtisq;
+			c__ = w - dtipsq * dw + delsq * (r__1 * r__1);
+		    } else {
+/* Computing 2nd power */
+			r__1 = z__[ip1] / dtipsq;
+			c__ = w - dtisq * dw - delsq * (r__1 * r__1);
+		    }
+		} else {
+		    temp = z__[ii] / (work[ii] * delta[ii]);
+		    if (orgati) {
+			dpsi += temp * temp;
+		    } else {
+			dphi += temp * temp;
+		    }
+		    c__ = w - dtisq * dpsi - dtipsq * dphi;
+		}
+		a = (dtipsq + dtisq) * w - dtipsq * dtisq * dw;
+		b = dtipsq * dtisq * w;
+		if (c__ == 0.f) {
+		    if (a == 0.f) {
+			if (! swtch) {
+			    if (orgati) {
+				a = z__[*i__] * z__[*i__] + dtipsq * dtipsq *
+					(dpsi + dphi);
+			    } else {
+				a = z__[ip1] * z__[ip1] + dtisq * dtisq * (
+					dpsi + dphi);
+			    }
+			} else {
+			    a = dtisq * dtisq * dpsi + dtipsq * dtipsq * dphi;
+			}
+		    }
+		    eta = b / a;
+		} else if (a <= 0.f) {
+		    eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1))
+			    )) / (c__ * 2.f);
+		} else {
+		    eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__,
+			    dabs(r__1))));
+		}
+	    } else {
+
+/*              Interpolation using THREE most relevant poles */
+
+		dtiim = work[iim1] * delta[iim1];
+		dtiip = work[iip1] * delta[iip1];
+		temp = rhoinv + psi + phi;
+		if (swtch) {
+		    c__ = temp - dtiim * dpsi - dtiip * dphi;
+		    zz[0] = dtiim * dtiim * dpsi;
+		    zz[2] = dtiip * dtiip * dphi;
+		} else {
+		    if (orgati) {
+			temp1 = z__[iim1] / dtiim;
+			temp1 *= temp1;
+			temp2 = (d__[iim1] - d__[iip1]) * (d__[iim1] + d__[
+				iip1]) * temp1;
+			c__ = temp - dtiip * (dpsi + dphi) - temp2;
+			zz[0] = z__[iim1] * z__[iim1];
+			if (dpsi < temp1) {
+			    zz[2] = dtiip * dtiip * dphi;
+			} else {
+			    zz[2] = dtiip * dtiip * (dpsi - temp1 + dphi);
+			}
+		    } else {
+			temp1 = z__[iip1] / dtiip;
+			temp1 *= temp1;
+			temp2 = (d__[iip1] - d__[iim1]) * (d__[iim1] + d__[
+				iip1]) * temp1;
+			c__ = temp - dtiim * (dpsi + dphi) - temp2;
+			if (dphi < temp1) {
+			    zz[0] = dtiim * dtiim * dpsi;
+			} else {
+			    zz[0] = dtiim * dtiim * (dpsi + (dphi - temp1));
+			}
+			zz[2] = z__[iip1] * z__[iip1];
+		    }
+		}
+		dd[0] = dtiim;
+		dd[1] = delta[ii] * work[ii];
+		dd[2] = dtiip;
+		slaed6_(&niter, &orgati, &c__, dd, zz, &w, &eta, info);
+		if (*info != 0) {
+		    goto L240;
+		}
+	    }
+
+/*
+             Note, eta should be positive if w is negative, and
+             eta should be negative otherwise. However,
+             if for some reason caused by roundoff, eta*w > 0,
+             we simply use one Newton step instead. This way
+             will guarantee eta*w < 0.
+*/
+
+	    if (w * eta >= 0.f) {
+		eta = -w / dw;
+	    }
+	    if (orgati) {
+		temp1 = work[*i__] * delta[*i__];
+		temp = eta - temp1;
+	    } else {
+		temp1 = work[ip1] * delta[ip1];
+		temp = eta - temp1;
+	    }
+	    if ((temp > sg2ub) || (temp < sg2lb)) {
+		if (w < 0.f) {
+		    eta = (sg2ub - tau) / 2.f;
+		} else {
+		    eta = (sg2lb - tau) / 2.f;
+		}
+	    }
+
+	    tau += eta;
+	    eta /= *sigma + sqrt(*sigma * *sigma + eta);
+
+	    *sigma += eta;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		work[j] += eta;
+		delta[j] -= eta;
+/* L200: */
+	    }
+
+	    prew = w;
+
+/*           Evaluate PSI and the derivative DPSI */
+
+	    dpsi = 0.f;
+	    psi = 0.f;
+	    erretm = 0.f;
+	    i__1 = iim1;
+	    for (j = 1; j <= i__1; ++j) {
+		temp = z__[j] / (work[j] * delta[j]);
+		psi += z__[j] * temp;
+		dpsi += temp * temp;
+		erretm += psi;
+/* L210: */
+	    }
+	    erretm = dabs(erretm);
+
+/*           Evaluate PHI and the derivative DPHI */
+
+	    dphi = 0.f;
+	    phi = 0.f;
+	    i__1 = iip1;
+	    for (j = *n; j >= i__1; --j) {
+		temp = z__[j] / (work[j] * delta[j]);
+		phi += z__[j] * temp;
+		dphi += temp * temp;
+		erretm += phi;
+/* L220: */
+	    }
+
+	    temp = z__[ii] / (work[ii] * delta[ii]);
+	    dw = dpsi + dphi + temp * temp;
+	    temp = z__[ii] * temp;
+	    w = rhoinv + phi + psi + temp;
+	    erretm = (phi - psi) * 8.f + erretm + rhoinv * 2.f + dabs(temp) *
+		    3.f + dabs(tau) * dw;
+	    if (w * prew > 0.f && dabs(w) > dabs(prew) / 10.f) {
+		swtch = ! swtch;
+	    }
+
+	    if (w <= 0.f) {
+		sg2lb = dmax(sg2lb,tau);
+	    } else {
+		sg2ub = dmin(sg2ub,tau);
+	    }
+
+/* L230: */
+	}
+
+/*        Return with INFO = 1, NITER = MAXIT and not converged */
+
+	*info = 1;
+
+    }
+
+L240:
+    return 0;
+
+/*     End of SLASD4 */
+
+} /* slasd4_ */
+
+/* Subroutine */ int slasd5_(integer *i__, real *d__, real *z__, real *delta,
+	real *rho, real *dsigma, real *work)
+{
+    /* System generated locals */
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real b, c__, w, del, tau, delsq;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    This subroutine computes the square root of the I-th eigenvalue
+    of a positive symmetric rank-one modification of a 2-by-2 diagonal
+    matrix
+
+               diag( D ) * diag( D ) +  RHO *  Z * transpose(Z) .
+
+    The diagonal entries in the array D are assumed to satisfy
+
+               0 <= D(i) < D(j)  for  i < j .
+
+    We also assume RHO > 0 and that the Euclidean norm of the vector
+    Z is one.
+
+    Arguments
+    =========
+
+    I      (input) INTEGER
+           The index of the eigenvalue to be computed.  I = 1 or I = 2.
+
+    D      (input) REAL array, dimension ( 2 )
+           The original eigenvalues.  We assume 0 <= D(1) < D(2).
+
+    Z      (input) REAL array, dimension ( 2 )
+           The components of the updating vector.
+
+    DELTA  (output) REAL array, dimension ( 2 )
+           Contains (D(j) - lambda_I) in its  j-th component.
+           The vector DELTA contains the information necessary
+           to construct the eigenvectors.
+
+    RHO    (input) REAL
+           The scalar in the symmetric updating formula.
+
+    DSIGMA (output) REAL
+           The computed lambda_I, the I-th updated eigenvalue.
+
+    WORK   (workspace) REAL array, dimension ( 2 )
+           WORK contains (D(j) + sigma_I) in its  j-th component.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ren-Cang Li, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --work;
+    --delta;
+    --z__;
+    --d__;
+
+    /* Function Body */
+    del = d__[2] - d__[1];
+    delsq = del * (d__[2] + d__[1]);
+    if (*i__ == 1) {
+	w = *rho * 4.f * (z__[2] * z__[2] / (d__[1] + d__[2] * 3.f) - z__[1] *
+		 z__[1] / (d__[1] * 3.f + d__[2])) / del + 1.f;
+	if (w > 0.f) {
+	    b = delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[1] * z__[1] * delsq;
+
+/*
+             B > ZERO, always
+
+             The following TAU is DSIGMA * DSIGMA - D( 1 ) * D( 1 )
+*/
+
+	    tau = c__ * 2.f / (b + sqrt((r__1 = b * b - c__ * 4.f, dabs(r__1))
+		    ));
+
+/*           The following TAU is DSIGMA - D( 1 ) */
+
+	    tau /= d__[1] + sqrt(d__[1] * d__[1] + tau);
+	    *dsigma = d__[1] + tau;
+	    delta[1] = -tau;
+	    delta[2] = del - tau;
+	    work[1] = d__[1] * 2.f + tau;
+	    work[2] = d__[1] + tau + d__[2];
+/*
+             DELTA( 1 ) = -Z( 1 ) / TAU
+             DELTA( 2 ) = Z( 2 ) / ( DEL-TAU )
+*/
+	} else {
+	    b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	    c__ = *rho * z__[2] * z__[2] * delsq;
+
+/*           The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
+
+	    if (b > 0.f) {
+		tau = c__ * -2.f / (b + sqrt(b * b + c__ * 4.f));
+	    } else {
+		tau = (b - sqrt(b * b + c__ * 4.f)) / 2.f;
+	    }
+
+/*           The following TAU is DSIGMA - D( 2 ) */
+
+	    tau /= d__[2] + sqrt((r__1 = d__[2] * d__[2] + tau, dabs(r__1)));
+	    *dsigma = d__[2] + tau;
+	    delta[1] = -(del + tau);
+	    delta[2] = -tau;
+	    work[1] = d__[1] + tau + d__[2];
+	    work[2] = d__[2] * 2.f + tau;
+/*
+             DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+             DELTA( 2 ) = -Z( 2 ) / TAU
+*/
+	}
+/*
+          TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+          DELTA( 1 ) = DELTA( 1 ) / TEMP
+          DELTA( 2 ) = DELTA( 2 ) / TEMP
+*/
+    } else {
+
+/*        Now I=2 */
+
+	b = -delsq + *rho * (z__[1] * z__[1] + z__[2] * z__[2]);
+	c__ = *rho * z__[2] * z__[2] * delsq;
+
+/*        The following TAU is DSIGMA * DSIGMA - D( 2 ) * D( 2 ) */
+
+	if (b > 0.f) {
+	    tau = (b + sqrt(b * b + c__ * 4.f)) / 2.f;
+	} else {
+	    tau = c__ * 2.f / (-b + sqrt(b * b + c__ * 4.f));
+	}
+
+/*        The following TAU is DSIGMA - D( 2 ) */
+
+	tau /= d__[2] + sqrt(d__[2] * d__[2] + tau);
+	*dsigma = d__[2] + tau;
+	delta[1] = -(del + tau);
+	delta[2] = -tau;
+	work[1] = d__[1] + tau + d__[2];
+	work[2] = d__[2] * 2.f + tau;
+/*
+          DELTA( 1 ) = -Z( 1 ) / ( DEL+TAU )
+          DELTA( 2 ) = -Z( 2 ) / TAU
+          TEMP = SQRT( DELTA( 1 )*DELTA( 1 )+DELTA( 2 )*DELTA( 2 ) )
+          DELTA( 1 ) = DELTA( 1 ) / TEMP
+          DELTA( 2 ) = DELTA( 2 ) / TEMP
+*/
+    }
+    return 0;
+
+/*     End of SLASD5 */
+
+} /* slasd5_ */
+
+/* Subroutine */ int slasd6_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, real *d__, real *vf, real *vl, real *alpha, real *beta,
+	 integer *idxq, integer *perm, integer *givptr, integer *givcol,
+	integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real *
+	difl, real *difr, real *z__, integer *k, real *c__, real *s, real *
+	work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset,
+	    poles_dim1, poles_offset, i__1;
+    real r__1, r__2;
+
+    /* Local variables */
+    static integer i__, m, n, n1, n2, iw, idx, idxc, idxp, ivfw, ivlw;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slasd7_(integer *, integer *, integer *, integer *,
+	    integer *, real *, real *, real *, real *, real *, real *, real *,
+	     real *, real *, real *, integer *, integer *, integer *, integer
+	    *, integer *, integer *, integer *, real *, integer *, real *,
+	    real *, integer *), slasd8_(integer *, integer *, real *, real *,
+	    real *, real *, real *, real *, integer *, real *, real *,
+	    integer *);
+    static integer isigma;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
+	    char *, integer *, integer *, real *, real *, integer *, integer *
+	    , real *, integer *, integer *), slamrg_(integer *,
+	    integer *, real *, integer *, integer *, integer *);
+    static real orgnrm;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLASD6 computes the SVD of an updated upper bidiagonal matrix B
+    obtained by merging two smaller ones by appending a row. This
+    routine is used only for the problem which requires all singular
+    values and optionally singular vector matrices in factored form.
+    B is an N-by-M matrix with N = NL + NR + 1 and M = N + SQRE.
+    A related subroutine, SLASD1, handles the case in which all singular
+    values and singular vectors of the bidiagonal matrix are desired.
+
+    SLASD6 computes the SVD as follows:
+
+                  ( D1(in)  0    0     0 )
+      B = U(in) * (   Z1'   a   Z2'    b ) * VT(in)
+                  (   0     0   D2(in) 0 )
+
+        = U(out) * ( D(out) 0) * VT(out)
+
+    where Z' = (Z1' a Z2' b) = u' VT', and u is a vector of dimension M
+    with ALPHA and BETA in the NL+1 and NL+2 th entries and zeros
+    elsewhere; and the entry b is empty if SQRE = 0.
+
+    The singular values of B can be computed using D1, D2, the first
+    components of all the right singular vectors of the lower block, and
+    the last components of all the right singular vectors of the upper
+    block. These components are stored and updated in VF and VL,
+    respectively, in SLASD6. Hence U and VT are not explicitly
+    referenced.
+
+    The singular values are stored in D. The algorithm consists of two
+    stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple singular values or if there is a zero
+          in the Z vector. For each such occurence the dimension of the
+          secular equation problem is reduced by one. This stage is
+          performed by the routine SLASD7.
+
+          The second stage consists of calculating the updated
+          singular values. This is done by finding the roots of the
+          secular equation via the routine SLASD4 (as called by SLASD8).
+          This routine also updates VF and VL and computes the distances
+          between the updated singular values and the old singular
+          values.
+
+    SLASD6 is called from SLASDA.
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed in
+           factored form:
+           = 0: Compute singular values only.
+           = 1: Compute singular vectors in factored form as well.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block.  NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block.  NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    D      (input/output) REAL array, dimension ( NL+NR+1 ).
+           On entry D(1:NL,1:NL) contains the singular values of the
+           upper block, and D(NL+2:N) contains the singular values
+           of the lower block. On exit D(1:N) contains the singular
+           values of the modified matrix.
+
+    VF     (input/output) REAL array, dimension ( M )
+           On entry, VF(1:NL+1) contains the first components of all
+           right singular vectors of the upper block; and VF(NL+2:M)
+           contains the first components of all right singular vectors
+           of the lower block. On exit, VF contains the first components
+           of all right singular vectors of the bidiagonal matrix.
+
+    VL     (input/output) REAL array, dimension ( M )
+           On entry, VL(1:NL+1) contains the  last components of all
+           right singular vectors of the upper block; and VL(NL+2:M)
+           contains the last components of all right singular vectors of
+           the lower block. On exit, VL contains the last components of
+           all right singular vectors of the bidiagonal matrix.
+
+    ALPHA  (input) REAL
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) REAL
+           Contains the off-diagonal element associated with the added
+           row.
+
+    IDXQ   (output) INTEGER array, dimension ( N )
+           This contains the permutation which will reintegrate the
+           subproblem just solved back into sorted order, i.e.
+           D( IDXQ( I = 1, N ) ) will be in ascending order.
+
+    PERM   (output) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) to be applied
+           to each block. Not referenced if ICOMPQ = 0.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem. Not referenced if ICOMPQ = 0.
+
+    GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGCOL (input) INTEGER
+           leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (output) REAL array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value to be used in the
+           corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of GIVNUM and POLES, must be at least N.
+
+    POLES  (output) REAL array, dimension ( LDGNUM, 2 )
+           On exit, POLES(1,*) is an array containing the new singular
+           values obtained from solving the secular equation, and
+           POLES(2,*) is an array containing the poles in the secular
+           equation. Not referenced if ICOMPQ = 0.
+
+    DIFL   (output) REAL array, dimension ( N )
+           On exit, DIFL(I) is the distance between I-th updated
+           (undeflated) singular value and the I-th (undeflated) old
+           singular value.
+
+    DIFR   (output) REAL array,
+                    dimension ( LDGNUM, 2 ) if ICOMPQ = 1 and
+                    dimension ( N ) if ICOMPQ = 0.
+           On exit, DIFR(I, 1) is the distance between I-th updated
+           (undeflated) singular value and the I+1-th (undeflated) old
+           singular value.
+
+           If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+           normalizing factors for the right singular vector matrix.
+
+           See SLASD8 for details on DIFL and DIFR.
+
+    Z      (output) REAL array, dimension ( M )
+           The first elements of this array contain the components
+           of the deflation-adjusted updating row vector.
+
+    K      (output) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    C      (output) REAL
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (output) REAL
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    WORK   (workspace) REAL array, dimension ( 4 * M )
+
+    IWORK  (workspace) INTEGER array, dimension ( 3 * N )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --vf;
+    --vl;
+    --idxq;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    poles_dim1 = *ldgnum;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    --difl;
+    --difr;
+    --z__;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    } else if (*ldgcol < n) {
+	*info = -14;
+    } else if (*ldgnum < n) {
+	*info = -16;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASD6", &i__1);
+	return 0;
+    }
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in SLASD7 and SLASD8.
+*/
+
+    isigma = 1;
+    iw = isigma + n;
+    ivfw = iw + m;
+    ivlw = ivfw + m;
+
+    idx = 1;
+    idxc = idx + n;
+    idxp = idxc + n;
+
+/*
+       Scale.
+
+   Computing MAX
+*/
+    r__1 = dabs(*alpha), r__2 = dabs(*beta);
+    orgnrm = dmax(r__1,r__2);
+    d__[*nl + 1] = 0.f;
+    i__1 = n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((r__1 = d__[i__], dabs(r__1)) > orgnrm) {
+	    orgnrm = (r__1 = d__[i__], dabs(r__1));
+	}
+/* L10: */
+    }
+    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &n, &c__1, &d__[1], &n,
+	    info);
+    *alpha /= orgnrm;
+    *beta /= orgnrm;
+
+/*     Sort and Deflate singular values. */
+
+    slasd7_(icompq, nl, nr, sqre, k, &d__[1], &z__[1], &work[iw], &vf[1], &
+	    work[ivfw], &vl[1], &work[ivlw], alpha, beta, &work[isigma], &
+	    iwork[idx], &iwork[idxp], &idxq[1], &perm[1], givptr, &givcol[
+	    givcol_offset], ldgcol, &givnum[givnum_offset], ldgnum, c__, s,
+	    info);
+
+/*     Solve Secular Equation, compute DIFL, DIFR, and update VF, VL. */
+
+    slasd8_(icompq, k, &d__[1], &z__[1], &vf[1], &vl[1], &difl[1], &difr[1],
+	    ldgnum, &work[isigma], &work[iw], info);
+
+/*     Save the poles if ICOMPQ = 1. */
+
+    if (*icompq == 1) {
+	scopy_(k, &d__[1], &c__1, &poles[poles_dim1 + 1], &c__1);
+	scopy_(k, &work[isigma], &c__1, &poles[((poles_dim1) << (1)) + 1], &
+		c__1);
+    }
+
+/*     Unscale. */
+
+    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, &n, &c__1, &d__[1], &n,
+	    info);
+
+/*     Prepare the IDXQ sorting permutation. */
+
+    n1 = *k;
+    n2 = n - *k;
+    slamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &idxq[1]);
+
+    return 0;
+
+/*     End of SLASD6 */
+
+} /* slasd6_ */
+
+/* Subroutine */ int slasd7_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *k, real *d__, real *z__, real *zw, real *vf,
+	real *vfw, real *vl, real *vlw, real *alpha, real *beta, real *dsigma,
+	 integer *idx, integer *idxp, integer *idxq, integer *perm, integer *
+	givptr, integer *givcol, integer *ldgcol, real *givnum, integer *
+	ldgnum, real *c__, real *s, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, givnum_dim1, givnum_offset, i__1;
+    real r__1, r__2;
+
+    /* Local variables */
+    static integer i__, j, m, n, k2;
+    static real z1;
+    static integer jp;
+    static real eps, tau, tol;
+    static integer nlp1, nlp2, idxi, idxj;
+    extern /* Subroutine */ int srot_(integer *, real *, integer *, real *,
+	    integer *, real *, real *);
+    static integer idxjp, jprev;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slapy2_(real *, real *), slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slamrg_(
+	    integer *, integer *, real *, integer *, integer *, integer *);
+    static real hlftol;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLASD7 merges the two sets of singular values together into a single
+    sorted set. Then it tries to deflate the size of the problem. There
+    are two ways in which deflation can occur:  when two or more singular
+    values are close together or if there is a tiny entry in the Z
+    vector. For each such occurrence the order of the related
+    secular equation problem is reduced by one.
+
+    SLASD7 is called from SLASD6.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            Specifies whether singular vectors are to be computed
+            in compact form, as follows:
+            = 0: Compute singular values only.
+            = 1: Compute singular vectors of upper
+                 bidiagonal matrix in compact form.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block. NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block. NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has
+           N = NL + NR + 1 rows and
+           M = N + SQRE >= N columns.
+
+    K      (output) INTEGER
+           Contains the dimension of the non-deflated matrix, this is
+           the order of the related secular equation. 1 <= K <=N.
+
+    D      (input/output) REAL array, dimension ( N )
+           On entry D contains the singular values of the two submatrices
+           to be combined. On exit D contains the trailing (N-K) updated
+           singular values (those which were deflated) sorted into
+           increasing order.
+
+    Z      (output) REAL array, dimension ( M )
+           On exit Z contains the updating row vector in the secular
+           equation.
+
+    ZW     (workspace) REAL array, dimension ( M )
+           Workspace for Z.
+
+    VF     (input/output) REAL array, dimension ( M )
+           On entry, VF(1:NL+1) contains the first components of all
+           right singular vectors of the upper block; and VF(NL+2:M)
+           contains the first components of all right singular vectors
+           of the lower block. On exit, VF contains the first components
+           of all right singular vectors of the bidiagonal matrix.
+
+    VFW    (workspace) REAL array, dimension ( M )
+           Workspace for VF.
+
+    VL     (input/output) REAL array, dimension ( M )
+           On entry, VL(1:NL+1) contains the  last components of all
+           right singular vectors of the upper block; and VL(NL+2:M)
+           contains the last components of all right singular vectors
+           of the lower block. On exit, VL contains the last components
+           of all right singular vectors of the bidiagonal matrix.
+
+    VLW    (workspace) REAL array, dimension ( M )
+           Workspace for VL.
+
+    ALPHA  (input) REAL
+           Contains the diagonal element associated with the added row.
+
+    BETA   (input) REAL
+           Contains the off-diagonal element associated with the added
+           row.
+
+    DSIGMA (output) REAL array, dimension ( N )
+           Contains a copy of the diagonal elements (K-1 singular values
+           and one zero) in the secular equation.
+
+    IDX    (workspace) INTEGER array, dimension ( N )
+           This will contain the permutation used to sort the contents of
+           D into ascending order.
+
+    IDXP   (workspace) INTEGER array, dimension ( N )
+           This will contain the permutation used to place deflated
+           values of D at the end of the array. On output IDXP(2:K)
+           points to the nondeflated D-values and IDXP(K+1:N)
+           points to the deflated singular values.
+
+    IDXQ   (input) INTEGER array, dimension ( N )
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that entries in
+           the first half of this permutation must first be moved one
+           position backward; and entries in the second half
+           must first have NL+1 added to their values.
+
+    PERM   (output) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) to be applied
+           to each singular block. Not referenced if ICOMPQ = 0.
+
+    GIVPTR (output) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem. Not referenced if ICOMPQ = 0.
+
+    GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGCOL (input) INTEGER
+           The leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (output) REAL array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value to be used in the
+           corresponding Givens rotation. Not referenced if ICOMPQ = 0.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of GIVNUM, must be at least N.
+
+    C      (output) REAL
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (output) REAL
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --z__;
+    --zw;
+    --vf;
+    --vfw;
+    --vl;
+    --vlw;
+    --dsigma;
+    --idx;
+    --idxp;
+    --idxq;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+
+    /* Function Body */
+    *info = 0;
+    n = *nl + *nr + 1;
+    m = n + *sqre;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    } else if (*ldgcol < n) {
+	*info = -22;
+    } else if (*ldgnum < n) {
+	*info = -24;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASD7", &i__1);
+	return 0;
+    }
+
+    nlp1 = *nl + 1;
+    nlp2 = *nl + 2;
+    if (*icompq == 1) {
+	*givptr = 0;
+    }
+
+/*
+       Generate the first part of the vector Z and move the singular
+       values in the first part of D one position backward.
+*/
+
+    z1 = *alpha * vl[nlp1];
+    vl[nlp1] = 0.f;
+    tau = vf[nlp1];
+    for (i__ = *nl; i__ >= 1; --i__) {
+	z__[i__ + 1] = *alpha * vl[i__];
+	vl[i__] = 0.f;
+	vf[i__ + 1] = vf[i__];
+	d__[i__ + 1] = d__[i__];
+	idxq[i__ + 1] = idxq[i__] + 1;
+/* L10: */
+    }
+    vf[1] = tau;
+
+/*     Generate the second part of the vector Z. */
+
+    i__1 = m;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	z__[i__] = *beta * vf[i__];
+	vf[i__] = 0.f;
+/* L20: */
+    }
+
+/*     Sort the singular values into increasing order */
+
+    i__1 = n;
+    for (i__ = nlp2; i__ <= i__1; ++i__) {
+	idxq[i__] += nlp1;
+/* L30: */
+    }
+
+/*     DSIGMA, IDXC, IDXC, and ZW are used as storage space. */
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	dsigma[i__] = d__[idxq[i__]];
+	zw[i__] = z__[idxq[i__]];
+	vfw[i__] = vf[idxq[i__]];
+	vlw[i__] = vl[idxq[i__]];
+/* L40: */
+    }
+
+    slamrg_(nl, nr, &dsigma[2], &c__1, &c__1, &idx[2]);
+
+    i__1 = n;
+    for (i__ = 2; i__ <= i__1; ++i__) {
+	idxi = idx[i__] + 1;
+	d__[i__] = dsigma[idxi];
+	z__[i__] = zw[idxi];
+	vf[i__] = vfw[idxi];
+	vl[i__] = vlw[idxi];
+/* L50: */
+    }
+
+/*     Calculate the allowable deflation tolerence */
+
+    eps = slamch_("Epsilon");
+/* Computing MAX */
+    r__1 = dabs(*alpha), r__2 = dabs(*beta);
+    tol = dmax(r__1,r__2);
+/* Computing MAX */
+    r__2 = (r__1 = d__[n], dabs(r__1));
+    tol = eps * 64.f * dmax(r__2,tol);
+
+/*
+       There are 2 kinds of deflation -- first a value in the z-vector
+       is small, second two (or more) singular values are very close
+       together (their difference is small).
+
+       If the value in the z-vector is small, we simply permute the
+       array so that the corresponding singular value is moved to the
+       end.
+
+       If two values in the D-vector are close, we perform a two-sided
+       rotation designed to make one of the corresponding z-vector
+       entries zero, and then permute the array so that the deflated
+       singular value is moved to the end.
+
+       If there are multiple singular values then the problem deflates.
+       Here the number of equal singular values are found.  As each equal
+       singular value is found, an elementary reflector is computed to
+       rotate the corresponding singular subspace so that the
+       corresponding components of Z are zero in this new basis.
+*/
+
+    *k = 1;
+    k2 = n + 1;
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	if ((r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    idxp[k2] = j;
+	    if (j == n) {
+		goto L100;
+	    }
+	} else {
+	    jprev = j;
+	    goto L70;
+	}
+/* L60: */
+    }
+L70:
+    j = jprev;
+L80:
+    ++j;
+    if (j > n) {
+	goto L90;
+    }
+    if ((r__1 = z__[j], dabs(r__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	idxp[k2] = j;
+    } else {
+
+/*        Check if singular values are close enough to allow deflation. */
+
+	if ((r__1 = d__[j] - d__[jprev], dabs(r__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    *s = z__[jprev];
+	    *c__ = z__[j];
+
+/*
+             Find sqrt(a**2+b**2) without overflow or
+             destructive underflow.
+*/
+
+	    tau = slapy2_(c__, s);
+	    z__[j] = tau;
+	    z__[jprev] = 0.f;
+	    *c__ /= tau;
+	    *s = -(*s) / tau;
+
+/*           Record the appropriate Givens rotation */
+
+	    if (*icompq == 1) {
+		++(*givptr);
+		idxjp = idxq[idx[jprev] + 1];
+		idxj = idxq[idx[j] + 1];
+		if (idxjp <= nlp1) {
+		    --idxjp;
+		}
+		if (idxj <= nlp1) {
+		    --idxj;
+		}
+		givcol[*givptr + ((givcol_dim1) << (1))] = idxjp;
+		givcol[*givptr + givcol_dim1] = idxj;
+		givnum[*givptr + ((givnum_dim1) << (1))] = *c__;
+		givnum[*givptr + givnum_dim1] = *s;
+	    }
+	    srot_(&c__1, &vf[jprev], &c__1, &vf[j], &c__1, c__, s);
+	    srot_(&c__1, &vl[jprev], &c__1, &vl[j], &c__1, c__, s);
+	    --k2;
+	    idxp[k2] = jprev;
+	    jprev = j;
+	} else {
+	    ++(*k);
+	    zw[*k] = z__[jprev];
+	    dsigma[*k] = d__[jprev];
+	    idxp[*k] = jprev;
+	    jprev = j;
+	}
+    }
+    goto L80;
+L90:
+
+/*     Record the last singular value. */
+
+    ++(*k);
+    zw[*k] = z__[jprev];
+    dsigma[*k] = d__[jprev];
+    idxp[*k] = jprev;
+
+L100:
+
+/*
+       Sort the singular values into DSIGMA. The singular values which
+       were not deflated go into the first K slots of DSIGMA, except
+       that DSIGMA(1) is treated separately.
+*/
+
+    i__1 = n;
+    for (j = 2; j <= i__1; ++j) {
+	jp = idxp[j];
+	dsigma[j] = d__[jp];
+	vfw[j] = vf[jp];
+	vlw[j] = vl[jp];
+/* L110: */
+    }
+    if (*icompq == 1) {
+	i__1 = n;
+	for (j = 2; j <= i__1; ++j) {
+	    jp = idxp[j];
+	    perm[j] = idxq[idx[jp] + 1];
+	    if (perm[j] <= nlp1) {
+		--perm[j];
+	    }
+/* L120: */
+	}
+    }
+
+/*
+       The deflated singular values go back into the last N - K slots of
+       D.
+*/
+
+    i__1 = n - *k;
+    scopy_(&i__1, &dsigma[*k + 1], &c__1, &d__[*k + 1], &c__1);
+
+/*
+       Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
+       VL(M).
+*/
+
+    dsigma[1] = 0.f;
+    hlftol = tol / 2.f;
+    if (dabs(dsigma[2]) <= hlftol) {
+	dsigma[2] = hlftol;
+    }
+    if (m > n) {
+	z__[1] = slapy2_(&z1, &z__[m]);
+	if (z__[1] <= tol) {
+	    *c__ = 1.f;
+	    *s = 0.f;
+	    z__[1] = tol;
+	} else {
+	    *c__ = z1 / z__[1];
+	    *s = -z__[m] / z__[1];
+	}
+	srot_(&c__1, &vf[m], &c__1, &vf[1], &c__1, c__, s);
+	srot_(&c__1, &vl[m], &c__1, &vl[1], &c__1, c__, s);
+    } else {
+	if (dabs(z1) <= tol) {
+	    z__[1] = tol;
+	} else {
+	    z__[1] = z1;
+	}
+    }
+
+/*     Restore Z, VF, and VL. */
+
+    i__1 = *k - 1;
+    scopy_(&i__1, &zw[2], &c__1, &z__[2], &c__1);
+    i__1 = n - 1;
+    scopy_(&i__1, &vfw[2], &c__1, &vf[2], &c__1);
+    i__1 = n - 1;
+    scopy_(&i__1, &vlw[2], &c__1, &vl[2], &c__1);
+
+    return 0;
+
+/*     End of SLASD7 */
+
+} /* slasd7_ */
+
+/* Subroutine */ int slasd8_(integer *icompq, integer *k, real *d__, real *
+	z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr,
+	real *dsigma, real *work, integer *info)
+{
+    /* System generated locals */
+    integer difr_dim1, difr_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static integer i__, j;
+    static real dj, rho;
+    static integer iwk1, iwk2, iwk3;
+    static real temp;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    static integer iwk2i, iwk3i;
+    extern doublereal snrm2_(integer *, real *, integer *);
+    static real diflj, difrj, dsigj;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    extern doublereal slamc3_(real *, real *);
+    extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *,
+	    real *, real *, real *, real *, integer *), xerbla_(char *,
+	    integer *);
+    static real dsigjp;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *,
+	    real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLASD8 finds the square roots of the roots of the secular equation,
+    as defined by the values in DSIGMA and Z. It makes the appropriate
+    calls to SLASD4, and stores, for each  element in D, the distance
+    to its two nearest poles (elements in DSIGMA). It also updates
+    the arrays VF and VL, the first and last components of all the
+    right singular vectors of the original bidiagonal matrix.
+
+    SLASD8 is called from SLASD6.
+
+    Arguments
+    =========
+
+    ICOMPQ  (input) INTEGER
+            Specifies whether singular vectors are to be computed in
+            factored form in the calling routine:
+            = 0: Compute singular values only.
+            = 1: Compute singular vectors in factored form as well.
+
+    K       (input) INTEGER
+            The number of terms in the rational function to be solved
+            by SLASD4.  K >= 1.
+
+    D       (output) REAL array, dimension ( K )
+            On output, D contains the updated singular values.
+
+    Z       (input) REAL array, dimension ( K )
+            The first K elements of this array contain the components
+            of the deflation-adjusted updating row vector.
+
+    VF      (input/output) REAL array, dimension ( K )
+            On entry, VF contains  information passed through DBEDE8.
+            On exit, VF contains the first K components of the first
+            components of all right singular vectors of the bidiagonal
+            matrix.
+
+    VL      (input/output) REAL array, dimension ( K )
+            On entry, VL contains  information passed through DBEDE8.
+            On exit, VL contains the first K components of the last
+            components of all right singular vectors of the bidiagonal
+            matrix.
+
+    DIFL    (output) REAL array, dimension ( K )
+            On exit, DIFL(I) = D(I) - DSIGMA(I).
+
+    DIFR    (output) REAL array,
+                     dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and
+                     dimension ( K ) if ICOMPQ = 0.
+            On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not
+            defined and will not be referenced.
+
+            If ICOMPQ = 1, DIFR(1:K,2) is an array containing the
+            normalizing factors for the right singular vector matrix.
+
+    LDDIFR  (input) INTEGER
+            The leading dimension of DIFR, must be at least K.
+
+    DSIGMA  (input) REAL array, dimension ( K )
+            The first K elements of this array contain the old roots
+            of the deflated updating problem.  These are the poles
+            of the secular equation.
+
+    WORK    (workspace) REAL array, dimension at least 3 * K
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --z__;
+    --vf;
+    --vl;
+    --difl;
+    difr_dim1 = *lddifr;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    --dsigma;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*k < 1) {
+	*info = -2;
+    } else if (*lddifr < *k) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASD8", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*k == 1) {
+	d__[1] = dabs(z__[1]);
+	difl[1] = d__[1];
+	if (*icompq == 1) {
+	    difl[2] = 1.f;
+	    difr[((difr_dim1) << (1)) + 1] = 1.f;
+	}
+	return 0;
+    }
+
+/*
+       Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can
+       be computed with high relative accuracy (barring over/underflow).
+       This is a problem on machines without a guard digit in
+       add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
+       The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I),
+       which on any of these machines zeros out the bottommost
+       bit of DSIGMA(I) if it is 1; this makes the subsequent
+       subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation
+       occurs. On binary machines with a guard digit (almost all
+       machines) it does not change DSIGMA(I) at all. On hexadecimal
+       and decimal machines with a guard digit, it slightly
+       changes the bottommost bits of DSIGMA(I). It does not account
+       for hexadecimal or decimal machines without guard digits
+       (we know of none). We use a subroutine call to compute
+       2*DLAMBDA(I) to prevent optimizing compilers from eliminating
+       this code.
+*/
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__];
+/* L10: */
+    }
+
+/*     Book keeping. */
+
+    iwk1 = 1;
+    iwk2 = iwk1 + *k;
+    iwk3 = iwk2 + *k;
+    iwk2i = iwk2 - 1;
+    iwk3i = iwk3 - 1;
+
+/*     Normalize Z. */
+
+    rho = snrm2_(k, &z__[1], &c__1);
+    slascl_("G", &c__0, &c__0, &rho, &c_b1011, k, &c__1, &z__[1], k, info);
+    rho *= rho;
+
+/*     Initialize WORK(IWK3). */
+
+    slaset_("A", k, &c__1, &c_b1011, &c_b1011, &work[iwk3], k);
+
+/*
+       Compute the updated singular values, the arrays DIFL, DIFR,
+       and the updated Z.
+*/
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	slasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[
+		iwk2], info);
+
+/*        If the root finder fails, the computation is terminated. */
+
+	if (*info != 0) {
+	    return 0;
+	}
+	work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j];
+	difl[j] = -work[j];
+	difr[j + difr_dim1] = -work[j + 1];
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
+		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
+		    j]);
+/* L20: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i +
+		    i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[
+		    j]);
+/* L30: */
+	}
+/* L40: */
+    }
+
+/*     Compute updated Z. */
+
+    i__1 = *k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	r__2 = sqrt((r__1 = work[iwk3i + i__], dabs(r__1)));
+	z__[i__] = r_sign(&r__2, &z__[i__]);
+/* L50: */
+    }
+
+/*     Update VF and VL. */
+
+    i__1 = *k;
+    for (j = 1; j <= i__1; ++j) {
+	diflj = difl[j];
+	dj = d__[j];
+	dsigj = -dsigma[j];
+	if (j < *k) {
+	    difrj = -difr[j + difr_dim1];
+	    dsigjp = -dsigma[j + 1];
+	}
+	work[j] = -z__[j] / diflj / (dsigma[j] + dj);
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigj) - diflj) / (
+		    dsigma[i__] + dj);
+/* L60: */
+	}
+	i__2 = *k;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigjp) + difrj) /
+		    (dsigma[i__] + dj);
+/* L70: */
+	}
+	temp = snrm2_(k, &work[1], &c__1);
+	work[iwk2i + j] = sdot_(k, &work[1], &c__1, &vf[1], &c__1) / temp;
+	work[iwk3i + j] = sdot_(k, &work[1], &c__1, &vl[1], &c__1) / temp;
+	if (*icompq == 1) {
+	    difr[j + ((difr_dim1) << (1))] = temp;
+	}
+/* L80: */
+    }
+
+    scopy_(k, &work[iwk2], &c__1, &vf[1], &c__1);
+    scopy_(k, &work[iwk3], &c__1, &vl[1], &c__1);
+
+    return 0;
+
+/*     End of SLASD8 */
+
+} /* slasd8_ */
+
+/* Subroutine */ int slasda_(integer *icompq, integer *smlsiz, integer *n,
+	integer *sqre, real *d__, real *e, real *u, integer *ldu, real *vt,
+	integer *k, real *difl, real *difr, real *z__, real *poles, integer *
+	givptr, integer *givcol, integer *ldgcol, integer *perm, real *givnum,
+	 real *c__, real *s, real *work, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
+	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset,
+	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset,
+	    z_dim1, z_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, m, i1, ic, lf, nd, ll, nl, vf, nr, vl, im1, ncc,
+	    nlf, nrf, vfi, iwk, vli, lvl, nru, ndb1, nlp1, lvl2, nrp1;
+    static real beta;
+    static integer idxq, nlvl;
+    static real alpha;
+    static integer inode, ndiml, ndimr, idxqi, itemp, sqrei;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slasd6_(integer *, integer *, integer *, integer *,
+	    real *, real *, real *, real *, real *, integer *, integer *,
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , real *, real *, integer *, real *, real *, real *, integer *,
+	    integer *);
+    static integer nwork1, nwork2;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slasdq_(
+	    char *, integer *, integer *, integer *, integer *, integer *,
+	    real *, real *, real *, integer *, real *, integer *, real *,
+	    integer *, real *, integer *), slasdt_(integer *, integer
+	    *, integer *, integer *, integer *, integer *, integer *),
+	    slaset_(char *, integer *, integer *, real *, real *, real *,
+	    integer *);
+    static integer smlszp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    Using a divide and conquer approach, SLASDA computes the singular
+    value decomposition (SVD) of a real upper bidiagonal N-by-M matrix
+    B with diagonal D and offdiagonal E, where M = N + SQRE. The
+    algorithm computes the singular values in the SVD B = U * S * VT.
+    The orthogonal matrices U and VT are optionally computed in
+    compact form.
+
+    A related subroutine, SLASD0, computes the singular values and
+    the singular vectors in explicit form.
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed
+           in compact form, as follows
+           = 0: Compute singular values only.
+           = 1: Compute singular vectors of upper bidiagonal
+                matrix in compact form.
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The row dimension of the upper bidiagonal matrix. This is
+           also the dimension of the main diagonal array D.
+
+    SQRE   (input) INTEGER
+           Specifies the column dimension of the bidiagonal matrix.
+           = 0: The bidiagonal matrix has column dimension M = N;
+           = 1: The bidiagonal matrix has column dimension M = N + 1.
+
+    D      (input/output) REAL array, dimension ( N )
+           On entry D contains the main diagonal of the bidiagonal
+           matrix. On exit D, if INFO = 0, contains its singular values.
+
+    E      (input) REAL array, dimension ( M-1 )
+           Contains the subdiagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    U      (output) REAL array,
+           dimension ( LDU, SMLSIZ ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, U contains the left
+           singular vector matrices of all subproblems at the bottom
+           level.
+
+    LDU    (input) INTEGER, LDU = > N.
+           The leading dimension of arrays U, VT, DIFL, DIFR, POLES,
+           GIVNUM, and Z.
+
+    VT     (output) REAL array,
+           dimension ( LDU, SMLSIZ+1 ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, VT' contains the right
+           singular vector matrices of all subproblems at the bottom
+           level.
+
+    K      (output) INTEGER array,
+           dimension ( N ) if ICOMPQ = 1 and dimension 1 if ICOMPQ = 0.
+           If ICOMPQ = 1, on exit, K(I) is the dimension of the I-th
+           secular equation on the computation tree.
+
+    DIFL   (output) REAL array, dimension ( LDU, NLVL ),
+           where NLVL = floor(log_2 (N/SMLSIZ))).
+
+    DIFR   (output) REAL array,
+                    dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1 and
+                    dimension ( N ) if ICOMPQ = 0.
+           If ICOMPQ = 1, on exit, DIFL(1:N, I) and DIFR(1:N, 2 * I - 1)
+           record distances between singular values on the I-th
+           level and singular values on the (I -1)-th level, and
+           DIFR(1:N, 2 * I ) contains the normalizing factors for
+           the right singular vector matrix. See SLASD8 for details.
+
+    Z      (output) REAL array,
+                    dimension ( LDU, NLVL ) if ICOMPQ = 1 and
+                    dimension ( N ) if ICOMPQ = 0.
+           The first K elements of Z(1, I) contain the components of
+           the deflation-adjusted updating row vector for subproblems
+           on the I-th level.
+
+    POLES  (output) REAL array,
+           dimension ( LDU, 2 * NLVL ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, POLES(1, 2*I - 1) and
+           POLES(1, 2*I) contain  the new and old singular values
+           involved in the secular equations on the I-th level.
+
+    GIVPTR (output) INTEGER array,
+           dimension ( N ) if ICOMPQ = 1, and not referenced if
+           ICOMPQ = 0. If ICOMPQ = 1, on exit, GIVPTR( I ) records
+           the number of Givens rotations performed on the I-th
+           problem on the computation tree.
+
+    GIVCOL (output) INTEGER array,
+           dimension ( LDGCOL, 2 * NLVL ) if ICOMPQ = 1, and not
+           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+           GIVCOL(1, 2 *I - 1) and GIVCOL(1, 2 *I) record the locations
+           of Givens rotations performed on the I-th level on the
+           computation tree.
+
+    LDGCOL (input) INTEGER, LDGCOL = > N.
+           The leading dimension of arrays GIVCOL and PERM.
+
+    PERM   (output) INTEGER array,
+           dimension ( LDGCOL, NLVL ) if ICOMPQ = 1, and not referenced
+           if ICOMPQ = 0. If ICOMPQ = 1, on exit, PERM(1, I) records
+           permutations done on the I-th level of the computation tree.
+
+    GIVNUM (output) REAL array,
+           dimension ( LDU,  2 * NLVL ) if ICOMPQ = 1, and not
+           referenced if ICOMPQ = 0. If ICOMPQ = 1, on exit, for each I,
+           GIVNUM(1, 2 *I - 1) and GIVNUM(1, 2 *I) record the C- and S-
+           values of Givens rotations performed on the I-th level on
+           the computation tree.
+
+    C      (output) REAL array,
+           dimension ( N ) if ICOMPQ = 1, and dimension 1 if ICOMPQ = 0.
+           If ICOMPQ = 1 and the I-th subproblem is not square, on exit,
+           C( I ) contains the C-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    S      (output) REAL array, dimension ( N ) if
+           ICOMPQ = 1, and dimension 1 if ICOMPQ = 0. If ICOMPQ = 1
+           and the I-th subproblem is not square, on exit, S( I )
+           contains the S-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    WORK   (workspace) REAL array, dimension
+           (6 * N + (SMLSIZ + 1)*(SMLSIZ + 1)).
+
+    IWORK  (workspace) INTEGER array.
+           Dimension must be at least (7 * N).
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an singular value did not converge
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    givnum_dim1 = *ldu;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    poles_dim1 = *ldu;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    z_dim1 = *ldu;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    difr_dim1 = *ldu;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    difl_dim1 = *ldu;
+    difl_offset = 1 + difl_dim1;
+    difl -= difl_offset;
+    vt_dim1 = *ldu;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    --k;
+    --givptr;
+    perm_dim1 = *ldgcol;
+    perm_offset = 1 + perm_dim1;
+    perm -= perm_offset;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    --c__;
+    --s;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*smlsiz < 3) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    } else if (*ldu < *n + *sqre) {
+	*info = -8;
+    } else if (*ldgcol < *n) {
+	*info = -17;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASDA", &i__1);
+	return 0;
+    }
+
+    m = *n + *sqre;
+
+/*     If the input matrix is too small, call SLASDQ to find the SVD. */
+
+    if (*n <= *smlsiz) {
+	if (*icompq == 0) {
+	    slasdq_("U", sqre, n, &c__0, &c__0, &c__0, &d__[1], &e[1], &vt[
+		    vt_offset], ldu, &u[u_offset], ldu, &u[u_offset], ldu, &
+		    work[1], info);
+	} else {
+	    slasdq_("U", sqre, n, &m, n, &c__0, &d__[1], &e[1], &vt[vt_offset]
+		    , ldu, &u[u_offset], ldu, &u[u_offset], ldu, &work[1],
+		    info);
+	}
+	return 0;
+    }
+
+/*     Book-keeping and  set up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+    idxq = ndimr + *n;
+    iwk = idxq + *n;
+
+    ncc = 0;
+    nru = 0;
+
+    smlszp = *smlsiz + 1;
+    vf = 1;
+    vl = vf + m;
+    nwork1 = vl + m;
+    nwork2 = nwork1 + smlszp * smlszp;
+
+    slasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       for the nodes on bottom level of the tree, solve
+       their subproblems by SLASDQ.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+          IC : center row of each node
+          NL : number of rows of left  subproblem
+          NR : number of rows of right subproblem
+          NLF: starting row of the left   subproblem
+          NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nlp1 = nl + 1;
+	nr = iwork[ndimr + i1];
+	nlf = ic - nl;
+	nrf = ic + 1;
+	idxqi = idxq + nlf - 2;
+	vfi = vf + nlf - 1;
+	vli = vl + nlf - 1;
+	sqrei = 1;
+	if (*icompq == 0) {
+	    slaset_("A", &nlp1, &nlp1, &c_b320, &c_b1011, &work[nwork1], &
+		    smlszp);
+	    slasdq_("U", &sqrei, &nl, &nlp1, &nru, &ncc, &d__[nlf], &e[nlf], &
+		    work[nwork1], &smlszp, &work[nwork2], &nl, &work[nwork2],
+		    &nl, &work[nwork2], info);
+	    itemp = nwork1 + nl * smlszp;
+	    scopy_(&nlp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    scopy_(&nlp1, &work[itemp], &c__1, &work[vli], &c__1);
+	} else {
+	    slaset_("A", &nl, &nl, &c_b320, &c_b1011, &u[nlf + u_dim1], ldu);
+	    slaset_("A", &nlp1, &nlp1, &c_b320, &c_b1011, &vt[nlf + vt_dim1],
+		    ldu);
+	    slasdq_("U", &sqrei, &nl, &nlp1, &nl, &ncc, &d__[nlf], &e[nlf], &
+		    vt[nlf + vt_dim1], ldu, &u[nlf + u_dim1], ldu, &u[nlf +
+		    u_dim1], ldu, &work[nwork1], info);
+	    scopy_(&nlp1, &vt[nlf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    scopy_(&nlp1, &vt[nlf + nlp1 * vt_dim1], &c__1, &work[vli], &c__1)
+		    ;
+	}
+	if (*info != 0) {
+	    return 0;
+	}
+	i__2 = nl;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[idxqi + j] = j;
+/* L10: */
+	}
+	if (i__ == nd && *sqre == 0) {
+	    sqrei = 0;
+	} else {
+	    sqrei = 1;
+	}
+	idxqi += nlp1;
+	vfi += nlp1;
+	vli += nlp1;
+	nrp1 = nr + sqrei;
+	if (*icompq == 0) {
+	    slaset_("A", &nrp1, &nrp1, &c_b320, &c_b1011, &work[nwork1], &
+		    smlszp);
+	    slasdq_("U", &sqrei, &nr, &nrp1, &nru, &ncc, &d__[nrf], &e[nrf], &
+		    work[nwork1], &smlszp, &work[nwork2], &nr, &work[nwork2],
+		    &nr, &work[nwork2], info);
+	    itemp = nwork1 + (nrp1 - 1) * smlszp;
+	    scopy_(&nrp1, &work[nwork1], &c__1, &work[vfi], &c__1);
+	    scopy_(&nrp1, &work[itemp], &c__1, &work[vli], &c__1);
+	} else {
+	    slaset_("A", &nr, &nr, &c_b320, &c_b1011, &u[nrf + u_dim1], ldu);
+	    slaset_("A", &nrp1, &nrp1, &c_b320, &c_b1011, &vt[nrf + vt_dim1],
+		    ldu);
+	    slasdq_("U", &sqrei, &nr, &nrp1, &nr, &ncc, &d__[nrf], &e[nrf], &
+		    vt[nrf + vt_dim1], ldu, &u[nrf + u_dim1], ldu, &u[nrf +
+		    u_dim1], ldu, &work[nwork1], info);
+	    scopy_(&nrp1, &vt[nrf + vt_dim1], &c__1, &work[vfi], &c__1);
+	    scopy_(&nrp1, &vt[nrf + nrp1 * vt_dim1], &c__1, &work[vli], &c__1)
+		    ;
+	}
+	if (*info != 0) {
+	    return 0;
+	}
+	i__2 = nr;
+	for (j = 1; j <= i__2; ++j) {
+	    iwork[idxqi + j] = j;
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*     Now conquer each subproblem bottom-up. */
+
+    j = pow_ii(&c__2, &nlvl);
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          Find the first node LF and last node LL on
+          the current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    if (i__ == ll) {
+		sqrei = *sqre;
+	    } else {
+		sqrei = 1;
+	    }
+	    vfi = vf + nlf - 1;
+	    vli = vl + nlf - 1;
+	    idxqi = idxq + nlf - 1;
+	    alpha = d__[ic];
+	    beta = e[ic];
+	    if (*icompq == 0) {
+		slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+			work[vli], &alpha, &beta, &iwork[idxqi], &perm[
+			perm_offset], &givptr[1], &givcol[givcol_offset],
+			ldgcol, &givnum[givnum_offset], ldu, &poles[
+			poles_offset], &difl[difl_offset], &difr[difr_offset],
+			 &z__[z_offset], &k[1], &c__[1], &s[1], &work[nwork1],
+			 &iwork[iwk], info);
+	    } else {
+		--j;
+		slasd6_(icompq, &nl, &nr, &sqrei, &d__[nlf], &work[vfi], &
+			work[vli], &alpha, &beta, &iwork[idxqi], &perm[nlf +
+			lvl * perm_dim1], &givptr[j], &givcol[nlf + lvl2 *
+			givcol_dim1], ldgcol, &givnum[nlf + lvl2 *
+			givnum_dim1], ldu, &poles[nlf + lvl2 * poles_dim1], &
+			difl[nlf + lvl * difl_dim1], &difr[nlf + lvl2 *
+			difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[j],
+			&s[j], &work[nwork1], &iwork[iwk], info);
+	    }
+	    if (*info != 0) {
+		return 0;
+	    }
+/* L40: */
+	}
+/* L50: */
+    }
+
+    return 0;
+
+/*     End of SLASDA */
+
+} /* slasda_ */
+
+/* Subroutine */ int slasdq_(char *uplo, integer *sqre, integer *n, integer *
+	ncvt, integer *nru, integer *ncc, real *d__, real *e, real *vt,
+	integer *ldvt, real *u, integer *ldu, real *c__, integer *ldc, real *
+	work, integer *info)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    static real r__, cs, sn;
+    static integer np1, isub;
+    static real smin;
+    static integer sqre1;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, real *, integer *);
+    static integer iuplo;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
+	    integer *), xerbla_(char *, integer *), slartg_(real *,
+	    real *, real *, real *, real *);
+    static logical rotate;
+    extern /* Subroutine */ int sbdsqr_(char *, integer *, integer *, integer
+	    *, integer *, real *, real *, real *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLASDQ computes the singular value decomposition (SVD) of a real
+    (upper or lower) bidiagonal matrix with diagonal D and offdiagonal
+    E, accumulating the transformations if desired. Letting B denote
+    the input bidiagonal matrix, the algorithm computes orthogonal
+    matrices Q and P such that B = Q * S * P' (P' denotes the transpose
+    of P). The singular values S are overwritten on D.
+
+    The input matrix U  is changed to U  * Q  if desired.
+    The input matrix VT is changed to P' * VT if desired.
+    The input matrix C  is changed to Q' * C  if desired.
+
+    See "Computing  Small Singular Values of Bidiagonal Matrices With
+    Guaranteed High Relative Accuracy," by J. Demmel and W. Kahan,
+    LAPACK Working Note #3, for a detailed description of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO  (input) CHARACTER*1
+          On entry, UPLO specifies whether the input bidiagonal matrix
+          is upper or lower bidiagonal, and wether it is square are
+          not.
+             UPLO = 'U' or 'u'   B is upper bidiagonal.
+             UPLO = 'L' or 'l'   B is lower bidiagonal.
+
+    SQRE  (input) INTEGER
+          = 0: then the input matrix is N-by-N.
+          = 1: then the input matrix is N-by-(N+1) if UPLU = 'U' and
+               (N+1)-by-N if UPLU = 'L'.
+
+          The bidiagonal matrix has
+          N = NL + NR + 1 rows and
+          M = N + SQRE >= N columns.
+
+    N     (input) INTEGER
+          On entry, N specifies the number of rows and columns
+          in the matrix. N must be at least 0.
+
+    NCVT  (input) INTEGER
+          On entry, NCVT specifies the number of columns of
+          the matrix VT. NCVT must be at least 0.
+
+    NRU   (input) INTEGER
+          On entry, NRU specifies the number of rows of
+          the matrix U. NRU must be at least 0.
+
+    NCC   (input) INTEGER
+          On entry, NCC specifies the number of columns of
+          the matrix C. NCC must be at least 0.
+
+    D     (input/output) REAL array, dimension (N)
+          On entry, D contains the diagonal entries of the
+          bidiagonal matrix whose SVD is desired. On normal exit,
+          D contains the singular values in ascending order.
+
+    E     (input/output) REAL array.
+          dimension is (N-1) if SQRE = 0 and N if SQRE = 1.
+          On entry, the entries of E contain the offdiagonal entries
+          of the bidiagonal matrix whose SVD is desired. On normal
+          exit, E will contain 0. If the algorithm does not converge,
+          D and E will contain the diagonal and superdiagonal entries
+          of a bidiagonal matrix orthogonally equivalent to the one
+          given as input.
+
+    VT    (input/output) REAL array, dimension (LDVT, NCVT)
+          On entry, contains a matrix which on exit has been
+          premultiplied by P', dimension N-by-NCVT if SQRE = 0
+          and (N+1)-by-NCVT if SQRE = 1 (not referenced if NCVT=0).
+
+    LDVT  (input) INTEGER
+          On entry, LDVT specifies the leading dimension of VT as
+          declared in the calling (sub) program. LDVT must be at
+          least 1. If NCVT is nonzero LDVT must also be at least N.
+
+    U     (input/output) REAL array, dimension (LDU, N)
+          On entry, contains a  matrix which on exit has been
+          postmultiplied by Q, dimension NRU-by-N if SQRE = 0
+          and NRU-by-(N+1) if SQRE = 1 (not referenced if NRU=0).
+
+    LDU   (input) INTEGER
+          On entry, LDU  specifies the leading dimension of U as
+          declared in the calling (sub) program. LDU must be at
+          least max( 1, NRU ) .
+
+    C     (input/output) REAL array, dimension (LDC, NCC)
+          On entry, contains an N-by-NCC matrix which on exit
+          has been premultiplied by Q'  dimension N-by-NCC if SQRE = 0
+          and (N+1)-by-NCC if SQRE = 1 (not referenced if NCC=0).
+
+    LDC   (input) INTEGER
+          On entry, LDC  specifies the leading dimension of C as
+          declared in the calling (sub) program. LDC must be at
+          least 1. If NCC is nonzero, LDC must also be at least N.
+
+    WORK  (workspace) REAL array, dimension (4*N)
+          Workspace. Only referenced if one of NCVT, NRU, or NCC is
+          nonzero, and if N is at least 2.
+
+    INFO  (output) INTEGER
+          On exit, a value of 0 indicates a successful exit.
+          If INFO < 0, argument number -INFO is illegal.
+          If INFO > 0, the algorithm did not converge, and INFO
+          specifies how many superdiagonals did not converge.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    iuplo = 0;
+    if (lsame_(uplo, "U")) {
+	iuplo = 1;
+    }
+    if (lsame_(uplo, "L")) {
+	iuplo = 2;
+    }
+    if (iuplo == 0) {
+	*info = -1;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*ncvt < 0) {
+	*info = -4;
+    } else if (*nru < 0) {
+	*info = -5;
+    } else if (*ncc < 0) {
+	*info = -6;
+    } else if ((*ncvt == 0 && *ldvt < 1) || (*ncvt > 0 && *ldvt < max(1,*n)))
+	    {
+	*info = -10;
+    } else if (*ldu < max(1,*nru)) {
+	*info = -12;
+    } else if ((*ncc == 0 && *ldc < 1) || (*ncc > 0 && *ldc < max(1,*n))) {
+	*info = -14;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASDQ", &i__1);
+	return 0;
+    }
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     ROTATE is true if any singular vectors desired, false otherwise */
+
+    rotate = ((*ncvt > 0) || (*nru > 0)) || (*ncc > 0);
+    np1 = *n + 1;
+    sqre1 = *sqre;
+
+/*
+       If matrix non-square upper bidiagonal, rotate to be lower
+       bidiagonal.  The rotations are on the right.
+*/
+
+    if (iuplo == 1 && sqre1 == 1) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (rotate) {
+		work[i__] = cs;
+		work[*n + i__] = sn;
+	    }
+/* L10: */
+	}
+	slartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	d__[*n] = r__;
+	e[*n] = 0.f;
+	if (rotate) {
+	    work[*n] = cs;
+	    work[*n + *n] = sn;
+	}
+	iuplo = 2;
+	sqre1 = 0;
+
+/*        Update singular vectors if desired. */
+
+	if (*ncvt > 0) {
+	    slasr_("L", "V", "F", &np1, ncvt, &work[1], &work[np1], &vt[
+		    vt_offset], ldvt);
+	}
+    }
+
+/*
+       If matrix lower bidiagonal, rotate to be upper bidiagonal
+       by applying Givens rotations on the left.
+*/
+
+    if (iuplo == 2) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    slartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (rotate) {
+		work[i__] = cs;
+		work[*n + i__] = sn;
+	    }
+/* L20: */
+	}
+
+/*
+          If matrix (N+1)-by-N lower bidiagonal, one additional
+          rotation is needed.
+*/
+
+	if (sqre1 == 1) {
+	    slartg_(&d__[*n], &e[*n], &cs, &sn, &r__);
+	    d__[*n] = r__;
+	    if (rotate) {
+		work[*n] = cs;
+		work[*n + *n] = sn;
+	    }
+	}
+
+/*        Update singular vectors if desired. */
+
+	if (*nru > 0) {
+	    if (sqre1 == 0) {
+		slasr_("R", "V", "F", nru, n, &work[1], &work[np1], &u[
+			u_offset], ldu);
+	    } else {
+		slasr_("R", "V", "F", nru, &np1, &work[1], &work[np1], &u[
+			u_offset], ldu);
+	    }
+	}
+	if (*ncc > 0) {
+	    if (sqre1 == 0) {
+		slasr_("L", "V", "F", n, ncc, &work[1], &work[np1], &c__[
+			c_offset], ldc);
+	    } else {
+		slasr_("L", "V", "F", &np1, ncc, &work[1], &work[np1], &c__[
+			c_offset], ldc);
+	    }
+	}
+    }
+
+/*
+       Call SBDSQR to compute the SVD of the reduced real
+       N-by-N upper bidiagonal matrix.
+*/
+
+    sbdsqr_("U", n, ncvt, nru, ncc, &d__[1], &e[1], &vt[vt_offset], ldvt, &u[
+	    u_offset], ldu, &c__[c_offset], ldc, &work[1], info);
+
+/*
+       Sort the singular values into ascending order (insertion sort on
+       singular values, but only one transposition per singular vector)
+*/
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Scan for smallest D(I). */
+
+	isub = i__;
+	smin = d__[i__];
+	i__2 = *n;
+	for (j = i__ + 1; j <= i__2; ++j) {
+	    if (d__[j] < smin) {
+		isub = j;
+		smin = d__[j];
+	    }
+/* L30: */
+	}
+	if (isub != i__) {
+
+/*           Swap singular values and vectors. */
+
+	    d__[isub] = d__[i__];
+	    d__[i__] = smin;
+	    if (*ncvt > 0) {
+		sswap_(ncvt, &vt[isub + vt_dim1], ldvt, &vt[i__ + vt_dim1],
+			ldvt);
+	    }
+	    if (*nru > 0) {
+		sswap_(nru, &u[isub * u_dim1 + 1], &c__1, &u[i__ * u_dim1 + 1]
+			, &c__1);
+	    }
+	    if (*ncc > 0) {
+		sswap_(ncc, &c__[isub + c_dim1], ldc, &c__[i__ + c_dim1], ldc)
+			;
+	    }
+	}
+/* L40: */
+    }
+
+    return 0;
+
+/*     End of SLASDQ */
+
+} /* slasdq_ */
+
+/* Subroutine */ int slasdt_(integer *n, integer *lvl, integer *nd, integer *
+	inode, integer *ndiml, integer *ndimr, integer *msub)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+
+    /* Local variables */
+    static integer i__, il, ir, maxn;
+    static real temp;
+    static integer nlvl, llst, ncrnt;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLASDT creates a tree of subproblems for bidiagonal divide and
+    conquer.
+
+    Arguments
+    =========
+
+     N      (input) INTEGER
+            On entry, the number of diagonal elements of the
+            bidiagonal matrix.
+
+     LVL    (output) INTEGER
+            On exit, the number of levels on the computation tree.
+
+     ND     (output) INTEGER
+            On exit, the number of nodes on the tree.
+
+     INODE  (output) INTEGER array, dimension ( N )
+            On exit, centers of subproblems.
+
+     NDIML  (output) INTEGER array, dimension ( N )
+            On exit, row dimensions of left children.
+
+     NDIMR  (output) INTEGER array, dimension ( N )
+            On exit, row dimensions of right children.
+
+     MSUB   (input) INTEGER.
+            On entry, the maximum row dimension each subproblem at the
+            bottom of the tree can be of.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Find the number of levels on the tree.
+*/
+
+    /* Parameter adjustments */
+    --ndimr;
+    --ndiml;
+    --inode;
+
+    /* Function Body */
+    maxn = max(1,*n);
+    temp = log((real) maxn / (real) (*msub + 1)) / log(2.f);
+    *lvl = (integer) temp + 1;
+
+    i__ = *n / 2;
+    inode[1] = i__ + 1;
+    ndiml[1] = i__;
+    ndimr[1] = *n - i__ - 1;
+    il = 0;
+    ir = 1;
+    llst = 1;
+    i__1 = *lvl - 1;
+    for (nlvl = 1; nlvl <= i__1; ++nlvl) {
+
+/*
+          Constructing the tree at (NLVL+1)-st level. The number of
+          nodes created on this level is LLST * 2.
+*/
+
+	i__2 = llst - 1;
+	for (i__ = 0; i__ <= i__2; ++i__) {
+	    il += 2;
+	    ir += 2;
+	    ncrnt = llst + i__;
+	    ndiml[il] = ndiml[ncrnt] / 2;
+	    ndimr[il] = ndiml[ncrnt] - ndiml[il] - 1;
+	    inode[il] = inode[ncrnt] - ndimr[il] - 1;
+	    ndiml[ir] = ndimr[ncrnt] / 2;
+	    ndimr[ir] = ndimr[ncrnt] - ndiml[ir] - 1;
+	    inode[ir] = inode[ncrnt] + ndiml[ir] + 1;
+/* L10: */
+	}
+	llst <<= 1;
+/* L20: */
+    }
+    *nd = ((llst) << (1)) - 1;
+
+    return 0;
+
+/*     End of SLASDT */
+
+} /* slasdt_ */
+
+/* Subroutine */ int slaset_(char *uplo, integer *m, integer *n, real *alpha,
+	real *beta, real *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLASET initializes an m-by-n matrix A to BETA on the diagonal and
+    ALPHA on the offdiagonals.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be set.
+            = 'U':      Upper triangular part is set; the strictly lower
+                        triangular part of A is not changed.
+            = 'L':      Lower triangular part is set; the strictly upper
+                        triangular part of A is not changed.
+            Otherwise:  All of the matrix A is set.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    ALPHA   (input) REAL
+            The constant to which the offdiagonal elements are to be set.
+
+    BETA    (input) REAL
+            The constant to which the diagonal elements are to be set.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On exit, the leading m-by-n submatrix of A is set as follows:
+
+            if UPLO = 'U', A(i,j) = ALPHA, 1<=i<=j-1, 1<=j<=n,
+            if UPLO = 'L', A(i,j) = ALPHA, j+1<=i<=m, 1<=j<=n,
+            otherwise,     A(i,j) = ALPHA, 1<=i<=m, 1<=j<=n, i.ne.j,
+
+            and, for all UPLO, A(i,i) = BETA, 1<=i<=min(m,n).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+
+/*
+          Set the strictly upper triangular or trapezoidal part of the
+          array to ALPHA.
+*/
+
+	i__1 = *n;
+	for (j = 2; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j - 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = *alpha;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
+
+/*
+          Set the strictly lower triangular or trapezoidal part of the
+          array to ALPHA.
+*/
+
+	i__1 = min(*m,*n);
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j + 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = *alpha;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+    } else {
+
+/*        Set the leading m-by-n submatrix to ALPHA. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = *alpha;
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+/*     Set the first min(M,N) diagonal elements to BETA. */
+
+    i__1 = min(*m,*n);
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	a[i__ + i__ * a_dim1] = *beta;
+/* L70: */
+    }
+
+    return 0;
+
+/*     End of SLASET */
+
+} /* slaset_ */
+
+/* Subroutine */ int slasq1_(integer *n, real *d__, real *e, real *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    real r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__;
+    static real eps;
+    extern /* Subroutine */ int slas2_(real *, real *, real *, real *, real *)
+	    ;
+    static real scale;
+    static integer iinfo;
+    static real sigmn, sigmx;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *), slasq2_(integer *, real *, integer *);
+    extern doublereal slamch_(char *);
+    static real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *), slascl_(
+	    char *, integer *, integer *, real *, real *, integer *, integer *
+	    , real *, integer *, integer *), slasrt_(char *, integer *
+	    , real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLASQ1 computes the singular values of a real N-by-N bidiagonal
+    matrix with diagonal D and off-diagonal E. The singular values
+    are computed to high relative accuracy, in the absence of
+    denormalization, underflow and overflow. The algorithm was first
+    presented in
+
+    "Accurate singular values and differential qd algorithms" by K. V.
+    Fernando and B. N. Parlett, Numer. Math., Vol-67, No. 2, pp. 191-230,
+    1994,
+
+    and the present implementation is described in "An implementation of
+    the dqds Algorithm (Positive Case)", LAPACK Working Note.
+
+    Arguments
+    =========
+
+    N     (input) INTEGER
+          The number of rows and columns in the matrix. N >= 0.
+
+    D     (input/output) REAL array, dimension (N)
+          On entry, D contains the diagonal elements of the
+          bidiagonal matrix whose SVD is desired. On normal exit,
+          D contains the singular values in decreasing order.
+
+    E     (input/output) REAL array, dimension (N)
+          On entry, elements E(1:N-1) contain the off-diagonal elements
+          of the bidiagonal matrix whose SVD is desired.
+          On exit, E is overwritten.
+
+    WORK  (workspace) REAL array, dimension (4*N)
+
+    INFO  (output) INTEGER
+          = 0: successful exit
+          < 0: if INFO = -i, the i-th argument had an illegal value
+          > 0: the algorithm failed
+               = 1, a split was marked by a positive value in E
+               = 2, current block of Z not diagonalized after 30*N
+                    iterations (in inner while loop)
+               = 3, termination criterion of outer while loop not met
+                    (program created more than N unreduced blocks)
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --work;
+    --e;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -2;
+	i__1 = -(*info);
+	xerbla_("SLASQ1", &i__1);
+	return 0;
+    } else if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+	d__[1] = dabs(d__[1]);
+	return 0;
+    } else if (*n == 2) {
+	slas2_(&d__[1], &e[1], &d__[2], &sigmn, &sigmx);
+	d__[1] = sigmx;
+	d__[2] = sigmn;
+	return 0;
+    }
+
+/*     Estimate the largest singular value. */
+
+    sigmx = 0.f;
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = (r__1 = d__[i__], dabs(r__1));
+/* Computing MAX */
+	r__2 = sigmx, r__3 = (r__1 = e[i__], dabs(r__1));
+	sigmx = dmax(r__2,r__3);
+/* L10: */
+    }
+    d__[*n] = (r__1 = d__[*n], dabs(r__1));
+
+/*     Early return if SIGMX is zero (matrix is already diagonal). */
+
+    if (sigmx == 0.f) {
+	slasrt_("D", n, &d__[1], &iinfo);
+	return 0;
+    }
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	r__1 = sigmx, r__2 = d__[i__];
+	sigmx = dmax(r__1,r__2);
+/* L20: */
+    }
+
+/*
+       Copy D and E into WORK (in the Z format) and scale (squaring the
+       input data makes scaling by a power of the radix pointless).
+*/
+
+    eps = slamch_("Precision");
+    safmin = slamch_("Safe minimum");
+    scale = sqrt(eps / safmin);
+    scopy_(n, &d__[1], &c__1, &work[1], &c__2);
+    i__1 = *n - 1;
+    scopy_(&i__1, &e[1], &c__1, &work[2], &c__2);
+    i__1 = ((*n) << (1)) - 1;
+    i__2 = ((*n) << (1)) - 1;
+    slascl_("G", &c__0, &c__0, &sigmx, &scale, &i__1, &c__1, &work[1], &i__2,
+	    &iinfo);
+
+/*     Compute the q's and e's. */
+
+    i__1 = ((*n) << (1)) - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing 2nd power */
+	r__1 = work[i__];
+	work[i__] = r__1 * r__1;
+/* L30: */
+    }
+    work[*n * 2] = 0.f;
+
+    slasq2_(n, &work[1], info);
+
+    if (*info == 0) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    d__[i__] = sqrt(work[i__]);
+/* L40: */
+	}
+	slascl_("G", &c__0, &c__0, &scale, &sigmx, n, &c__1, &d__[1], n, &
+		iinfo);
+    }
+
+    return 0;
+
+/*     End of SLASQ1 */
+
+} /* slasq1_ */
+
+/* Subroutine */ int slasq2_(integer *n, real *z__, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real d__, e;
+    static integer k;
+    static real s, t;
+    static integer i0, i4, n0, pp;
+    static real eps, tol;
+    static integer ipn4;
+    static real tol2;
+    static logical ieee;
+    static integer nbig;
+    static real dmin__, emin, emax;
+    static integer ndiv, iter;
+    static real qmin, temp, qmax, zmax;
+    static integer splt, nfail;
+    static real desig, trace, sigma;
+    static integer iinfo;
+    extern /* Subroutine */ int slasq3_(integer *, integer *, real *, integer
+	    *, real *, real *, real *, real *, integer *, integer *, integer *
+	    , logical *);
+    extern doublereal slamch_(char *);
+    static integer iwhila, iwhilb;
+    static real oldemn, safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLASQ2 computes all the eigenvalues of the symmetric positive
+    definite tridiagonal matrix associated with the qd array Z to high
+    relative accuracy are computed to high relative accuracy, in the
+    absence of denormalization, underflow and overflow.
+
+    To see the relation of Z to the tridiagonal matrix, let L be a
+    unit lower bidiagonal matrix with subdiagonals Z(2,4,6,,..) and
+    let U be an upper bidiagonal matrix with 1's above and diagonal
+    Z(1,3,5,,..). The tridiagonal is L*U or, if you prefer, the
+    symmetric tridiagonal to which it is similar.
+
+    Note : SLASQ2 defines a logical variable, IEEE, which is true
+    on machines which follow ieee-754 floating-point standard in their
+    handling of infinities and NaNs, and false otherwise. This variable
+    is passed to SLASQ3.
+
+    Arguments
+    =========
+
+    N     (input) INTEGER
+          The number of rows and columns in the matrix. N >= 0.
+
+    Z     (workspace) REAL array, dimension ( 4*N )
+          On entry Z holds the qd array. On exit, entries 1 to N hold
+          the eigenvalues in decreasing order, Z( 2*N+1 ) holds the
+          trace, and Z( 2*N+2 ) holds the sum of the eigenvalues. If
+          N > 2, then Z( 2*N+3 ) holds the iteration count, Z( 2*N+4 )
+          holds NDIVS/NIN^2, and Z( 2*N+5 ) holds the percentage of
+          shifts that failed.
+
+    INFO  (output) INTEGER
+          = 0: successful exit
+          < 0: if the i-th argument is a scalar and had an illegal
+               value, then INFO = -i, if the i-th argument is an
+               array and the j-entry had an illegal value, then
+               INFO = -(i*100+j)
+          > 0: the algorithm failed
+                = 1, a split was marked by a positive value in E
+                = 2, current block of Z not diagonalized after 30*N
+                     iterations (in inner while loop)
+                = 3, termination criterion of outer while loop not met
+                     (program created more than N unreduced blocks)
+
+    Further Details
+    ===============
+    Local Variables: I0:N0 defines a current unreduced segment of Z.
+    The shifts are accumulated in SIGMA. Iteration count is in ITER.
+    Ping-pong is controlled by PP (alternates between 0 and 1).
+
+    =====================================================================
+
+
+       Test the input arguments.
+       (in case SLASQ2 is not called by SLASQ1)
+*/
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+    *info = 0;
+    eps = slamch_("Precision");
+    safmin = slamch_("Safe minimum");
+    tol = eps * 100.f;
+/* Computing 2nd power */
+    r__1 = tol;
+    tol2 = r__1 * r__1;
+
+    if (*n < 0) {
+	*info = -1;
+	xerbla_("SLASQ2", &c__1);
+	return 0;
+    } else if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+
+/*        1-by-1 case. */
+
+	if (z__[1] < 0.f) {
+	    *info = -201;
+	    xerbla_("SLASQ2", &c__2);
+	}
+	return 0;
+    } else if (*n == 2) {
+
+/*        2-by-2 case. */
+
+	if ((z__[2] < 0.f) || (z__[3] < 0.f)) {
+	    *info = -2;
+	    xerbla_("SLASQ2", &c__2);
+	    return 0;
+	} else if (z__[3] > z__[1]) {
+	    d__ = z__[3];
+	    z__[3] = z__[1];
+	    z__[1] = d__;
+	}
+	z__[5] = z__[1] + z__[2] + z__[3];
+	if (z__[2] > z__[3] * tol2) {
+	    t = (z__[1] - z__[3] + z__[2]) * .5f;
+	    s = z__[3] * (z__[2] / t);
+	    if (s <= t) {
+		s = z__[3] * (z__[2] / (t * (sqrt(s / t + 1.f) + 1.f)));
+	    } else {
+		s = z__[3] * (z__[2] / (t + sqrt(t) * sqrt(t + s)));
+	    }
+	    t = z__[1] + (s + z__[2]);
+	    z__[3] *= z__[1] / t;
+	    z__[1] = t;
+	}
+	z__[2] = z__[3];
+	z__[6] = z__[2] + z__[1];
+	return 0;
+    }
+
+/*     Check for negative data and compute sums of q's and e's. */
+
+    z__[*n * 2] = 0.f;
+    emin = z__[2];
+    qmax = 0.f;
+    zmax = 0.f;
+    d__ = 0.f;
+    e = 0.f;
+
+    i__1 = (*n - 1) << (1);
+    for (k = 1; k <= i__1; k += 2) {
+	if (z__[k] < 0.f) {
+	    *info = -(k + 200);
+	    xerbla_("SLASQ2", &c__2);
+	    return 0;
+	} else if (z__[k + 1] < 0.f) {
+	    *info = -(k + 201);
+	    xerbla_("SLASQ2", &c__2);
+	    return 0;
+	}
+	d__ += z__[k];
+	e += z__[k + 1];
+/* Computing MAX */
+	r__1 = qmax, r__2 = z__[k];
+	qmax = dmax(r__1,r__2);
+/* Computing MIN */
+	r__1 = emin, r__2 = z__[k + 1];
+	emin = dmin(r__1,r__2);
+/* Computing MAX */
+	r__1 = max(qmax,zmax), r__2 = z__[k + 1];
+	zmax = dmax(r__1,r__2);
+/* L10: */
+    }
+    if (z__[((*n) << (1)) - 1] < 0.f) {
+	*info = -(((*n) << (1)) + 199);
+	xerbla_("SLASQ2", &c__2);
+	return 0;
+    }
+    d__ += z__[((*n) << (1)) - 1];
+/* Computing MAX */
+    r__1 = qmax, r__2 = z__[((*n) << (1)) - 1];
+    qmax = dmax(r__1,r__2);
+    zmax = dmax(qmax,zmax);
+
+/*     Check for diagonality. */
+
+    if (e == 0.f) {
+	i__1 = *n;
+	for (k = 2; k <= i__1; ++k) {
+	    z__[k] = z__[((k) << (1)) - 1];
+/* L20: */
+	}
+	slasrt_("D", n, &z__[1], &iinfo);
+	z__[((*n) << (1)) - 1] = d__;
+	return 0;
+    }
+
+    trace = d__ + e;
+
+/*     Check for zero data. */
+
+    if (trace == 0.f) {
+	z__[((*n) << (1)) - 1] = 0.f;
+	return 0;
+    }
+
+/*     Check whether the machine is IEEE conformable. */
+
+    ieee = ilaenv_(&c__10, "SLASQ2", "N", &c__1, &c__2, &c__3, &c__4, (ftnlen)
+	    6, (ftnlen)1) == 1 && ilaenv_(&c__11, "SLASQ2", "N", &c__1, &c__2,
+	     &c__3, &c__4, (ftnlen)6, (ftnlen)1) == 1;
+
+/*     Rearrange data for locality: Z=(q1,qq1,e1,ee1,q2,qq2,e2,ee2,...). */
+
+    for (k = (*n) << (1); k >= 2; k += -2) {
+	z__[k * 2] = 0.f;
+	z__[((k) << (1)) - 1] = z__[k];
+	z__[((k) << (1)) - 2] = 0.f;
+	z__[((k) << (1)) - 3] = z__[k - 1];
+/* L30: */
+    }
+
+    i0 = 1;
+    n0 = *n;
+
+/*     Reverse the qd-array, if warranted. */
+
+    if (z__[((i0) << (2)) - 3] * 1.5f < z__[((n0) << (2)) - 3]) {
+	ipn4 = (i0 + n0) << (2);
+	i__1 = (i0 + n0 - 1) << (1);
+	for (i4 = (i0) << (2); i4 <= i__1; i4 += 4) {
+	    temp = z__[i4 - 3];
+	    z__[i4 - 3] = z__[ipn4 - i4 - 3];
+	    z__[ipn4 - i4 - 3] = temp;
+	    temp = z__[i4 - 1];
+	    z__[i4 - 1] = z__[ipn4 - i4 - 5];
+	    z__[ipn4 - i4 - 5] = temp;
+/* L40: */
+	}
+    }
+
+/*     Initial split checking via dqd and Li's test. */
+
+    pp = 0;
+
+    for (k = 1; k <= 2; ++k) {
+
+	d__ = z__[((n0) << (2)) + pp - 3];
+	i__1 = ((i0) << (2)) + pp;
+	for (i4 = ((n0 - 1) << (2)) + pp; i4 >= i__1; i4 += -4) {
+	    if (z__[i4 - 1] <= tol2 * d__) {
+		z__[i4 - 1] = -0.f;
+		d__ = z__[i4 - 3];
+	    } else {
+		d__ = z__[i4 - 3] * (d__ / (d__ + z__[i4 - 1]));
+	    }
+/* L50: */
+	}
+
+/*        dqd maps Z to ZZ plus Li's test. */
+
+	emin = z__[((i0) << (2)) + pp + 1];
+	d__ = z__[((i0) << (2)) + pp - 3];
+	i__1 = ((n0 - 1) << (2)) + pp;
+	for (i4 = ((i0) << (2)) + pp; i4 <= i__1; i4 += 4) {
+	    z__[i4 - ((pp) << (1)) - 2] = d__ + z__[i4 - 1];
+	    if (z__[i4 - 1] <= tol2 * d__) {
+		z__[i4 - 1] = -0.f;
+		z__[i4 - ((pp) << (1)) - 2] = d__;
+		z__[i4 - ((pp) << (1))] = 0.f;
+		d__ = z__[i4 + 1];
+	    } else if (safmin * z__[i4 + 1] < z__[i4 - ((pp) << (1)) - 2] &&
+		    safmin * z__[i4 - ((pp) << (1)) - 2] < z__[i4 + 1]) {
+		temp = z__[i4 + 1] / z__[i4 - ((pp) << (1)) - 2];
+		z__[i4 - ((pp) << (1))] = z__[i4 - 1] * temp;
+		d__ *= temp;
+	    } else {
+		z__[i4 - ((pp) << (1))] = z__[i4 + 1] * (z__[i4 - 1] / z__[i4
+			- ((pp) << (1)) - 2]);
+		d__ = z__[i4 + 1] * (d__ / z__[i4 - ((pp) << (1)) - 2]);
+	    }
+/* Computing MIN */
+	    r__1 = emin, r__2 = z__[i4 - ((pp) << (1))];
+	    emin = dmin(r__1,r__2);
+/* L60: */
+	}
+	z__[((n0) << (2)) - pp - 2] = d__;
+
+/*        Now find qmax. */
+
+	qmax = z__[((i0) << (2)) - pp - 2];
+	i__1 = ((n0) << (2)) - pp - 2;
+	for (i4 = ((i0) << (2)) - pp + 2; i4 <= i__1; i4 += 4) {
+/* Computing MAX */
+	    r__1 = qmax, r__2 = z__[i4];
+	    qmax = dmax(r__1,r__2);
+/* L70: */
+	}
+
+/*        Prepare for the next iteration on K. */
+
+	pp = 1 - pp;
+/* L80: */
+    }
+
+    iter = 2;
+    nfail = 0;
+    ndiv = (n0 - i0) << (1);
+
+    i__1 = *n + 1;
+    for (iwhila = 1; iwhila <= i__1; ++iwhila) {
+	if (n0 < 1) {
+	    goto L150;
+	}
+
+/*
+          While array unfinished do
+
+          E(N0) holds the value of SIGMA when submatrix in I0:N0
+          splits from the rest of the array, but is negated.
+*/
+
+	desig = 0.f;
+	if (n0 == *n) {
+	    sigma = 0.f;
+	} else {
+	    sigma = -z__[((n0) << (2)) - 1];
+	}
+	if (sigma < 0.f) {
+	    *info = 1;
+	    return 0;
+	}
+
+/*
+          Find last unreduced submatrix's top index I0, find QMAX and
+          EMIN. Find Gershgorin-type bound if Q's much greater than E's.
+*/
+
+	emax = 0.f;
+	if (n0 > i0) {
+	    emin = (r__1 = z__[((n0) << (2)) - 5], dabs(r__1));
+	} else {
+	    emin = 0.f;
+	}
+	qmin = z__[((n0) << (2)) - 3];
+	qmax = qmin;
+	for (i4 = (n0) << (2); i4 >= 8; i4 += -4) {
+	    if (z__[i4 - 5] <= 0.f) {
+		goto L100;
+	    }
+	    if (qmin >= emax * 4.f) {
+/* Computing MIN */
+		r__1 = qmin, r__2 = z__[i4 - 3];
+		qmin = dmin(r__1,r__2);
+/* Computing MAX */
+		r__1 = emax, r__2 = z__[i4 - 5];
+		emax = dmax(r__1,r__2);
+	    }
+/* Computing MAX */
+	    r__1 = qmax, r__2 = z__[i4 - 7] + z__[i4 - 5];
+	    qmax = dmax(r__1,r__2);
+/* Computing MIN */
+	    r__1 = emin, r__2 = z__[i4 - 5];
+	    emin = dmin(r__1,r__2);
+/* L90: */
+	}
+	i4 = 4;
+
+L100:
+	i0 = i4 / 4;
+
+/*        Store EMIN for passing to SLASQ3. */
+
+	z__[((n0) << (2)) - 1] = emin;
+
+/*
+          Put -(initial shift) into DMIN.
+
+   Computing MAX
+*/
+	r__1 = 0.f, r__2 = qmin - sqrt(qmin) * 2.f * sqrt(emax);
+	dmin__ = -dmax(r__1,r__2);
+
+/*        Now I0:N0 is unreduced. PP = 0 for ping, PP = 1 for pong. */
+
+	pp = 0;
+
+	nbig = (n0 - i0 + 1) * 30;
+	i__2 = nbig;
+	for (iwhilb = 1; iwhilb <= i__2; ++iwhilb) {
+	    if (i0 > n0) {
+		goto L130;
+	    }
+
+/*           While submatrix unfinished take a good dqds step. */
+
+	    slasq3_(&i0, &n0, &z__[1], &pp, &dmin__, &sigma, &desig, &qmax, &
+		    nfail, &iter, &ndiv, &ieee);
+
+	    pp = 1 - pp;
+
+/*           When EMIN is very small check for splits. */
+
+	    if (pp == 0 && n0 - i0 >= 3) {
+		if ((z__[n0 * 4] <= tol2 * qmax) || (z__[((n0) << (2)) - 1] <=
+			 tol2 * sigma)) {
+		    splt = i0 - 1;
+		    qmax = z__[((i0) << (2)) - 3];
+		    emin = z__[((i0) << (2)) - 1];
+		    oldemn = z__[i0 * 4];
+		    i__3 = (n0 - 3) << (2);
+		    for (i4 = (i0) << (2); i4 <= i__3; i4 += 4) {
+			if ((z__[i4] <= tol2 * z__[i4 - 3]) || (z__[i4 - 1] <=
+				 tol2 * sigma)) {
+			    z__[i4 - 1] = -sigma;
+			    splt = i4 / 4;
+			    qmax = 0.f;
+			    emin = z__[i4 + 3];
+			    oldemn = z__[i4 + 4];
+			} else {
+/* Computing MAX */
+			    r__1 = qmax, r__2 = z__[i4 + 1];
+			    qmax = dmax(r__1,r__2);
+/* Computing MIN */
+			    r__1 = emin, r__2 = z__[i4 - 1];
+			    emin = dmin(r__1,r__2);
+/* Computing MIN */
+			    r__1 = oldemn, r__2 = z__[i4];
+			    oldemn = dmin(r__1,r__2);
+			}
+/* L110: */
+		    }
+		    z__[((n0) << (2)) - 1] = emin;
+		    z__[n0 * 4] = oldemn;
+		    i0 = splt + 1;
+		}
+	    }
+
+/* L120: */
+	}
+
+	*info = 2;
+	return 0;
+
+/*        end IWHILB */
+
+L130:
+
+/* L140: */
+	;
+    }
+
+    *info = 3;
+    return 0;
+
+/*     end IWHILA */
+
+L150:
+
+/*     Move q's to the front. */
+
+    i__1 = *n;
+    for (k = 2; k <= i__1; ++k) {
+	z__[k] = z__[((k) << (2)) - 3];
+/* L160: */
+    }
+
+/*     Sort and compute sum of eigenvalues. */
+
+    slasrt_("D", n, &z__[1], &iinfo);
+
+    e = 0.f;
+    for (k = *n; k >= 1; --k) {
+	e += z__[k];
+/* L170: */
+    }
+
+/*     Store trace, sum(eigenvalues) and information on performance. */
+
+    z__[((*n) << (1)) + 1] = trace;
+    z__[((*n) << (1)) + 2] = e;
+    z__[((*n) << (1)) + 3] = (real) iter;
+/* Computing 2nd power */
+    i__1 = *n;
+    z__[((*n) << (1)) + 4] = (real) ndiv / (real) (i__1 * i__1);
+    z__[((*n) << (1)) + 5] = nfail * 100.f / (real) iter;
+    return 0;
+
+/*     End of SLASQ2 */
+
+} /* slasq2_ */
+
+/* Subroutine */ int slasq3_(integer *i0, integer *n0, real *z__, integer *pp,
+	 real *dmin__, real *sigma, real *desig, real *qmax, integer *nfail,
+	integer *iter, integer *ndiv, logical *ieee)
+{
+    /* Initialized data */
+
+    static integer ttype = 0;
+    static real dmin1 = 0.f;
+    static real dmin2 = 0.f;
+    static real dn = 0.f;
+    static real dn1 = 0.f;
+    static real dn2 = 0.f;
+    static real tau = 0.f;
+
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real s, t;
+    static integer j4, nn;
+    static real eps, tol;
+    static integer n0in, ipn4;
+    static real tol2, temp;
+    extern /* Subroutine */ int slasq4_(integer *, integer *, real *, integer
+	    *, integer *, real *, real *, real *, real *, real *, real *,
+	    real *, integer *), slasq5_(integer *, integer *, real *, integer
+	    *, real *, real *, real *, real *, real *, real *, real *,
+	    logical *), slasq6_(integer *, integer *, real *, integer *, real
+	    *, real *, real *, real *, real *, real *);
+    extern doublereal slamch_(char *);
+    static real safmin;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       May 17, 2000
+
+
+    Purpose
+    =======
+
+    SLASQ3 checks for deflation, computes a shift (TAU) and calls dqds.
+    In case of failure it changes shifts, and tries again until output
+    is positive.
+
+    Arguments
+    =========
+
+    I0     (input) INTEGER
+           First index.
+
+    N0     (input) INTEGER
+           Last index.
+
+    Z      (input) REAL array, dimension ( 4*N )
+           Z holds the qd array.
+
+    PP     (input) INTEGER
+           PP=0 for ping, PP=1 for pong.
+
+    DMIN   (output) REAL
+           Minimum value of d.
+
+    SIGMA  (output) REAL
+           Sum of shifts used in current segment.
+
+    DESIG  (input/output) REAL
+           Lower order part of SIGMA
+
+    QMAX   (input) REAL
+           Maximum value of q.
+
+    NFAIL  (output) INTEGER
+           Number of times shift was too big.
+
+    ITER   (output) INTEGER
+           Number of iterations.
+
+    NDIV   (output) INTEGER
+           Number of divisions.
+
+    TTYPE  (output) INTEGER
+           Shift type.
+
+    IEEE   (input) LOGICAL
+           Flag for IEEE or non IEEE arithmetic (passed to SLASQ5).
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+
+    n0in = *n0;
+    eps = slamch_("Precision");
+    safmin = slamch_("Safe minimum");
+    tol = eps * 100.f;
+/* Computing 2nd power */
+    r__1 = tol;
+    tol2 = r__1 * r__1;
+
+/*     Check for deflation. */
+
+L10:
+
+    if (*n0 < *i0) {
+	return 0;
+    }
+    if (*n0 == *i0) {
+	goto L20;
+    }
+    nn = ((*n0) << (2)) + *pp;
+    if (*n0 == *i0 + 1) {
+	goto L40;
+    }
+
+/*     Check whether E(N0-1) is negligible, 1 eigenvalue. */
+
+    if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - ((*pp) << (1))
+	     - 4] > tol2 * z__[nn - 7]) {
+	goto L30;
+    }
+
+L20:
+
+    z__[((*n0) << (2)) - 3] = z__[((*n0) << (2)) + *pp - 3] + *sigma;
+    --(*n0);
+    goto L10;
+
+/*     Check  whether E(N0-2) is negligible, 2 eigenvalues. */
+
+L30:
+
+    if (z__[nn - 9] > tol2 * *sigma && z__[nn - ((*pp) << (1)) - 8] > tol2 *
+	    z__[nn - 11]) {
+	goto L50;
+    }
+
+L40:
+
+    if (z__[nn - 3] > z__[nn - 7]) {
+	s = z__[nn - 3];
+	z__[nn - 3] = z__[nn - 7];
+	z__[nn - 7] = s;
+    }
+    if (z__[nn - 5] > z__[nn - 3] * tol2) {
+	t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5f;
+	s = z__[nn - 3] * (z__[nn - 5] / t);
+	if (s <= t) {
+	    s = z__[nn - 3] * (z__[nn - 5] / (t * (sqrt(s / t + 1.f) + 1.f)));
+	} else {
+	    s = z__[nn - 3] * (z__[nn - 5] / (t + sqrt(t) * sqrt(t + s)));
+	}
+	t = z__[nn - 7] + (s + z__[nn - 5]);
+	z__[nn - 3] *= z__[nn - 7] / t;
+	z__[nn - 7] = t;
+    }
+    z__[((*n0) << (2)) - 7] = z__[nn - 7] + *sigma;
+    z__[((*n0) << (2)) - 3] = z__[nn - 3] + *sigma;
+    *n0 += -2;
+    goto L10;
+
+L50:
+
+/*     Reverse the qd-array, if warranted. */
+
+    if ((*dmin__ <= 0.f) || (*n0 < n0in)) {
+	if (z__[((*i0) << (2)) + *pp - 3] * 1.5f < z__[((*n0) << (2)) + *pp -
+		3]) {
+	    ipn4 = (*i0 + *n0) << (2);
+	    i__1 = (*i0 + *n0 - 1) << (1);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		temp = z__[j4 - 3];
+		z__[j4 - 3] = z__[ipn4 - j4 - 3];
+		z__[ipn4 - j4 - 3] = temp;
+		temp = z__[j4 - 2];
+		z__[j4 - 2] = z__[ipn4 - j4 - 2];
+		z__[ipn4 - j4 - 2] = temp;
+		temp = z__[j4 - 1];
+		z__[j4 - 1] = z__[ipn4 - j4 - 5];
+		z__[ipn4 - j4 - 5] = temp;
+		temp = z__[j4];
+		z__[j4] = z__[ipn4 - j4 - 4];
+		z__[ipn4 - j4 - 4] = temp;
+/* L60: */
+	    }
+	    if (*n0 - *i0 <= 4) {
+		z__[((*n0) << (2)) + *pp - 1] = z__[((*i0) << (2)) + *pp - 1];
+		z__[((*n0) << (2)) - *pp] = z__[((*i0) << (2)) - *pp];
+	    }
+/* Computing MIN */
+	    r__1 = dmin2, r__2 = z__[((*n0) << (2)) + *pp - 1];
+	    dmin2 = dmin(r__1,r__2);
+/* Computing MIN */
+	    r__1 = z__[((*n0) << (2)) + *pp - 1], r__2 = z__[((*i0) << (2)) +
+		    *pp - 1], r__1 = min(r__1,r__2), r__2 = z__[((*i0) << (2))
+		     + *pp + 3];
+	    z__[((*n0) << (2)) + *pp - 1] = dmin(r__1,r__2);
+/* Computing MIN */
+	    r__1 = z__[((*n0) << (2)) - *pp], r__2 = z__[((*i0) << (2)) - *pp]
+		    , r__1 = min(r__1,r__2), r__2 = z__[((*i0) << (2)) - *pp
+		    + 4];
+	    z__[((*n0) << (2)) - *pp] = dmin(r__1,r__2);
+/* Computing MAX */
+	    r__1 = *qmax, r__2 = z__[((*i0) << (2)) + *pp - 3], r__1 = max(
+		    r__1,r__2), r__2 = z__[((*i0) << (2)) + *pp + 1];
+	    *qmax = dmax(r__1,r__2);
+	    *dmin__ = -0.f;
+	}
+    }
+
+/*
+   L70:
+
+   Computing MIN
+*/
+    r__1 = z__[((*n0) << (2)) + *pp - 1], r__2 = z__[((*n0) << (2)) + *pp - 9]
+	    , r__1 = min(r__1,r__2), r__2 = dmin2 + z__[((*n0) << (2)) - *pp];
+    if ((*dmin__ < 0.f) || (safmin * *qmax < dmin(r__1,r__2))) {
+
+/*        Choose a shift. */
+
+	slasq4_(i0, n0, &z__[1], pp, &n0in, dmin__, &dmin1, &dmin2, &dn, &dn1,
+		 &dn2, &tau, &ttype);
+
+/*        Call dqds until DMIN > 0. */
+
+L80:
+
+	slasq5_(i0, n0, &z__[1], pp, &tau, dmin__, &dmin1, &dmin2, &dn, &dn1,
+		&dn2, ieee);
+
+	*ndiv += *n0 - *i0 + 2;
+	++(*iter);
+
+/*        Check status. */
+
+	if (*dmin__ >= 0.f && dmin1 > 0.f) {
+
+/*           Success. */
+
+	    goto L100;
+
+	} else if (*dmin__ < 0.f && dmin1 > 0.f && z__[((*n0 - 1) << (2)) - *
+		pp] < tol * (*sigma + dn1) && dabs(dn) < tol * *sigma) {
+
+/*           Convergence hidden by negative DN. */
+
+	    z__[((*n0 - 1) << (2)) - *pp + 2] = 0.f;
+	    *dmin__ = 0.f;
+	    goto L100;
+	} else if (*dmin__ < 0.f) {
+
+/*           TAU too big. Select new TAU and try again. */
+
+	    ++(*nfail);
+	    if (ttype < -22) {
+
+/*              Failed twice. Play it safe. */
+
+		tau = 0.f;
+	    } else if (dmin1 > 0.f) {
+
+/*              Late failure. Gives excellent shift. */
+
+		tau = (tau + *dmin__) * (1.f - eps * 2.f);
+		ttype += -11;
+	    } else {
+
+/*              Early failure. Divide by 4. */
+
+		tau *= .25f;
+		ttype += -12;
+	    }
+	    goto L80;
+	} else if (*dmin__ != *dmin__) {
+
+/*           NaN. */
+
+	    tau = 0.f;
+	    goto L80;
+	} else {
+
+/*           Possible underflow. Play it safe. */
+
+	    goto L90;
+	}
+    }
+
+/*     Risk of underflow. */
+
+L90:
+    slasq6_(i0, n0, &z__[1], pp, dmin__, &dmin1, &dmin2, &dn, &dn1, &dn2);
+    *ndiv += *n0 - *i0 + 2;
+    ++(*iter);
+    tau = 0.f;
+
+L100:
+    if (tau < *sigma) {
+	*desig += tau;
+	t = *sigma + *desig;
+	*desig -= t - *sigma;
+    } else {
+	t = *sigma + tau;
+	*desig = *sigma - (t - tau) + *desig;
+    }
+    *sigma = t;
+
+    return 0;
+
+/*     End of SLASQ3 */
+
+} /* slasq3_ */
+
+/* Subroutine */ int slasq4_(integer *i0, integer *n0, real *z__, integer *pp,
+	 integer *n0in, real *dmin__, real *dmin1, real *dmin2, real *dn,
+	real *dn1, real *dn2, real *tau, integer *ttype)
+{
+    /* Initialized data */
+
+    static real g = 0.f;
+
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real s, a2, b1, b2;
+    static integer i4, nn, np;
+    static real gam, gap1, gap2;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLASQ4 computes an approximation TAU to the smallest eigenvalue
+    using values of d from the previous transform.
+
+    I0    (input) INTEGER
+          First index.
+
+    N0    (input) INTEGER
+          Last index.
+
+    Z     (input) REAL array, dimension ( 4*N )
+          Z holds the qd array.
+
+    PP    (input) INTEGER
+          PP=0 for ping, PP=1 for pong.
+
+    NOIN  (input) INTEGER
+          The value of N0 at start of EIGTEST.
+
+    DMIN  (input) REAL
+          Minimum value of d.
+
+    DMIN1 (input) REAL
+          Minimum value of d, excluding D( N0 ).
+
+    DMIN2 (input) REAL
+          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+
+    DN    (input) REAL
+          d(N)
+
+    DN1   (input) REAL
+          d(N-1)
+
+    DN2   (input) REAL
+          d(N-2)
+
+    TAU   (output) REAL
+          This is the shift.
+
+    TTYPE (output) INTEGER
+          Shift type.
+
+    Further Details
+    ===============
+    CNST1 = 9/16
+
+    =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+
+/*
+       A negative DMIN forces the shift to take that absolute value
+       TTYPE records the type of shift.
+*/
+
+    if (*dmin__ <= 0.f) {
+	*tau = -(*dmin__);
+	*ttype = -1;
+	return 0;
+    }
+
+    nn = ((*n0) << (2)) + *pp;
+    if (*n0in == *n0) {
+
+/*        No eigenvalues deflated. */
+
+	if ((*dmin__ == *dn) || (*dmin__ == *dn1)) {
+
+	    b1 = sqrt(z__[nn - 3]) * sqrt(z__[nn - 5]);
+	    b2 = sqrt(z__[nn - 7]) * sqrt(z__[nn - 9]);
+	    a2 = z__[nn - 7] + z__[nn - 5];
+
+/*           Cases 2 and 3. */
+
+	    if (*dmin__ == *dn && *dmin1 == *dn1) {
+		gap2 = *dmin2 - a2 - *dmin2 * .25f;
+		if (gap2 > 0.f && gap2 > b2) {
+		    gap1 = a2 - *dn - b2 / gap2 * b2;
+		} else {
+		    gap1 = a2 - *dn - (b1 + b2);
+		}
+		if (gap1 > 0.f && gap1 > b1) {
+/* Computing MAX */
+		    r__1 = *dn - b1 / gap1 * b1, r__2 = *dmin__ * .5f;
+		    s = dmax(r__1,r__2);
+		    *ttype = -2;
+		} else {
+		    s = 0.f;
+		    if (*dn > b1) {
+			s = *dn - b1;
+		    }
+		    if (a2 > b1 + b2) {
+/* Computing MIN */
+			r__1 = s, r__2 = a2 - (b1 + b2);
+			s = dmin(r__1,r__2);
+		    }
+/* Computing MAX */
+		    r__1 = s, r__2 = *dmin__ * .333f;
+		    s = dmax(r__1,r__2);
+		    *ttype = -3;
+		}
+	    } else {
+
+/*              Case 4. */
+
+		*ttype = -4;
+		s = *dmin__ * .25f;
+		if (*dmin__ == *dn) {
+		    gam = *dn;
+		    a2 = 0.f;
+		    if (z__[nn - 5] > z__[nn - 7]) {
+			return 0;
+		    }
+		    b2 = z__[nn - 5] / z__[nn - 7];
+		    np = nn - 9;
+		} else {
+		    np = nn - ((*pp) << (1));
+		    b2 = z__[np - 2];
+		    gam = *dn1;
+		    if (z__[np - 4] > z__[np - 2]) {
+			return 0;
+		    }
+		    a2 = z__[np - 4] / z__[np - 2];
+		    if (z__[nn - 9] > z__[nn - 11]) {
+			return 0;
+		    }
+		    b2 = z__[nn - 9] / z__[nn - 11];
+		    np = nn - 13;
+		}
+
+/*              Approximate contribution to norm squared from I < NN-1. */
+
+		a2 += b2;
+		i__1 = ((*i0) << (2)) - 1 + *pp;
+		for (i4 = np; i4 >= i__1; i4 += -4) {
+		    if (b2 == 0.f) {
+			goto L20;
+		    }
+		    b1 = b2;
+		    if (z__[i4] > z__[i4 - 2]) {
+			return 0;
+		    }
+		    b2 *= z__[i4] / z__[i4 - 2];
+		    a2 += b2;
+		    if ((dmax(b2,b1) * 100.f < a2) || (.563f < a2)) {
+			goto L20;
+		    }
+/* L10: */
+		}
+L20:
+		a2 *= 1.05f;
+
+/*              Rayleigh quotient residual bound. */
+
+		if (a2 < .563f) {
+		    s = gam * (1.f - sqrt(a2)) / (a2 + 1.f);
+		}
+	    }
+	} else if (*dmin__ == *dn2) {
+
+/*           Case 5. */
+
+	    *ttype = -5;
+	    s = *dmin__ * .25f;
+
+/*           Compute contribution to norm squared from I > NN-2. */
+
+	    np = nn - ((*pp) << (1));
+	    b1 = z__[np - 2];
+	    b2 = z__[np - 6];
+	    gam = *dn2;
+	    if ((z__[np - 8] > b2) || (z__[np - 4] > b1)) {
+		return 0;
+	    }
+	    a2 = z__[np - 8] / b2 * (z__[np - 4] / b1 + 1.f);
+
+/*           Approximate contribution to norm squared from I < NN-2. */
+
+	    if (*n0 - *i0 > 2) {
+		b2 = z__[nn - 13] / z__[nn - 15];
+		a2 += b2;
+		i__1 = ((*i0) << (2)) - 1 + *pp;
+		for (i4 = nn - 17; i4 >= i__1; i4 += -4) {
+		    if (b2 == 0.f) {
+			goto L40;
+		    }
+		    b1 = b2;
+		    if (z__[i4] > z__[i4 - 2]) {
+			return 0;
+		    }
+		    b2 *= z__[i4] / z__[i4 - 2];
+		    a2 += b2;
+		    if ((dmax(b2,b1) * 100.f < a2) || (.563f < a2)) {
+			goto L40;
+		    }
+/* L30: */
+		}
+L40:
+		a2 *= 1.05f;
+	    }
+
+	    if (a2 < .563f) {
+		s = gam * (1.f - sqrt(a2)) / (a2 + 1.f);
+	    }
+	} else {
+
+/*           Case 6, no information to guide us. */
+
+	    if (*ttype == -6) {
+		g += (1.f - g) * .333f;
+	    } else if (*ttype == -18) {
+		g = .083250000000000005f;
+	    } else {
+		g = .25f;
+	    }
+	    s = g * *dmin__;
+	    *ttype = -6;
+	}
+
+    } else if (*n0in == *n0 + 1) {
+
+/*        One eigenvalue just deflated. Use DMIN1, DN1 for DMIN and DN. */
+
+	if (*dmin1 == *dn1 && *dmin2 == *dn2) {
+
+/*           Cases 7 and 8. */
+
+	    *ttype = -7;
+	    s = *dmin1 * .333f;
+	    if (z__[nn - 5] > z__[nn - 7]) {
+		return 0;
+	    }
+	    b1 = z__[nn - 5] / z__[nn - 7];
+	    b2 = b1;
+	    if (b2 == 0.f) {
+		goto L60;
+	    }
+	    i__1 = ((*i0) << (2)) - 1 + *pp;
+	    for (i4 = ((*n0) << (2)) - 9 + *pp; i4 >= i__1; i4 += -4) {
+		a2 = b1;
+		if (z__[i4] > z__[i4 - 2]) {
+		    return 0;
+		}
+		b1 *= z__[i4] / z__[i4 - 2];
+		b2 += b1;
+		if (dmax(b1,a2) * 100.f < b2) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+L60:
+	    b2 = sqrt(b2 * 1.05f);
+/* Computing 2nd power */
+	    r__1 = b2;
+	    a2 = *dmin1 / (r__1 * r__1 + 1.f);
+	    gap2 = *dmin2 * .5f - a2;
+	    if (gap2 > 0.f && gap2 > b2 * a2) {
+/* Computing MAX */
+		r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2);
+		s = dmax(r__1,r__2);
+	    } else {
+/* Computing MAX */
+		r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f);
+		s = dmax(r__1,r__2);
+		*ttype = -8;
+	    }
+	} else {
+
+/*           Case 9. */
+
+	    s = *dmin1 * .25f;
+	    if (*dmin1 == *dn1) {
+		s = *dmin1 * .5f;
+	    }
+	    *ttype = -9;
+	}
+
+    } else if (*n0in == *n0 + 2) {
+
+/*
+          Two eigenvalues deflated. Use DMIN2, DN2 for DMIN and DN.
+
+          Cases 10 and 11.
+*/
+
+	if (*dmin2 == *dn2 && z__[nn - 5] * 2.f < z__[nn - 7]) {
+	    *ttype = -10;
+	    s = *dmin2 * .333f;
+	    if (z__[nn - 5] > z__[nn - 7]) {
+		return 0;
+	    }
+	    b1 = z__[nn - 5] / z__[nn - 7];
+	    b2 = b1;
+	    if (b2 == 0.f) {
+		goto L80;
+	    }
+	    i__1 = ((*i0) << (2)) - 1 + *pp;
+	    for (i4 = ((*n0) << (2)) - 9 + *pp; i4 >= i__1; i4 += -4) {
+		if (z__[i4] > z__[i4 - 2]) {
+		    return 0;
+		}
+		b1 *= z__[i4] / z__[i4 - 2];
+		b2 += b1;
+		if (b1 * 100.f < b2) {
+		    goto L80;
+		}
+/* L70: */
+	    }
+L80:
+	    b2 = sqrt(b2 * 1.05f);
+/* Computing 2nd power */
+	    r__1 = b2;
+	    a2 = *dmin2 / (r__1 * r__1 + 1.f);
+	    gap2 = z__[nn - 7] + z__[nn - 9] - sqrt(z__[nn - 11]) * sqrt(z__[
+		    nn - 9]) - a2;
+	    if (gap2 > 0.f && gap2 > b2 * a2) {
+/* Computing MAX */
+		r__1 = s, r__2 = a2 * (1.f - a2 * 1.01f * (b2 / gap2) * b2);
+		s = dmax(r__1,r__2);
+	    } else {
+/* Computing MAX */
+		r__1 = s, r__2 = a2 * (1.f - b2 * 1.01f);
+		s = dmax(r__1,r__2);
+	    }
+	} else {
+	    s = *dmin2 * .25f;
+	    *ttype = -11;
+	}
+    } else if (*n0in > *n0 + 2) {
+
+/*        Case 12, more than two eigenvalues deflated. No information. */
+
+	s = 0.f;
+	*ttype = -12;
+    }
+
+    *tau = s;
+    return 0;
+
+/*     End of SLASQ4 */
+
+} /* slasq4_ */
+
+/* Subroutine */ int slasq5_(integer *i0, integer *n0, real *z__, integer *pp,
+	 real *tau, real *dmin__, real *dmin1, real *dmin2, real *dn, real *
+	dnm1, real *dnm2, logical *ieee)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2;
+
+    /* Local variables */
+    static real d__;
+    static integer j4, j4p2;
+    static real emin, temp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       May 17, 2000
+
+
+    Purpose
+    =======
+
+    SLASQ5 computes one dqds transform in ping-pong form, one
+    version for IEEE machines another for non IEEE machines.
+
+    Arguments
+    =========
+
+    I0    (input) INTEGER
+          First index.
+
+    N0    (input) INTEGER
+          Last index.
+
+    Z     (input) REAL array, dimension ( 4*N )
+          Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+          an extra argument.
+
+    PP    (input) INTEGER
+          PP=0 for ping, PP=1 for pong.
+
+    TAU   (input) REAL
+          This is the shift.
+
+    DMIN  (output) REAL
+          Minimum value of d.
+
+    DMIN1 (output) REAL
+          Minimum value of d, excluding D( N0 ).
+
+    DMIN2 (output) REAL
+          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+
+    DN    (output) REAL
+          d(N0), the last value of d.
+
+    DNM1  (output) REAL
+          d(N0-1).
+
+    DNM2  (output) REAL
+          d(N0-2).
+
+    IEEE  (input) LOGICAL
+          Flag for IEEE or non IEEE arithmetic.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+    if (*n0 - *i0 - 1 <= 0) {
+	return 0;
+    }
+
+    j4 = ((*i0) << (2)) + *pp - 3;
+    emin = z__[j4 + 4];
+    d__ = z__[j4] - *tau;
+    *dmin__ = d__;
+    *dmin1 = -z__[j4];
+
+    if (*ieee) {
+
+/*        Code for IEEE arithmetic. */
+
+	if (*pp == 0) {
+	    i__1 = (*n0 - 3) << (2);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		z__[j4 - 2] = d__ + z__[j4 - 1];
+		temp = z__[j4 + 1] / z__[j4 - 2];
+		d__ = d__ * temp - *tau;
+		*dmin__ = dmin(*dmin__,d__);
+		z__[j4] = z__[j4 - 1] * temp;
+/* Computing MIN */
+		r__1 = z__[j4];
+		emin = dmin(r__1,emin);
+/* L10: */
+	    }
+	} else {
+	    i__1 = (*n0 - 3) << (2);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		z__[j4 - 3] = d__ + z__[j4];
+		temp = z__[j4 + 2] / z__[j4 - 3];
+		d__ = d__ * temp - *tau;
+		*dmin__ = dmin(*dmin__,d__);
+		z__[j4 - 1] = z__[j4] * temp;
+/* Computing MIN */
+		r__1 = z__[j4 - 1];
+		emin = dmin(r__1,emin);
+/* L20: */
+	    }
+	}
+
+/*        Unroll last two steps. */
+
+	*dnm2 = d__;
+	*dmin2 = *dmin__;
+	j4 = ((*n0 - 2) << (2)) - *pp;
+	j4p2 = j4 + ((*pp) << (1)) - 1;
+	z__[j4 - 2] = *dnm2 + z__[j4p2];
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
+	*dmin__ = dmin(*dmin__,*dnm1);
+
+	*dmin1 = *dmin__;
+	j4 += 4;
+	j4p2 = j4 + ((*pp) << (1)) - 1;
+	z__[j4 - 2] = *dnm1 + z__[j4p2];
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
+	*dmin__ = dmin(*dmin__,*dn);
+
+    } else {
+
+/*        Code for non IEEE arithmetic. */
+
+	if (*pp == 0) {
+	    i__1 = (*n0 - 3) << (2);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		z__[j4 - 2] = d__ + z__[j4 - 1];
+		if (d__ < 0.f) {
+		    return 0;
+		} else {
+		    z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
+		    d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
+		}
+		*dmin__ = dmin(*dmin__,d__);
+/* Computing MIN */
+		r__1 = emin, r__2 = z__[j4];
+		emin = dmin(r__1,r__2);
+/* L30: */
+	    }
+	} else {
+	    i__1 = (*n0 - 3) << (2);
+	    for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+		z__[j4 - 3] = d__ + z__[j4];
+		if (d__ < 0.f) {
+		    return 0;
+		} else {
+		    z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
+		    d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
+		}
+		*dmin__ = dmin(*dmin__,d__);
+/* Computing MIN */
+		r__1 = emin, r__2 = z__[j4 - 1];
+		emin = dmin(r__1,r__2);
+/* L40: */
+	    }
+	}
+
+/*        Unroll last two steps. */
+
+	*dnm2 = d__;
+	*dmin2 = *dmin__;
+	j4 = ((*n0 - 2) << (2)) - *pp;
+	j4p2 = j4 + ((*pp) << (1)) - 1;
+	z__[j4 - 2] = *dnm2 + z__[j4p2];
+	if (*dnm2 < 0.f) {
+	    return 0;
+	} else {
+	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	    *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
+	}
+	*dmin__ = dmin(*dmin__,*dnm1);
+
+	*dmin1 = *dmin__;
+	j4 += 4;
+	j4p2 = j4 + ((*pp) << (1)) - 1;
+	z__[j4 - 2] = *dnm1 + z__[j4p2];
+	if (*dnm1 < 0.f) {
+	    return 0;
+	} else {
+	    z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	    *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
+	}
+	*dmin__ = dmin(*dmin__,*dn);
+
+    }
+
+    z__[j4 + 2] = *dn;
+    z__[((*n0) << (2)) - *pp] = emin;
+    return 0;
+
+/*     End of SLASQ5 */
+
+} /* slasq5_ */
+
+/* Subroutine */ int slasq6_(integer *i0, integer *n0, real *z__, integer *pp,
+	 real *dmin__, real *dmin1, real *dmin2, real *dn, real *dnm1, real *
+	dnm2)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2;
+
+    /* Local variables */
+    static real d__;
+    static integer j4, j4p2;
+    static real emin, temp;
+    extern doublereal slamch_(char *);
+    static real safmin;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    SLASQ6 computes one dqd (shift equal to zero) transform in
+    ping-pong form, with protection against underflow and overflow.
+
+    Arguments
+    =========
+
+    I0    (input) INTEGER
+          First index.
+
+    N0    (input) INTEGER
+          Last index.
+
+    Z     (input) REAL array, dimension ( 4*N )
+          Z holds the qd array. EMIN is stored in Z(4*N0) to avoid
+          an extra argument.
+
+    PP    (input) INTEGER
+          PP=0 for ping, PP=1 for pong.
+
+    DMIN  (output) REAL
+          Minimum value of d.
+
+    DMIN1 (output) REAL
+          Minimum value of d, excluding D( N0 ).
+
+    DMIN2 (output) REAL
+          Minimum value of d, excluding D( N0 ) and D( N0-1 ).
+
+    DN    (output) REAL
+          d(N0), the last value of d.
+
+    DNM1  (output) REAL
+          d(N0-1).
+
+    DNM2  (output) REAL
+          d(N0-2).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --z__;
+
+    /* Function Body */
+    if (*n0 - *i0 - 1 <= 0) {
+	return 0;
+    }
+
+    safmin = slamch_("Safe minimum");
+    j4 = ((*i0) << (2)) + *pp - 3;
+    emin = z__[j4 + 4];
+    d__ = z__[j4];
+    *dmin__ = d__;
+
+    if (*pp == 0) {
+	i__1 = (*n0 - 3) << (2);
+	for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	    z__[j4 - 2] = d__ + z__[j4 - 1];
+	    if (z__[j4 - 2] == 0.f) {
+		z__[j4] = 0.f;
+		d__ = z__[j4 + 1];
+		*dmin__ = d__;
+		emin = 0.f;
+	    } else if (safmin * z__[j4 + 1] < z__[j4 - 2] && safmin * z__[j4
+		    - 2] < z__[j4 + 1]) {
+		temp = z__[j4 + 1] / z__[j4 - 2];
+		z__[j4] = z__[j4 - 1] * temp;
+		d__ *= temp;
+	    } else {
+		z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
+		d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]);
+	    }
+	    *dmin__ = dmin(*dmin__,d__);
+/* Computing MIN */
+	    r__1 = emin, r__2 = z__[j4];
+	    emin = dmin(r__1,r__2);
+/* L10: */
+	}
+    } else {
+	i__1 = (*n0 - 3) << (2);
+	for (j4 = (*i0) << (2); j4 <= i__1; j4 += 4) {
+	    z__[j4 - 3] = d__ + z__[j4];
+	    if (z__[j4 - 3] == 0.f) {
+		z__[j4 - 1] = 0.f;
+		d__ = z__[j4 + 2];
+		*dmin__ = d__;
+		emin = 0.f;
+	    } else if (safmin * z__[j4 + 2] < z__[j4 - 3] && safmin * z__[j4
+		    - 3] < z__[j4 + 2]) {
+		temp = z__[j4 + 2] / z__[j4 - 3];
+		z__[j4 - 1] = z__[j4] * temp;
+		d__ *= temp;
+	    } else {
+		z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
+		d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]);
+	    }
+	    *dmin__ = dmin(*dmin__,d__);
+/* Computing MIN */
+	    r__1 = emin, r__2 = z__[j4 - 1];
+	    emin = dmin(r__1,r__2);
+/* L20: */
+	}
+    }
+
+/*     Unroll last two steps. */
+
+    *dnm2 = d__;
+    *dmin2 = *dmin__;
+    j4 = ((*n0 - 2) << (2)) - *pp;
+    j4p2 = j4 + ((*pp) << (1)) - 1;
+    z__[j4 - 2] = *dnm2 + z__[j4p2];
+    if (z__[j4 - 2] == 0.f) {
+	z__[j4] = 0.f;
+	*dnm1 = z__[j4p2 + 2];
+	*dmin__ = *dnm1;
+	emin = 0.f;
+    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
+	    z__[j4p2 + 2]) {
+	temp = z__[j4p2 + 2] / z__[j4 - 2];
+	z__[j4] = z__[j4p2] * temp;
+	*dnm1 = *dnm2 * temp;
+    } else {
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]);
+    }
+    *dmin__ = dmin(*dmin__,*dnm1);
+
+    *dmin1 = *dmin__;
+    j4 += 4;
+    j4p2 = j4 + ((*pp) << (1)) - 1;
+    z__[j4 - 2] = *dnm1 + z__[j4p2];
+    if (z__[j4 - 2] == 0.f) {
+	z__[j4] = 0.f;
+	*dn = z__[j4p2 + 2];
+	*dmin__ = *dn;
+	emin = 0.f;
+    } else if (safmin * z__[j4p2 + 2] < z__[j4 - 2] && safmin * z__[j4 - 2] <
+	    z__[j4p2 + 2]) {
+	temp = z__[j4p2 + 2] / z__[j4 - 2];
+	z__[j4] = z__[j4p2] * temp;
+	*dn = *dnm1 * temp;
+    } else {
+	z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
+	*dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]);
+    }
+    *dmin__ = dmin(*dmin__,*dn);
+
+    z__[j4 + 2] = *dn;
+    z__[((*n0) << (2)) - *pp] = emin;
+    return 0;
+
+/*     End of SLASQ6 */
+
+} /* slasq6_ */
+
+/* Subroutine */ int slasr_(char *side, char *pivot, char *direct, integer *m,
+	 integer *n, real *c__, real *s, real *a, integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, info;
+    static real temp;
+    extern logical lsame_(char *, char *);
+    static real ctemp, stemp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLASR   performs the transformation
+
+       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+
+       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+
+    where A is an m by n real matrix and P is an orthogonal matrix,
+    consisting of a sequence of plane rotations determined by the
+    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+    and z = n when SIDE = 'R' or 'r' ):
+
+    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+
+       P = P( z - 1 )*...*P( 2 )*P( 1 ),
+
+    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+
+       P = P( 1 )*P( 2 )*...*P( z - 1 ),
+
+    where  P( k ) is a plane rotation matrix for the following planes:
+
+       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+          the plane ( k, k + 1 )
+
+       when  PIVOT = 'T' or 't'  ( Top pivot ),
+          the plane ( 1, k + 1 )
+
+       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+          the plane ( k, z )
+
+    c( k ) and s( k )  must contain the  cosine and sine that define the
+    matrix  P( k ).  The two by two plane rotation part of the matrix
+    P( k ), R( k ), is assumed to be of the form
+
+       R( k ) = (  c( k )  s( k ) ).
+                ( -s( k )  c( k ) )
+
+    This version vectorises across rows of the array A when SIDE = 'L'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            Specifies whether the plane rotation matrix P is applied to
+            A on the left or the right.
+            = 'L':  Left, compute A := P*A
+            = 'R':  Right, compute A:= A*P'
+
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+
+    PIVOT   (input) CHARACTER*1
+            Specifies the plane for which P(k) is a plane rotation
+            matrix.
+            = 'V':  Variable pivot, the plane (k,k+1)
+            = 'T':  Top pivot, the plane (1,k+1)
+            = 'B':  Bottom pivot, the plane (k,z)
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  If m <= 1, an immediate
+            return is effected.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  If n <= 1, an
+            immediate return is effected.
+
+    C, S    (input) REAL arrays, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            c(k) and s(k) contain the cosine and sine that define the
+            matrix P(k).  The two by two plane rotation part of the
+            matrix P(k), R(k), is assumed to be of the form
+            R( k ) = (  c( k )  s( k ) ).
+                     ( -s( k )  c( k ) )
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            The m by n matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P' if SIDE = 'L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --c__;
+    --s;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! ((lsame_(side, "L")) || (lsame_(side, "R")))) {
+	info = 1;
+    } else if (! (((lsame_(pivot, "V")) || (lsame_(
+	    pivot, "T"))) || (lsame_(pivot, "B")))) {
+	info = 2;
+    } else if (! ((lsame_(direct, "F")) || (lsame_(
+	    direct, "B")))) {
+	info = 3;
+    } else if (*m < 0) {
+	info = 4;
+    } else if (*n < 0) {
+	info = 5;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("SLASR ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  P * A */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[j + 1 + i__ * a_dim1];
+			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
+				    a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
+				    + i__ * a_dim1];
+/* L10: */
+			}
+		    }
+/* L20: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[j + 1 + i__ * a_dim1];
+			    a[j + 1 + i__ * a_dim1] = ctemp * temp - stemp *
+				    a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * temp + ctemp * a[j
+				    + i__ * a_dim1];
+/* L30: */
+			}
+		    }
+/* L40: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
+				    i__ * a_dim1 + 1];
+			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
+				    i__ * a_dim1 + 1];
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = ctemp * temp - stemp * a[
+				    i__ * a_dim1 + 1];
+			    a[i__ * a_dim1 + 1] = stemp * temp + ctemp * a[
+				    i__ * a_dim1 + 1];
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
+				     + ctemp * temp;
+			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
+				    a_dim1] - stemp * temp;
+/* L90: */
+			}
+		    }
+/* L100: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[j + i__ * a_dim1];
+			    a[j + i__ * a_dim1] = stemp * a[*m + i__ * a_dim1]
+				     + ctemp * temp;
+			    a[*m + i__ * a_dim1] = ctemp * a[*m + i__ *
+				    a_dim1] - stemp * temp;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+	    }
+	}
+    } else if (lsame_(side, "R")) {
+
+/*        Form A * P' */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[i__ + (j + 1) * a_dim1];
+			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
+				     a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
+				    i__ + j * a_dim1];
+/* L130: */
+			}
+		    }
+/* L140: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[i__ + (j + 1) * a_dim1];
+			    a[i__ + (j + 1) * a_dim1] = ctemp * temp - stemp *
+				     a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * temp + ctemp * a[
+				    i__ + j * a_dim1];
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
+				    i__ + a_dim1];
+			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
+				    a_dim1];
+/* L170: */
+			}
+		    }
+/* L180: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = ctemp * temp - stemp * a[
+				    i__ + a_dim1];
+			    a[i__ + a_dim1] = stemp * temp + ctemp * a[i__ +
+				    a_dim1];
+/* L190: */
+			}
+		    }
+/* L200: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
+				     + ctemp * temp;
+			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
+				    a_dim1] - stemp * temp;
+/* L210: */
+			}
+		    }
+/* L220: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.f) || (stemp != 0.f)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    temp = a[i__ + j * a_dim1];
+			    a[i__ + j * a_dim1] = stemp * a[i__ + *n * a_dim1]
+				     + ctemp * temp;
+			    a[i__ + *n * a_dim1] = ctemp * a[i__ + *n *
+				    a_dim1] - stemp * temp;
+/* L230: */
+			}
+		    }
+/* L240: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SLASR */
+
+} /* slasr_ */
+
+/* Subroutine */ int slasrt_(char *id, integer *n, real *d__, integer *info)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j;
+    static real d1, d2, d3;
+    static integer dir;
+    static real tmp;
+    static integer endd;
+    extern logical lsame_(char *, char *);
+    static integer stack[64]	/* was [2][32] */;
+    static real dmnmx;
+    static integer start;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static integer stkpnt;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    Sort the numbers in D in increasing order (if ID = 'I') or
+    in decreasing order (if ID = 'D' ).
+
+    Use Quick Sort, reverting to Insertion sort on arrays of
+    size <= 20. Dimension of STACK limits N to about 2**32.
+
+    Arguments
+    =========
+
+    ID      (input) CHARACTER*1
+            = 'I': sort D in increasing order;
+            = 'D': sort D in decreasing order.
+
+    N       (input) INTEGER
+            The length of the array D.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the array to be sorted.
+            On exit, D has been sorted into increasing order
+            (D(1) <= ... <= D(N) ) or into decreasing order
+            (D(1) >= ... >= D(N) ), depending on ID.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input paramters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+    dir = -1;
+    if (lsame_(id, "D")) {
+	dir = 0;
+    } else if (lsame_(id, "I")) {
+	dir = 1;
+    }
+    if (dir == -1) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLASRT", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 1) {
+	return 0;
+    }
+
+    stkpnt = 1;
+    stack[0] = 1;
+    stack[1] = *n;
+L10:
+    start = stack[((stkpnt) << (1)) - 2];
+    endd = stack[((stkpnt) << (1)) - 1];
+    --stkpnt;
+    if (endd - start <= 20 && endd - start > 0) {
+
+/*        Do Insertion sort on D( START:ENDD ) */
+
+	if (dir == 0) {
+
+/*           Sort into decreasing order */
+
+	    i__1 = endd;
+	    for (i__ = start + 1; i__ <= i__1; ++i__) {
+		i__2 = start + 1;
+		for (j = i__; j >= i__2; --j) {
+		    if (d__[j] > d__[j - 1]) {
+			dmnmx = d__[j];
+			d__[j] = d__[j - 1];
+			d__[j - 1] = dmnmx;
+		    } else {
+			goto L30;
+		    }
+/* L20: */
+		}
+L30:
+		;
+	    }
+
+	} else {
+
+/*           Sort into increasing order */
+
+	    i__1 = endd;
+	    for (i__ = start + 1; i__ <= i__1; ++i__) {
+		i__2 = start + 1;
+		for (j = i__; j >= i__2; --j) {
+		    if (d__[j] < d__[j - 1]) {
+			dmnmx = d__[j];
+			d__[j] = d__[j - 1];
+			d__[j - 1] = dmnmx;
+		    } else {
+			goto L50;
+		    }
+/* L40: */
+		}
+L50:
+		;
+	    }
+
+	}
+
+    } else if (endd - start > 20) {
+
+/*
+          Partition D( START:ENDD ) and stack parts, largest one first
+
+          Choose partition entry as median of 3
+*/
+
+	d1 = d__[start];
+	d2 = d__[endd];
+	i__ = (start + endd) / 2;
+	d3 = d__[i__];
+	if (d1 < d2) {
+	    if (d3 < d1) {
+		dmnmx = d1;
+	    } else if (d3 < d2) {
+		dmnmx = d3;
+	    } else {
+		dmnmx = d2;
+	    }
+	} else {
+	    if (d3 < d2) {
+		dmnmx = d2;
+	    } else if (d3 < d1) {
+		dmnmx = d3;
+	    } else {
+		dmnmx = d1;
+	    }
+	}
+
+	if (dir == 0) {
+
+/*           Sort into decreasing order */
+
+	    i__ = start - 1;
+	    j = endd + 1;
+L60:
+L70:
+	    --j;
+	    if (d__[j] < dmnmx) {
+		goto L70;
+	    }
+L80:
+	    ++i__;
+	    if (d__[i__] > dmnmx) {
+		goto L80;
+	    }
+	    if (i__ < j) {
+		tmp = d__[i__];
+		d__[i__] = d__[j];
+		d__[j] = tmp;
+		goto L60;
+	    }
+	    if (j - start > endd - j - 1) {
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = start;
+		stack[((stkpnt) << (1)) - 1] = j;
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = j + 1;
+		stack[((stkpnt) << (1)) - 1] = endd;
+	    } else {
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = j + 1;
+		stack[((stkpnt) << (1)) - 1] = endd;
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = start;
+		stack[((stkpnt) << (1)) - 1] = j;
+	    }
+	} else {
+
+/*           Sort into increasing order */
+
+	    i__ = start - 1;
+	    j = endd + 1;
+L90:
+L100:
+	    --j;
+	    if (d__[j] > dmnmx) {
+		goto L100;
+	    }
+L110:
+	    ++i__;
+	    if (d__[i__] < dmnmx) {
+		goto L110;
+	    }
+	    if (i__ < j) {
+		tmp = d__[i__];
+		d__[i__] = d__[j];
+		d__[j] = tmp;
+		goto L90;
+	    }
+	    if (j - start > endd - j - 1) {
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = start;
+		stack[((stkpnt) << (1)) - 1] = j;
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = j + 1;
+		stack[((stkpnt) << (1)) - 1] = endd;
+	    } else {
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = j + 1;
+		stack[((stkpnt) << (1)) - 1] = endd;
+		++stkpnt;
+		stack[((stkpnt) << (1)) - 2] = start;
+		stack[((stkpnt) << (1)) - 1] = j;
+	    }
+	}
+    }
+    if (stkpnt > 0) {
+	goto L10;
+    }
+    return 0;
+
+/*     End of SLASRT */
+
+} /* slasrt_ */
+
+/* Subroutine */ int slassq_(integer *n, real *x, integer *incx, real *scale,
+	real *sumsq)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    real r__1;
+
+    /* Local variables */
+    static integer ix;
+    static real absxi;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLASSQ  returns the values  scl  and  smsq  such that
+
+       ( scl**2 )*smsq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
+
+    where  x( i ) = X( 1 + ( i - 1 )*INCX ). The value of  sumsq  is
+    assumed to be non-negative and  scl  returns the value
+
+       scl = max( scale, abs( x( i ) ) ).
+
+    scale and sumsq must be supplied in SCALE and SUMSQ and
+    scl and smsq are overwritten on SCALE and SUMSQ respectively.
+
+    The routine makes only one pass through the vector x.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of elements to be used from the vector X.
+
+    X       (input) REAL array, dimension (N)
+            The vector for which a scaled sum of squares is computed.
+               x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
+
+    INCX    (input) INTEGER
+            The increment between successive values of the vector X.
+            INCX > 0.
+
+    SCALE   (input/output) REAL
+            On entry, the value  scale  in the equation above.
+            On exit, SCALE is overwritten with  scl , the scaling factor
+            for the sum of squares.
+
+    SUMSQ   (input/output) REAL
+            On entry, the value  sumsq  in the equation above.
+            On exit, SUMSQ is overwritten with  smsq , the basic sum of
+            squares from which  scl  has been factored out.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n > 0) {
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    if (x[ix] != 0.f) {
+		absxi = (r__1 = x[ix], dabs(r__1));
+		if (*scale < absxi) {
+/* Computing 2nd power */
+		    r__1 = *scale / absxi;
+		    *sumsq = *sumsq * (r__1 * r__1) + 1;
+		    *scale = absxi;
+		} else {
+/* Computing 2nd power */
+		    r__1 = absxi / *scale;
+		    *sumsq += r__1 * r__1;
+		}
+	    }
+/* L10: */
+	}
+    }
+    return 0;
+
+/*     End of SLASSQ */
+
+} /* slassq_ */
+
+/* Subroutine */ int slasv2_(real *f, real *g, real *h__, real *ssmin, real *
+	ssmax, real *snr, real *csr, real *snl, real *csl)
+{
+    /* System generated locals */
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static real a, d__, l, m, r__, s, t, fa, ga, ha, ft, gt, ht, mm, tt, clt,
+	    crt, slt, srt;
+    static integer pmax;
+    static real temp;
+    static logical swap;
+    static real tsign;
+    static logical gasmal;
+    extern doublereal slamch_(char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLASV2 computes the singular value decomposition of a 2-by-2
+    triangular matrix
+       [  F   G  ]
+       [  0   H  ].
+    On return, abs(SSMAX) is the larger singular value, abs(SSMIN) is the
+    smaller singular value, and (CSL,SNL) and (CSR,SNR) are the left and
+    right singular vectors for abs(SSMAX), giving the decomposition
+
+       [ CSL  SNL ] [  F   G  ] [ CSR -SNR ]  =  [ SSMAX   0   ]
+       [-SNL  CSL ] [  0   H  ] [ SNR  CSR ]     [  0    SSMIN ].
+
+    Arguments
+    =========
+
+    F       (input) REAL
+            The (1,1) element of the 2-by-2 matrix.
+
+    G       (input) REAL
+            The (1,2) element of the 2-by-2 matrix.
+
+    H       (input) REAL
+            The (2,2) element of the 2-by-2 matrix.
+
+    SSMIN   (output) REAL
+            abs(SSMIN) is the smaller singular value.
+
+    SSMAX   (output) REAL
+            abs(SSMAX) is the larger singular value.
+
+    SNL     (output) REAL
+    CSL     (output) REAL
+            The vector (CSL, SNL) is a unit left singular vector for the
+            singular value abs(SSMAX).
+
+    SNR     (output) REAL
+    CSR     (output) REAL
+            The vector (CSR, SNR) is a unit right singular vector for the
+            singular value abs(SSMAX).
+
+    Further Details
+    ===============
+
+    Any input parameter may be aliased with any output parameter.
+
+    Barring over/underflow and assuming a guard digit in subtraction, all
+    output quantities are correct to within a few units in the last
+    place (ulps).
+
+    In IEEE arithmetic, the code works correctly if one matrix element is
+    infinite.
+
+    Overflow will not occur unless the largest singular value itself
+    overflows or is within a few ulps of overflow. (On machines with
+    partial overflow, like the Cray, overflow may occur if the largest
+    singular value is within a factor of 2 of overflow.)
+
+    Underflow is harmless if underflow is gradual. Otherwise, results
+    may correspond to a matrix modified by perturbations of size near
+    the underflow threshold.
+
+   =====================================================================
+*/
+
+
+    ft = *f;
+    fa = dabs(ft);
+    ht = *h__;
+    ha = dabs(*h__);
+
+/*
+       PMAX points to the maximum absolute element of matrix
+         PMAX = 1 if F largest in absolute values
+         PMAX = 2 if G largest in absolute values
+         PMAX = 3 if H largest in absolute values
+*/
+
+    pmax = 1;
+    swap = ha > fa;
+    if (swap) {
+	pmax = 3;
+	temp = ft;
+	ft = ht;
+	ht = temp;
+	temp = fa;
+	fa = ha;
+	ha = temp;
+
+/*        Now FA .ge. HA */
+
+    }
+    gt = *g;
+    ga = dabs(gt);
+    if (ga == 0.f) {
+
+/*        Diagonal matrix */
+
+	*ssmin = ha;
+	*ssmax = fa;
+	clt = 1.f;
+	crt = 1.f;
+	slt = 0.f;
+	srt = 0.f;
+    } else {
+	gasmal = TRUE_;
+	if (ga > fa) {
+	    pmax = 2;
+	    if (fa / ga < slamch_("EPS")) {
+
+/*              Case of very large GA */
+
+		gasmal = FALSE_;
+		*ssmax = ga;
+		if (ha > 1.f) {
+		    *ssmin = fa / (ga / ha);
+		} else {
+		    *ssmin = fa / ga * ha;
+		}
+		clt = 1.f;
+		slt = ht / gt;
+		srt = 1.f;
+		crt = ft / gt;
+	    }
+	}
+	if (gasmal) {
+
+/*           Normal case */
+
+	    d__ = fa - ha;
+	    if (d__ == fa) {
+
+/*              Copes with infinite F or H */
+
+		l = 1.f;
+	    } else {
+		l = d__ / fa;
+	    }
+
+/*           Note that 0 .le. L .le. 1 */
+
+	    m = gt / ft;
+
+/*           Note that abs(M) .le. 1/macheps */
+
+	    t = 2.f - l;
+
+/*           Note that T .ge. 1 */
+
+	    mm = m * m;
+	    tt = t * t;
+	    s = sqrt(tt + mm);
+
+/*           Note that 1 .le. S .le. 1 + 1/macheps */
+
+	    if (l == 0.f) {
+		r__ = dabs(m);
+	    } else {
+		r__ = sqrt(l * l + mm);
+	    }
+
+/*           Note that 0 .le. R .le. 1 + 1/macheps */
+
+	    a = (s + r__) * .5f;
+
+/*           Note that 1 .le. A .le. 1 + abs(M) */
+
+	    *ssmin = ha / a;
+	    *ssmax = fa * a;
+	    if (mm == 0.f) {
+
+/*              Note that M is very tiny */
+
+		if (l == 0.f) {
+		    t = r_sign(&c_b9647, &ft) * r_sign(&c_b1011, &gt);
+		} else {
+		    t = gt / r_sign(&d__, &ft) + m / t;
+		}
+	    } else {
+		t = (m / (s + t) + m / (r__ + l)) * (a + 1.f);
+	    }
+	    l = sqrt(t * t + 4.f);
+	    crt = 2.f / l;
+	    srt = t / l;
+	    clt = (crt + srt * m) / a;
+	    slt = ht / ft * srt / a;
+	}
+    }
+    if (swap) {
+	*csl = srt;
+	*snl = crt;
+	*csr = slt;
+	*snr = clt;
+    } else {
+	*csl = clt;
+	*snl = slt;
+	*csr = crt;
+	*snr = srt;
+    }
+
+/*     Correct signs of SSMAX and SSMIN */
+
+    if (pmax == 1) {
+	tsign = r_sign(&c_b1011, csr) * r_sign(&c_b1011, csl) * r_sign(&
+		c_b1011, f);
+    }
+    if (pmax == 2) {
+	tsign = r_sign(&c_b1011, snr) * r_sign(&c_b1011, csl) * r_sign(&
+		c_b1011, g);
+    }
+    if (pmax == 3) {
+	tsign = r_sign(&c_b1011, snr) * r_sign(&c_b1011, snl) * r_sign(&
+		c_b1011, h__);
+    }
+    *ssmax = r_sign(ssmax, &tsign);
+    r__1 = tsign * r_sign(&c_b1011, f) * r_sign(&c_b1011, h__);
+    *ssmin = r_sign(ssmin, &r__1);
+    return 0;
+
+/*     End of SLASV2 */
+
+} /* slasv2_ */
+
+/* Subroutine */ int slaswp_(integer *n, real *a, integer *lda, integer *k1,
+	integer *k2, integer *ipiv, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
+    static real temp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SLASWP performs a series of row interchanges on the matrix A.
+    One row interchange is initiated for each of rows K1 through K2 of A.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the matrix of column dimension N to which the row
+            interchanges will be applied.
+            On exit, the permuted matrix.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+
+    K1      (input) INTEGER
+            The first element of IPIV for which a row interchange will
+            be done.
+
+    K2      (input) INTEGER
+            The last element of IPIV for which a row interchange will
+            be done.
+
+    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+            The vector of pivot indices.  Only the elements in positions
+            K1 through K2 of IPIV are accessed.
+            IPIV(K) = L implies rows K and L are to be interchanged.
+
+    INCX    (input) INTEGER
+            The increment between successive values of IPIV.  If IPIV
+            is negative, the pivots are applied in reverse order.
+
+    Further Details
+    ===============
+
+    Modified by
+     R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+
+   =====================================================================
+
+
+       Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    if (*incx > 0) {
+	ix0 = *k1;
+	i1 = *k1;
+	i2 = *k2;
+	inc = 1;
+    } else if (*incx < 0) {
+	ix0 = (1 - *k2) * *incx + 1;
+	i1 = *k2;
+	i2 = *k1;
+	inc = -1;
+    } else {
+	return 0;
+    }
+
+    n32 = (*n / 32) << (5);
+    if (n32 != 0) {
+	i__1 = n32;
+	for (j = 1; j <= i__1; j += 32) {
+	    ix = ix0;
+	    i__2 = i2;
+	    i__3 = inc;
+	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3)
+		    {
+		ip = ipiv[ix];
+		if (ip != i__) {
+		    i__4 = j + 31;
+		    for (k = j; k <= i__4; ++k) {
+			temp = a[i__ + k * a_dim1];
+			a[i__ + k * a_dim1] = a[ip + k * a_dim1];
+			a[ip + k * a_dim1] = temp;
+/* L10: */
+		    }
+		}
+		ix += *incx;
+/* L20: */
+	    }
+/* L30: */
+	}
+    }
+    if (n32 != *n) {
+	++n32;
+	ix = ix0;
+	i__1 = i2;
+	i__3 = inc;
+	for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) {
+	    ip = ipiv[ix];
+	    if (ip != i__) {
+		i__2 = *n;
+		for (k = n32; k <= i__2; ++k) {
+		    temp = a[i__ + k * a_dim1];
+		    a[i__ + k * a_dim1] = a[ip + k * a_dim1];
+		    a[ip + k * a_dim1] = temp;
+/* L40: */
+		}
+	    }
+	    ix += *incx;
+/* L50: */
+	}
+    }
+
+    return 0;
+
+/*     End of SLASWP */
+
+} /* slaswp_ */
+
+/* Subroutine */ int slatrd_(char *uplo, integer *n, integer *nb, real *a,
+	integer *lda, real *e, real *tau, real *w, integer *ldw)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, iw;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    static real alpha;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *), saxpy_(
+	    integer *, real *, real *, integer *, real *, integer *), ssymv_(
+	    char *, integer *, real *, real *, integer *, real *, integer *,
+	    real *, real *, integer *), slarfg_(integer *, real *,
+	    real *, integer *, real *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SLATRD reduces NB rows and columns of a real symmetric matrix A to
+    symmetric tridiagonal form by an orthogonal similarity
+    transformation Q' * A * Q, and returns the matrices V and W which are
+    needed to apply the transformation to the unreduced part of A.
+
+    If UPLO = 'U', SLATRD reduces the last NB rows and columns of a
+    matrix, of which the upper triangle is supplied;
+    if UPLO = 'L', SLATRD reduces the first NB rows and columns of a
+    matrix, of which the lower triangle is supplied.
+
+    This is an auxiliary routine called by SSYTRD.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is stored:
+            = 'U': Upper triangular
+            = 'L': Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    NB      (input) INTEGER
+            The number of rows and columns to be reduced.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit:
+            if UPLO = 'U', the last NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements above the diagonal
+              with the array TAU, represent the orthogonal matrix Q as a
+              product of elementary reflectors;
+            if UPLO = 'L', the first NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements below the diagonal
+              with the array TAU, represent the  orthogonal matrix Q as a
+              product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= (1,N).
+
+    E       (output) REAL array, dimension (N-1)
+            If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+            elements of the last NB columns of the reduced matrix;
+            if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+            the first NB columns of the reduced matrix.
+
+    TAU     (output) REAL array, dimension (N-1)
+            The scalar factors of the elementary reflectors, stored in
+            TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+            See Further Details.
+
+    W       (output) REAL array, dimension (LDW,NB)
+            The n-by-nb matrix W required to update the unreduced part
+            of A.
+
+    LDW     (input) INTEGER
+            The leading dimension of the array W. LDW >= max(1,N).
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n) H(n-1) . . . H(n-nb+1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+    and tau in TAU(i-1).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+    and tau in TAU(i).
+
+    The elements of the vectors v together form the n-by-nb matrix V
+    which is needed, with W, to apply the transformation to the unreduced
+    part of the matrix, using a symmetric rank-2k update of the form:
+    A := A - V*W' - W*V'.
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5 and nb = 2:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  a   a   a   v4  v5 )              (  d                  )
+      (      a   a   v4  v5 )              (  1   d              )
+      (          a   1   v5 )              (  v1  1   a          )
+      (              d   1  )              (  v1  v2  a   a      )
+      (                  d  )              (  v1  v2  a   a   a  )
+
+    where d denotes a diagonal element of the reduced matrix, a denotes
+    an element of the original matrix that is unchanged, and vi denotes
+    an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --e;
+    --tau;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1;
+    w -= w_offset;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (lsame_(uplo, "U")) {
+
+/*        Reduce last NB columns of upper triangle */
+
+	i__1 = *n - *nb + 1;
+	for (i__ = *n; i__ >= i__1; --i__) {
+	    iw = i__ - *n + *nb;
+	    if (i__ < *n) {
+
+/*              Update A(1:i,i) */
+
+		i__2 = *n - i__;
+		sgemv_("No transpose", &i__, &i__2, &c_b1290, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
+			c_b1011, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		sgemv_("No transpose", &i__, &i__2, &c_b1290, &w[(iw + 1) *
+			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b1011, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+	    if (i__ > 1) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(1:i-2,i)
+*/
+
+		i__2 = i__ - 1;
+		slarfg_(&i__2, &a[i__ - 1 + i__ * a_dim1], &a[i__ * a_dim1 +
+			1], &c__1, &tau[i__ - 1]);
+		e[i__ - 1] = a[i__ - 1 + i__ * a_dim1];
+		a[i__ - 1 + i__ * a_dim1] = 1.f;
+
+/*              Compute W(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		ssymv_("Upper", &i__2, &c_b1011, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b320, &w[iw * w_dim1 + 1], &
+			c__1);
+		if (i__ < *n) {
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    sgemv_("Transpose", &i__2, &i__3, &c_b1011, &w[(iw + 1) *
+			    w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1], &c__1, &
+			    c_b320, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[(i__ +
+			    1) * a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1],
+			    &c__1, &c_b1011, &w[iw * w_dim1 + 1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[(i__ + 1) *
+			     a_dim1 + 1], lda, &a[i__ * a_dim1 + 1], &c__1, &
+			    c_b320, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &w[(iw + 1)
+			     * w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b1011, &w[iw * w_dim1 + 1], &c__1);
+		}
+		i__2 = i__ - 1;
+		sscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		alpha = tau[i__ - 1] * -.5f * sdot_(&i__2, &w[iw * w_dim1 + 1]
+			, &c__1, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		saxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
+			w_dim1 + 1], &c__1);
+	    }
+
+/* L10: */
+	}
+    } else {
+
+/*        Reduce first NB columns of lower triangle */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:n,i) */
+
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + a_dim1],
+		    lda, &w[i__ + w_dim1], ldw, &c_b1011, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    sgemv_("No transpose", &i__2, &i__3, &c_b1290, &w[i__ + w_dim1],
+		    ldw, &a[i__ + a_dim1], lda, &c_b1011, &a[i__ + i__ *
+		    a_dim1], &c__1);
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:n,i)
+*/
+
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) +
+			i__ * a_dim1], &c__1, &tau[i__]);
+		e[i__] = a[i__ + 1 + i__ * a_dim1];
+		a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*              Compute W(i+1:n,i) */
+
+		i__2 = *n - i__;
+		ssymv_("Lower", &i__2, &c_b1011, &a[i__ + 1 + (i__ + 1) *
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b320, &w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &w[i__ + 1 +
+			w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b320, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[i__ + 1 +
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b1011, &
+			w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b320, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &w[i__ + 1 +
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b1011, &
+			w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		sscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		alpha = tau[i__] * -.5f * sdot_(&i__2, &w[i__ + 1 + i__ *
+			w_dim1], &c__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+	    }
+
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of SLATRD */
+
+} /* slatrd_ */
+
+/* Subroutine */ int slauu2_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    static real aii;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLAUU2 computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the unblocked form of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAUU2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the product U * U'. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    aii = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		i__2 = *n - i__ + 1;
+		a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1],
+			lda, &a[i__ + i__ * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1011, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			aii, &a[i__ * a_dim1 + 1], &c__1);
+	    } else {
+		sscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+/* L10: */
+	}
+
+    } else {
+
+/*        Compute the product L' * L. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    aii = a[i__ + i__ * a_dim1];
+	    if (i__ < *n) {
+		i__2 = *n - i__ + 1;
+		a[i__ + i__ * a_dim1] = sdot_(&i__2, &a[i__ + i__ * a_dim1], &
+			c__1, &a[i__ + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1011, &a[i__ + 1 +
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &aii,
+			 &a[i__ + a_dim1], lda);
+	    } else {
+		sscal_(&i__, &aii, &a[i__ + a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of SLAUU2 */
+
+} /* slauu2_ */
+
+/* Subroutine */ int slauum_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, ib, nb;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int strmm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), ssyrk_(char *, char *, integer
+	    *, integer *, real *, real *, integer *, real *, real *, integer *
+	    ), slauu2_(char *, integer *, real *, integer *,
+	    integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SLAUUM computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the blocked form of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SLAUUM", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "SLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code */
+
+	slauu2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute the product U * U'. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		strmm_("Right", "Upper", "Transpose", "Non-unit", &i__3, &ib,
+			&c_b1011, &a[i__ + i__ * a_dim1], lda, &a[i__ *
+			a_dim1 + 1], lda);
+		slauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    sgemm_("No transpose", "Transpose", &i__3, &ib, &i__4, &
+			    c_b1011, &a[(i__ + ib) * a_dim1 + 1], lda, &a[i__
+			    + (i__ + ib) * a_dim1], lda, &c_b1011, &a[i__ *
+			    a_dim1 + 1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    ssyrk_("Upper", "No transpose", &ib, &i__3, &c_b1011, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b1011, &a[i__
+			    + i__ * a_dim1], lda);
+		}
+/* L10: */
+	    }
+	} else {
+
+/*           Compute the product L' * L. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		strmm_("Left", "Lower", "Transpose", "Non-unit", &ib, &i__3, &
+			c_b1011, &a[i__ + i__ * a_dim1], lda, &a[i__ + a_dim1]
+			, lda);
+		slauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    sgemm_("Transpose", "No transpose", &ib, &i__3, &i__4, &
+			    c_b1011, &a[i__ + ib + i__ * a_dim1], lda, &a[i__
+			    + ib + a_dim1], lda, &c_b1011, &a[i__ + a_dim1],
+			    lda);
+		    i__3 = *n - i__ - ib + 1;
+		    ssyrk_("Lower", "Transpose", &ib, &i__3, &c_b1011, &a[i__
+			    + ib + i__ * a_dim1], lda, &c_b1011, &a[i__ + i__
+			    * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of SLAUUM */
+
+} /* slauum_ */
+
+/* Subroutine */ int sorg2r_(integer *m, integer *n, integer *k, real *a,
+	integer *lda, real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1;
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    slarf_(char *, integer *, integer *, real *, integer *, real *,
+	    real *, integer *, real *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SORG2R generates an m by n real matrix Q with orthonormal columns,
+    which is defined as the first n columns of a product of k elementary
+    reflectors of order m
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by SGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by SGEQRF in the first k columns of its array
+            argument A.
+            On exit, the m-by-n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGEQRF.
+
+    WORK    (workspace) REAL array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if ((*n < 0) || (*n > *m)) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORG2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+/*     Initialise columns k+1:n to columns of the unit matrix */
+
+    i__1 = *n;
+    for (j = *k + 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (l = 1; l <= i__2; ++l) {
+	    a[l + j * a_dim1] = 0.f;
+/* L10: */
+	}
+	a[j + j * a_dim1] = 1.f;
+/* L20: */
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i) to A(i:m,i:n) from the left */
+
+	if (i__ < *n) {
+	    a[i__ + i__ * a_dim1] = 1.f;
+	    i__1 = *m - i__ + 1;
+	    i__2 = *n - i__;
+	    slarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	}
+	if (i__ < *m) {
+	    i__1 = *m - i__;
+	    r__1 = -tau[i__];
+	    sscal_(&i__1, &r__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+	}
+	a[i__ + i__ * a_dim1] = 1.f - tau[i__];
+
+/*        Set A(1:i-1,i) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    a[l + i__ * a_dim1] = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of SORG2R */
+
+} /* sorg2r_ */
+
+/* Subroutine */ int sorgbr_(char *vect, integer *m, integer *n, integer *k,
+	real *a, integer *lda, real *tau, real *work, integer *lwork, integer
+	*info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, nb, mn;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical wantq;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sorglq_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *), sorgqr_(
+	    integer *, integer *, integer *, real *, integer *, real *, real *
+	    , integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SORGBR generates one of the real orthogonal matrices Q or P**T
+    determined by SGEBRD when reducing a real matrix A to bidiagonal
+    form: A = Q * B * P**T.  Q and P**T are defined as products of
+    elementary reflectors H(i) or G(i) respectively.
+
+    If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+    is of order M:
+    if m >= k, Q = H(1) H(2) . . . H(k) and SORGBR returns the first n
+    columns of Q, where m >= n >= k;
+    if m < k, Q = H(1) H(2) . . . H(m-1) and SORGBR returns Q as an
+    M-by-M matrix.
+
+    If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**T
+    is of order N:
+    if k < n, P**T = G(k) . . . G(2) G(1) and SORGBR returns the first m
+    rows of P**T, where n >= m >= k;
+    if k >= n, P**T = G(n-1) . . . G(2) G(1) and SORGBR returns P**T as
+    an N-by-N matrix.
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            Specifies whether the matrix Q or the matrix P**T is
+            required, as defined in the transformation applied by SGEBRD:
+            = 'Q':  generate Q;
+            = 'P':  generate P**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q or P**T to be returned.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q or P**T to be returned.
+            N >= 0.
+            If VECT = 'Q', M >= N >= min(M,K);
+            if VECT = 'P', N >= M >= min(N,K).
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original M-by-K
+            matrix reduced by SGEBRD.
+            If VECT = 'P', the number of rows in the original K-by-N
+            matrix reduced by SGEBRD.
+            K >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by SGEBRD.
+            On exit, the M-by-N matrix Q or P**T.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) REAL array, dimension
+                                  (min(M,K)) if VECT = 'Q'
+                                  (min(N,K)) if VECT = 'P'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i), which determines Q or P**T, as
+            returned by SGEBRD in its array argument TAUQ or TAUP.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+            For optimum performance LWORK >= min(M,N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(vect, "Q");
+    mn = min(*m,*n);
+    lquery = *lwork == -1;
+    if (! wantq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (((*n < 0) || (wantq && ((*n > *m) || (*n < min(*m,*k))))) || (!
+	     wantq && ((*m > *n) || (*m < min(*n,*k))))) {
+	*info = -3;
+    } else if (*k < 0) {
+	*info = -4;
+    } else if (*lda < max(1,*m)) {
+	*info = -6;
+    } else if (*lwork < max(1,mn) && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+	if (wantq) {
+	    nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	} else {
+	    nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	}
+	lwkopt = max(1,mn) * nb;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORGBR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    if (wantq) {
+
+/*
+          Form Q, determined by a call to SGEBRD to reduce an m-by-k
+          matrix
+*/
+
+	if (*m >= *k) {
+
+/*           If m >= k, assume m >= n >= k */
+
+	    sorgqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If m < k, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             column to the right, and set the first row and column of Q
+             to those of the unit matrix
+*/
+
+	    for (j = *m; j >= 2; --j) {
+		a[j * a_dim1 + 1] = 0.f;
+		i__1 = *m;
+		for (i__ = j + 1; i__ <= i__1; ++i__) {
+		    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
+/* L10: */
+		}
+/* L20: */
+	    }
+	    a[a_dim1 + 1] = 1.f;
+	    i__1 = *m;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		a[i__ + a_dim1] = 0.f;
+/* L30: */
+	    }
+	    if (*m > 1) {
+
+/*              Form Q(2:m,2:m) */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		sorgqr_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
+			tau[1], &work[1], lwork, &iinfo);
+	    }
+	}
+    } else {
+
+/*
+          Form P', determined by a call to SGEBRD to reduce a k-by-n
+          matrix
+*/
+
+	if (*k < *n) {
+
+/*           If k < n, assume k <= m <= n */
+
+	    sorglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If k >= n, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             row downward, and set the first row and column of P' to
+             those of the unit matrix
+*/
+
+	    a[a_dim1 + 1] = 1.f;
+	    i__1 = *n;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		a[i__ + a_dim1] = 0.f;
+/* L40: */
+	    }
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		for (i__ = j - 1; i__ >= 2; --i__) {
+		    a[i__ + j * a_dim1] = a[i__ - 1 + j * a_dim1];
+/* L50: */
+		}
+		a[j * a_dim1 + 1] = 0.f;
+/* L60: */
+	    }
+	    if (*n > 1) {
+
+/*              Form P'(2:n,2:n) */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		sorglq_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
+			tau[1], &work[1], lwork, &iinfo);
+	    }
+	}
+    }
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SORGBR */
+
+} /* sorgbr_ */
+
+/* Subroutine */ int sorghr_(integer *n, integer *ilo, integer *ihi, real *a,
+	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, j, nb, nh, iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sorgqr_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SORGHR generates a real orthogonal matrix Q which is defined as the
+    product of IHI-ILO elementary reflectors of order N, as returned by
+    SGEHRD:
+
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix Q. N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of SGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by SGEHRD.
+            On exit, the N-by-N orthogonal matrix Q.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,N).
+
+    TAU     (input) REAL array, dimension (N-1)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGEHRD.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= IHI-ILO.
+            For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,nh) && ! lquery) {
+	*info = -8;
+    }
+
+    if (*info == 0) {
+	nb = ilaenv_(&c__1, "SORGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	lwkopt = max(1,nh) * nb;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORGHR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+/*
+       Shift the vectors which define the elementary reflectors one
+       column to the right, and set the first ilo and the last n-ihi
+       rows and columns to those of the unit matrix
+*/
+
+    i__1 = *ilo + 1;
+    for (j = *ihi; j >= i__1; --j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.f;
+/* L10: */
+	}
+	i__2 = *ihi;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = a[i__ + (j - 1) * a_dim1];
+/* L20: */
+	}
+	i__2 = *n;
+	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    i__1 = *ilo;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.f;
+/* L50: */
+	}
+	a[j + j * a_dim1] = 1.f;
+/* L60: */
+    }
+    i__1 = *n;
+    for (j = *ihi + 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    a[i__ + j * a_dim1] = 0.f;
+/* L70: */
+	}
+	a[j + j * a_dim1] = 1.f;
+/* L80: */
+    }
+
+    if (nh > 0) {
+
+/*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
+
+	sorgqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
+		ilo], &work[1], lwork, &iinfo);
+    }
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SORGHR */
+
+} /* sorghr_ */
+
+/* Subroutine */ int sorgl2_(integer *m, integer *n, integer *k, real *a,
+	integer *lda, real *tau, real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    real r__1;
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    slarf_(char *, integer *, integer *, real *, integer *, real *,
+	    real *, integer *, real *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SORGL2 generates an m by n real matrix Q with orthonormal rows,
+    which is defined as the first m rows of a product of k elementary
+    reflectors of order n
+
+          Q  =  H(k) . . . H(2) H(1)
+
+    as returned by SGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by SGELQF in the first k rows of its array argument A.
+            On exit, the m-by-n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGELQF.
+
+    WORK    (workspace) REAL array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *m)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORGL2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	return 0;
+    }
+
+    if (*k < *m) {
+
+/*        Initialise rows k+1:m to rows of the unit matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (l = *k + 1; l <= i__2; ++l) {
+		a[l + j * a_dim1] = 0.f;
+/* L10: */
+	    }
+	    if (j > *k && j <= *m) {
+		a[j + j * a_dim1] = 1.f;
+	    }
+/* L20: */
+	}
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i) to A(i:m,i:n) from the right */
+
+	if (i__ < *n) {
+	    if (i__ < *m) {
+		a[i__ + i__ * a_dim1] = 1.f;
+		i__1 = *m - i__;
+		i__2 = *n - i__ + 1;
+		slarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
+			tau[i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
+	    i__1 = *n - i__;
+	    r__1 = -tau[i__];
+	    sscal_(&i__1, &r__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+	a[i__ + i__ * a_dim1] = 1.f - tau[i__];
+
+/*        Set A(i,1:i-1) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    a[i__ + l * a_dim1] = 0.f;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of SORGL2 */
+
+} /* sorgl2_ */
+
+/* Subroutine */ int sorglq_(integer *m, integer *n, integer *k, real *a,
+	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int sorgl2_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *), slarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SORGLQ generates an M-by-N real matrix Q with orthonormal rows,
+    which is defined as the first M rows of a product of K elementary
+    reflectors of order N
+
+          Q  =  H(k) . . . H(2) H(1)
+
+    as returned by SGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by SGELQF in the first k rows of its array argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGELQF.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SORGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*m) * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *m)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORGLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SORGLQ", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SORGLQ", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk rows are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(kk+1:m,1:kk) to zero. */
+
+	i__1 = kk;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *m) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	sorgl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *n - i__ + 1;
+		slarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i+ib:m,i:n) from the right */
+
+		i__2 = *m - i__ - ib + 1;
+		i__3 = *n - i__ + 1;
+		slarfb_("Right", "Transpose", "Forward", "Rowwise", &i__2, &
+			i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
+	    }
+
+/*           Apply H' to columns i:n of current block */
+
+	    i__2 = *n - i__ + 1;
+	    sorgl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set columns 1:i-1 of current block to zero */
+
+	    i__2 = i__ - 1;
+	    for (j = 1; j <= i__2; ++j) {
+		i__3 = i__ + ib - 1;
+		for (l = i__; l <= i__3; ++l) {
+		    a[l + j * a_dim1] = 0.f;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SORGLQ */
+
+} /* sorglq_ */
+
+/* Subroutine */ int sorgqr_(integer *m, integer *n, integer *k, real *a,
+	integer *lda, real *tau, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int sorg2r_(integer *, integer *, integer *, real
+	    *, integer *, real *, real *, integer *), slarfb_(char *, char *,
+	    char *, char *, integer *, integer *, integer *, real *, integer *
+	    , real *, integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SORGQR generates an M-by-N real matrix Q with orthonormal columns,
+    which is defined as the first N columns of a product of K elementary
+    reflectors of order M
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by SGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by SGEQRF in the first k columns of its array
+            argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGEQRF.
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "SORGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*n) * nb;
+    work[1] = (real) lwkopt;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if ((*n < 0) || (*n > *m)) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORGQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "SORGQR", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "SORGQR", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk columns are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(1:kk,kk+1:n) to zero. */
+
+	i__1 = *n;
+	for (j = kk + 1; j <= i__1; ++j) {
+	    i__2 = kk;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		a[i__ + j * a_dim1] = 0.f;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *n) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	sorg2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *m - i__ + 1;
+		slarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i:m,i+ib:n) from the left */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__ - ib + 1;
+		slarfb_("Left", "No transpose", "Forward", "Columnwise", &
+			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
+			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
+			work[ib + 1], &ldwork);
+	    }
+
+/*           Apply H to rows i:m of current block */
+
+	    i__2 = *m - i__ + 1;
+	    sorg2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set rows 1:i-1 of current block to zero */
+
+	    i__2 = i__ + ib - 1;
+	    for (j = i__; j <= i__2; ++j) {
+		i__3 = i__ - 1;
+		for (l = 1; l <= i__3; ++l) {
+		    a[l + j * a_dim1] = 0.f;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1] = (real) iws;
+    return 0;
+
+/*     End of SORGQR */
+
+} /* sorgqr_ */
+
+/* Subroutine */ int sorm2l_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, mi, ni, nq;
+    static real aii;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SORM2L overwrites the general real m by n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'T', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'T',
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by SGEQLF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'T': apply Q' (Transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) REAL array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            SGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGEQLF.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) REAL array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORM2L", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && notran) || (! left && ! notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+    } else {
+	mi = *m;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) is applied to C(1:m-k+i,1:n) */
+
+	    mi = *m - *k + i__;
+	} else {
+
+/*           H(i) is applied to C(1:m,1:n-k+i) */
+
+	    ni = *n - *k + i__;
+	}
+
+/*        Apply H(i) */
+
+	aii = a[nq - *k + i__ + i__ * a_dim1];
+	a[nq - *k + i__ + i__ * a_dim1] = 1.f;
+	slarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &tau[i__], &c__[
+		c_offset], ldc, &work[1]);
+	a[nq - *k + i__ + i__ * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of SORM2L */
+
+} /* sorm2l_ */
+
+/* Subroutine */ int sorm2r_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static real aii;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SORM2R overwrites the general real m by n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'T', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'T',
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by SGEQRF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'T': apply Q' (Transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) REAL array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            SGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGEQRF.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) REAL array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORM2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && ! notran) || (! left && notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) */
+
+	aii = a[i__ + i__ * a_dim1];
+	a[i__ + i__ * a_dim1] = 1.f;
+	slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &tau[i__], &c__[
+		ic + jc * c_dim1], ldc, &work[1]);
+	a[i__ + i__ * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of SORM2R */
+
+} /* sorm2r_ */
+
+/* Subroutine */ int sormbr_(char *vect, char *side, char *trans, integer *m,
+	integer *n, integer *k, real *a, integer *lda, real *tau, real *c__,
+	integer *ldc, real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran, applyq;
+    static char transt[1];
+    extern /* Subroutine */ int sormlq_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    If VECT = 'Q', SORMBR overwrites the general real M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    If VECT = 'P', SORMBR overwrites the general real M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      P * C          C * P
+    TRANS = 'T':      P**T * C       C * P**T
+
+    Here Q and P**T are the orthogonal matrices determined by SGEBRD when
+    reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and
+    P**T are defined as products of elementary reflectors H(i) and G(i)
+    respectively.
+
+    Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+    order of the orthogonal matrix Q or P**T that is applied.
+
+    If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+    if nq >= k, Q = H(1) H(2) . . . H(k);
+    if nq < k, Q = H(1) H(2) . . . H(nq-1).
+
+    If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+    if k < nq, P = G(1) G(2) . . . G(k);
+    if k >= nq, P = G(1) G(2) . . . G(nq-1).
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            = 'Q': apply Q or Q**T;
+            = 'P': apply P or P**T.
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q, Q**T, P or P**T from the Left;
+            = 'R': apply Q, Q**T, P or P**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q  or P;
+            = 'T':  Transpose, apply Q**T or P**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original
+            matrix reduced by SGEBRD.
+            If VECT = 'P', the number of rows in the original
+            matrix reduced by SGEBRD.
+            K >= 0.
+
+    A       (input) REAL array, dimension
+                                  (LDA,min(nq,K)) if VECT = 'Q'
+                                  (LDA,nq)        if VECT = 'P'
+            The vectors which define the elementary reflectors H(i) and
+            G(i), whose products determine the matrices Q and P, as
+            returned by SGEBRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If VECT = 'Q', LDA >= max(1,nq);
+            if VECT = 'P', LDA >= max(1,min(nq,K)).
+
+    TAU     (input) REAL array, dimension (min(nq,K))
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i) which determines Q or P, as returned
+            by SGEBRD in the array argument TAUQ or TAUP.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q
+            or P*C or P**T*C or C*P or C*P**T.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    applyq = lsame_(vect, "Q");
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q or P and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! applyq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (! left && ! lsame_(side, "R")) {
+	*info = -2;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*k < 0) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = min(nq,*k);
+	if ((applyq && *lda < max(1,nq)) || (! applyq && *lda < max(i__1,i__2)
+		)) {
+	    *info = -8;
+	} else if (*ldc < max(1,*m)) {
+	    *info = -11;
+	} else if (*lwork < max(1,nw) && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info == 0) {
+	if (applyq) {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "SORMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "SORMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORMBR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    work[1] = 1.f;
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    if (applyq) {
+
+/*        Apply Q */
+
+	if (nq >= *k) {
+
+/*           Q was determined by a call to SGEBRD with nq >= k */
+
+	    sormqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           Q was determined by a call to SGEBRD with nq < k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    sormqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
+		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+	}
+    } else {
+
+/*        Apply P */
+
+	if (notran) {
+	    *(unsigned char *)transt = 'T';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+	if (nq > *k) {
+
+/*           P was determined by a call to SGEBRD with nq > k */
+
+	    sormlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           P was determined by a call to SGEBRD with nq <= k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    sormlq_(side, transt, &mi, &ni, &i__1, &a[((a_dim1) << (1)) + 1],
+		    lda, &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1],
+		    lwork, &iinfo);
+	}
+    }
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SORMBR */
+
+} /* sormbr_ */
+
+/* Subroutine */ int sorml2_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static real aii;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, integer *, real *), xerbla_(
+	    char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SORML2 overwrites the general real m by n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'T', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'T',
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'T': apply Q' (Transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) REAL array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            SGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGELQF.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) REAL array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORML2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && notran) || (! left && ! notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) */
+
+	aii = a[i__ + i__ * a_dim1];
+	a[i__ + i__ * a_dim1] = 1.f;
+	slarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &tau[i__], &c__[
+		ic + jc * c_dim1], ldc, &work[1]);
+	a[i__ + i__ * a_dim1] = aii;
+/* L10: */
+    }
+    return 0;
+
+/*     End of SORML2 */
+
+} /* sorml2_ */
+
+/* Subroutine */ int sormlq_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static real t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int sorml2_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *), slarfb_(char *, char *, char *, char *
+	    , integer *, integer *, integer *, real *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static logical notran;
+    static integer ldwork;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SORMLQ overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by SGELQF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) REAL array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            SGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGELQF.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMLQ", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORMLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "SORMLQ", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	sorml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && notran) || (! left && ! notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	if (notran) {
+	    *(unsigned char *)transt = 'T';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    slarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
+		    lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    slarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
+		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
+		    ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SORMLQ */
+
+} /* sormlq_ */
+
+/* Subroutine */ int sormql_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static real t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int sorm2l_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *), slarfb_(char *, char *, char *, char *
+	    , integer *, integer *, integer *, real *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SORMQL overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by SGEQLF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) REAL array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            SGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGEQLF.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQL", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORMQL", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQL", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	sorm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && notran) || (! left && ! notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	} else {
+	    mi = *m;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i+ib-1) . . . H(i+1) H(i)
+*/
+
+	    i__4 = nq - *k + i__ + ib - 1;
+	    slarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
+		    , lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(1:m-k+i+ib-1,1:n) */
+
+		mi = *m - *k + i__ + ib - 1;
+	    } else {
+
+/*              H or H' is applied to C(1:m,1:n-k+i+ib-1) */
+
+		ni = *n - *k + i__ + ib - 1;
+	    }
+
+/*           Apply H or H' */
+
+	    slarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
+		    work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SORMQL */
+
+} /* sormql_ */
+
+/* Subroutine */ int sormqr_(char *side, char *trans, integer *m, integer *n,
+	integer *k, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static real t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int sorm2r_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *), slarfb_(char *, char *, char *, char *
+	    , integer *, integer *, integer *, real *, integer *, real *,
+	    integer *, real *, integer *, real *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slarft_(char *, char *, integer *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static logical notran;
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SORMQR overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by SGEQRF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) REAL array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            SGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) REAL array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SGEQRF.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "SORMQR", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SORMQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "SORMQR", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	sorm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && ! notran) || (! left && notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    slarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
+		    a_dim1], lda, &tau[i__], t, &c__65)
+		    ;
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    slarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
+		    c_dim1], ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SORMQR */
+
+} /* sormqr_ */
+
+/* Subroutine */ int sormtr_(char *side, char *uplo, char *trans, integer *m,
+	integer *n, real *a, integer *lda, real *tau, real *c__, integer *ldc,
+	 real *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int sormql_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
+    static integer lwkopt;
+    static logical lquery;
+    extern /* Subroutine */ int sormqr_(char *, char *, integer *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SORMTR overwrites the general real M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'T':      Q**T * C       C * Q**T
+
+    where Q is a real orthogonal matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    nq-1 elementary reflectors, as returned by SSYTRD:
+
+    if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+
+    if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**T from the Left;
+            = 'R': apply Q or Q**T from the Right.
+
+    UPLO    (input) CHARACTER*1
+            = 'U': Upper triangle of A contains elementary reflectors
+                   from SSYTRD;
+            = 'L': Lower triangle of A contains elementary reflectors
+                   from SSYTRD.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'T':  Transpose, apply Q**T.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    A       (input) REAL array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by SSYTRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) REAL array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by SSYTRD.
+
+    C       (input/output) REAL array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	*info = -2;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "T")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	if (upper) {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "SORMQL", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "SORMQL", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "SORMQR", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "SORMQR", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("SORMTR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (nq == 1)) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    if (left) {
+	mi = *m - 1;
+	ni = *n;
+    } else {
+	mi = *m;
+	ni = *n - 1;
+    }
+
+    if (upper) {
+
+/*        Q was determined by a call to SSYTRD with UPLO = 'U' */
+
+	i__2 = nq - 1;
+	sormql_(side, trans, &mi, &ni, &i__2, &a[((a_dim1) << (1)) + 1], lda,
+		&tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
+    } else {
+
+/*        Q was determined by a call to SSYTRD with UPLO = 'L' */
+
+	if (left) {
+	    i1 = 2;
+	    i2 = 1;
+	} else {
+	    i1 = 1;
+	    i2 = 2;
+	}
+	i__2 = nq - 1;
+	sormqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
+		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+    }
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SORMTR */
+
+} /* sormtr_ */
+
+/* Subroutine */ int spotf2_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer j;
+    static real ajj;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *),
+	    sgemv_(char *, integer *, integer *, real *, real *, integer *,
+	    real *, integer *, real *, real *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    SPOTF2 computes the Cholesky factorization of a real symmetric
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U' * U ,  if UPLO = 'U', or
+       A = L  * L',  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the unblocked version of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is stored.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            n by n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U'*U  or A = L*L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, the leading minor of order k is not
+                 positive definite, and the factorization could not be
+                 completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SPOTF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the Cholesky factorization A = U'*U. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute U(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j - 1;
+	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1,
+		    &a[j * a_dim1 + 1], &c__1);
+	    if (ajj <= 0.f) {
+		a[j + j * a_dim1] = ajj;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    a[j + j * a_dim1] = ajj;
+
+/*           Compute elements J+1:N of row J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		i__3 = *n - j;
+		sgemv_("Transpose", &i__2, &i__3, &c_b1290, &a[(j + 1) *
+			a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b1011,
+			 &a[j + (j + 1) * a_dim1], lda);
+		i__2 = *n - j;
+		r__1 = 1.f / ajj;
+		sscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Compute the Cholesky factorization A = L*L'. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute L(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j - 1;
+	    ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j
+		    + a_dim1], lda);
+	    if (ajj <= 0.f) {
+		a[j + j * a_dim1] = ajj;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    a[j + j * a_dim1] = ajj;
+
+/*           Compute elements J+1:N of column J. */
+
+	    if (j < *n) {
+		i__2 = *n - j;
+		i__3 = j - 1;
+		sgemv_("No transpose", &i__2, &i__3, &c_b1290, &a[j + 1 +
+			a_dim1], lda, &a[j + a_dim1], lda, &c_b1011, &a[j + 1
+			+ j * a_dim1], &c__1);
+		i__2 = *n - j;
+		r__1 = 1.f / ajj;
+		sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    goto L40;
+
+L30:
+    *info = j;
+
+L40:
+    return 0;
+
+/*     End of SPOTF2 */
+
+} /* spotf2_ */
+
+/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer j, jb, nb;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), ssyrk_(char *, char *, integer
+	    *, integer *, real *, real *, integer *, real *, real *, integer *
+	    ), spotf2_(char *, integer *, real *, integer *,
+	    integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SPOTRF computes the Cholesky factorization of a real symmetric
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U**T * U,  if UPLO = 'U', or
+       A = L  * L**T,  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the block version of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U**T*U or A = L*L**T.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the leading minor of order i is not
+                  positive definite, and the factorization could not be
+                  completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SPOTRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code. */
+
+	spotf2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code. */
+
+	if (upper) {
+
+/*           Compute the Cholesky factorization A = U'*U. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b1290, &a[j *
+			a_dim1 + 1], lda, &c_b1011, &a[j + j * a_dim1], lda);
+		spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block row. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, &
+			    c_b1290, &a[j * a_dim1 + 1], lda, &a[(j + jb) *
+			    a_dim1 + 1], lda, &c_b1011, &a[j + (j + jb) *
+			    a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, &
+			    i__3, &c_b1011, &a[j + j * a_dim1], lda, &a[j + (
+			    j + jb) * a_dim1], lda);
+		}
+/* L10: */
+	    }
+
+	} else {
+
+/*           Compute the Cholesky factorization A = L*L'. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b1290, &a[j +
+			a_dim1], lda, &c_b1011, &a[j + j * a_dim1], lda);
+		spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block column. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, &
+			    c_b1290, &a[j + jb + a_dim1], lda, &a[j + a_dim1],
+			     lda, &c_b1011, &a[j + jb + j * a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, &
+			    jb, &c_b1011, &a[j + j * a_dim1], lda, &a[j + jb
+			    + j * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+    goto L40;
+
+L30:
+    *info = *info + j - 1;
+
+L40:
+    return 0;
+
+/*     End of SPOTRF */
+
+} /* spotrf_ */
+
+/* Subroutine */ int spotri_(char *uplo, integer *n, real *a, integer *lda,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), slauum_(
+	    char *, integer *, real *, integer *, integer *), strtri_(
+	    char *, char *, integer *, real *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SPOTRI computes the inverse of a real symmetric positive definite
+    matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+    computed by SPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular factor U or L from the Cholesky
+            factorization A = U**T*U or A = L*L**T, as computed by
+            SPOTRF.
+            On exit, the upper or lower triangle of the (symmetric)
+            inverse of A, overwriting the input factor U or L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the (i,i) element of the factor U or L is
+                  zero, and the inverse could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SPOTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Invert the triangular Cholesky factor U or L. */
+
+    strtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
+
+    slauum_(uplo, n, &a[a_offset], lda, info);
+
+    return 0;
+
+/*     End of SPOTRI */
+
+} /* spotri_ */
+
+/* Subroutine */ int spotrs_(char *uplo, integer *n, integer *nrhs, real *a,
+	integer *lda, real *b, integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int strsm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    SPOTRS solves a system of linear equations A*X = B with a symmetric
+    positive definite matrix A using the Cholesky factorization
+    A = U**T*U or A = L*L**T computed by SPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) REAL array, dimension (LDA,N)
+            The triangular factor U or L from the Cholesky factorization
+            A = U**T*U or A = L*L**T, as computed by SPOTRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    B       (input/output) REAL array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SPOTRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*nrhs == 0)) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*
+          Solve A*X = B where A = U'*U.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b1011, &
+		a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b1011,
+		 &a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A*X = B where A = L*L'.
+
+          Solve L*X = B, overwriting B with X.
+*/
+
+	strsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b1011,
+		 &a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	strsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b1011, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    }
+
+    return 0;
+
+/*     End of SPOTRS */
+
+} /* spotrs_ */
+
+/* Subroutine */ int sstedc_(char *compz, integer *n, real *d__, real *e,
+	real *z__, integer *ldz, real *work, integer *lwork, integer *iwork,
+	integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k, m;
+    static real p;
+    static integer ii, end, lgn;
+    static real eps, tiny;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *,
+	    integer *, real *, real *, integer *, real *, integer *, real *,
+	    real *, integer *);
+    static integer lwmin, start;
+    extern /* Subroutine */ int sswap_(integer *, real *, integer *, real *,
+	    integer *), slaed0_(integer *, integer *, integer *, real *, real
+	    *, real *, integer *, real *, integer *, real *, integer *,
+	    integer *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *,
+	    real *, integer *), slaset_(char *, integer *, integer *,
+	    real *, real *, real *, integer *);
+    static integer liwmin, icompz;
+    static real orgnrm;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *),
+	     slasrt_(char *, integer *, real *, integer *);
+    static logical lquery;
+    static integer smlsiz;
+    extern /* Subroutine */ int ssteqr_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *);
+    static integer storez, strtrw;
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+    The eigenvectors of a full or band real symmetric matrix can also be
+    found if SSYTRD or SSPTRD or SSBTRD has been used to reduce this
+    matrix to tridiagonal form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See SLAED3 for details.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'I':  Compute eigenvectors of tridiagonal matrix also.
+            = 'V':  Compute eigenvectors of original dense symmetric
+                    matrix also.  On entry, Z contains the orthogonal
+                    matrix used to reduce the original matrix to
+                    tridiagonal form.
+
+    N       (input) INTEGER
+            The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) REAL array, dimension (N-1)
+            On entry, the subdiagonal elements of the tridiagonal matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) REAL array, dimension (LDZ,N)
+            On entry, if COMPZ = 'V', then Z contains the orthogonal
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original symmetric matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If  COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1.
+            If eigenvectors are desired, then LDZ >= max(1,N).
+
+    WORK    (workspace/output) REAL array,
+                                           dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If COMPZ = 'N' or N <= 1 then LWORK must be at least 1.
+            If COMPZ = 'V' and N > 1 then LWORK must be at least
+                           ( 1 + 3*N + 2*N*lg N + 3*N**2 ),
+                           where lg( N ) = smallest integer k such
+                           that 2**k >= N.
+            If COMPZ = 'I' and N > 1 then LWORK must be at least
+                           ( 1 + 4*N + N**2 ).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If COMPZ = 'N' or N <= 1 then LIWORK must be at least 1.
+            If COMPZ = 'V' and N > 1 then LIWORK must be at least
+                           ( 6 + 6*N + 5*N*lg N ).
+            If COMPZ = 'I' and N > 1 then LIWORK must be at least
+                           ( 3 + 5*N ).
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = (*lwork == -1) || (*liwork == -1);
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if ((*n <= 1) || (icompz <= 0)) {
+	liwmin = 1;
+	lwmin = 1;
+    } else {
+	lgn = (integer) (log((real) (*n)) / log(2.f));
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (icompz == 1) {
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lwmin = *n * 3 + 1 + ((*n) << (1)) * lgn + i__1 * i__1 * 3;
+	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	} else if (icompz == 2) {
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lwmin = ((*n) << (2)) + 1 + i__1 * i__1;
+	    liwmin = *n * 5 + 3;
+	}
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+	*info = -6;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -10;
+    }
+
+    if (*info == 0) {
+	work[1] = (real) lwmin;
+	iwork[1] = liwmin;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SSTEDC", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	if (icompz != 0) {
+	    z__[z_dim1 + 1] = 1.f;
+	}
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "SSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       If the following conditional clause is removed, then the routine
+       will use the Divide and Conquer routine to compute only the
+       eigenvalues, which requires (3N + 3N**2) real workspace and
+       (2 + 5N + 2N lg(N)) integer workspace.
+       Since on many architectures SSTERF is much faster than any other
+       algorithm for finding eigenvalues only, it is used here
+       as the default.
+
+       If COMPZ = 'N', use SSTERF to compute the eigenvalues.
+*/
+
+    if (icompz == 0) {
+	ssterf_(n, &d__[1], &e[1], info);
+	return 0;
+    }
+
+/*
+       If N is smaller than the minimum divide size (SMLSIZ+1), then
+       solve the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompz == 0) {
+	    ssterf_(n, &d__[1], &e[1], info);
+	    return 0;
+	} else if (icompz == 2) {
+	    ssteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+		    info);
+	    return 0;
+	} else {
+	    ssteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &work[1],
+		    info);
+	    return 0;
+	}
+    }
+
+/*
+       If COMPZ = 'V', the Z matrix must be stored elsewhere for later
+       use.
+*/
+
+    if (icompz == 1) {
+	storez = *n * *n + 1;
+    } else {
+	storez = 1;
+    }
+
+    if (icompz == 2) {
+	slaset_("Full", n, n, &c_b320, &c_b1011, &z__[z_offset], ldz);
+    }
+
+/*     Scale. */
+
+    orgnrm = slanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.f) {
+	return 0;
+    }
+
+    eps = slamch_("Epsilon");
+
+    start = 1;
+
+/*     while ( START <= N ) */
+
+L10:
+    if (start <= *n) {
+
+/*
+       Let END be the position of the next subdiagonal entry such that
+       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+       matrix identified by the elements between START and END
+       constitutes an independent sub-problem.
+*/
+
+	end = start;
+L20:
+	if (end < *n) {
+	    tiny = eps * sqrt((r__1 = d__[end], dabs(r__1))) * sqrt((r__2 =
+		    d__[end + 1], dabs(r__2)));
+	    if ((r__1 = e[end], dabs(r__1)) > tiny) {
+		++end;
+		goto L20;
+	    }
+	}
+
+/*        (Sub) Problem determined.  Compute its size and solve it. */
+
+	m = end - start + 1;
+	if (m == 1) {
+	    start = end + 1;
+	    goto L10;
+	}
+	if (m > smlsiz) {
+	    *info = smlsiz;
+
+/*           Scale. */
+
+	    orgnrm = slanst_("M", &m, &d__[start], &e[start]);
+	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &m, &c__1, &d__[
+		    start], &m, info);
+	    i__1 = m - 1;
+	    i__2 = m - 1;
+	    slascl_("G", &c__0, &c__0, &orgnrm, &c_b1011, &i__1, &c__1, &e[
+		    start], &i__2, info);
+
+	    if (icompz == 1) {
+		strtrw = 1;
+	    } else {
+		strtrw = start;
+	    }
+	    slaed0_(&icompz, n, &m, &d__[start], &e[start], &z__[strtrw +
+		    start * z_dim1], ldz, &work[1], n, &work[storez], &iwork[
+		    1], info);
+	    if (*info != 0) {
+		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
+			+ 1) + start - 1;
+		return 0;
+	    }
+
+/*           Scale back. */
+
+	    slascl_("G", &c__0, &c__0, &c_b1011, &orgnrm, &m, &c__1, &d__[
+		    start], &m, info);
+
+	} else {
+	    if (icompz == 1) {
+
+/*
+       Since QR won't update a Z matrix which is larger than the
+       length of D, we must solve the sub-problem in a workspace and
+       then multiply back into Z.
+*/
+
+		ssteqr_("I", &m, &d__[start], &e[start], &work[1], &m, &work[
+			m * m + 1], info);
+		slacpy_("A", n, &m, &z__[start * z_dim1 + 1], ldz, &work[
+			storez], n);
+		sgemm_("N", "N", n, &m, &m, &c_b1011, &work[storez], ldz, &
+			work[1], &m, &c_b320, &z__[start * z_dim1 + 1], ldz);
+	    } else if (icompz == 2) {
+		ssteqr_("I", &m, &d__[start], &e[start], &z__[start + start *
+			z_dim1], ldz, &work[1], info);
+	    } else {
+		ssterf_(&m, &d__[start], &e[start], info);
+	    }
+	    if (*info != 0) {
+		*info = start * (*n + 1) + end;
+		return 0;
+	    }
+	}
+
+	start = end + 1;
+	goto L10;
+    }
+
+/*
+       endwhile
+
+       If the problem split any number of times, then the eigenvalues
+       will not be properly ordered.  Here we permute the eigenvalues
+       (and the associated eigenvectors) into ascending order.
+*/
+
+    if (m != *n) {
+	if (icompz == 0) {
+
+/*        Use Quick Sort */
+
+	    slasrt_("I", n, &d__[1], info);
+
+	} else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	    i__1 = *n;
+	    for (ii = 2; ii <= i__1; ++ii) {
+		i__ = ii - 1;
+		k = i__;
+		p = d__[i__];
+		i__2 = *n;
+		for (j = ii; j <= i__2; ++j) {
+		    if (d__[j] < p) {
+			k = j;
+			p = d__[j];
+		    }
+/* L30: */
+		}
+		if (k != i__) {
+		    d__[k] = d__[i__];
+		    d__[i__] = p;
+		    sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1
+			    + 1], &c__1);
+		}
+/* L40: */
+	    }
+	}
+    }
+
+    work[1] = (real) lwmin;
+    iwork[1] = liwmin;
+
+    return 0;
+
+/*     End of SSTEDC */
+
+} /* sstedc_ */
+
+/* Subroutine */ int ssteqr_(char *compz, integer *n, real *d__, real *e,
+	real *z__, integer *ldz, real *work, integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    real r__1, r__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static real b, c__, f, g;
+    static integer i__, j, k, l, m;
+    static real p, r__, s;
+    static integer l1, ii, mm, lm1, mm1, nm1;
+    static real rt1, rt2, eps;
+    static integer lsv;
+    static real tst, eps2;
+    static integer lend, jtot;
+    extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
+	    ;
+    extern logical lsame_(char *, char *);
+    static real anorm;
+    extern /* Subroutine */ int slasr_(char *, char *, char *, integer *,
+	    integer *, real *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *);
+    static integer lendm1, lendp1;
+    extern /* Subroutine */ int slaev2_(real *, real *, real *, real *, real *
+	    , real *, real *);
+    extern doublereal slapy2_(real *, real *);
+    static integer iscale;
+    extern doublereal slamch_(char *);
+    static real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real safmax;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
+    static integer lendsv;
+    extern /* Subroutine */ int slartg_(real *, real *, real *, real *, real *
+	    ), slaset_(char *, integer *, integer *, real *, real *, real *,
+	    integer *);
+    static real ssfmin;
+    static integer nmaxit, icompz;
+    static real ssfmax;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    SSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the implicit QL or QR method.
+    The eigenvectors of a full or band symmetric matrix can also be found
+    if SSYTRD or SSPTRD or SSBTRD has been used to reduce this matrix to
+    tridiagonal form.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'V':  Compute eigenvalues and eigenvectors of the original
+                    symmetric matrix.  On entry, Z must contain the
+                    orthogonal matrix used to reduce the original matrix
+                    to tridiagonal form.
+            = 'I':  Compute eigenvalues and eigenvectors of the
+                    tridiagonal matrix.  Z is initialized to the identity
+                    matrix.
+
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) REAL array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) REAL array, dimension (LDZ, N)
+            On entry, if  COMPZ = 'V', then Z contains the orthogonal
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if  COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original symmetric matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1, and if
+            eigenvectors are desired, then  LDZ >= max(1,N).
+
+    WORK    (workspace) REAL array, dimension (max(1,2*N-2))
+            If COMPZ = 'N', then WORK is not referenced.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm has failed to find all the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero; on exit, D
+                  and E contain the elements of a symmetric tridiagonal
+                  matrix which is orthogonally similar to the original
+                  matrix.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SSTEQR", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	if (icompz == 2) {
+	    z__[z_dim1 + 1] = 1.f;
+	}
+	return 0;
+    }
+
+/*     Determine the unit roundoff and over/underflow thresholds. */
+
+    eps = slamch_("E");
+/* Computing 2nd power */
+    r__1 = eps;
+    eps2 = r__1 * r__1;
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    ssfmax = sqrt(safmax) / 3.f;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*
+       Compute the eigenvalues and eigenvectors of the tridiagonal
+       matrix.
+*/
+
+    if (icompz == 2) {
+	slaset_("Full", n, n, &c_b320, &c_b1011, &z__[z_offset], ldz);
+    }
+
+    nmaxit = *n * 30;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+    nm1 = *n - 1;
+
+L10:
+    if (l1 > *n) {
+	goto L160;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.f;
+    }
+    if (l1 <= nm1) {
+	i__1 = nm1;
+	for (m = l1; m <= i__1; ++m) {
+	    tst = (r__1 = e[m], dabs(r__1));
+	    if (tst == 0.f) {
+		goto L30;
+	    }
+	    if (tst <= sqrt((r__1 = d__[m], dabs(r__1))) * sqrt((r__2 = d__[m
+		    + 1], dabs(r__2))) * eps) {
+		e[m] = 0.f;
+		goto L30;
+	    }
+/* L20: */
+	}
+    }
+    m = *n;
+
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
+
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = slanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm == 0.f) {
+	goto L10;
+    }
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
+
+/*     Choose between QL and QR iteration */
+
+    if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
+
+    if (lend > l) {
+
+/*
+          QL Iteration
+
+          Look for small subdiagonal element.
+*/
+
+L40:
+	if (l != lend) {
+	    lendm1 = lend - 1;
+	    i__1 = lendm1;
+	    for (m = l; m <= i__1; ++m) {
+/* Computing 2nd power */
+		r__2 = (r__1 = e[m], dabs(r__1));
+		tst = r__2 * r__2;
+		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
+			+ 1], dabs(r__2)) + safmin) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+	}
+
+	m = lend;
+
+L60:
+	if (m < lend) {
+	    e[m] = 0.f;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L80;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l + 1) {
+	    if (icompz > 0) {
+		slaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		work[l] = c__;
+		work[*n - 1 + l] = s;
+		slasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+			z__[l * z_dim1 + 1], ldz);
+	    } else {
+		slae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+	    }
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.f;
+	    l += 2;
+	    if (l <= lend) {
+		goto L40;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l + 1] - p) / (e[l] * 2.f);
+	r__ = slapy2_(&g, &c_b1011);
+	g = d__[m] - p + e[l] / (g + r_sign(&r__, &g));
+
+	s = 1.f;
+	c__ = 1.f;
+	p = 0.f;
+
+/*        Inner loop */
+
+	mm1 = m - 1;
+	i__1 = l;
+	for (i__ = mm1; i__ >= i__1; --i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    slartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m - 1) {
+		e[i__ + 1] = r__;
+	    }
+	    g = d__[i__ + 1] - p;
+	    r__ = (d__[i__] - g) * s + c__ * 2.f * b;
+	    p = s * r__;
+	    d__[i__ + 1] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = -s;
+	    }
+
+/* L70: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = m - l + 1;
+	    slasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[l] = g;
+	goto L40;
+
+/*        Eigenvalue found. */
+
+L80:
+	d__[l] = p;
+
+	++l;
+	if (l <= lend) {
+	    goto L40;
+	}
+	goto L140;
+
+    } else {
+
+/*
+          QR Iteration
+
+          Look for small superdiagonal element.
+*/
+
+L90:
+	if (l != lend) {
+	    lendp1 = lend + 1;
+	    i__1 = lendp1;
+	    for (m = l; m >= i__1; --m) {
+/* Computing 2nd power */
+		r__2 = (r__1 = e[m - 1], dabs(r__1));
+		tst = r__2 * r__2;
+		if (tst <= eps2 * (r__1 = d__[m], dabs(r__1)) * (r__2 = d__[m
+			- 1], dabs(r__2)) + safmin) {
+		    goto L110;
+		}
+/* L100: */
+	    }
+	}
+
+	m = lend;
+
+L110:
+	if (m > lend) {
+	    e[m - 1] = 0.f;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L130;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use SLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l - 1) {
+	    if (icompz > 0) {
+		slaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+			;
+		work[m] = c__;
+		work[*n - 1 + m] = s;
+		slasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+			z__[(l - 1) * z_dim1 + 1], ldz);
+	    } else {
+		slae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
+	    }
+	    d__[l - 1] = rt1;
+	    d__[l] = rt2;
+	    e[l - 1] = 0.f;
+	    l += -2;
+	    if (l >= lend) {
+		goto L90;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.f);
+	r__ = slapy2_(&g, &c_b1011);
+	g = d__[m] - p + e[l - 1] / (g + r_sign(&r__, &g));
+
+	s = 1.f;
+	c__ = 1.f;
+	p = 0.f;
+
+/*        Inner loop */
+
+	lm1 = l - 1;
+	i__1 = lm1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    slartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m) {
+		e[i__ - 1] = r__;
+	    }
+	    g = d__[i__] - p;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2.f * b;
+	    p = s * r__;
+	    d__[i__] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = s;
+	    }
+
+/* L120: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = l - m + 1;
+	    slasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[lm1] = g;
+	goto L90;
+
+/*        Eigenvalue found. */
+
+L130:
+	d__[l] = p;
+
+	--l;
+	if (l >= lend) {
+	    goto L90;
+	}
+	goto L140;
+
+    }
+
+/*     Undo scaling if necessary */
+
+L140:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    } else if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    }
+
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
+
+    if (jtot < nmaxit) {
+	goto L10;
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.f) {
+	    ++(*info);
+	}
+/* L150: */
+    }
+    goto L190;
+
+/*     Order eigenvalues and eigenvectors. */
+
+L160:
+    if (icompz == 0) {
+
+/*        Use Quick Sort */
+
+	slasrt_("I", n, &d__[1], info);
+
+    } else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L170: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		sswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L180: */
+	}
+    }
+
+L190:
+    return 0;
+
+/*     End of SSTEQR */
+
+} /* ssteqr_ */
+
+/* Subroutine */ int ssterf_(integer *n, real *d__, real *e, integer *info)
+{
+    /* System generated locals */
+    integer i__1;
+    real r__1, r__2, r__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal), r_sign(real *, real *);
+
+    /* Local variables */
+    static real c__;
+    static integer i__, l, m;
+    static real p, r__, s;
+    static integer l1;
+    static real bb, rt1, rt2, eps, rte;
+    static integer lsv;
+    static real eps2, oldc;
+    static integer lend, jtot;
+    extern /* Subroutine */ int slae2_(real *, real *, real *, real *, real *)
+	    ;
+    static real gamma, alpha, sigma, anorm;
+    extern doublereal slapy2_(real *, real *);
+    static integer iscale;
+    static real oldgam;
+    extern doublereal slamch_(char *);
+    static real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real safmax;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
+    static integer lendsv;
+    static real ssfmin;
+    static integer nmaxit;
+    static real ssfmax;
+    extern doublereal slanst_(char *, integer *, real *, real *);
+    extern /* Subroutine */ int slasrt_(char *, integer *, real *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SSTERF computes all eigenvalues of a symmetric tridiagonal matrix
+    using the Pal-Walker-Kahan variant of the QL or QR algorithm.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
+
+    D       (input/output) REAL array, dimension (N)
+            On entry, the n diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) REAL array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm failed to find all of the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --e;
+    --d__;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n < 0) {
+	*info = -1;
+	i__1 = -(*info);
+	xerbla_("SSTERF", &i__1);
+	return 0;
+    }
+    if (*n <= 1) {
+	return 0;
+    }
+
+/*     Determine the unit roundoff for this environment. */
+
+    eps = slamch_("E");
+/* Computing 2nd power */
+    r__1 = eps;
+    eps2 = r__1 * r__1;
+    safmin = slamch_("S");
+    safmax = 1.f / safmin;
+    ssfmax = sqrt(safmax) / 3.f;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*     Compute the eigenvalues of the tridiagonal matrix. */
+
+    nmaxit = *n * 30;
+    sigma = 0.f;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+
+L10:
+    if (l1 > *n) {
+	goto L170;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.f;
+    }
+    i__1 = *n - 1;
+    for (m = l1; m <= i__1; ++m) {
+	if ((r__3 = e[m], dabs(r__3)) <= sqrt((r__1 = d__[m], dabs(r__1))) *
+		sqrt((r__2 = d__[m + 1], dabs(r__2))) * eps) {
+	    e[m] = 0.f;
+	    goto L30;
+	}
+/* L20: */
+    }
+    m = *n;
+
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
+
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = slanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	slascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
+
+    i__1 = lend - 1;
+    for (i__ = l; i__ <= i__1; ++i__) {
+/* Computing 2nd power */
+	r__1 = e[i__];
+	e[i__] = r__1 * r__1;
+/* L40: */
+    }
+
+/*     Choose between QL and QR iteration */
+
+    if ((r__1 = d__[lend], dabs(r__1)) < (r__2 = d__[l], dabs(r__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
+
+    if (lend >= l) {
+
+/*
+          QL Iteration
+
+          Look for small subdiagonal element.
+*/
+
+L50:
+	if (l != lend) {
+	    i__1 = lend - 1;
+	    for (m = l; m <= i__1; ++m) {
+		if ((r__2 = e[m], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[
+			m + 1], dabs(r__1))) {
+		    goto L70;
+		}
+/* L60: */
+	    }
+	}
+	m = lend;
+
+L70:
+	if (m < lend) {
+	    e[m] = 0.f;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L90;
+	}
+
+/*
+          If remaining matrix is 2 by 2, use SLAE2 to compute its
+          eigenvalues.
+*/
+
+	if (m == l + 1) {
+	    rte = sqrt(e[l]);
+	    slae2_(&d__[l], &rte, &d__[l + 1], &rt1, &rt2);
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.f;
+	    l += 2;
+	    if (l <= lend) {
+		goto L50;
+	    }
+	    goto L150;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L150;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	rte = sqrt(e[l]);
+	sigma = (d__[l + 1] - p) / (rte * 2.f);
+	r__ = slapy2_(&sigma, &c_b1011);
+	sigma = p - rte / (sigma + r_sign(&r__, &sigma));
+
+	c__ = 1.f;
+	s = 0.f;
+	gamma = d__[m] - sigma;
+	p = gamma * gamma;
+
+/*        Inner loop */
+
+	i__1 = l;
+	for (i__ = m - 1; i__ >= i__1; --i__) {
+	    bb = e[i__];
+	    r__ = p + bb;
+	    if (i__ != m - 1) {
+		e[i__ + 1] = s * r__;
+	    }
+	    oldc = c__;
+	    c__ = p / r__;
+	    s = bb / r__;
+	    oldgam = gamma;
+	    alpha = d__[i__];
+	    gamma = c__ * (alpha - sigma) - s * oldgam;
+	    d__[i__ + 1] = oldgam + (alpha - gamma);
+	    if (c__ != 0.f) {
+		p = gamma * gamma / c__;
+	    } else {
+		p = oldc * bb;
+	    }
+/* L80: */
+	}
+
+	e[l] = s * p;
+	d__[l] = sigma + gamma;
+	goto L50;
+
+/*        Eigenvalue found. */
+
+L90:
+	d__[l] = p;
+
+	++l;
+	if (l <= lend) {
+	    goto L50;
+	}
+	goto L150;
+
+    } else {
+
+/*
+          QR Iteration
+
+          Look for small superdiagonal element.
+*/
+
+L100:
+	i__1 = lend + 1;
+	for (m = l; m >= i__1; --m) {
+	    if ((r__2 = e[m - 1], dabs(r__2)) <= eps2 * (r__1 = d__[m] * d__[
+		    m - 1], dabs(r__1))) {
+		goto L120;
+	    }
+/* L110: */
+	}
+	m = lend;
+
+L120:
+	if (m > lend) {
+	    e[m - 1] = 0.f;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L140;
+	}
+
+/*
+          If remaining matrix is 2 by 2, use SLAE2 to compute its
+          eigenvalues.
+*/
+
+	if (m == l - 1) {
+	    rte = sqrt(e[l - 1]);
+	    slae2_(&d__[l], &rte, &d__[l - 1], &rt1, &rt2);
+	    d__[l] = rt1;
+	    d__[l - 1] = rt2;
+	    e[l - 1] = 0.f;
+	    l += -2;
+	    if (l >= lend) {
+		goto L100;
+	    }
+	    goto L150;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L150;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	rte = sqrt(e[l - 1]);
+	sigma = (d__[l - 1] - p) / (rte * 2.f);
+	r__ = slapy2_(&sigma, &c_b1011);
+	sigma = p - rte / (sigma + r_sign(&r__, &sigma));
+
+	c__ = 1.f;
+	s = 0.f;
+	gamma = d__[m] - sigma;
+	p = gamma * gamma;
+
+/*        Inner loop */
+
+	i__1 = l - 1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    bb = e[i__];
+	    r__ = p + bb;
+	    if (i__ != m) {
+		e[i__ - 1] = s * r__;
+	    }
+	    oldc = c__;
+	    c__ = p / r__;
+	    s = bb / r__;
+	    oldgam = gamma;
+	    alpha = d__[i__ + 1];
+	    gamma = c__ * (alpha - sigma) - s * oldgam;
+	    d__[i__] = oldgam + (alpha - gamma);
+	    if (c__ != 0.f) {
+		p = gamma * gamma / c__;
+	    } else {
+		p = oldc * bb;
+	    }
+/* L130: */
+	}
+
+	e[l - 1] = s * p;
+	d__[l] = sigma + gamma;
+	goto L100;
+
+/*        Eigenvalue found. */
+
+L140:
+	d__[l] = p;
+
+	--l;
+	if (l >= lend) {
+	    goto L100;
+	}
+	goto L150;
+
+    }
+
+/*     Undo scaling if necessary */
+
+L150:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	slascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+    }
+    if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	slascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+    }
+
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
+
+    if (jtot < nmaxit) {
+	goto L10;
+    }
+    i__1 = *n - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (e[i__] != 0.f) {
+	    ++(*info);
+	}
+/* L160: */
+    }
+    goto L180;
+
+/*     Sort eigenvalues in increasing order. */
+
+L170:
+    slasrt_("I", n, &d__[1], info);
+
+L180:
+    return 0;
+
+/*     End of SSTERF */
+
+} /* ssterf_ */
+
+/* Subroutine */ int ssyevd_(char *jobz, char *uplo, integer *n, real *a,
+	integer *lda, real *w, real *work, integer *lwork, integer *iwork,
+	integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    real r__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static real eps;
+    static integer inde;
+    static real anrm, rmin, rmax;
+    static integer lopt;
+    static real sigma;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static integer lwmin, liopt;
+    static logical lower, wantz;
+    static integer indwk2, llwrk2, iscale;
+    extern doublereal slamch_(char *);
+    static real safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real bignum;
+    extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *,
+	    real *, integer *, integer *, real *, integer *, integer *);
+    static integer indtau;
+    extern /* Subroutine */ int sstedc_(char *, integer *, real *, real *,
+	    real *, integer *, real *, integer *, integer *, integer *,
+	    integer *), slacpy_(char *, integer *, integer *, real *,
+	    integer *, real *, integer *);
+    static integer indwrk, liwmin;
+    extern /* Subroutine */ int ssterf_(integer *, real *, real *, integer *);
+    extern doublereal slansy_(char *, char *, integer *, real *, integer *,
+	    real *);
+    static integer llwork;
+    static real smlnum;
+    static logical lquery;
+    extern /* Subroutine */ int sormtr_(char *, char *, char *, integer *,
+	    integer *, real *, integer *, real *, real *, integer *, real *,
+	    integer *, integer *), ssytrd_(char *,
+	    integer *, real *, integer *, real *, real *, real *, real *,
+	    integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SSYEVD computes all eigenvalues and, optionally, eigenvectors of a
+    real symmetric matrix A. If eigenvectors are desired, it uses a
+    divide and conquer algorithm.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Because of large use of BLAS of level 3, SSYEVD needs N**2 more
+    workspace than SSYEVX.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only;
+            = 'V':  Compute eigenvalues and eigenvectors.
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA, N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of A contains the
+            upper triangular part of the matrix A.  If UPLO = 'L',
+            the leading N-by-N lower triangular part of A contains
+            the lower triangular part of the matrix A.
+            On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+            orthonormal eigenvectors of the matrix A.
+            If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+            or the upper triangle (if UPLO='U') of A, including the
+            diagonal, is destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    W       (output) REAL array, dimension (N)
+            If INFO = 0, the eigenvalues in ascending order.
+
+    WORK    (workspace/output) REAL array,
+                                           dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If N <= 1,               LWORK must be at least 1.
+            If JOBZ = 'N' and N > 1, LWORK must be at least 2*N+1.
+            If JOBZ = 'V' and N > 1, LWORK must be at least
+                                                  1 + 6*N + 2*N**2.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If N <= 1,                LIWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+            If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the algorithm failed to converge; i
+                  off-diagonal elements of an intermediate tridiagonal
+                  form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+    Modified by Francoise Tisseur, University of Tennessee.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --w;
+    --work;
+    --iwork;
+
+    /* Function Body */
+    wantz = lsame_(jobz, "V");
+    lower = lsame_(uplo, "L");
+    lquery = (*lwork == -1) || (*liwork == -1);
+
+    *info = 0;
+    if (*n <= 1) {
+	liwmin = 1;
+	lwmin = 1;
+	lopt = lwmin;
+	liopt = liwmin;
+    } else {
+	if (wantz) {
+	    liwmin = *n * 5 + 3;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lwmin = *n * 6 + 1 + ((i__1 * i__1) << (1));
+	} else {
+	    liwmin = 1;
+	    lwmin = ((*n) << (1)) + 1;
+	}
+	lopt = lwmin;
+	liopt = liwmin;
+    }
+    if (! ((wantz) || (lsame_(jobz, "N")))) {
+	*info = -1;
+    } else if (! ((lower) || (lsame_(uplo, "U")))) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -10;
+    }
+
+    if (*info == 0) {
+	work[1] = (real) lopt;
+	iwork[1] = liopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SSYEVD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	w[1] = a[a_dim1 + 1];
+	if (wantz) {
+	    a[a_dim1 + 1] = 1.f;
+	}
+	return 0;
+    }
+
+/*     Get machine constants. */
+
+    safmin = slamch_("Safe minimum");
+    eps = slamch_("Precision");
+    smlnum = safmin / eps;
+    bignum = 1.f / smlnum;
+    rmin = sqrt(smlnum);
+    rmax = sqrt(bignum);
+
+/*     Scale matrix to allowable range, if necessary. */
+
+    anrm = slansy_("M", uplo, n, &a[a_offset], lda, &work[1]);
+    iscale = 0;
+    if (anrm > 0.f && anrm < rmin) {
+	iscale = 1;
+	sigma = rmin / anrm;
+    } else if (anrm > rmax) {
+	iscale = 1;
+	sigma = rmax / anrm;
+    }
+    if (iscale == 1) {
+	slascl_(uplo, &c__0, &c__0, &c_b1011, &sigma, n, n, &a[a_offset], lda,
+		 info);
+    }
+
+/*     Call SSYTRD to reduce symmetric matrix to tridiagonal form. */
+
+    inde = 1;
+    indtau = inde + *n;
+    indwrk = indtau + *n;
+    llwork = *lwork - indwrk + 1;
+    indwk2 = indwrk + *n * *n;
+    llwrk2 = *lwork - indwk2 + 1;
+
+    ssytrd_(uplo, n, &a[a_offset], lda, &w[1], &work[inde], &work[indtau], &
+	    work[indwrk], &llwork, &iinfo);
+    lopt = ((*n) << (1)) + work[indwrk];
+
+/*
+       For eigenvalues only, call SSTERF.  For eigenvectors, first call
+       SSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+       tridiagonal matrix, then call SORMTR to multiply it by the
+       Householder transformations stored in A.
+*/
+
+    if (! wantz) {
+	ssterf_(n, &w[1], &work[inde], info);
+    } else {
+	sstedc_("I", n, &w[1], &work[inde], &work[indwrk], n, &work[indwk2], &
+		llwrk2, &iwork[1], liwork, info);
+	sormtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
+		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
+	slacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
+/*
+   Computing MAX
+   Computing 2nd power
+*/
+	i__3 = *n;
+	i__1 = lopt, i__2 = *n * 6 + 1 + ((i__3 * i__3) << (1));
+	lopt = max(i__1,i__2);
+    }
+
+/*     If matrix was scaled, then rescale eigenvalues appropriately. */
+
+    if (iscale == 1) {
+	r__1 = 1.f / sigma;
+	sscal_(n, &r__1, &w[1], &c__1);
+    }
+
+    work[1] = (real) lopt;
+    iwork[1] = liopt;
+
+    return 0;
+
+/*     End of SSYEVD */
+
+} /* ssyevd_ */
+
+/* Subroutine */ int ssytd2_(char *uplo, integer *n, real *a, integer *lda,
+	real *d__, real *e, real *tau, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__;
+    static real taui;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    extern /* Subroutine */ int ssyr2_(char *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, integer *);
+    static real alpha;
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *,
+	    real *, integer *), ssymv_(char *, integer *, real *, real *,
+	    integer *, real *, integer *, real *, real *, integer *),
+	    xerbla_(char *, integer *), slarfg_(integer *, real *,
+	    real *, integer *, real *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    SSYTD2 reduces a real symmetric matrix A to symmetric tridiagonal
+    form T by an orthogonal similarity transformation: Q' * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            symmetric matrix A is stored:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the orthogonal
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the orthogonal matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) REAL array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) REAL array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SSYTD2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Reduce the upper triangle of A */
+
+	for (i__ = *n - 1; i__ >= 1; --i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(1:i-1,i+1)
+*/
+
+	    slarfg_(&i__, &a[i__ + (i__ + 1) * a_dim1], &a[(i__ + 1) * a_dim1
+		    + 1], &c__1, &taui);
+	    e[i__] = a[i__ + (i__ + 1) * a_dim1];
+
+	    if (taui != 0.f) {
+
+/*              Apply H(i) from both sides to A(1:i,1:i) */
+
+		a[i__ + (i__ + 1) * a_dim1] = 1.f;
+
+/*              Compute  x := tau * A * v  storing x in TAU(1:i) */
+
+		ssymv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
+			a_dim1 + 1], &c__1, &c_b320, &tau[1], &c__1);
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		alpha = taui * -.5f * sdot_(&i__, &tau[1], &c__1, &a[(i__ + 1)
+			 * a_dim1 + 1], &c__1);
+		saxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
+			1], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		ssyr2_(uplo, &i__, &c_b1290, &a[(i__ + 1) * a_dim1 + 1], &
+			c__1, &tau[1], &c__1, &a[a_offset], lda);
+
+		a[i__ + (i__ + 1) * a_dim1] = e[i__];
+	    }
+	    d__[i__ + 1] = a[i__ + 1 + (i__ + 1) * a_dim1];
+	    tau[i__] = taui;
+/* L10: */
+	}
+	d__[1] = a[a_dim1 + 1];
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(i+2:n,i)
+*/
+
+	    i__2 = *n - i__;
+/* Computing MIN */
+	    i__3 = i__ + 2;
+	    slarfg_(&i__2, &a[i__ + 1 + i__ * a_dim1], &a[min(i__3,*n) + i__ *
+		     a_dim1], &c__1, &taui);
+	    e[i__] = a[i__ + 1 + i__ * a_dim1];
+
+	    if (taui != 0.f) {
+
+/*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
+
+		a[i__ + 1 + i__ * a_dim1] = 1.f;
+
+/*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
+
+		i__2 = *n - i__;
+		ssymv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b320, &tau[
+			i__], &c__1);
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		i__2 = *n - i__;
+		alpha = taui * -.5f * sdot_(&i__2, &tau[i__], &c__1, &a[i__ +
+			1 + i__ * a_dim1], &c__1);
+		i__2 = *n - i__;
+		saxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+			i__], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		i__2 = *n - i__;
+		ssyr2_(uplo, &i__2, &c_b1290, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) *
+			a_dim1], lda);
+
+		a[i__ + 1 + i__ * a_dim1] = e[i__];
+	    }
+	    d__[i__] = a[i__ + i__ * a_dim1];
+	    tau[i__] = taui;
+/* L20: */
+	}
+	d__[*n] = a[*n + *n * a_dim1];
+    }
+
+    return 0;
+
+/*     End of SSYTD2 */
+
+} /* ssytd2_ */
+
+/* Subroutine */ int ssytrd_(char *uplo, integer *n, real *a, integer *lda,
+	real *d__, real *e, real *tau, real *work, integer *lwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, nb, kk, nx, iws;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    static logical upper;
+    extern /* Subroutine */ int ssytd2_(char *, integer *, real *, integer *,
+	    real *, real *, real *, integer *), ssyr2k_(char *, char *
+	    , integer *, integer *, real *, real *, integer *, real *,
+	    integer *, real *, real *, integer *), xerbla_(
+	    char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int slatrd_(char *, integer *, integer *, real *,
+	    integer *, real *, real *, real *, integer *);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    SSYTRD reduces a real symmetric matrix A to real symmetric
+    tridiagonal form T by an orthogonal similarity transformation:
+    Q**T * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the symmetric matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the orthogonal
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the orthogonal matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) REAL array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) REAL array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) REAL array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) REAL array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= 1.
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a real scalar, and v is a real vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*lwork < 1 && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+
+/*        Determine the block size. */
+
+	nb = ilaenv_(&c__1, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
+		 (ftnlen)1);
+	lwkopt = *n * nb;
+	work[1] = (real) lwkopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("SSYTRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1] = 1.f;
+	return 0;
+    }
+
+    nx = *n;
+    iws = 1;
+    if (nb > 1 && nb < *n) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "SSYTRD", uplo, n, &c_n1, &c_n1, &
+		c_n1, (ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *n) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code by setting NX = N.
+
+   Computing MAX
+*/
+		i__1 = *lwork / ldwork;
+		nb = max(i__1,1);
+		nbmin = ilaenv_(&c__2, "SSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		if (nb < nbmin) {
+		    nx = *n;
+		}
+	    }
+	} else {
+	    nx = *n;
+	}
+    } else {
+	nb = 1;
+    }
+
+    if (upper) {
+
+/*
+          Reduce the upper triangle of A.
+          Columns 1:kk are handled by the unblocked method.
+*/
+
+	kk = *n - (*n - nx + nb - 1) / nb * nb;
+	i__1 = kk + 1;
+	i__2 = -nb;
+	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+		i__2) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = i__ + nb - 1;
+	    slatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
+		    work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(1:i-1,1:i-1), using an
+             update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = i__ - 1;
+	    ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b1290, &a[i__ *
+		    a_dim1 + 1], lda, &work[1], &ldwork, &c_b1011, &a[
+		    a_offset], lda);
+
+/*
+             Copy superdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j - 1 + j * a_dim1] = e[j - 1];
+		d__[j] = a[j + j * a_dim1];
+/* L10: */
+	    }
+/* L20: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	ssytd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__2 = *n - nx;
+	i__1 = nb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = *n - i__ + 1;
+	    slatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
+		    tau[i__], &work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(i+ib:n,i+ib:n), using
+             an update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = *n - i__ - nb + 1;
+	    ssyr2k_(uplo, "No transpose", &i__3, &nb, &c_b1290, &a[i__ + nb +
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1011, &a[
+		    i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*
+             Copy subdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		a[j + 1 + j * a_dim1] = e[j];
+		d__[j] = a[j + j * a_dim1];
+/* L30: */
+	    }
+/* L40: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	i__1 = *n - i__ + 1;
+	ssytd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
+		&tau[i__], &iinfo);
+    }
+
+    work[1] = (real) lwkopt;
+    return 0;
+
+/*     End of SSYTRD */
+
+} /* ssytrd_ */
+
+/* Subroutine */ int strevc_(char *side, char *howmny, logical *select,
+	integer *n, real *t, integer *ldt, real *vl, integer *ldvl, real *vr,
+	integer *ldvr, integer *mm, integer *m, real *work, integer *info)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3;
+    real r__1, r__2, r__3, r__4;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k;
+    static real x[4]	/* was [2][2] */;
+    static integer j1, j2, n2, ii, ki, ip, is;
+    static real wi, wr, rec, ulp, beta, emax;
+    static logical pair, allv;
+    static integer ierr;
+    static real unfl, ovfl, smin;
+    extern doublereal sdot_(integer *, real *, integer *, real *, integer *);
+    static logical over;
+    static real vmax;
+    static integer jnxt;
+    static real scale;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static real remax;
+    static logical leftv;
+    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *,
+	    real *, integer *, real *, integer *, real *, real *, integer *);
+    static logical bothv;
+    static real vcrit;
+    static logical somev;
+    extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *,
+	    integer *);
+    static real xnorm;
+    extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *,
+	    real *, integer *), slaln2_(logical *, integer *, integer *, real
+	    *, real *, real *, integer *, real *, real *, real *, integer *,
+	    real *, real *, real *, integer *, real *, real *, integer *),
+	    slabad_(real *, real *);
+    extern doublereal slamch_(char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static real bignum;
+    extern integer isamax_(integer *, real *, integer *);
+    static logical rightv;
+    static real smlnum;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    STREVC computes some or all of the right and/or left eigenvectors of
+    a real upper quasi-triangular matrix T.
+
+    The right eigenvector x and the left eigenvector y of T corresponding
+    to an eigenvalue w are defined by:
+
+                 T*x = w*x,     y'*T = w*y'
+
+    where y' denotes the conjugate transpose of the vector y.
+
+    If all eigenvectors are requested, the routine may either return the
+    matrices X and/or Y of right or left eigenvectors of T, or the
+    products Q*X and/or Q*Y, where Q is an input orthogonal
+    matrix. If T was obtained from the real-Schur factorization of an
+    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+    right or left eigenvectors of A.
+
+    T must be in Schur canonical form (as returned by SHSEQR), that is,
+    block upper triangular with 1-by-1 and 2-by-2 diagonal blocks; each
+    2-by-2 diagonal block has its diagonal elements equal and its
+    off-diagonal elements of opposite sign.  Corresponding to each 2-by-2
+    diagonal block is a complex conjugate pair of eigenvalues and
+    eigenvectors; only one eigenvector of the pair is computed, namely
+    the one corresponding to the eigenvalue with positive imaginary part.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  compute right eigenvectors only;
+            = 'L':  compute left eigenvectors only;
+            = 'B':  compute both right and left eigenvectors.
+
+    HOWMNY  (input) CHARACTER*1
+            = 'A':  compute all right and/or left eigenvectors;
+            = 'B':  compute all right and/or left eigenvectors,
+                    and backtransform them using the input matrices
+                    supplied in VR and/or VL;
+            = 'S':  compute selected right and/or left eigenvectors,
+                    specified by the logical array SELECT.
+
+    SELECT  (input/output) LOGICAL array, dimension (N)
+            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+            computed.
+            If HOWMNY = 'A' or 'B', SELECT is not referenced.
+            To select the real eigenvector corresponding to a real
+            eigenvalue w(j), SELECT(j) must be set to .TRUE..  To select
+            the complex eigenvector corresponding to a complex conjugate
+            pair w(j) and w(j+1), either SELECT(j) or SELECT(j+1) must be
+            set to .TRUE.; then on exit SELECT(j) is .TRUE. and
+            SELECT(j+1) is .FALSE..
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input) REAL array, dimension (LDT,N)
+            The upper quasi-triangular matrix T in Schur canonical form.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    VL      (input/output) REAL array, dimension (LDVL,MM)
+            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+            contain an N-by-N matrix Q (usually the orthogonal matrix Q
+            of Schur vectors returned by SHSEQR).
+            On exit, if SIDE = 'L' or 'B', VL contains:
+            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+                             VL has the same quasi-lower triangular form
+                             as T'. If T(i,i) is a real eigenvalue, then
+                             the i-th column VL(i) of VL  is its
+                             corresponding eigenvector. If T(i:i+1,i:i+1)
+                             is a 2-by-2 block whose eigenvalues are
+                             complex-conjugate eigenvalues of T, then
+                             VL(i)+sqrt(-1)*VL(i+1) is the complex
+                             eigenvector corresponding to the eigenvalue
+                             with positive real part.
+            if HOWMNY = 'B', the matrix Q*Y;
+            if HOWMNY = 'S', the left eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VL, in the same order as their
+                             eigenvalues.
+            A complex eigenvector corresponding to a complex eigenvalue
+            is stored in two consecutive columns, the first holding the
+            real part, and the second the imaginary part.
+            If SIDE = 'R', VL is not referenced.
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= max(1,N) if
+            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+
+    VR      (input/output) REAL array, dimension (LDVR,MM)
+            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+            contain an N-by-N matrix Q (usually the orthogonal matrix Q
+            of Schur vectors returned by SHSEQR).
+            On exit, if SIDE = 'R' or 'B', VR contains:
+            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+                             VR has the same quasi-upper triangular form
+                             as T. If T(i,i) is a real eigenvalue, then
+                             the i-th column VR(i) of VR  is its
+                             corresponding eigenvector. If T(i:i+1,i:i+1)
+                             is a 2-by-2 block whose eigenvalues are
+                             complex-conjugate eigenvalues of T, then
+                             VR(i)+sqrt(-1)*VR(i+1) is the complex
+                             eigenvector corresponding to the eigenvalue
+                             with positive real part.
+            if HOWMNY = 'B', the matrix Q*X;
+            if HOWMNY = 'S', the right eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VR, in the same order as their
+                             eigenvalues.
+            A complex eigenvector corresponding to a complex eigenvalue
+            is stored in two consecutive columns, the first holding the
+            real part and the second the imaginary part.
+            If SIDE = 'L', VR is not referenced.
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= max(1,N) if
+            SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+
+    MM      (input) INTEGER
+            The number of columns in the arrays VL and/or VR. MM >= M.
+
+    M       (output) INTEGER
+            The number of columns in the arrays VL and/or VR actually
+            used to store the eigenvectors.
+            If HOWMNY = 'A' or 'B', M is set to N.
+            Each selected real eigenvector occupies one column and each
+            selected complex eigenvector occupies two columns.
+
+    WORK    (workspace) REAL array, dimension (3*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The algorithm used in this program is basically backward (forward)
+    substitution, with scaling to make the the code robust against
+    possible overflow.
+
+    Each eigenvector is normalized so that the element of largest
+    magnitude has magnitude 1; here the magnitude of a complex number
+    (x,y) is taken to be |x| + |y|.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --select;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+
+    /* Function Body */
+    bothv = lsame_(side, "B");
+    rightv = (lsame_(side, "R")) || (bothv);
+    leftv = (lsame_(side, "L")) || (bothv);
+
+    allv = lsame_(howmny, "A");
+    over = lsame_(howmny, "B");
+    somev = lsame_(howmny, "S");
+
+    *info = 0;
+    if (! rightv && ! leftv) {
+	*info = -1;
+    } else if (! allv && ! over && ! somev) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ldt < max(1,*n)) {
+	*info = -6;
+    } else if ((*ldvl < 1) || (leftv && *ldvl < *n)) {
+	*info = -8;
+    } else if ((*ldvr < 1) || (rightv && *ldvr < *n)) {
+	*info = -10;
+    } else {
+
+/*
+          Set M to the number of columns required to store the selected
+          eigenvectors, standardize the array SELECT if necessary, and
+          test MM.
+*/
+
+	if (somev) {
+	    *m = 0;
+	    pair = FALSE_;
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		if (pair) {
+		    pair = FALSE_;
+		    select[j] = FALSE_;
+		} else {
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] == 0.f) {
+			    if (select[j]) {
+				++(*m);
+			    }
+			} else {
+			    pair = TRUE_;
+			    if ((select[j]) || (select[j + 1])) {
+				select[j] = TRUE_;
+				*m += 2;
+			    }
+			}
+		    } else {
+			if (select[*n]) {
+			    ++(*m);
+			}
+		    }
+		}
+/* L10: */
+	    }
+	} else {
+	    *m = *n;
+	}
+
+	if (*mm < *m) {
+	    *info = -11;
+	}
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("STREVC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Set the constants to control overflow. */
+
+    unfl = slamch_("Safe minimum");
+    ovfl = 1.f / unfl;
+    slabad_(&unfl, &ovfl);
+    ulp = slamch_("Precision");
+    smlnum = unfl * (*n / ulp);
+    bignum = (1.f - ulp) / smlnum;
+
+/*
+       Compute 1-norm of each column of strictly upper triangular
+       part of T to control overflow in triangular solver.
+*/
+
+    work[1] = 0.f;
+    i__1 = *n;
+    for (j = 2; j <= i__1; ++j) {
+	work[j] = 0.f;
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    work[j] += (r__1 = t[i__ + j * t_dim1], dabs(r__1));
+/* L20: */
+	}
+/* L30: */
+    }
+
+/*
+       Index IP is used to specify the real or complex eigenvalue:
+         IP = 0, real eigenvalue,
+              1, first of conjugate complex pair: (wr,wi)
+             -1, second of conjugate complex pair: (wr,wi)
+*/
+
+    n2 = (*n) << (1);
+
+    if (rightv) {
+
+/*        Compute right eigenvectors. */
+
+	ip = 0;
+	is = *m;
+	for (ki = *n; ki >= 1; --ki) {
+
+	    if (ip == 1) {
+		goto L130;
+	    }
+	    if (ki == 1) {
+		goto L40;
+	    }
+	    if (t[ki + (ki - 1) * t_dim1] == 0.f) {
+		goto L40;
+	    }
+	    ip = -1;
+
+L40:
+	    if (somev) {
+		if (ip == 0) {
+		    if (! select[ki]) {
+			goto L130;
+		    }
+		} else {
+		    if (! select[ki - 1]) {
+			goto L130;
+		    }
+		}
+	    }
+
+/*           Compute the KI-th eigenvalue (WR,WI). */
+
+	    wr = t[ki + ki * t_dim1];
+	    wi = 0.f;
+	    if (ip != 0) {
+		wi = sqrt((r__1 = t[ki + (ki - 1) * t_dim1], dabs(r__1))) *
+			sqrt((r__2 = t[ki - 1 + ki * t_dim1], dabs(r__2)));
+	    }
+/* Computing MAX */
+	    r__1 = ulp * (dabs(wr) + dabs(wi));
+	    smin = dmax(r__1,smlnum);
+
+	    if (ip == 0) {
+
+/*              Real right eigenvector */
+
+		work[ki + *n] = 1.f;
+
+/*              Form right-hand side */
+
+		i__1 = ki - 1;
+		for (k = 1; k <= i__1; ++k) {
+		    work[k + *n] = -t[k + ki * t_dim1];
+/* L50: */
+		}
+
+/*
+                Solve the upper quasi-triangular system:
+                   (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
+*/
+
+		jnxt = ki - 1;
+		for (j = ki - 1; j >= 1; --j) {
+		    if (j > jnxt) {
+			goto L60;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j - 1;
+		    if (j > 1) {
+			if (t[j + (j - 1) * t_dim1] != 0.f) {
+			    j1 = j - 1;
+			    jnxt = j - 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*                    1-by-1 diagonal block */
+
+			slaln2_(&c_false, &c__1, &c__1, &smin, &c_b1011, &t[j
+				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
+				j + *n], n, &wr, &c_b320, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*
+                      Scale X(1,1) to avoid overflow when updating
+                      the right-hand side.
+*/
+
+			if (xnorm > 1.f) {
+			    if (work[j] > bignum / xnorm) {
+				x[0] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.f) {
+			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
+			}
+			work[j + *n] = x[0];
+
+/*                    Update right-hand side */
+
+			i__1 = j - 1;
+			r__1 = -x[0];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+
+		    } else {
+
+/*                    2-by-2 diagonal block */
+
+			slaln2_(&c_false, &c__2, &c__1, &smin, &c_b1011, &t[j
+				- 1 + (j - 1) * t_dim1], ldt, &c_b1011, &
+				c_b1011, &work[j - 1 + *n], n, &wr, &c_b320,
+				x, &c__2, &scale, &xnorm, &ierr);
+
+/*
+                      Scale X(1,1) and X(2,1) to avoid overflow when
+                      updating the right-hand side.
+*/
+
+			if (xnorm > 1.f) {
+/* Computing MAX */
+			    r__1 = work[j - 1], r__2 = work[j];
+			    beta = dmax(r__1,r__2);
+			    if (beta > bignum / xnorm) {
+				x[0] /= xnorm;
+				x[1] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.f) {
+			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
+			}
+			work[j - 1 + *n] = x[0];
+			work[j + *n] = x[1];
+
+/*                    Update right-hand side */
+
+			i__1 = j - 2;
+			r__1 = -x[0];
+			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[*n + 1], &c__1);
+			i__1 = j - 2;
+			r__1 = -x[1];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+		    }
+L60:
+		    ;
+		}
+
+/*              Copy the vector x or Q*x to VR and normalize. */
+
+		if (! over) {
+		    scopy_(&ki, &work[*n + 1], &c__1, &vr[is * vr_dim1 + 1], &
+			    c__1);
+
+		    ii = isamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		    remax = 1.f / (r__1 = vr[ii + is * vr_dim1], dabs(r__1));
+		    sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		    i__1 = *n;
+		    for (k = ki + 1; k <= i__1; ++k) {
+			vr[k + is * vr_dim1] = 0.f;
+/* L70: */
+		    }
+		} else {
+		    if (ki > 1) {
+			i__1 = ki - 1;
+			sgemv_("N", n, &i__1, &c_b1011, &vr[vr_offset], ldvr,
+				&work[*n + 1], &c__1, &work[ki + *n], &vr[ki *
+				 vr_dim1 + 1], &c__1);
+		    }
+
+		    ii = isamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		    remax = 1.f / (r__1 = vr[ii + ki * vr_dim1], dabs(r__1));
+		    sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+		}
+
+	    } else {
+
+/*
+                Complex right eigenvector.
+
+                Initial solve
+                  [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
+                  [ (T(KI,KI-1)   T(KI,KI)   )               ]
+*/
+
+		if ((r__1 = t[ki - 1 + ki * t_dim1], dabs(r__1)) >= (r__2 = t[
+			ki + (ki - 1) * t_dim1], dabs(r__2))) {
+		    work[ki - 1 + *n] = 1.f;
+		    work[ki + n2] = wi / t[ki - 1 + ki * t_dim1];
+		} else {
+		    work[ki - 1 + *n] = -wi / t[ki + (ki - 1) * t_dim1];
+		    work[ki + n2] = 1.f;
+		}
+		work[ki + *n] = 0.f;
+		work[ki - 1 + n2] = 0.f;
+
+/*              Form right-hand side */
+
+		i__1 = ki - 2;
+		for (k = 1; k <= i__1; ++k) {
+		    work[k + *n] = -work[ki - 1 + *n] * t[k + (ki - 1) *
+			    t_dim1];
+		    work[k + n2] = -work[ki + n2] * t[k + ki * t_dim1];
+/* L80: */
+		}
+
+/*
+                Solve upper quasi-triangular system:
+                (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
+*/
+
+		jnxt = ki - 2;
+		for (j = ki - 2; j >= 1; --j) {
+		    if (j > jnxt) {
+			goto L90;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j - 1;
+		    if (j > 1) {
+			if (t[j + (j - 1) * t_dim1] != 0.f) {
+			    j1 = j - 1;
+			    jnxt = j - 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*                    1-by-1 diagonal block */
+
+			slaln2_(&c_false, &c__1, &c__2, &smin, &c_b1011, &t[j
+				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
+				j + *n], n, &wr, &wi, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*
+                      Scale X(1,1) and X(1,2) to avoid overflow when
+                      updating the right-hand side.
+*/
+
+			if (xnorm > 1.f) {
+			    if (work[j] > bignum / xnorm) {
+				x[0] /= xnorm;
+				x[2] /= xnorm;
+				scale /= xnorm;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.f) {
+			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    sscal_(&ki, &scale, &work[n2 + 1], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+
+/*                    Update the right-hand side */
+
+			i__1 = j - 1;
+			r__1 = -x[0];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+			i__1 = j - 1;
+			r__1 = -x[2];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				n2 + 1], &c__1);
+
+		    } else {
+
+/*                    2-by-2 diagonal block */
+
+			slaln2_(&c_false, &c__2, &c__2, &smin, &c_b1011, &t[j
+				- 1 + (j - 1) * t_dim1], ldt, &c_b1011, &
+				c_b1011, &work[j - 1 + *n], n, &wr, &wi, x, &
+				c__2, &scale, &xnorm, &ierr);
+
+/*
+                      Scale X to avoid overflow when updating
+                      the right-hand side.
+*/
+
+			if (xnorm > 1.f) {
+/* Computing MAX */
+			    r__1 = work[j - 1], r__2 = work[j];
+			    beta = dmax(r__1,r__2);
+			    if (beta > bignum / xnorm) {
+				rec = 1.f / xnorm;
+				x[0] *= rec;
+				x[2] *= rec;
+				x[1] *= rec;
+				x[3] *= rec;
+				scale *= rec;
+			    }
+			}
+
+/*                    Scale if necessary */
+
+			if (scale != 1.f) {
+			    sscal_(&ki, &scale, &work[*n + 1], &c__1);
+			    sscal_(&ki, &scale, &work[n2 + 1], &c__1);
+			}
+			work[j - 1 + *n] = x[0];
+			work[j + *n] = x[1];
+			work[j - 1 + n2] = x[2];
+			work[j + n2] = x[3];
+
+/*                    Update the right-hand side */
+
+			i__1 = j - 2;
+			r__1 = -x[0];
+			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[*n + 1], &c__1);
+			i__1 = j - 2;
+			r__1 = -x[1];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				*n + 1], &c__1);
+			i__1 = j - 2;
+			r__1 = -x[2];
+			saxpy_(&i__1, &r__1, &t[(j - 1) * t_dim1 + 1], &c__1,
+				&work[n2 + 1], &c__1);
+			i__1 = j - 2;
+			r__1 = -x[3];
+			saxpy_(&i__1, &r__1, &t[j * t_dim1 + 1], &c__1, &work[
+				n2 + 1], &c__1);
+		    }
+L90:
+		    ;
+		}
+
+/*              Copy the vector x or Q*x to VR and normalize. */
+
+		if (! over) {
+		    scopy_(&ki, &work[*n + 1], &c__1, &vr[(is - 1) * vr_dim1
+			    + 1], &c__1);
+		    scopy_(&ki, &work[n2 + 1], &c__1, &vr[is * vr_dim1 + 1], &
+			    c__1);
+
+		    emax = 0.f;
+		    i__1 = ki;
+		    for (k = 1; k <= i__1; ++k) {
+/* Computing MAX */
+			r__3 = emax, r__4 = (r__1 = vr[k + (is - 1) * vr_dim1]
+				, dabs(r__1)) + (r__2 = vr[k + is * vr_dim1],
+				dabs(r__2));
+			emax = dmax(r__3,r__4);
+/* L100: */
+		    }
+
+		    remax = 1.f / emax;
+		    sscal_(&ki, &remax, &vr[(is - 1) * vr_dim1 + 1], &c__1);
+		    sscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		    i__1 = *n;
+		    for (k = ki + 1; k <= i__1; ++k) {
+			vr[k + (is - 1) * vr_dim1] = 0.f;
+			vr[k + is * vr_dim1] = 0.f;
+/* L110: */
+		    }
+
+		} else {
+
+		    if (ki > 2) {
+			i__1 = ki - 2;
+			sgemv_("N", n, &i__1, &c_b1011, &vr[vr_offset], ldvr,
+				&work[*n + 1], &c__1, &work[ki - 1 + *n], &vr[
+				(ki - 1) * vr_dim1 + 1], &c__1);
+			i__1 = ki - 2;
+			sgemv_("N", n, &i__1, &c_b1011, &vr[vr_offset], ldvr,
+				&work[n2 + 1], &c__1, &work[ki + n2], &vr[ki *
+				 vr_dim1 + 1], &c__1);
+		    } else {
+			sscal_(n, &work[ki - 1 + *n], &vr[(ki - 1) * vr_dim1
+				+ 1], &c__1);
+			sscal_(n, &work[ki + n2], &vr[ki * vr_dim1 + 1], &
+				c__1);
+		    }
+
+		    emax = 0.f;
+		    i__1 = *n;
+		    for (k = 1; k <= i__1; ++k) {
+/* Computing MAX */
+			r__3 = emax, r__4 = (r__1 = vr[k + (ki - 1) * vr_dim1]
+				, dabs(r__1)) + (r__2 = vr[k + ki * vr_dim1],
+				dabs(r__2));
+			emax = dmax(r__3,r__4);
+/* L120: */
+		    }
+		    remax = 1.f / emax;
+		    sscal_(n, &remax, &vr[(ki - 1) * vr_dim1 + 1], &c__1);
+		    sscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+		}
+	    }
+
+	    --is;
+	    if (ip != 0) {
+		--is;
+	    }
+L130:
+	    if (ip == 1) {
+		ip = 0;
+	    }
+	    if (ip == -1) {
+		ip = 1;
+	    }
+/* L140: */
+	}
+    }
+
+    if (leftv) {
+
+/*        Compute left eigenvectors. */
+
+	ip = 0;
+	is = 1;
+	i__1 = *n;
+	for (ki = 1; ki <= i__1; ++ki) {
+
+	    if (ip == -1) {
+		goto L250;
+	    }
+	    if (ki == *n) {
+		goto L150;
+	    }
+	    if (t[ki + 1 + ki * t_dim1] == 0.f) {
+		goto L150;
+	    }
+	    ip = 1;
+
+L150:
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L250;
+		}
+	    }
+
+/*           Compute the KI-th eigenvalue (WR,WI). */
+
+	    wr = t[ki + ki * t_dim1];
+	    wi = 0.f;
+	    if (ip != 0) {
+		wi = sqrt((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1))) *
+			sqrt((r__2 = t[ki + 1 + ki * t_dim1], dabs(r__2)));
+	    }
+/* Computing MAX */
+	    r__1 = ulp * (dabs(wr) + dabs(wi));
+	    smin = dmax(r__1,smlnum);
+
+	    if (ip == 0) {
+
+/*              Real left eigenvector. */
+
+		work[ki + *n] = 1.f;
+
+/*              Form right-hand side */
+
+		i__2 = *n;
+		for (k = ki + 1; k <= i__2; ++k) {
+		    work[k + *n] = -t[ki + k * t_dim1];
+/* L160: */
+		}
+
+/*
+                Solve the quasi-triangular system:
+                   (T(KI+1:N,KI+1:N) - WR)'*X = SCALE*WORK
+*/
+
+		vmax = 1.f;
+		vcrit = bignum;
+
+		jnxt = ki + 1;
+		i__2 = *n;
+		for (j = ki + 1; j <= i__2; ++j) {
+		    if (j < jnxt) {
+			goto L170;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j + 1;
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] != 0.f) {
+			    j2 = j + 1;
+			    jnxt = j + 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*
+                      1-by-1 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side.
+*/
+
+			if (work[j] > vcrit) {
+			    rec = 1.f / vmax;
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.f;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 1;
+			work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1],
+				&c__1, &work[ki + 1 + *n], &c__1);
+
+/*                    Solve (T(J,J)-WR)'*X = WORK */
+
+			slaln2_(&c_false, &c__1, &c__1, &smin, &c_b1011, &t[j
+				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
+				j + *n], n, &wr, &c_b320, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.f) {
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			}
+			work[j + *n] = x[0];
+/* Computing MAX */
+			r__2 = (r__1 = work[j + *n], dabs(r__1));
+			vmax = dmax(r__2,vmax);
+			vcrit = bignum / vmax;
+
+		    } else {
+
+/*
+                      2-by-2 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side.
+
+   Computing MAX
+*/
+			r__1 = work[j], r__2 = work[j + 1];
+			beta = dmax(r__1,r__2);
+			if (beta > vcrit) {
+			    rec = 1.f / vmax;
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    vmax = 1.f;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 1;
+			work[j + *n] -= sdot_(&i__3, &t[ki + 1 + j * t_dim1],
+				&c__1, &work[ki + 1 + *n], &c__1);
+
+			i__3 = j - ki - 1;
+			work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 1 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 1 + *n], &c__1);
+
+/*
+                      Solve
+                        [T(J,J)-WR   T(J,J+1)     ]'* X = SCALE*( WORK1 )
+                        [T(J+1,J)    T(J+1,J+1)-WR]             ( WORK2 )
+*/
+
+			slaln2_(&c_true, &c__2, &c__1, &smin, &c_b1011, &t[j
+				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
+				j + *n], n, &wr, &c_b320, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.f) {
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + 1 + *n] = x[1];
+
+/* Computing MAX */
+			r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = (
+				r__2 = work[j + 1 + *n], dabs(r__2)), r__3 =
+				max(r__3,r__4);
+			vmax = dmax(r__3,vmax);
+			vcrit = bignum / vmax;
+
+		    }
+L170:
+		    ;
+		}
+
+/*              Copy the vector x or Q*x to VL and normalize. */
+
+		if (! over) {
+		    i__2 = *n - ki + 1;
+		    scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+			    vl_dim1], &c__1);
+
+		    i__2 = *n - ki + 1;
+		    ii = isamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki -
+			    1;
+		    remax = 1.f / (r__1 = vl[ii + is * vl_dim1], dabs(r__1));
+		    i__2 = *n - ki + 1;
+		    sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+
+		    i__2 = ki - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			vl[k + is * vl_dim1] = 0.f;
+/* L180: */
+		    }
+
+		} else {
+
+		    if (ki < *n) {
+			i__2 = *n - ki;
+			sgemv_("N", n, &i__2, &c_b1011, &vl[(ki + 1) *
+				vl_dim1 + 1], ldvl, &work[ki + 1 + *n], &c__1,
+				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+		    }
+
+		    ii = isamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		    remax = 1.f / (r__1 = vl[ii + ki * vl_dim1], dabs(r__1));
+		    sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+
+		}
+
+	    } else {
+
+/*
+                Complex left eigenvector.
+
+                 Initial solve:
+                   ((T(KI,KI)    T(KI,KI+1) )' - (WR - I* WI))*X = 0.
+                   ((T(KI+1,KI) T(KI+1,KI+1))                )
+*/
+
+		if ((r__1 = t[ki + (ki + 1) * t_dim1], dabs(r__1)) >= (r__2 =
+			t[ki + 1 + ki * t_dim1], dabs(r__2))) {
+		    work[ki + *n] = wi / t[ki + (ki + 1) * t_dim1];
+		    work[ki + 1 + n2] = 1.f;
+		} else {
+		    work[ki + *n] = 1.f;
+		    work[ki + 1 + n2] = -wi / t[ki + 1 + ki * t_dim1];
+		}
+		work[ki + 1 + *n] = 0.f;
+		work[ki + n2] = 0.f;
+
+/*              Form right-hand side */
+
+		i__2 = *n;
+		for (k = ki + 2; k <= i__2; ++k) {
+		    work[k + *n] = -work[ki + *n] * t[ki + k * t_dim1];
+		    work[k + n2] = -work[ki + 1 + n2] * t[ki + 1 + k * t_dim1]
+			    ;
+/* L190: */
+		}
+
+/*
+                Solve complex quasi-triangular system:
+                ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
+*/
+
+		vmax = 1.f;
+		vcrit = bignum;
+
+		jnxt = ki + 2;
+		i__2 = *n;
+		for (j = ki + 2; j <= i__2; ++j) {
+		    if (j < jnxt) {
+			goto L200;
+		    }
+		    j1 = j;
+		    j2 = j;
+		    jnxt = j + 1;
+		    if (j < *n) {
+			if (t[j + 1 + j * t_dim1] != 0.f) {
+			    j2 = j + 1;
+			    jnxt = j + 2;
+			}
+		    }
+
+		    if (j1 == j2) {
+
+/*
+                      1-by-1 diagonal block
+
+                      Scale if necessary to avoid overflow when
+                      forming the right-hand side elements.
+*/
+
+			if (work[j] > vcrit) {
+			    rec = 1.f / vmax;
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.f;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 2;
+			work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + *n], &c__1);
+			i__3 = j - ki - 2;
+			work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + n2], &c__1);
+
+/*                    Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2 */
+
+			r__1 = -wi;
+			slaln2_(&c_false, &c__1, &c__2, &smin, &c_b1011, &t[j
+				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
+				j + *n], n, &wr, &r__1, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.f) {
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+/* Computing MAX */
+			r__3 = (r__1 = work[j + *n], dabs(r__1)), r__4 = (
+				r__2 = work[j + n2], dabs(r__2)), r__3 = max(
+				r__3,r__4);
+			vmax = dmax(r__3,vmax);
+			vcrit = bignum / vmax;
+
+		    } else {
+
+/*
+                      2-by-2 diagonal block
+
+                      Scale if necessary to avoid overflow when forming
+                      the right-hand side elements.
+
+   Computing MAX
+*/
+			r__1 = work[j], r__2 = work[j + 1];
+			beta = dmax(r__1,r__2);
+			if (beta > vcrit) {
+			    rec = 1.f / vmax;
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &rec, &work[ki + n2], &c__1);
+			    vmax = 1.f;
+			    vcrit = bignum;
+			}
+
+			i__3 = j - ki - 2;
+			work[j + *n] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + *n], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + n2] -= sdot_(&i__3, &t[ki + 2 + j * t_dim1],
+				&c__1, &work[ki + 2 + n2], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + 1 + *n] -= sdot_(&i__3, &t[ki + 2 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 2 + *n], &c__1);
+
+			i__3 = j - ki - 2;
+			work[j + 1 + n2] -= sdot_(&i__3, &t[ki + 2 + (j + 1) *
+				 t_dim1], &c__1, &work[ki + 2 + n2], &c__1);
+
+/*
+                      Solve 2-by-2 complex linear equation
+                        ([T(j,j)   T(j,j+1)  ]'-(wr-i*wi)*I)*X = SCALE*B
+                        ([T(j+1,j) T(j+1,j+1)]             )
+*/
+
+			r__1 = -wi;
+			slaln2_(&c_true, &c__2, &c__2, &smin, &c_b1011, &t[j
+				+ j * t_dim1], ldt, &c_b1011, &c_b1011, &work[
+				j + *n], n, &wr, &r__1, x, &c__2, &scale, &
+				xnorm, &ierr);
+
+/*                    Scale if necessary */
+
+			if (scale != 1.f) {
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + *n], &c__1);
+			    i__3 = *n - ki + 1;
+			    sscal_(&i__3, &scale, &work[ki + n2], &c__1);
+			}
+			work[j + *n] = x[0];
+			work[j + n2] = x[2];
+			work[j + 1 + *n] = x[1];
+			work[j + 1 + n2] = x[3];
+/* Computing MAX */
+			r__1 = dabs(x[0]), r__2 = dabs(x[2]), r__1 = max(r__1,
+				r__2), r__2 = dabs(x[1]), r__1 = max(r__1,
+				r__2), r__2 = dabs(x[3]), r__1 = max(r__1,
+				r__2);
+			vmax = dmax(r__1,vmax);
+			vcrit = bignum / vmax;
+
+		    }
+L200:
+		    ;
+		}
+
+/*
+                Copy the vector x or Q*x to VL and normalize.
+
+   L210:
+*/
+		if (! over) {
+		    i__2 = *n - ki + 1;
+		    scopy_(&i__2, &work[ki + *n], &c__1, &vl[ki + is *
+			    vl_dim1], &c__1);
+		    i__2 = *n - ki + 1;
+		    scopy_(&i__2, &work[ki + n2], &c__1, &vl[ki + (is + 1) *
+			    vl_dim1], &c__1);
+
+		    emax = 0.f;
+		    i__2 = *n;
+		    for (k = ki; k <= i__2; ++k) {
+/* Computing MAX */
+			r__3 = emax, r__4 = (r__1 = vl[k + is * vl_dim1],
+				dabs(r__1)) + (r__2 = vl[k + (is + 1) *
+				vl_dim1], dabs(r__2));
+			emax = dmax(r__3,r__4);
+/* L220: */
+		    }
+		    remax = 1.f / emax;
+		    i__2 = *n - ki + 1;
+		    sscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+		    i__2 = *n - ki + 1;
+		    sscal_(&i__2, &remax, &vl[ki + (is + 1) * vl_dim1], &c__1)
+			    ;
+
+		    i__2 = ki - 1;
+		    for (k = 1; k <= i__2; ++k) {
+			vl[k + is * vl_dim1] = 0.f;
+			vl[k + (is + 1) * vl_dim1] = 0.f;
+/* L230: */
+		    }
+		} else {
+		    if (ki < *n - 1) {
+			i__2 = *n - ki - 1;
+			sgemv_("N", n, &i__2, &c_b1011, &vl[(ki + 2) *
+				vl_dim1 + 1], ldvl, &work[ki + 2 + *n], &c__1,
+				 &work[ki + *n], &vl[ki * vl_dim1 + 1], &c__1);
+			i__2 = *n - ki - 1;
+			sgemv_("N", n, &i__2, &c_b1011, &vl[(ki + 2) *
+				vl_dim1 + 1], ldvl, &work[ki + 2 + n2], &c__1,
+				 &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1 +
+				1], &c__1);
+		    } else {
+			sscal_(n, &work[ki + *n], &vl[ki * vl_dim1 + 1], &
+				c__1);
+			sscal_(n, &work[ki + 1 + n2], &vl[(ki + 1) * vl_dim1
+				+ 1], &c__1);
+		    }
+
+		    emax = 0.f;
+		    i__2 = *n;
+		    for (k = 1; k <= i__2; ++k) {
+/* Computing MAX */
+			r__3 = emax, r__4 = (r__1 = vl[k + ki * vl_dim1],
+				dabs(r__1)) + (r__2 = vl[k + (ki + 1) *
+				vl_dim1], dabs(r__2));
+			emax = dmax(r__3,r__4);
+/* L240: */
+		    }
+		    remax = 1.f / emax;
+		    sscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+		    sscal_(n, &remax, &vl[(ki + 1) * vl_dim1 + 1], &c__1);
+
+		}
+
+	    }
+
+	    ++is;
+	    if (ip != 0) {
+		++is;
+	    }
+L250:
+	    if (ip == -1) {
+		ip = 0;
+	    }
+	    if (ip == 1) {
+		ip = -1;
+	    }
+
+/* L260: */
+	}
+
+    }
+
+    return 0;
+
+/*     End of STREVC */
+
+} /* strevc_ */
+
+/* Subroutine */ int strti2_(char *uplo, char *diag, integer *n, real *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+
+    /* Local variables */
+    static integer j;
+    static real ajj;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int strmv_(char *, char *, char *, integer *,
+	    real *, integer *, real *, integer *),
+	    xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    STRTI2 computes the inverse of a real upper or lower triangular
+    matrix.
+
+    This is the Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading n by n upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("STRTI2", &i__1);
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute inverse of upper triangular matrix. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (nounit) {
+		a[j + j * a_dim1] = 1.f / a[j + j * a_dim1];
+		ajj = -a[j + j * a_dim1];
+	    } else {
+		ajj = -1.f;
+	    }
+
+/*           Compute elements 1:j-1 of j-th column. */
+
+	    i__2 = j - 1;
+	    strmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
+		    a[j * a_dim1 + 1], &c__1);
+	    i__2 = j - 1;
+	    sscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
+	}
+    } else {
+
+/*        Compute inverse of lower triangular matrix. */
+
+	for (j = *n; j >= 1; --j) {
+	    if (nounit) {
+		a[j + j * a_dim1] = 1.f / a[j + j * a_dim1];
+		ajj = -a[j + j * a_dim1];
+	    } else {
+		ajj = -1.f;
+	    }
+	    if (j < *n) {
+
+/*              Compute elements j+1:n of j-th column. */
+
+		i__1 = *n - j;
+		strmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
+			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
+		i__1 = *n - j;
+		sscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of STRTI2 */
+
+} /* strti2_ */
+
+/* Subroutine */ int strtri_(char *uplo, char *diag, integer *n, real *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, i__1, i__2[2], i__3, i__4, i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer j, jb, nb, nn;
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int strmm_(char *, char *, char *, char *,
+	    integer *, integer *, real *, real *, integer *, real *, integer *
+	    ), strsm_(char *, char *, char *,
+	    char *, integer *, integer *, real *, real *, integer *, real *,
+	    integer *), strti2_(char *, char *
+	    , integer *, real *, integer *, integer *),
+	    xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    STRTRI computes the inverse of a real upper or lower triangular
+    matrix A.
+
+    This is the Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  A is upper triangular;
+            = 'L':  A is lower triangular.
+
+    DIAG    (input) CHARACTER*1
+            = 'N':  A is non-unit triangular;
+            = 'U':  A is unit triangular.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) REAL array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+                 matrix is singular and its inverse can not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("STRTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Check for singularity if non-unit. */
+
+    if (nounit) {
+	i__1 = *n;
+	for (*info = 1; *info <= i__1; ++(*info)) {
+	    if (a[*info + *info * a_dim1] == 0.f) {
+		return 0;
+	    }
+/* L10: */
+	}
+	*info = 0;
+    }
+
+/*
+       Determine the block size for this environment.
+
+   Writing concatenation
+*/
+    i__2[0] = 1, a__1[0] = uplo;
+    i__2[1] = 1, a__1[1] = diag;
+    s_cat(ch__1, a__1, i__2, &c__2, (ftnlen)2);
+    nb = ilaenv_(&c__1, "STRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code */
+
+	strti2_(uplo, diag, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute inverse of upper triangular matrix */
+
+	    i__1 = *n;
+	    i__3 = nb;
+	    for (j = 1; i__3 < 0 ? j >= i__1 : j <= i__1; j += i__3) {
+/* Computing MIN */
+		i__4 = nb, i__5 = *n - j + 1;
+		jb = min(i__4,i__5);
+
+/*              Compute rows 1:j-1 of current block column */
+
+		i__4 = j - 1;
+		strmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b1011, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+		i__4 = j - 1;
+		strsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b1290, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
+			lda);
+
+/*              Compute inverse of current diagonal block */
+
+		strti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L20: */
+	    }
+	} else {
+
+/*           Compute inverse of lower triangular matrix */
+
+	    nn = (*n - 1) / nb * nb + 1;
+	    i__3 = -nb;
+	    for (j = nn; i__3 < 0 ? j >= 1 : j <= 1; j += i__3) {
+/* Computing MIN */
+		i__1 = nb, i__4 = *n - j + 1;
+		jb = min(i__1,i__4);
+		if (j + jb <= *n) {
+
+/*                 Compute rows j+jb:n of current block column */
+
+		    i__1 = *n - j - jb + 1;
+		    strmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
+			    &c_b1011, &a[j + jb + (j + jb) * a_dim1], lda, &a[
+			    j + jb + j * a_dim1], lda);
+		    i__1 = *n - j - jb + 1;
+		    strsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
+			     &c_b1290, &a[j + j * a_dim1], lda, &a[j + jb + j
+			    * a_dim1], lda);
+		}
+
+/*              Compute inverse of current diagonal block */
+
+		strti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L30: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of STRTRI */
+
+} /* strtri_ */
+
diff --git a/numpy/core/src/umath/lapack_lite/f2c.h b/numpy/core/src/umath/lapack_lite/f2c.h
new file mode 100644
index 000000000000..e27d7ae57733
--- /dev/null
+++ b/numpy/core/src/umath/lapack_lite/f2c.h
@@ -0,0 +1,217 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+typedef int integer;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+#ifdef f2c_i2
+/* for -i2 */
+typedef short flag;
+typedef short ftnlen;
+typedef short ftnint;
+#else
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+#endif
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	shortint h;
+	integer i;
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+typedef long Long;	/* No longer used; formerly in Namelist */
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#ifndef abs
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#endif
+#define dabs(x) (doublereal)abs(x)
+#ifndef min
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+#endif
+#define dmin(a,b) (doublereal)min(a,b)
+#define dmax(a,b) (doublereal)max(a,b)
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef int /* Unknown procedure type */ (*U_fp)(...);
+typedef shortint (*J_fp)(...);
+typedef integer (*I_fp)(...);
+typedef real (*R_fp)(...);
+typedef doublereal (*D_fp)(...), (*E_fp)(...);
+typedef /* Complex */ VOID (*C_fp)(...);
+typedef /* Double Complex */ VOID (*Z_fp)(...);
+typedef logical (*L_fp)(...);
+typedef shortlogical (*K_fp)(...);
+typedef /* Character */ VOID (*H_fp)(...);
+typedef /* Subroutine */ int (*S_fp)(...);
+#else
+typedef int /* Unknown procedure type */ (*U_fp)(void);
+typedef shortint (*J_fp)(void);
+typedef integer (*I_fp)(void);
+typedef real (*R_fp)(void);
+typedef doublereal (*D_fp)(void), (*E_fp)(void);
+typedef /* Complex */ VOID (*C_fp)(void);
+typedef /* Double Complex */ VOID (*Z_fp)(void);
+typedef logical (*L_fp)(void);
+typedef shortlogical (*K_fp)(void);
+typedef /* Character */ VOID (*H_fp)(void);
+typedef /* Subroutine */ int (*S_fp)(void);
+#endif
+/* E_fp is for real functions when -R is not specified */
+typedef VOID C_f;	/* complex function */
+typedef VOID H_f;	/* character function */
+typedef VOID Z_f;	/* double complex function */
+typedef doublereal E_f;	/* real function with -R not specified */
+
+/* undef any lower-case symbols that your C compiler predefines, e.g.: */
+
+#ifndef Skip_f2c_Undefs
+#undef cray
+#undef gcos
+#undef mc68010
+#undef mc68020
+#undef mips
+#undef pdp11
+#undef sgi
+#undef sparc
+#undef sun
+#undef sun2
+#undef sun3
+#undef sun4
+#undef u370
+#undef u3b
+#undef u3b2
+#undef u3b5
+#undef unix
+#undef vax
+#endif
+#endif
diff --git a/numpy/core/src/umath/lapack_lite/f2c_lite.c b/numpy/core/src/umath/lapack_lite/f2c_lite.c
new file mode 100644
index 000000000000..c0814b3bf80c
--- /dev/null
+++ b/numpy/core/src/umath/lapack_lite/f2c_lite.c
@@ -0,0 +1,671 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "f2c.h"
+
+
+extern void s_wsfe(cilist *f) {;}
+extern void e_wsfe(void) {;}
+extern void do_fio(integer *c, char *s, ftnlen l) {;}
+
+/* You'll want this if you redo the *_lite.c files with the -C option
+ * to f2c for checking array subscripts. (It's not suggested you do that
+ * for production use, of course.) */
+extern int
+s_rnge(char *var, int index, char *routine, int lineno)
+{
+    fprintf(stderr, "array index out-of-bounds for %s[%d] in routine %s:%d\n",
+            var, index, routine, lineno);
+    fflush(stderr);
+    abort();
+}
+
+#ifdef KR_headers
+extern float sqrtf();
+double f__cabsf(real, imag) float real, imag;
+#else
+#undef abs
+
+double f__cabsf(float real, float imag)
+#endif
+{
+float temp;
+
+if(real < 0.0f)
+	real = -real;
+if(imag < 0.0f)
+	imag = -imag;
+if(imag > real){
+	temp = real;
+	real = imag;
+	imag = temp;
+}
+if((imag+real) == real)
+	return((float)real);
+
+temp = imag/real;
+temp = real*sqrtf(1.0 + temp*temp);  /*overflow!!*/
+return(temp);
+}
+
+
+#ifdef KR_headers
+extern double sqrt();
+double f__cabs(real, imag) double real, imag;
+#else
+#undef abs
+
+double f__cabs(double real, double imag)
+#endif
+{
+double temp;
+
+if(real < 0)
+	real = -real;
+if(imag < 0)
+	imag = -imag;
+if(imag > real){
+	temp = real;
+	real = imag;
+	imag = temp;
+}
+if((imag+real) == real)
+	return((double)real);
+
+temp = imag/real;
+temp = real*sqrt(1.0 + temp*temp);  /*overflow!!*/
+return(temp);
+}
+
+ VOID
+#ifdef KR_headers
+r_cnjg(r, z) complex *r, *z;
+#else
+r_cnjg(complex *r, complex *z)
+#endif
+{
+r->r = z->r;
+r->i = - z->i;
+}
+
+ VOID
+#ifdef KR_headers
+d_cnjg(r, z) doublecomplex *r, *z;
+#else
+d_cnjg(doublecomplex *r, doublecomplex *z)
+#endif
+{
+r->r = z->r;
+r->i = - z->i;
+}
+
+
+#ifdef KR_headers
+float r_imag(z) complex *z;
+#else
+float r_imag(complex *z)
+#endif
+{
+return(z->i);
+}
+
+#ifdef KR_headers
+double d_imag(z) doublecomplex *z;
+#else
+double d_imag(doublecomplex *z)
+#endif
+{
+return(z->i);
+}
+
+
+#define log10e 0.43429448190325182765
+
+#ifdef KR_headers
+float logf();
+float r_lg10(x) real *x;
+#else
+#undef abs
+
+float r_lg10(real *x)
+#endif
+{
+return( log10e * logf(*x) );
+}
+
+#ifdef KR_headers
+double log();
+double d_lg10(x) doublereal *x;
+#else
+#undef abs
+
+double d_lg10(doublereal *x)
+#endif
+{
+return( log10e * log(*x) );
+}
+
+#ifdef KR_headers
+double r_sign(a,b) real *a, *b;
+#else
+double r_sign(real *a, real *b)
+#endif
+{
+float x;
+x = (*a >= 0.0f ? *a : - *a);
+return( *b >= 0.0f ? x : -x);
+}
+
+#ifdef KR_headers
+double d_sign(a,b) doublereal *a, *b;
+#else
+double d_sign(doublereal *a, doublereal *b)
+#endif
+{
+double x;
+x = (*a >= 0 ? *a : - *a);
+return( *b >= 0 ? x : -x);
+}
+
+
+#ifdef KR_headers
+double floor();
+integer i_dnnt(x) doublereal *x;
+#else
+#undef abs
+
+integer i_dnnt(doublereal *x)
+#endif
+{
+return( (*x)>=0 ?
+	floor(*x + .5) : -floor(.5 - *x) );
+}
+
+
+#ifdef KR_headers
+double pow();
+double pow_dd(ap, bp) doublereal *ap, *bp;
+#else
+#undef abs
+
+double pow_dd(doublereal *ap, doublereal *bp)
+#endif
+{
+return(pow(*ap, *bp) );
+}
+
+
+#ifdef KR_headers
+double pow_ri(ap, bp) real *ap; integer *bp;
+#else
+double pow_ri(real *ap, integer *bp)
+#endif
+{
+float pow, x;
+integer n;
+unsigned long u;
+
+pow = 1;
+x = *ap;
+n = *bp;
+
+if(n != 0)
+	{
+	if(n < 0)
+		{
+		n = -n;
+		x = 1.0f/x;
+		}
+	for(u = n; ; )
+		{
+		if(u & 01)
+			pow *= x;
+		if(u >>= 1)
+			x *= x;
+		else
+			break;
+		}
+	}
+return(pow);
+}
+
+#ifdef KR_headers
+double pow_di(ap, bp) doublereal *ap; integer *bp;
+#else
+double pow_di(doublereal *ap, integer *bp)
+#endif
+{
+double pow, x;
+integer n;
+unsigned long u;
+
+pow = 1;
+x = *ap;
+n = *bp;
+
+if(n != 0)
+	{
+	if(n < 0)
+		{
+		n = -n;
+		x = 1/x;
+		}
+	for(u = n; ; )
+		{
+		if(u & 01)
+			pow *= x;
+		if(u >>= 1)
+			x *= x;
+		else
+			break;
+		}
+	}
+return(pow);
+}
+/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the
+ * target of a concatenation to appear on its right-hand side (contrary
+ * to the Fortran 77 Standard, but in accordance with Fortran 90).
+ */
+#define NO_OVERWRITE
+
+
+#ifndef NO_OVERWRITE
+
+#undef abs
+#ifdef KR_headers
+ extern char *F77_aloc();
+ extern void free();
+ extern void exit_();
+#else
+
+ extern char *F77_aloc(ftnlen, char*);
+#endif
+
+#endif /* NO_OVERWRITE */
+
+ VOID
+#ifdef KR_headers
+s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnlen rnp[], *np, ll;
+#else
+s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll)
+#endif
+{
+	ftnlen i, nc;
+	char *rp;
+	ftnlen n = *np;
+#ifndef NO_OVERWRITE
+	ftnlen L, m;
+	char *lp0, *lp1;
+
+	lp0 = 0;
+	lp1 = lp;
+	L = ll;
+	i = 0;
+	while(i < n) {
+		rp = rpp[i];
+		m = rnp[i++];
+		if (rp >= lp1 || rp + m <= lp) {
+			if ((L -= m) <= 0) {
+				n = i;
+				break;
+				}
+			lp1 += m;
+			continue;
+			}
+		lp0 = lp;
+		lp = lp1 = F77_aloc(L = ll, "s_cat");
+		break;
+		}
+	lp1 = lp;
+#endif /* NO_OVERWRITE */
+	for(i = 0 ; i < n ; ++i) {
+		nc = ll;
+		if(rnp[i] < nc)
+			nc = rnp[i];
+		ll -= nc;
+		rp = rpp[i];
+		while(--nc >= 0)
+			*lp++ = *rp++;
+		}
+	while(--ll >= 0)
+		*lp++ = ' ';
+#ifndef NO_OVERWRITE
+	if (lp0) {
+		memmove(lp0, lp1, L);
+		free(lp1);
+		}
+#endif
+	}
+
+
+/* compare two strings */
+
+#ifdef KR_headers
+integer s_cmp(a0, b0, la, lb) char *a0, *b0; ftnlen la, lb;
+#else
+integer s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb)
+#endif
+{
+register unsigned char *a, *aend, *b, *bend;
+a = (unsigned char *)a0;
+b = (unsigned char *)b0;
+aend = a + la;
+bend = b + lb;
+
+if(la <= lb)
+	{
+	while(a < aend)
+		if(*a != *b)
+			return( *a - *b );
+		else
+			{ ++a; ++b; }
+
+	while(b < bend)
+		if(*b != ' ')
+			return( ' ' - *b );
+		else	++b;
+	}
+
+else
+	{
+	while(b < bend)
+		if(*a == *b)
+			{ ++a; ++b; }
+		else
+			return( *a - *b );
+	while(a < aend)
+		if(*a != ' ')
+			return(*a - ' ');
+		else	++a;
+	}
+return(0);
+}
+/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the
+ * target of an assignment to appear on its right-hand side (contrary
+ * to the Fortran 77 Standard, but in accordance with Fortran 90),
+ * as in  a(2:5) = a(4:7) .
+ */
+
+
+
+/* assign strings:  a = b */
+
+#ifdef KR_headers
+VOID s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb;
+#else
+void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb)
+#endif
+{
+	register char *aend, *bend;
+
+	aend = a + la;
+
+	if(la <= lb)
+#ifndef NO_OVERWRITE
+		if (a <= b || a >= b + la)
+#endif
+			while(a < aend)
+				*a++ = *b++;
+#ifndef NO_OVERWRITE
+		else
+			for(b += la; a < aend; )
+				*--aend = *--b;
+#endif
+
+	else {
+		bend = b + lb;
+#ifndef NO_OVERWRITE
+		if (a <= b || a >= bend)
+#endif
+			while(b < bend)
+				*a++ = *b++;
+#ifndef NO_OVERWRITE
+		else {
+			a += lb;
+			while(b < bend)
+				*--a = *--bend;
+			a += lb;
+			}
+#endif
+		while(a < aend)
+			*a++ = ' ';
+		}
+	}
+
+
+#ifdef KR_headers
+double f__cabsf();
+double c_abs(z) complex *z;
+#else
+double f__cabsf(float, float);
+double c_abs(complex *z)
+#endif
+{
+return( f__cabsf( z->r, z->i ) );
+}
+
+#ifdef KR_headers
+double f__cabs();
+double z_abs(z) doublecomplex *z;
+#else
+double f__cabs(double, double);
+double z_abs(doublecomplex *z)
+#endif
+{
+return( f__cabs( z->r, z->i ) );
+}
+
+
+#ifdef KR_headers
+extern void sig_die();
+VOID c_div(c, a, b) complex *a, *b, *c;
+#else
+extern void sig_die(char*, int);
+void c_div(complex *c, complex *a, complex *b)
+#endif
+{
+float ratio, den;
+float abr, abi;
+
+if( (abr = b->r) < 0.f)
+	abr = - abr;
+if( (abi = b->i) < 0.f)
+	abi = - abi;
+if( abr <= abi )
+	{
+	  /*Let IEEE Infinties handle this ;( */
+	  /*if(abi == 0)
+		sig_die("complex division by zero", 1);*/
+	ratio = b->r / b->i ;
+	den = b->i * (1 + ratio*ratio);
+	c->r = (a->r*ratio + a->i) / den;
+	c->i = (a->i*ratio - a->r) / den;
+	}
+
+else
+	{
+	ratio = b->i / b->r ;
+	den = b->r * (1.f + ratio*ratio);
+	c->r = (a->r + a->i*ratio) / den;
+	c->i = (a->i - a->r*ratio) / den;
+	}
+
+}
+
+#ifdef KR_headers
+extern void sig_die();
+VOID z_div(c, a, b) doublecomplex *a, *b, *c;
+#else
+extern void sig_die(char*, int);
+void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b)
+#endif
+{
+double ratio, den;
+double abr, abi;
+
+if( (abr = b->r) < 0.)
+	abr = - abr;
+if( (abi = b->i) < 0.)
+	abi = - abi;
+if( abr <= abi )
+	{
+	  /*Let IEEE Infinties handle this ;( */
+	  /*if(abi == 0)
+		sig_die("complex division by zero", 1);*/
+	ratio = b->r / b->i ;
+	den = b->i * (1 + ratio*ratio);
+	c->r = (a->r*ratio + a->i) / den;
+	c->i = (a->i*ratio - a->r) / den;
+	}
+
+else
+	{
+	ratio = b->i / b->r ;
+	den = b->r * (1 + ratio*ratio);
+	c->r = (a->r + a->i*ratio) / den;
+	c->i = (a->i - a->r*ratio) / den;
+	}
+
+}
+
+
+#ifdef KR_headers
+float sqrtf(), f__cabsf();
+VOID c_sqrt(r, z) complex *r, *z;
+#else
+#undef abs
+
+extern double f__cabsf(float, float);
+void c_sqrt(complex *r, complex *z)
+#endif
+{
+float mag;
+
+if( (mag = f__cabsf(z->r, z->i)) == 0.f)
+	r->r = r->i = 0.f;
+else if(z->r > 0.0f)
+	{
+	r->r = sqrtf(0.5f * (mag + z->r) );
+	r->i = z->i / r->r / 2.0f;
+	}
+else
+	{
+	r->i = sqrtf(0.5f * (mag - z->r) );
+	if(z->i < 0.0f)
+		r->i = - r->i;
+	r->r = z->i / r->i / 2.0f;
+	}
+}
+
+
+#ifdef KR_headers
+double sqrt(), f__cabs();
+VOID z_sqrt(r, z) doublecomplex *r, *z;
+#else
+#undef abs
+
+extern double f__cabs(double, double);
+void z_sqrt(doublecomplex *r, doublecomplex *z)
+#endif
+{
+double mag;
+
+if( (mag = f__cabs(z->r, z->i)) == 0.)
+	r->r = r->i = 0.;
+else if(z->r > 0)
+	{
+	r->r = sqrt(0.5 * (mag + z->r) );
+	r->i = z->i / r->r / 2;
+	}
+else
+	{
+	r->i = sqrt(0.5 * (mag - z->r) );
+	if(z->i < 0)
+		r->i = - r->i;
+	r->r = z->i / r->i / 2;
+	}
+}
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef KR_headers
+integer pow_ii(ap, bp) integer *ap, *bp;
+#else
+integer pow_ii(integer *ap, integer *bp)
+#endif
+{
+	integer pow, x, n;
+	unsigned long u;
+
+	x = *ap;
+	n = *bp;
+
+	if (n <= 0) {
+		if (n == 0 || x == 1)
+			return 1;
+		if (x != -1)
+			return x == 0 ? 1/x : 0;
+		n = -n;
+		}
+	u = n;
+	for(pow = 1; ; )
+		{
+		if(u & 01)
+			pow *= x;
+		if(u >>= 1)
+			x *= x;
+		else
+			break;
+		}
+	return(pow);
+	}
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef KR_headers
+extern void f_exit();
+VOID s_stop(s, n) char *s; ftnlen n;
+#else
+#undef abs
+#undef min
+#undef max
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+extern "C" {
+#endif
+void f_exit(void);
+
+int s_stop(char *s, ftnlen n)
+#endif
+{
+int i;
+
+if(n > 0)
+	{
+	fprintf(stderr, "STOP ");
+	for(i = 0; i<n ; ++i)
+		putc(*s++, stderr);
+	fprintf(stderr, " statement executed\n");
+	}
+#ifdef NO_ONEXIT
+f_exit();
+#endif
+exit(0);
+
+/* We cannot avoid (useless) compiler diagnostics here:		*/
+/* some compilers complain if there is no return statement,	*/
+/* and others complain that this one cannot be reached.		*/
+
+return 0; /* NOT REACHED */
+}
+#ifdef __cplusplus
+}
+#endif
+#ifdef __cplusplus
+}
+#endif
diff --git a/numpy/core/src/umath/lapack_lite/python_xerbla.c b/numpy/core/src/umath/lapack_lite/python_xerbla.c
new file mode 100644
index 000000000000..4e5a68413b6e
--- /dev/null
+++ b/numpy/core/src/umath/lapack_lite/python_xerbla.c
@@ -0,0 +1,37 @@
+#include "Python.h"
+#include "f2c.h"
+
+/*
+  From the original manpage:
+  --------------------------
+  XERBLA is an error handler for the LAPACK routines.
+  It is called by an LAPACK routine if an input parameter has an invalid value.
+  A message is printed and execution stops.
+
+  Instead of printing a message and stopping the execution, a
+  ValueError is raised with the message.
+
+  Parameters:
+  -----------
+  srname: Subroutine name to use in error message, maximum six characters.
+          Spaces at the end are skipped.
+  info: Number of the invalid parameter.
+*/
+
+int xerbla_(char *srname, integer *info)
+{
+        const char* format = "On entry to %.*s" \
+                " parameter number %d had an illegal value";
+        char buf[57 + 6 + 4]; /* 57 for strlen(format),
+                                 6 for name, 4 for param. num. */
+
+        int len = 0; /* length of subroutine name*/
+        while( len<6 && srname[len]!='\0' )
+                len++;
+        while( len && srname[len-1]==' ' )
+                len--;
+
+        snprintf(buf, sizeof(buf), format, len, srname, *info);
+        PyErr_SetString(PyExc_ValueError, buf);
+        return 0;
+}
diff --git a/numpy/core/src/umath/lapack_lite/zlapack_lite.c b/numpy/core/src/umath/lapack_lite/zlapack_lite.c
new file mode 100644
index 000000000000..1b43c627047b
--- /dev/null
+++ b/numpy/core/src/umath/lapack_lite/zlapack_lite.c
@@ -0,0 +1,27006 @@
+/*
+NOTE: This is generated code. Look in Misc/lapack_lite for information on
+      remaking this file.
+*/
+#include "f2c.h"
+
+#ifdef HAVE_CONFIG
+#include "config.h"
+#else
+extern doublereal dlamch_(char *);
+#define EPSILON dlamch_("Epsilon")
+#define SAFEMINIMUM dlamch_("Safe minimum")
+#define PRECISION dlamch_("Precision")
+#define BASE dlamch_("Base")
+#endif
+
+extern doublereal dlapy2_(doublereal *x, doublereal *y);
+
+
+
+/* Table of constant values */
+
+static integer c__1 = 1;
+static doublecomplex c_b59 = {0.,0.};
+static doublecomplex c_b60 = {1.,0.};
+static integer c_n1 = -1;
+static integer c__3 = 3;
+static integer c__2 = 2;
+static integer c__0 = 0;
+static integer c__8 = 8;
+static integer c__4 = 4;
+static integer c__65 = 65;
+static integer c__6 = 6;
+static integer c__9 = 9;
+static doublereal c_b324 = 0.;
+static doublereal c_b1015 = 1.;
+static integer c__15 = 15;
+static logical c_false = FALSE_;
+static doublereal c_b1294 = -1.;
+static doublereal c_b2210 = .5;
+
+/* Subroutine */ int zdrot_(integer *n, doublecomplex *cx, integer *incx,
+	doublecomplex *cy, integer *incy, doublereal *c__, doublereal *s)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3, i__4;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Local variables */
+    static integer i__, ix, iy;
+    static doublecomplex ctemp;
+
+
+/*
+       applies a plane rotation, where the cos and sin (c and s) are real
+       and the vectors cx and cy are complex.
+       jack dongarra, linpack, 3/11/78.
+
+
+   =====================================================================
+*/
+
+    /* Parameter adjustments */
+    --cy;
+    --cx;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+    if (*incx == 1 && *incy == 1) {
+	goto L20;
+    }
+
+/*
+          code for unequal increments or equal increments not equal
+            to 1
+*/
+
+    ix = 1;
+    iy = 1;
+    if (*incx < 0) {
+	ix = (-(*n) + 1) * *incx + 1;
+    }
+    if (*incy < 0) {
+	iy = (-(*n) + 1) * *incy + 1;
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = ix;
+	z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i;
+	i__3 = iy;
+	z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i;
+	z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	ctemp.r = z__1.r, ctemp.i = z__1.i;
+	i__2 = iy;
+	i__3 = iy;
+	z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i;
+	i__4 = ix;
+	z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i;
+	z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
+	cy[i__2].r = z__1.r, cy[i__2].i = z__1.i;
+	i__2 = ix;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+	ix += *incx;
+	iy += *incy;
+/* L10: */
+    }
+    return 0;
+
+/*        code for both increments equal to 1 */
+
+L20:
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	z__2.r = *c__ * cx[i__2].r, z__2.i = *c__ * cx[i__2].i;
+	i__3 = i__;
+	z__3.r = *s * cy[i__3].r, z__3.i = *s * cy[i__3].i;
+	z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	ctemp.r = z__1.r, ctemp.i = z__1.i;
+	i__2 = i__;
+	i__3 = i__;
+	z__2.r = *c__ * cy[i__3].r, z__2.i = *c__ * cy[i__3].i;
+	i__4 = i__;
+	z__3.r = *s * cx[i__4].r, z__3.i = *s * cx[i__4].i;
+	z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
+	cy[i__2].r = z__1.r, cy[i__2].i = z__1.i;
+	i__2 = i__;
+	cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
+/* L30: */
+    }
+    return 0;
+} /* zdrot_ */
+
+/* Subroutine */ int zgebak_(char *job, char *side, integer *n, integer *ilo,
+	integer *ihi, doublereal *scale, integer *m, doublecomplex *v,
+	integer *ldv, integer *info)
+{
+    /* System generated locals */
+    integer v_dim1, v_offset, i__1;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublereal s;
+    static integer ii;
+    extern logical lsame_(char *, char *);
+    static logical leftv;
+    extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), xerbla_(char *, integer *),
+	    zdscal_(integer *, doublereal *, doublecomplex *, integer *);
+    static logical rightv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZGEBAK forms the right or left eigenvectors of a complex general
+    matrix by backward transformation on the computed eigenvectors of the
+    balanced matrix output by ZGEBAL.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the type of backward transformation required:
+            = 'N', do nothing, return immediately;
+            = 'P', do backward transformation for permutation only;
+            = 'S', do backward transformation for scaling only;
+            = 'B', do backward transformations for both permutation and
+                   scaling.
+            JOB must be the same as the argument JOB supplied to ZGEBAL.
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  V contains right eigenvectors;
+            = 'L':  V contains left eigenvectors.
+
+    N       (input) INTEGER
+            The number of rows of the matrix V.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            The integers ILO and IHI determined by ZGEBAL.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    SCALE   (input) DOUBLE PRECISION array, dimension (N)
+            Details of the permutation and scaling factors, as returned
+            by ZGEBAL.
+
+    M       (input) INTEGER
+            The number of columns of the matrix V.  M >= 0.
+
+    V       (input/output) COMPLEX*16 array, dimension (LDV,M)
+            On entry, the matrix of right or left eigenvectors to be
+            transformed, as returned by ZHSEIN or ZTREVC.
+            On exit, V is overwritten by the transformed eigenvectors.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V. LDV >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Decode and Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --scale;
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+
+    /* Function Body */
+    rightv = lsame_(side, "R");
+    leftv = lsame_(side, "L");
+
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (! rightv && ! leftv) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -4;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -5;
+    } else if (*m < 0) {
+	*info = -7;
+    } else if (*ldv < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEBAK", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*m == 0) {
+	return 0;
+    }
+    if (lsame_(job, "N")) {
+	return 0;
+    }
+
+    if (*ilo == *ihi) {
+	goto L30;
+    }
+
+/*     Backward balance */
+
+    if ((lsame_(job, "S")) || (lsame_(job, "B"))) {
+
+	if (rightv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = scale[i__];
+		zdscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L10: */
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *ihi;
+	    for (i__ = *ilo; i__ <= i__1; ++i__) {
+		s = 1. / scale[i__];
+		zdscal_(m, &s, &v[i__ + v_dim1], ldv);
+/* L20: */
+	    }
+	}
+
+    }
+
+/*
+       Backward permutation
+
+       For  I = ILO-1 step -1 until 1,
+                IHI+1 step 1 until N do --
+*/
+
+L30:
+    if ((lsame_(job, "P")) || (lsame_(job, "B"))) {
+	if (rightv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L40;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = (integer) scale[i__];
+		if (k == i__) {
+		    goto L40;
+		}
+		zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L40:
+		;
+	    }
+	}
+
+	if (leftv) {
+	    i__1 = *n;
+	    for (ii = 1; ii <= i__1; ++ii) {
+		i__ = ii;
+		if (i__ >= *ilo && i__ <= *ihi) {
+		    goto L50;
+		}
+		if (i__ < *ilo) {
+		    i__ = *ilo - ii;
+		}
+		k = (integer) scale[i__];
+		if (k == i__) {
+		    goto L50;
+		}
+		zswap_(m, &v[i__ + v_dim1], ldv, &v[k + v_dim1], ldv);
+L50:
+		;
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZGEBAK */
+
+} /* zgebak_ */
+
+/* Subroutine */ int zgebal_(char *job, integer *n, doublecomplex *a, integer
+	*lda, integer *ilo, integer *ihi, doublereal *scale, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *), z_abs(doublecomplex *);
+
+    /* Local variables */
+    static doublereal c__, f, g;
+    static integer i__, j, k, l, m;
+    static doublereal r__, s, ca, ra;
+    static integer ica, ira, iexc;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static doublereal sfmin1, sfmin2, sfmax1, sfmax2;
+
+    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
+	    integer *, doublereal *, doublecomplex *, integer *);
+    extern integer izamax_(integer *, doublecomplex *, integer *);
+    static logical noconv;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZGEBAL balances a general complex matrix A.  This involves, first,
+    permuting A by a similarity transformation to isolate eigenvalues
+    in the first 1 to ILO-1 and last IHI+1 to N elements on the
+    diagonal; and second, applying a diagonal similarity transformation
+    to rows and columns ILO to IHI to make the rows and columns as
+    close in norm as possible.  Both steps are optional.
+
+    Balancing may reduce the 1-norm of the matrix, and improve the
+    accuracy of the computed eigenvalues and/or eigenvectors.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            Specifies the operations to be performed on A:
+            = 'N':  none:  simply set ILO = 1, IHI = N, SCALE(I) = 1.0
+                    for i = 1,...,N;
+            = 'P':  permute only;
+            = 'S':  scale only;
+            = 'B':  both permute and scale.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the input matrix A.
+            On exit,  A is overwritten by the balanced matrix.
+            If JOB = 'N', A is not referenced.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    ILO     (output) INTEGER
+    IHI     (output) INTEGER
+            ILO and IHI are set to integers such that on exit
+            A(i,j) = 0 if i > j and j = 1,...,ILO-1 or I = IHI+1,...,N.
+            If JOB = 'N' or 'S', ILO = 1 and IHI = N.
+
+    SCALE   (output) DOUBLE PRECISION array, dimension (N)
+            Details of the permutations and scaling factors applied to
+            A.  If P(j) is the index of the row and column interchanged
+            with row and column j and D(j) is the scaling factor
+            applied to row and column j, then
+            SCALE(j) = P(j)    for j = 1,...,ILO-1
+                     = D(j)    for j = ILO,...,IHI
+                     = P(j)    for j = IHI+1,...,N.
+            The order in which the interchanges are made is N to IHI+1,
+            then 1 to ILO-1.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The permutations consist of row and column interchanges which put
+    the matrix in the form
+
+               ( T1   X   Y  )
+       P A P = (  0   B   Z  )
+               (  0   0   T2 )
+
+    where T1 and T2 are upper triangular matrices whose eigenvalues lie
+    along the diagonal.  The column indices ILO and IHI mark the starting
+    and ending columns of the submatrix B. Balancing consists of applying
+    a diagonal similarity transformation inv(D) * B * D to make the
+    1-norms of each row of B and its corresponding column nearly equal.
+    The output matrix is
+
+       ( T1     X*D          Y    )
+       (  0  inv(D)*B*D  inv(D)*Z ).
+       (  0      0           T2   )
+
+    Information about the permutations P and the diagonal matrix D is
+    returned in the vector SCALE.
+
+    This subroutine is based on the EISPACK routine CBAL.
+
+    Modified by Tzu-Yi Chen, Computer Science Division, University of
+      California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --scale;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(job, "N") && ! lsame_(job, "P") && ! lsame_(job, "S")
+	    && ! lsame_(job, "B")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEBAL", &i__1);
+	return 0;
+    }
+
+    k = 1;
+    l = *n;
+
+    if (*n == 0) {
+	goto L210;
+    }
+
+    if (lsame_(job, "N")) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scale[i__] = 1.;
+/* L10: */
+	}
+	goto L210;
+    }
+
+    if (lsame_(job, "S")) {
+	goto L120;
+    }
+
+/*     Permutation to isolate eigenvalues if possible */
+
+    goto L50;
+
+/*     Row and column exchange. */
+
+L20:
+    scale[m] = (doublereal) j;
+    if (j == m) {
+	goto L30;
+    }
+
+    zswap_(&l, &a[j * a_dim1 + 1], &c__1, &a[m * a_dim1 + 1], &c__1);
+    i__1 = *n - k + 1;
+    zswap_(&i__1, &a[j + k * a_dim1], lda, &a[m + k * a_dim1], lda);
+
+L30:
+    switch (iexc) {
+	case 1:  goto L40;
+	case 2:  goto L80;
+    }
+
+/*     Search for rows isolating an eigenvalue and push them down. */
+
+L40:
+    if (l == 1) {
+	goto L210;
+    }
+    --l;
+
+L50:
+    for (j = l; j >= 1; --j) {
+
+	i__1 = l;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (i__ == j) {
+		goto L60;
+	    }
+	    i__2 = j + i__ * a_dim1;
+	    if ((a[i__2].r != 0.) || (d_imag(&a[j + i__ * a_dim1]) != 0.)) {
+		goto L70;
+	    }
+L60:
+	    ;
+	}
+
+	m = l;
+	iexc = 1;
+	goto L20;
+L70:
+	;
+    }
+
+    goto L90;
+
+/*     Search for columns isolating an eigenvalue and push them left. */
+
+L80:
+    ++k;
+
+L90:
+    i__1 = l;
+    for (j = k; j <= i__1; ++j) {
+
+	i__2 = l;
+	for (i__ = k; i__ <= i__2; ++i__) {
+	    if (i__ == j) {
+		goto L100;
+	    }
+	    i__3 = i__ + j * a_dim1;
+	    if ((a[i__3].r != 0.) || (d_imag(&a[i__ + j * a_dim1]) != 0.)) {
+		goto L110;
+	    }
+L100:
+	    ;
+	}
+
+	m = k;
+	iexc = 2;
+	goto L20;
+L110:
+	;
+    }
+
+L120:
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	scale[i__] = 1.;
+/* L130: */
+    }
+
+    if (lsame_(job, "P")) {
+	goto L210;
+    }
+
+/*
+       Balance the submatrix in rows K to L.
+
+       Iterative loop for norm reduction
+*/
+
+    sfmin1 = SAFEMINIMUM / PRECISION;
+    sfmax1 = 1. / sfmin1;
+    sfmin2 = sfmin1 * 8.;
+    sfmax2 = 1. / sfmin2;
+L140:
+    noconv = FALSE_;
+
+    i__1 = l;
+    for (i__ = k; i__ <= i__1; ++i__) {
+	c__ = 0.;
+	r__ = 0.;
+
+	i__2 = l;
+	for (j = k; j <= i__2; ++j) {
+	    if (j == i__) {
+		goto L150;
+	    }
+	    i__3 = j + i__ * a_dim1;
+	    c__ += (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[j + i__ *
+		     a_dim1]), abs(d__2));
+	    i__3 = i__ + j * a_dim1;
+	    r__ += (d__1 = a[i__3].r, abs(d__1)) + (d__2 = d_imag(&a[i__ + j *
+		     a_dim1]), abs(d__2));
+L150:
+	    ;
+	}
+	ica = izamax_(&l, &a[i__ * a_dim1 + 1], &c__1);
+	ca = z_abs(&a[ica + i__ * a_dim1]);
+	i__2 = *n - k + 1;
+	ira = izamax_(&i__2, &a[i__ + k * a_dim1], lda);
+	ra = z_abs(&a[i__ + (ira + k - 1) * a_dim1]);
+
+/*        Guard against zero C or R due to underflow. */
+
+	if ((c__ == 0.) || (r__ == 0.)) {
+	    goto L200;
+	}
+	g = r__ / 8.;
+	f = 1.;
+	s = c__ + r__;
+L160:
+/* Computing MAX */
+	d__1 = max(f,c__);
+/* Computing MIN */
+	d__2 = min(r__,g);
+	if (((c__ >= g) || (max(d__1,ca) >= sfmax2)) || (min(d__2,ra) <=
+		sfmin2)) {
+	    goto L170;
+	}
+	f *= 8.;
+	c__ *= 8.;
+	ca *= 8.;
+	r__ /= 8.;
+	g /= 8.;
+	ra /= 8.;
+	goto L160;
+
+L170:
+	g = c__ / 8.;
+L180:
+/* Computing MIN */
+	d__1 = min(f,c__), d__1 = min(d__1,g);
+	if (((g < r__) || (max(r__,ra) >= sfmax2)) || (min(d__1,ca) <= sfmin2)
+		) {
+	    goto L190;
+	}
+	f /= 8.;
+	c__ /= 8.;
+	g /= 8.;
+	ca /= 8.;
+	r__ *= 8.;
+	ra *= 8.;
+	goto L180;
+
+/*        Now balance. */
+
+L190:
+	if (c__ + r__ >= s * .95) {
+	    goto L200;
+	}
+	if (f < 1. && scale[i__] < 1.) {
+	    if (f * scale[i__] <= sfmin1) {
+		goto L200;
+	    }
+	}
+	if (f > 1. && scale[i__] > 1.) {
+	    if (scale[i__] >= sfmax1 / f) {
+		goto L200;
+	    }
+	}
+	g = 1. / f;
+	scale[i__] *= f;
+	noconv = TRUE_;
+
+	i__2 = *n - k + 1;
+	zdscal_(&i__2, &g, &a[i__ + k * a_dim1], lda);
+	zdscal_(&l, &f, &a[i__ * a_dim1 + 1], &c__1);
+
+L200:
+	;
+    }
+
+    if (noconv) {
+	goto L140;
+    }
+
+L210:
+    *ilo = k;
+    *ihi = l;
+
+    return 0;
+
+/*     End of ZGEBAL */
+
+} /* zgebal_ */
+
+/* Subroutine */ int zgebd2_(integer *m, integer *n, doublecomplex *a,
+	integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq,
+	doublecomplex *taup, doublecomplex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__;
+    static doublecomplex alpha;
+    extern /* Subroutine */ int zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *,
+	    integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZGEBD2 reduces a complex general m by n matrix A to upper or lower
+    real bidiagonal form B by a unitary transformation: Q' * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the unitary matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the unitary matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) COMPLEX*16 array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix Q. See Further Details.
+
+    TAUP    (output) COMPLEX*16 array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix P. See Further Details.
+
+    WORK    (workspace) COMPLEX*16 array, dimension (max(M,N))
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+    A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+    A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, v and u are complex vectors;
+    v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in A(i+2:m,i);
+    u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in A(i,i+1:n);
+    tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEBD2", &i__1);
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, &
+		    tauq[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1., a[i__2].i = 0.;
+
+/*           Apply H(i)' to A(i:m,i+1:n) from the left */
+
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    d_cnjg(&z__1, &tauq[i__]);
+	    zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &z__1,
+		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__;
+	    a[i__2].r = d__[i__3], a[i__2].i = 0.;
+
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector G(i) to annihilate
+                A(i,i+2:n)
+*/
+
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+			taup[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+
+/*              Apply G(i) to A(i+1:m,i+1:n) from the right */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		zlarf_("Right", &i__2, &i__3, &a[i__ + (i__ + 1) * a_dim1],
+			lda, &taup[i__], &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &work[1]);
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		i__3 = i__;
+		a[i__2].r = e[i__3], a[i__2].i = 0.;
+	    } else {
+		i__2 = i__;
+		taup[i__2].r = 0., taup[i__2].i = 0.;
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Generate elementary reflector G(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = *n - i__ + 1;
+	    zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+		    taup[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1., a[i__2].i = 0.;
+
+/*           Apply G(i) to A(i+1:m,i:n) from the right */
+
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+/* Computing MIN */
+	    i__4 = i__ + 1;
+	    zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &taup[
+		    i__], &a[min(i__4,*m) + i__ * a_dim1], lda, &work[1]);
+	    i__2 = *n - i__ + 1;
+	    zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__;
+	    a[i__2].r = d__[i__3], a[i__2].i = 0.;
+
+	    if (i__ < *m) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:m,i)
+*/
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1,
+			 &tauq[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+
+/*              Apply H(i)' to A(i+1:m,i+1:n) from the left */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		d_cnjg(&z__1, &tauq[i__]);
+		zlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &
+			c__1, &z__1, &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &
+			work[1]);
+		i__2 = i__ + 1 + i__ * a_dim1;
+		i__3 = i__;
+		a[i__2].r = e[i__3], a[i__2].i = 0.;
+	    } else {
+		i__2 = i__;
+		tauq[i__2].r = 0., tauq[i__2].i = 0.;
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of ZGEBD2 */
+
+} /* zgebd2_ */
+
+/* Subroutine */ int zgebrd_(integer *m, integer *n, doublecomplex *a,
+	integer *lda, doublereal *d__, doublereal *e, doublecomplex *tauq,
+	doublecomplex *taup, doublecomplex *work, integer *lwork, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__, j, nb, nx;
+    static doublereal ws;
+    static integer nbmin, iinfo, minmn;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), zgebd2_(integer *, integer *,
+	    doublecomplex *, integer *, doublereal *, doublereal *,
+	    doublecomplex *, doublecomplex *, doublecomplex *, integer *),
+	    xerbla_(char *, integer *), zlabrd_(integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublereal *, doublereal *,
+	     doublecomplex *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer ldwrkx, ldwrky, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZGEBRD reduces a general complex M-by-N matrix A to upper or lower
+    bidiagonal form B by a unitary transformation: Q**H * A * P = B.
+
+    If m >= n, B is upper bidiagonal; if m < n, B is lower bidiagonal.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the M-by-N general matrix to be reduced.
+            On exit,
+            if m >= n, the diagonal and the first superdiagonal are
+              overwritten with the upper bidiagonal matrix B; the
+              elements below the diagonal, with the array TAUQ, represent
+              the unitary matrix Q as a product of elementary
+              reflectors, and the elements above the first superdiagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors;
+            if m < n, the diagonal and the first subdiagonal are
+              overwritten with the lower bidiagonal matrix B; the
+              elements below the first subdiagonal, with the array TAUQ,
+              represent the unitary matrix Q as a product of
+              elementary reflectors, and the elements above the diagonal,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The diagonal elements of the bidiagonal matrix B:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (min(M,N)-1)
+            The off-diagonal elements of the bidiagonal matrix B:
+            if m >= n, E(i) = A(i,i+1) for i = 1,2,...,n-1;
+            if m < n, E(i) = A(i+1,i) for i = 1,2,...,m-1.
+
+    TAUQ    (output) COMPLEX*16 array dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix Q. See Further Details.
+
+    TAUP    (output) COMPLEX*16 array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix P. See Further Details.
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,M,N).
+            For optimum performance LWORK >= (M+N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+    If m >= n,
+
+       Q = H(1) H(2) . . . H(n)  and  P = G(1) G(2) . . . G(n-1)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors; v(1:i-1) = 0, v(i) = 1, and v(i+1:m) is stored on exit in
+    A(i+1:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+2:n) is stored on exit in
+    A(i,i+2:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n,
+
+       Q = H(1) H(2) . . . H(m-1)  and  P = G(1) G(2) . . . G(m)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors; v(1:i) = 0, v(i+1) = 1, and v(i+2:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The contents of A on exit are illustrated by the following examples:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
+      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
+      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
+      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
+      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
+      (  v1  v2  v3  v4  v5 )
+
+    where d and e denote diagonal and off-diagonal elements of B, vi
+    denotes an element of the vector defining H(i), and ui an element of
+    the vector defining G(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MAX */
+    i__1 = 1, i__2 = ilaenv_(&c__1, "ZGEBRD", " ", m, n, &c_n1, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = max(i__1,i__2);
+    lwkopt = (*m + *n) * nb;
+    d__1 = (doublereal) lwkopt;
+    work[1].r = d__1, work[1].i = 0.;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = max(1,*m);
+	if (*lwork < max(i__1,*n) && ! lquery) {
+	    *info = -10;
+	}
+    }
+    if (*info < 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEBRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    minmn = min(*m,*n);
+    if (minmn == 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    ws = (doublereal) max(*m,*n);
+    ldwrkx = *m;
+    ldwrky = *n;
+
+    if (nb > 1 && nb < minmn) {
+
+/*
+          Set the crossover point NX.
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "ZGEBRD", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+
+/*        Determine when to switch from blocked to unblocked code. */
+
+	if (nx < minmn) {
+	    ws = (doublereal) ((*m + *n) * nb);
+	    if ((doublereal) (*lwork) < ws) {
+
+/*
+                Not enough work space for the optimal NB, consider using
+                a smaller block size.
+*/
+
+		nbmin = ilaenv_(&c__2, "ZGEBRD", " ", m, n, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		if (*lwork >= (*m + *n) * nbmin) {
+		    nb = *lwork / (*m + *n);
+		} else {
+		    nb = 1;
+		    nx = minmn;
+		}
+	    }
+	}
+    } else {
+	nx = minmn;
+    }
+
+    i__1 = minmn - nx;
+    i__2 = nb;
+    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+
+/*
+          Reduce rows and columns i:i+ib-1 to bidiagonal form and return
+          the matrices X and Y which are needed to update the unreduced
+          part of the matrix
+*/
+
+	i__3 = *m - i__ + 1;
+	i__4 = *n - i__ + 1;
+	zlabrd_(&i__3, &i__4, &nb, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[
+		i__], &tauq[i__], &taup[i__], &work[1], &ldwrkx, &work[ldwrkx
+		* nb + 1], &ldwrky);
+
+/*
+          Update the trailing submatrix A(i+ib:m,i+ib:n), using
+          an update of the form  A := A - V*Y' - X*U'
+*/
+
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	z__1.r = -1., z__1.i = -0.;
+	zgemm_("No transpose", "Conjugate transpose", &i__3, &i__4, &nb, &
+		z__1, &a[i__ + nb + i__ * a_dim1], lda, &work[ldwrkx * nb +
+		nb + 1], &ldwrky, &c_b60, &a[i__ + nb + (i__ + nb) * a_dim1],
+		lda);
+	i__3 = *m - i__ - nb + 1;
+	i__4 = *n - i__ - nb + 1;
+	z__1.r = -1., z__1.i = -0.;
+	zgemm_("No transpose", "No transpose", &i__3, &i__4, &nb, &z__1, &
+		work[nb + 1], &ldwrkx, &a[i__ + (i__ + nb) * a_dim1], lda, &
+		c_b60, &a[i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*        Copy diagonal and off-diagonal elements of B back into A */
+
+	if (*m >= *n) {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = d__[i__5], a[i__4].i = 0.;
+		i__4 = j + (j + 1) * a_dim1;
+		i__5 = j;
+		a[i__4].r = e[i__5], a[i__4].i = 0.;
+/* L10: */
+	    }
+	} else {
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = d__[i__5], a[i__4].i = 0.;
+		i__4 = j + 1 + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = e[i__5], a[i__4].i = 0.;
+/* L20: */
+	    }
+	}
+/* L30: */
+    }
+
+/*     Use unblocked code to reduce the remainder of the matrix */
+
+    i__2 = *m - i__ + 1;
+    i__1 = *n - i__ + 1;
+    zgebd2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__], &
+	    tauq[i__], &taup[i__], &work[1], &iinfo);
+    work[1].r = ws, work[1].i = 0.;
+    return 0;
+
+/*     End of ZGEBRD */
+
+} /* zgebrd_ */
+
+/* Subroutine */ int zgeev_(char *jobvl, char *jobvr, integer *n,
+	doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl,
+	integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work,
+	integer *lwork, doublereal *rwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4;
+    doublereal d__1, d__2;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_imag(doublecomplex *);
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, k, ihi;
+    static doublereal scl;
+    static integer ilo;
+    static doublereal dum[1], eps;
+    static doublecomplex tmp;
+    static integer ibal;
+    static char side[1];
+    static integer maxb;
+    static doublereal anrm;
+    static integer ierr, itau, iwrk, nout;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
+    extern doublereal dznrm2_(integer *, doublecomplex *, integer *);
+    static logical scalea;
+
+    static doublereal cscale;
+    extern /* Subroutine */ int zgebak_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublecomplex *, integer *,
+	    integer *), zgebal_(char *, integer *,
+	    doublecomplex *, integer *, integer *, integer *, doublereal *,
+	    integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical select[1];
+    extern /* Subroutine */ int zdscal_(integer *, doublereal *,
+	    doublecomplex *, integer *);
+    static doublereal bignum;
+    extern doublereal zlange_(char *, integer *, integer *, doublecomplex *,
+	    integer *, doublereal *);
+    extern /* Subroutine */ int zgehrd_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *), zlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublecomplex *,
+	     integer *, integer *), zlacpy_(char *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *);
+    static integer minwrk, maxwrk;
+    static logical wantvl;
+    static doublereal smlnum;
+    static integer hswork, irwork;
+    extern /* Subroutine */ int zhseqr_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), ztrevc_(char *, char *, logical *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, integer *, integer *, doublecomplex *,
+	     doublereal *, integer *);
+    static logical lquery, wantvr;
+    extern /* Subroutine */ int zunghr_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the
+    eigenvalues and, optionally, the left and/or right eigenvectors.
+
+    The right eigenvector v(j) of A satisfies
+                     A * v(j) = lambda(j) * v(j)
+    where lambda(j) is its eigenvalue.
+    The left eigenvector u(j) of A satisfies
+                  u(j)**H * A = lambda(j) * u(j)**H
+    where u(j)**H denotes the conjugate transpose of u(j).
+
+    The computed eigenvectors are normalized to have Euclidean norm
+    equal to 1 and largest component real.
+
+    Arguments
+    =========
+
+    JOBVL   (input) CHARACTER*1
+            = 'N': left eigenvectors of A are not computed;
+            = 'V': left eigenvectors of are computed.
+
+    JOBVR   (input) CHARACTER*1
+            = 'N': right eigenvectors of A are not computed;
+            = 'V': right eigenvectors of A are computed.
+
+    N       (input) INTEGER
+            The order of the matrix A. N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the N-by-N matrix A.
+            On exit, A has been overwritten.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    W       (output) COMPLEX*16 array, dimension (N)
+            W contains the computed eigenvalues.
+
+    VL      (output) COMPLEX*16 array, dimension (LDVL,N)
+            If JOBVL = 'V', the left eigenvectors u(j) are stored one
+            after another in the columns of VL, in the same order
+            as their eigenvalues.
+            If JOBVL = 'N', VL is not referenced.
+            u(j) = VL(:,j), the j-th column of VL.
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= 1; if
+            JOBVL = 'V', LDVL >= N.
+
+    VR      (output) COMPLEX*16 array, dimension (LDVR,N)
+            If JOBVR = 'V', the right eigenvectors v(j) are stored one
+            after another in the columns of VR, in the same order
+            as their eigenvalues.
+            If JOBVR = 'N', VR is not referenced.
+            v(j) = VR(:,j), the j-th column of VR.
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= 1; if
+            JOBVR = 'V', LDVR >= N.
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,2*N).
+            For good performance, LWORK must generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = i, the QR algorithm failed to compute all the
+                  eigenvalues, and no eigenvectors have been computed;
+                  elements and i+1:N of W contain eigenvalues which have
+                  converged.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --w;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+    --rwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = *lwork == -1;
+    wantvl = lsame_(jobvl, "V");
+    wantvr = lsame_(jobvr, "V");
+    if (! wantvl && ! lsame_(jobvl, "N")) {
+	*info = -1;
+    } else if (! wantvr && ! lsame_(jobvr, "N")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if ((*ldvl < 1) || (wantvl && *ldvl < *n)) {
+	*info = -8;
+    } else if ((*ldvr < 1) || (wantvr && *ldvr < *n)) {
+	*info = -10;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         CWorkspace refers to complex workspace, and RWorkspace to real
+         workspace. NB refers to the optimal block size for the
+         immediately following subroutine, as returned by ILAENV.
+         HSWORK refers to the workspace preferred by ZHSEQR, as
+         calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
+         the worst case.)
+*/
+
+    minwrk = 1;
+    if (*info == 0 && ((*lwork >= 1) || (lquery))) {
+	maxwrk = *n + *n * ilaenv_(&c__1, "ZGEHRD", " ", n, &c__1, n, &c__0, (
+		ftnlen)6, (ftnlen)1);
+	if (! wantvl && ! wantvr) {
+/* Computing MAX */
+	    i__1 = 1, i__2 = (*n) << (1);
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "ZHSEQR", "EN", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "EN", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    hswork = max(i__1,i__2);
+	    maxwrk = max(maxwrk,hswork);
+	} else {
+/* Computing MAX */
+	    i__1 = 1, i__2 = (*n) << (1);
+	    minwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n + (*n - 1) * ilaenv_(&c__1, "ZUNGHR",
+		    " ", n, &c__1, n, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ilaenv_(&c__8, "ZHSEQR", "SV", n, &c__1, n, &c_n1, (ftnlen)
+		    6, (ftnlen)2);
+	    maxb = max(i__1,2);
+/*
+   Computing MIN
+   Computing MAX
+*/
+	    i__3 = 2, i__4 = ilaenv_(&c__4, "ZHSEQR", "SV", n, &c__1, n, &
+		    c_n1, (ftnlen)6, (ftnlen)2);
+	    i__1 = min(maxb,*n), i__2 = max(i__3,i__4);
+	    k = min(i__1,i__2);
+/* Computing MAX */
+	    i__1 = k * (k + 2), i__2 = (*n) << (1);
+	    hswork = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = max(maxwrk,hswork), i__2 = (*n) << (1);
+	    maxwrk = max(i__1,i__2);
+	}
+	work[1].r = (doublereal) maxwrk, work[1].i = 0.;
+    }
+    if (*lwork < minwrk && ! lquery) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEEV ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = PRECISION;
+    smlnum = SAFEMINIMUM;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    smlnum = sqrt(smlnum) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = zlange_("M", n, n, &a[a_offset], lda, dum);
+    scalea = FALSE_;
+    if (anrm > 0. && anrm < smlnum) {
+	scalea = TRUE_;
+	cscale = smlnum;
+    } else if (anrm > bignum) {
+	scalea = TRUE_;
+	cscale = bignum;
+    }
+    if (scalea) {
+	zlascl_("G", &c__0, &c__0, &anrm, &cscale, n, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+/*
+       Balance the matrix
+       (CWorkspace: none)
+       (RWorkspace: need N)
+*/
+
+    ibal = 1;
+    zgebal_("B", n, &a[a_offset], lda, &ilo, &ihi, &rwork[ibal], &ierr);
+
+/*
+       Reduce to upper Hessenberg form
+       (CWorkspace: need 2*N, prefer N+N*NB)
+       (RWorkspace: none)
+*/
+
+    itau = 1;
+    iwrk = itau + *n;
+    i__1 = *lwork - iwrk + 1;
+    zgehrd_(n, &ilo, &ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1,
+	     &ierr);
+
+    if (wantvl) {
+
+/*
+          Want left eigenvectors
+          Copy Householder vectors to VL
+*/
+
+	*(unsigned char *)side = 'L';
+	zlacpy_("L", n, n, &a[a_offset], lda, &vl[vl_offset], ldvl)
+		;
+
+/*
+          Generate unitary matrix in VL
+          (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+          (RWorkspace: none)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	zunghr_(n, &ilo, &ihi, &vl[vl_offset], ldvl, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VL
+          (CWorkspace: need 1, prefer HSWORK (see comments) )
+          (RWorkspace: none)
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	zhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vl[
+		vl_offset], ldvl, &work[iwrk], &i__1, info);
+
+	if (wantvr) {
+
+/*
+             Want left and right eigenvectors
+             Copy Schur vectors to VR
+*/
+
+	    *(unsigned char *)side = 'B';
+	    zlacpy_("F", n, n, &vl[vl_offset], ldvl, &vr[vr_offset], ldvr);
+	}
+
+    } else if (wantvr) {
+
+/*
+          Want right eigenvectors
+          Copy Householder vectors to VR
+*/
+
+	*(unsigned char *)side = 'R';
+	zlacpy_("L", n, n, &a[a_offset], lda, &vr[vr_offset], ldvr)
+		;
+
+/*
+          Generate unitary matrix in VR
+          (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)
+          (RWorkspace: none)
+*/
+
+	i__1 = *lwork - iwrk + 1;
+	zunghr_(n, &ilo, &ihi, &vr[vr_offset], ldvr, &work[itau], &work[iwrk],
+		 &i__1, &ierr);
+
+/*
+          Perform QR iteration, accumulating Schur vectors in VR
+          (CWorkspace: need 1, prefer HSWORK (see comments) )
+          (RWorkspace: none)
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	zhseqr_("S", "V", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
+
+    } else {
+
+/*
+          Compute eigenvalues only
+          (CWorkspace: need 1, prefer HSWORK (see comments) )
+          (RWorkspace: none)
+*/
+
+	iwrk = itau;
+	i__1 = *lwork - iwrk + 1;
+	zhseqr_("E", "N", n, &ilo, &ihi, &a[a_offset], lda, &w[1], &vr[
+		vr_offset], ldvr, &work[iwrk], &i__1, info);
+    }
+
+/*     If INFO > 0 from ZHSEQR, then quit */
+
+    if (*info > 0) {
+	goto L50;
+    }
+
+    if ((wantvl) || (wantvr)) {
+
+/*
+          Compute left and/or right eigenvectors
+          (CWorkspace: need 2*N)
+          (RWorkspace: need 2*N)
+*/
+
+	irwork = ibal + *n;
+	ztrevc_(side, "B", select, n, &a[a_offset], lda, &vl[vl_offset], ldvl,
+		 &vr[vr_offset], ldvr, n, &nout, &work[iwrk], &rwork[irwork],
+		&ierr);
+    }
+
+    if (wantvl) {
+
+/*
+          Undo balancing of left eigenvectors
+          (CWorkspace: none)
+          (RWorkspace: need N)
+*/
+
+	zgebak_("B", "L", n, &ilo, &ihi, &rwork[ibal], n, &vl[vl_offset],
+		ldvl, &ierr);
+
+/*        Normalize left eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scl = 1. / dznrm2_(n, &vl[i__ * vl_dim1 + 1], &c__1);
+	    zdscal_(n, &scl, &vl[i__ * vl_dim1 + 1], &c__1);
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		i__3 = k + i__ * vl_dim1;
+/* Computing 2nd power */
+		d__1 = vl[i__3].r;
+/* Computing 2nd power */
+		d__2 = d_imag(&vl[k + i__ * vl_dim1]);
+		rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2;
+/* L10: */
+	    }
+	    k = idamax_(n, &rwork[irwork], &c__1);
+	    d_cnjg(&z__2, &vl[k + i__ * vl_dim1]);
+	    d__1 = sqrt(rwork[irwork + k - 1]);
+	    z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1;
+	    tmp.r = z__1.r, tmp.i = z__1.i;
+	    zscal_(n, &tmp, &vl[i__ * vl_dim1 + 1], &c__1);
+	    i__2 = k + i__ * vl_dim1;
+	    i__3 = k + i__ * vl_dim1;
+	    d__1 = vl[i__3].r;
+	    z__1.r = d__1, z__1.i = 0.;
+	    vl[i__2].r = z__1.r, vl[i__2].i = z__1.i;
+/* L20: */
+	}
+    }
+
+    if (wantvr) {
+
+/*
+          Undo balancing of right eigenvectors
+          (CWorkspace: none)
+          (RWorkspace: need N)
+*/
+
+	zgebak_("B", "R", n, &ilo, &ihi, &rwork[ibal], n, &vr[vr_offset],
+		ldvr, &ierr);
+
+/*        Normalize right eigenvectors and make largest component real */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    scl = 1. / dznrm2_(n, &vr[i__ * vr_dim1 + 1], &c__1);
+	    zdscal_(n, &scl, &vr[i__ * vr_dim1 + 1], &c__1);
+	    i__2 = *n;
+	    for (k = 1; k <= i__2; ++k) {
+		i__3 = k + i__ * vr_dim1;
+/* Computing 2nd power */
+		d__1 = vr[i__3].r;
+/* Computing 2nd power */
+		d__2 = d_imag(&vr[k + i__ * vr_dim1]);
+		rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2;
+/* L30: */
+	    }
+	    k = idamax_(n, &rwork[irwork], &c__1);
+	    d_cnjg(&z__2, &vr[k + i__ * vr_dim1]);
+	    d__1 = sqrt(rwork[irwork + k - 1]);
+	    z__1.r = z__2.r / d__1, z__1.i = z__2.i / d__1;
+	    tmp.r = z__1.r, tmp.i = z__1.i;
+	    zscal_(n, &tmp, &vr[i__ * vr_dim1 + 1], &c__1);
+	    i__2 = k + i__ * vr_dim1;
+	    i__3 = k + i__ * vr_dim1;
+	    d__1 = vr[i__3].r;
+	    z__1.r = d__1, z__1.i = 0.;
+	    vr[i__2].r = z__1.r, vr[i__2].i = z__1.i;
+/* L40: */
+	}
+    }
+
+/*     Undo scaling if necessary */
+
+L50:
+    if (scalea) {
+	i__1 = *n - *info;
+/* Computing MAX */
+	i__3 = *n - *info;
+	i__2 = max(i__3,1);
+	zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[*info + 1]
+		, &i__2, &ierr);
+	if (*info > 0) {
+	    i__1 = ilo - 1;
+	    zlascl_("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, &w[1], n,
+		     &ierr);
+	}
+    }
+
+    work[1].r = (doublereal) maxwrk, work[1].i = 0.;
+    return 0;
+
+/*     End of ZGEEV */
+
+} /* zgeev_ */
+
+/* Subroutine */ int zgehd2_(integer *n, integer *ilo, integer *ihi,
+	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+	work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__;
+    static doublecomplex alpha;
+    extern /* Subroutine */ int zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZGEHD2 reduces a complex general matrix A to upper Hessenberg form H
+    by a unitary similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to ZGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= max(1,N).
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the n by n general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the unitary matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) COMPLEX*16 array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) COMPLEX*16 array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEHD2", &i__1);
+	return 0;
+    }
+
+    i__1 = *ihi - 1;
+    for (i__ = *ilo; i__ <= i__1; ++i__) {
+
+/*        Compute elementary reflector H(i) to annihilate A(i+2:ihi,i) */
+
+	i__2 = i__ + 1 + i__ * a_dim1;
+	alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	i__2 = *ihi - i__;
+/* Computing MIN */
+	i__3 = i__ + 2;
+	zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[
+		i__]);
+	i__2 = i__ + 1 + i__ * a_dim1;
+	a[i__2].r = 1., a[i__2].i = 0.;
+
+/*        Apply H(i) to A(1:ihi,i+1:ihi) from the right */
+
+	i__2 = *ihi - i__;
+	zlarf_("Right", ihi, &i__2, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+		i__], &a[(i__ + 1) * a_dim1 + 1], lda, &work[1]);
+
+/*        Apply H(i)' to A(i+1:ihi,i+1:n) from the left */
+
+	i__2 = *ihi - i__;
+	i__3 = *n - i__;
+	d_cnjg(&z__1, &tau[i__]);
+	zlarf_("Left", &i__2, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &z__1,
+		 &a[i__ + 1 + (i__ + 1) * a_dim1], lda, &work[1]);
+
+	i__2 = i__ + 1 + i__ * a_dim1;
+	a[i__2].r = alpha.r, a[i__2].i = alpha.i;
+/* L10: */
+    }
+
+    return 0;
+
+/*     End of ZGEHD2 */
+
+} /* zgehd2_ */
+
+/* Subroutine */ int zgehrd_(integer *n, integer *ilo, integer *ihi,
+	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__;
+    static doublecomplex t[4160]	/* was [65][64] */;
+    static integer ib;
+    static doublecomplex ei;
+    static integer nb, nh, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), zgehd2_(integer *, integer *, integer
+	    *, doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *),
+	    zlahrd_(integer *, integer *, integer *, doublecomplex *, integer
+	    *, doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZGEHRD reduces a complex general matrix A to upper Hessenberg form H
+    by a unitary similarity transformation:  Q' * A * Q = H .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that A is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to ZGEBAL; otherwise they should be
+            set to 1 and N respectively. See Further Details.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the N-by-N general matrix to be reduced.
+            On exit, the upper triangle and the first subdiagonal of A
+            are overwritten with the upper Hessenberg matrix H, and the
+            elements below the first subdiagonal, with the array TAU,
+            represent the unitary matrix Q as a product of elementary
+            reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) COMPLEX*16 array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
+            zero.
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of (ihi-ilo) elementary
+    reflectors
+
+       Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
+    exit in A(i+2:ihi,i), and tau in TAU(i).
+
+    The contents of A are illustrated by the following example, with
+    n = 7, ilo = 2 and ihi = 6:
+
+    on entry,                        on exit,
+
+    ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
+    (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
+    (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
+    (                         a )    (                          a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+/* Computing MIN */
+    i__1 = 64, i__2 = ilaenv_(&c__1, "ZGEHRD", " ", n, ilo, ihi, &c_n1, (
+	    ftnlen)6, (ftnlen)1);
+    nb = min(i__1,i__2);
+    lwkopt = *n * nb;
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEHRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	tau[i__2].r = 0., tau[i__2].i = 0.;
+/* L10: */
+    }
+    i__1 = *n - 1;
+    for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
+	i__2 = i__;
+	tau[i__2].r = 0., tau[i__2].i = 0.;
+/* L20: */
+    }
+
+/*     Quick return if possible */
+
+    nh = *ihi - *ilo + 1;
+    if (nh <= 1) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    nbmin = 2;
+    iws = 1;
+    if (nb > 1 && nb < nh) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "ZGEHRD", " ", n, ilo, ihi, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < nh) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    iws = *n * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code.
+
+   Computing MAX
+*/
+		i__1 = 2, i__2 = ilaenv_(&c__2, "ZGEHRD", " ", n, ilo, ihi, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+		if (*lwork >= *n * nbmin) {
+		    nb = *lwork / *n;
+		} else {
+		    nb = 1;
+		}
+	    }
+	}
+    }
+    ldwork = *n;
+
+    if ((nb < nbmin) || (nb >= nh)) {
+
+/*        Use unblocked code below */
+
+	i__ = *ilo;
+
+    } else {
+
+/*        Use blocked code */
+
+	i__1 = *ihi - 1 - nx;
+	i__2 = nb;
+	for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = nb, i__4 = *ihi - i__;
+	    ib = min(i__3,i__4);
+
+/*
+             Reduce columns i:i+ib-1 to Hessenberg form, returning the
+             matrices V and T of the block reflector H = I - V*T*V'
+             which performs the reduction, and also the matrix Y = A*V*T
+*/
+
+	    zlahrd_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
+		    c__65, &work[1], &ldwork);
+
+/*
+             Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
+             right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set
+             to 1.
+*/
+
+	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
+	    ei.r = a[i__3].r, ei.i = a[i__3].i;
+	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
+	    a[i__3].r = 1., a[i__3].i = 0.;
+	    i__3 = *ihi - i__ - ib + 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgemm_("No transpose", "Conjugate transpose", ihi, &i__3, &ib, &
+		    z__1, &work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda,
+		     &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda);
+	    i__3 = i__ + ib + (i__ + ib - 1) * a_dim1;
+	    a[i__3].r = ei.r, a[i__3].i = ei.i;
+
+/*
+             Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
+             left
+*/
+
+	    i__3 = *ihi - i__;
+	    i__4 = *n - i__ - ib + 1;
+	    zlarfb_("Left", "Conjugate transpose", "Forward", "Columnwise", &
+		    i__3, &i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &
+		    c__65, &a[i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &
+		    ldwork);
+/* L30: */
+	}
+    }
+
+/*     Use unblocked code to reduce the rest of the matrix */
+
+    zgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
+    work[1].r = (doublereal) iws, work[1].i = 0.;
+
+    return 0;
+
+/*     End of ZGEHRD */
+
+} /* zgehrd_ */
+
+/* Subroutine */ int zgelq2_(integer *m, integer *n, doublecomplex *a,
+	integer *lda, doublecomplex *tau, doublecomplex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, k;
+    static doublecomplex alpha;
+    extern /* Subroutine */ int zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *,
+	    integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZGELQ2 computes an LQ factorization of a complex m by n matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m by min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) COMPLEX*16 array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+    A(i,i+1:n), and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGELQ2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i,i+1:n) */
+
+	i__2 = *n - i__ + 1;
+	zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	i__2 = i__ + i__ * a_dim1;
+	alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	i__2 = *n - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &tau[i__]
+		);
+	if (i__ < *m) {
+
+/*           Apply H(i) to A(i+1:m,i:n) from the right */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1., a[i__2].i = 0.;
+	    i__2 = *m - i__;
+	    i__3 = *n - i__ + 1;
+	    zlarf_("Right", &i__2, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[
+		    i__], &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	}
+	i__2 = i__ + i__ * a_dim1;
+	a[i__2].r = alpha.r, a[i__2].i = alpha.i;
+	i__2 = *n - i__ + 1;
+	zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+/* L10: */
+    }
+    return 0;
+
+/*     End of ZGELQ2 */
+
+} /* zgelq2_ */
+
+/* Subroutine */ int zgelqf_(integer *m, integer *n, doublecomplex *a,
+	integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int zgelq2_(integer *, integer *, doublecomplex *,
+	     integer *, doublecomplex *, doublecomplex *, integer *), xerbla_(
+	    char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static integer ldwork;
+    extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZGELQF computes an LQ factorization of a complex M-by-N matrix A:
+    A = L * Q.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and below the diagonal of the array
+            contain the m-by-min(m,n) lower trapezoidal matrix L (L is
+            lower triangular if m <= n); the elements above the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(k)' . . . H(2)' H(1)', where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
+    A(i,i+1:n), and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "ZGELQF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *m * nb;
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGELQF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "ZGELQF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "ZGELQF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the LQ factorization of the current block
+             A(i:i+ib-1,i:n)
+*/
+
+	    i__3 = *n - i__ + 1;
+	    zgelq2_(&ib, &i__3, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *n - i__ + 1;
+		zlarft_("Forward", "Rowwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i+ib:m,i:n) from the right */
+
+		i__3 = *m - i__ - ib + 1;
+		i__4 = *n - i__ + 1;
+		zlarfb_("Right", "No transpose", "Forward", "Rowwise", &i__3,
+			&i__4, &ib, &a[i__ + i__ * a_dim1], lda, &work[1], &
+			ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[ib +
+			1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	zgelq2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1].r = (doublereal) iws, work[1].i = 0.;
+    return 0;
+
+/*     End of ZGELQF */
+
+} /* zgelqf_ */
+
+/* Subroutine */ int zgelsd_(integer *m, integer *n, integer *nrhs,
+	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb,
+	doublereal *s, doublereal *rcond, integer *rank, doublecomplex *work,
+	integer *lwork, doublereal *rwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer ie, il, mm;
+    static doublereal eps, anrm, bnrm;
+    static integer itau, iascl, ibscl;
+    static doublereal sfmin;
+    static integer minmn, maxmn, itaup, itauq, mnthr, nwork;
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dlaset_(char *, integer *, integer
+	    *, doublereal *, doublereal *, doublereal *, integer *),
+	    xerbla_(char *, integer *), zgebrd_(integer *, integer *,
+	    doublecomplex *, integer *, doublereal *, doublereal *,
+	    doublecomplex *, doublecomplex *, doublecomplex *, integer *,
+	    integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern doublereal zlange_(char *, integer *, integer *, doublecomplex *,
+	    integer *, doublereal *);
+    static doublereal bignum;
+    extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *,
+	     integer *, doublecomplex *, doublecomplex *, integer *, integer *
+	    ), zlalsd_(char *, integer *, integer *, integer *, doublereal *,
+	    doublereal *, doublecomplex *, integer *, doublereal *, integer *,
+	     doublecomplex *, doublereal *, integer *, integer *),
+	    zlascl_(char *, integer *, integer *, doublereal *, doublereal *,
+	    integer *, integer *, doublecomplex *, integer *, integer *), zgeqrf_(integer *, integer *, doublecomplex *, integer *,
+	     doublecomplex *, doublecomplex *, integer *, integer *);
+    static integer ldwork;
+    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *),
+	    zlaset_(char *, integer *, integer *, doublecomplex *,
+	    doublecomplex *, doublecomplex *, integer *);
+    static integer minwrk, maxwrk;
+    static doublereal smlnum;
+    extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *,
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     doublecomplex *, integer *, doublecomplex *, integer *, integer *
+	    );
+    static logical lquery;
+    static integer nrwork, smlsiz;
+    extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    ZGELSD computes the minimum-norm solution to a real linear least
+    squares problem:
+        minimize 2-norm(| b - A*x |)
+    using the singular value decomposition (SVD) of A. A is an M-by-N
+    matrix which may be rank-deficient.
+
+    Several right hand side vectors b and solution vectors x can be
+    handled in a single call; they are stored as the columns of the
+    M-by-NRHS right hand side matrix B and the N-by-NRHS solution
+    matrix X.
+
+    The problem is solved in three steps:
+    (1) Reduce the coefficient matrix A to bidiagonal form with
+        Householder tranformations, reducing the original problem
+        into a "bidiagonal least squares problem" (BLS)
+    (2) Solve the BLS using a divide and conquer approach.
+    (3) Apply back all the Householder tranformations to solve
+        the original least squares problem.
+
+    The effective rank of A is determined by treating as zero those
+    singular values which are less than RCOND times the largest singular
+    value.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A. N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrices B and X. NRHS >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, A has been destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,M).
+
+    B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+            On entry, the M-by-NRHS right hand side matrix B.
+            On exit, B is overwritten by the N-by-NRHS solution matrix X.
+            If m >= n and RANK = n, the residual sum-of-squares for
+            the solution in the i-th column is given by the sum of
+            squares of elements n+1:m in that column.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M,N).
+
+    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The singular values of A in decreasing order.
+            The condition number of A in the 2-norm = S(1)/S(min(m,n)).
+
+    RCOND   (input) DOUBLE PRECISION
+            RCOND is used to determine the effective rank of A.
+            Singular values S(i) <= RCOND*S(1) are treated as zero.
+            If RCOND < 0, machine precision is used instead.
+
+    RANK    (output) INTEGER
+            The effective rank of A, i.e., the number of singular values
+            which are greater than RCOND*S(1).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK must be at least 1.
+            The exact minimum amount of workspace needed depends on M,
+            N and NRHS. As long as LWORK is at least
+                2 * N + N * NRHS
+            if M is greater than or equal to N or
+                2 * M + M * NRHS
+            if M is less than N, the code will execute correctly.
+            For good performance, LWORK should generally be larger.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    RWORK   (workspace) DOUBLE PRECISION array, dimension at least
+               10*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS +
+               (SMLSIZ+1)**2
+            if M is greater than or equal to N or
+               10*M + 2*M*SMLSIZ + 8*M*NLVL + 3*SMLSIZ*NRHS +
+               (SMLSIZ+1)**2
+            if M is less than N, the code will execute correctly.
+            SMLSIZ is returned by ILAENV and is equal to the maximum
+            size of the subproblems at the bottom of the computation
+            tree (usually about 25), and
+               NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+
+    IWORK   (workspace) INTEGER array, dimension (LIWORK)
+            LIWORK >= 3 * MINMN * NLVL + 11 * MINMN,
+            where MINMN = MIN( M,N ).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value.
+            > 0:  the algorithm for computing the SVD failed to converge;
+                  if INFO = i, i off-diagonal elements of an intermediate
+                  bidiagonal form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input arguments.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --s;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    maxmn = max(*m,*n);
+    mnthr = ilaenv_(&c__6, "ZGELSD", " ", m, n, nrhs, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*ldb < max(1,maxmn)) {
+	*info = -7;
+    }
+
+    smlsiz = ilaenv_(&c__9, "ZGELSD", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Compute workspace.
+       (Note: Comments in the code beginning "Workspace:" describe the
+       minimal amount of workspace needed at that point in the code,
+       as well as the preferred amount for good performance.
+       NB refers to the optimal block size for the immediately
+       following subroutine, as returned by ILAENV.)
+*/
+
+    minwrk = 1;
+    if (*info == 0) {
+	maxwrk = 0;
+	mm = *m;
+	if (*m >= *n && *m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns. */
+
+	    mm = *n;
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, &
+		    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = *nrhs * ilaenv_(&c__1, "ZUNMQR", "LC", m,
+		    nrhs, n, &c_n1, (ftnlen)6, (ftnlen)2);
+	    maxwrk = max(i__1,i__2);
+	}
+	if (*m >= *n) {
+
+/*
+             Path 1 - overdetermined or exactly determined.
+
+   Computing MAX
+*/
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (mm + *n) * ilaenv_(&c__1,
+		    "ZGEBRD", " ", &mm, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1)
+		    ;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + *nrhs * ilaenv_(&c__1,
+		    "ZUNMBR", "QLC", &mm, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)
+		    3);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + (*n - 1) * ilaenv_(&c__1,
+		    "ZUNMBR", "PLN", n, nrhs, n, &c_n1, (ftnlen)6, (ftnlen)3);
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * *nrhs;
+	    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+	    i__1 = ((*n) << (1)) + mm, i__2 = ((*n) << (1)) + *n * *nrhs;
+	    minwrk = max(i__1,i__2);
+	}
+	if (*n > *m) {
+	    if (*n >= mnthr) {
+
+/*
+                Path 2a - underdetermined, with many more columns
+                than rows.
+*/
+
+		maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &c_n1,
+			&c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + ((*m) << (1))
+			* ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1, (
+			ftnlen)6, (ftnlen)1);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *nrhs *
+			ilaenv_(&c__1, "ZUNMBR", "QLC", m, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + (*m - 1) *
+			ilaenv_(&c__1, "ZUNMLQ", "LC", n, nrhs, m, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+		maxwrk = max(i__1,i__2);
+		if (*nrhs > 1) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + *m + *m * *nrhs;
+		    maxwrk = max(i__1,i__2);
+		} else {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = *m * *m + ((*m) << (1));
+		    maxwrk = max(i__1,i__2);
+		}
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = *m * *m + ((*m) << (2)) + *m * *nrhs;
+		maxwrk = max(i__1,i__2);
+	    } else {
+
+/*              Path 2 - underdetermined. */
+
+		maxwrk = ((*m) << (1)) + (*n + *m) * ilaenv_(&c__1, "ZGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = ((*m) << (1)) + *nrhs * ilaenv_(&c__1,
+			"ZUNMBR", "QLC", m, nrhs, m, &c_n1, (ftnlen)6, (
+			ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			"ZUNMBR", "PLN", n, nrhs, m, &c_n1, (ftnlen)6, (
+			ftnlen)3);
+		maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * *nrhs;
+		maxwrk = max(i__1,i__2);
+	    }
+/* Computing MAX */
+	    i__1 = ((*m) << (1)) + *n, i__2 = ((*m) << (1)) + *m * *nrhs;
+	    minwrk = max(i__1,i__2);
+	}
+	minwrk = min(minwrk,maxwrk);
+	d__1 = (doublereal) maxwrk;
+	z__1.r = d__1, z__1.i = 0.;
+	work[1].r = z__1.r, work[1].i = z__1.i;
+	if (*lwork < minwrk && ! lquery) {
+	    *info = -12;
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGELSD", &i__1);
+	return 0;
+    } else if (lquery) {
+	goto L10;
+    }
+
+/*     Quick return if possible. */
+
+    if ((*m == 0) || (*n == 0)) {
+	*rank = 0;
+	return 0;
+    }
+
+/*     Get machine parameters. */
+
+    eps = PRECISION;
+    sfmin = SAFEMINIMUM;
+    smlnum = sfmin / eps;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+
+/*     Scale A if max entry outside range [SMLNUM,BIGNUM]. */
+
+    anrm = zlange_("M", m, n, &a[a_offset], lda, &rwork[1]);
+    iascl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM */
+
+	zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 1;
+    } else if (anrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda,
+		info);
+	iascl = 2;
+    } else if (anrm == 0.) {
+
+/*        Matrix all zero. Return zero solution. */
+
+	i__1 = max(*m,*n);
+	zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[b_offset], ldb);
+	dlaset_("F", &minmn, &c__1, &c_b324, &c_b324, &s[1], &c__1)
+		;
+	*rank = 0;
+	goto L10;
+    }
+
+/*     Scale B if max entry outside range [SMLNUM,BIGNUM]. */
+
+    bnrm = zlange_("M", m, nrhs, &b[b_offset], ldb, &rwork[1]);
+    ibscl = 0;
+    if (bnrm > 0. && bnrm < smlnum) {
+
+/*        Scale matrix norm up to SMLNUM. */
+
+	zlascl_("G", &c__0, &c__0, &bnrm, &smlnum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 1;
+    } else if (bnrm > bignum) {
+
+/*        Scale matrix norm down to BIGNUM. */
+
+	zlascl_("G", &c__0, &c__0, &bnrm, &bignum, m, nrhs, &b[b_offset], ldb,
+		 info);
+	ibscl = 2;
+    }
+
+/*     If M < N make sure B(M+1:N,:) = 0 */
+
+    if (*m < *n) {
+	i__1 = *n - *m;
+	zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[*m + 1 + b_dim1], ldb);
+    }
+
+/*     Overdetermined case. */
+
+    if (*m >= *n) {
+
+/*        Path 1 - overdetermined or exactly determined. */
+
+	mm = *m;
+	if (*m >= mnthr) {
+
+/*           Path 1a - overdetermined, with many more rows than columns */
+
+	    mm = *n;
+	    itau = 1;
+	    nwork = itau + *n;
+
+/*
+             Compute A=Q*R.
+             (RWorkspace: need N)
+             (CWorkspace: need N, prefer N*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+
+/*
+             Multiply B by transpose(Q).
+             (RWorkspace: need N)
+             (CWorkspace: need NRHS, prefer NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zunmqr_("L", "C", m, nrhs, n, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+/*           Zero out below R. */
+
+	    if (*n > 1) {
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &a[a_dim1 + 2],
+			lda);
+	    }
+	}
+
+	itauq = 1;
+	itaup = itauq + *n;
+	nwork = itaup + *n;
+	ie = 1;
+	nrwork = ie + *n;
+
+/*
+          Bidiagonalize R in A.
+          (RWorkspace: need N)
+          (CWorkspace: need 2*N+MM, prefer 2*N+(MM+N)*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	zgebrd_(&mm, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq], &
+		work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of R.
+          (CWorkspace: need 2*N+NRHS, prefer 2*N+NRHS*NB)
+*/
+
+	i__1 = *lwork - nwork + 1;
+	zunmbr_("Q", "L", "C", &mm, nrhs, n, &a[a_offset], lda, &work[itauq],
+		&b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	zlalsd_("U", &smlsiz, n, nrhs, &s[1], &rwork[ie], &b[b_offset], ldb,
+		rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1], info);
+	if (*info != 0) {
+	    goto L10;
+	}
+
+/*        Multiply B by right bidiagonalizing vectors of R. */
+
+	i__1 = *lwork - nwork + 1;
+	zunmbr_("P", "L", "N", n, nrhs, n, &a[a_offset], lda, &work[itaup], &
+		b[b_offset], ldb, &work[nwork], &i__1, info);
+
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = *m, i__2 = ((*m) << (1)) - 4, i__1 = max(i__1,i__2), i__1 =
+		max(i__1,*nrhs), i__2 = *n - *m * 3;
+	if (*n >= mnthr && *lwork >= ((*m) << (2)) + *m * *m + max(i__1,i__2))
+		 {
+
+/*
+          Path 2a - underdetermined, with many more columns than rows
+          and sufficient workspace for an efficient algorithm.
+*/
+
+	    ldwork = *m;
+/*
+   Computing MAX
+   Computing MAX
+*/
+	    i__3 = *m, i__4 = ((*m) << (1)) - 4, i__3 = max(i__3,i__4), i__3 =
+		     max(i__3,*nrhs), i__4 = *n - *m * 3;
+	    i__1 = ((*m) << (2)) + *m * *lda + max(i__3,i__4), i__2 = *m * *
+		    lda + *m + *m * *nrhs;
+	    if (*lwork >= max(i__1,i__2)) {
+		ldwork = *lda;
+	    }
+	    itau = 1;
+	    nwork = *m + 1;
+
+/*
+          Compute A=L*Q.
+          (CWorkspace: need 2*M, prefer M+M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &i__1,
+		     info);
+	    il = nwork;
+
+/*        Copy L to WORK(IL), zeroing out above its diagonal. */
+
+	    zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwork);
+	    i__1 = *m - 1;
+	    i__2 = *m - 1;
+	    zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &work[il + ldwork], &
+		    ldwork);
+	    itauq = il + ldwork * *m;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+	    ie = 1;
+	    nrwork = ie + *m;
+
+/*
+          Bidiagonalize L in WORK(IL).
+          (RWorkspace: need M)
+          (CWorkspace: need M*M+4*M, prefer M*M+4*M+2*M*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zgebrd_(m, m, &work[il], &ldwork, &s[1], &rwork[ie], &work[itauq],
+		     &work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors of L.
+          (CWorkspace: need M*M+4*M+NRHS, prefer M*M+4*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zunmbr_("Q", "L", "C", m, nrhs, m, &work[il], &ldwork, &work[
+		    itauq], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    zlalsd_("U", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1],
+		     info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of L. */
+
+	    i__1 = *lwork - nwork + 1;
+	    zunmbr_("P", "L", "N", m, nrhs, m, &work[il], &ldwork, &work[
+		    itaup], &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Zero out below first M rows of B. */
+
+	    i__1 = *n - *m;
+	    zlaset_("F", &i__1, nrhs, &c_b59, &c_b59, &b[*m + 1 + b_dim1],
+		    ldb);
+	    nwork = itau + *m;
+
+/*
+          Multiply transpose(Q) by B.
+          (CWorkspace: need NRHS, prefer NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zunmlq_("L", "C", n, nrhs, m, &a[a_offset], lda, &work[itau], &b[
+		    b_offset], ldb, &work[nwork], &i__1, info);
+
+	} else {
+
+/*        Path 2 - remaining underdetermined cases. */
+
+	    itauq = 1;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+	    ie = 1;
+	    nrwork = ie + *m;
+
+/*
+          Bidiagonalize A.
+          (RWorkspace: need M)
+          (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, info);
+
+/*
+          Multiply B by transpose of left bidiagonalizing vectors.
+          (CWorkspace: need 2*M+NRHS, prefer 2*M+NRHS*NB)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zunmbr_("Q", "L", "C", m, nrhs, n, &a[a_offset], lda, &work[itauq]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+/*        Solve the bidiagonal least squares problem. */
+
+	    zlalsd_("L", &smlsiz, m, nrhs, &s[1], &rwork[ie], &b[b_offset],
+		    ldb, rcond, rank, &work[nwork], &rwork[nrwork], &iwork[1],
+		     info);
+	    if (*info != 0) {
+		goto L10;
+	    }
+
+/*        Multiply B by right bidiagonalizing vectors of A. */
+
+	    i__1 = *lwork - nwork + 1;
+	    zunmbr_("P", "L", "N", n, nrhs, m, &a[a_offset], lda, &work[itaup]
+		    , &b[b_offset], ldb, &work[nwork], &i__1, info);
+
+	}
+    }
+
+/*     Undo scaling. */
+
+    if (iascl == 1) {
+	zlascl_("G", &c__0, &c__0, &anrm, &smlnum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    } else if (iascl == 2) {
+	zlascl_("G", &c__0, &c__0, &anrm, &bignum, n, nrhs, &b[b_offset], ldb,
+		 info);
+	dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		minmn, info);
+    }
+    if (ibscl == 1) {
+	zlascl_("G", &c__0, &c__0, &smlnum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    } else if (ibscl == 2) {
+	zlascl_("G", &c__0, &c__0, &bignum, &bnrm, n, nrhs, &b[b_offset], ldb,
+		 info);
+    }
+
+L10:
+    d__1 = (doublereal) maxwrk;
+    z__1.r = d__1, z__1.i = 0.;
+    work[1].r = z__1.r, work[1].i = z__1.i;
+    return 0;
+
+/*     End of ZGELSD */
+
+} /* zgelsd_ */
+
+/* Subroutine */ int zgeqr2_(integer *m, integer *n, doublecomplex *a,
+	integer *lda, doublecomplex *tau, doublecomplex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, k;
+    static doublecomplex alpha;
+    extern /* Subroutine */ int zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), xerbla_(char *, integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZGEQR2 computes a QR factorization of a complex m by n matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(m,n) by n upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of elementary reflectors (see Further Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace) COMPLEX*16 array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEQR2", &i__1);
+	return 0;
+    }
+
+    k = min(*m,*n);
+
+    i__1 = k;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*        Generate elementary reflector H(i) to annihilate A(i+1:m,i) */
+
+	i__2 = *m - i__ + 1;
+/* Computing MIN */
+	i__3 = i__ + 1;
+	zlarfg_(&i__2, &a[i__ + i__ * a_dim1], &a[min(i__3,*m) + i__ * a_dim1]
+		, &c__1, &tau[i__]);
+	if (i__ < *n) {
+
+/*           Apply H(i)' to A(i:m,i+1:n) from the left */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = 1., a[i__2].i = 0.;
+	    i__2 = *m - i__ + 1;
+	    i__3 = *n - i__;
+	    d_cnjg(&z__1, &tau[i__]);
+	    zlarf_("Left", &i__2, &i__3, &a[i__ + i__ * a_dim1], &c__1, &z__1,
+		     &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = alpha.r, a[i__2].i = alpha.i;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of ZGEQR2 */
+
+} /* zgeqr2_ */
+
+/* Subroutine */ int zgeqrf_(integer *m, integer *n, doublecomplex *a,
+	integer *lda, doublecomplex *tau, doublecomplex *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, k, ib, nb, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int zgeqr2_(integer *, integer *, doublecomplex *,
+	     integer *, doublecomplex *, doublecomplex *, integer *), xerbla_(
+	    char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static integer ldwork;
+    extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZGEQRF computes a QR factorization of a complex M-by-N matrix A:
+    A = Q * R.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit, the elements on and above the diagonal of the array
+            contain the min(M,N)-by-N upper trapezoidal matrix R (R is
+            upper triangular if m >= n); the elements below the diagonal,
+            with the array TAU, represent the unitary matrix Q as a
+            product of min(m,n) elementary reflectors (see Further
+            Details).
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    TAU     (output) COMPLEX*16 array, dimension (min(M,N))
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of elementary reflectors
+
+       Q = H(1) H(2) . . . H(k), where k = min(m,n).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
+    and tau in TAU(i).
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "ZGEQRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    lwkopt = *n * nb;
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGEQRF", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    k = min(*m,*n);
+    if (k == 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "ZGEQRF", " ", m, n, &c_n1, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "ZGEQRF", " ", m, n, &c_n1, &
+			c_n1, (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < k && nx < k) {
+
+/*        Use blocked code initially */
+
+	i__1 = k - nx;
+	i__2 = nb;
+	for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__3 = k - i__ + 1;
+	    ib = min(i__3,nb);
+
+/*
+             Compute the QR factorization of the current block
+             A(i:m,i:i+ib-1)
+*/
+
+	    i__3 = *m - i__ + 1;
+	    zgeqr2_(&i__3, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[
+		    1], &iinfo);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__3 = *m - i__ + 1;
+		zlarft_("Forward", "Columnwise", &i__3, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i:m,i+ib:n) from the left */
+
+		i__3 = *m - i__ + 1;
+		i__4 = *n - i__ - ib + 1;
+		zlarfb_("Left", "Conjugate transpose", "Forward", "Columnwise"
+			, &i__3, &i__4, &ib, &a[i__ + i__ * a_dim1], lda, &
+			work[1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda,
+			&work[ib + 1], &ldwork);
+	    }
+/* L10: */
+	}
+    } else {
+	i__ = 1;
+    }
+
+/*     Use unblocked code to factor the last or only block. */
+
+    if (i__ <= k) {
+	i__2 = *m - i__ + 1;
+	i__1 = *n - i__ + 1;
+	zgeqr2_(&i__2, &i__1, &a[i__ + i__ * a_dim1], lda, &tau[i__], &work[1]
+		, &iinfo);
+    }
+
+    work[1].r = (doublereal) iws, work[1].i = 0.;
+    return 0;
+
+/*     End of ZGEQRF */
+
+} /* zgeqrf_ */
+
+/* Subroutine */ int zgesdd_(char *jobz, integer *m, integer *n,
+	doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u,
+	integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work,
+	integer *lwork, doublereal *rwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, u_dim1, u_offset, vt_dim1, vt_offset, i__1,
+	    i__2, i__3;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, ie, il, ir, iu, blk;
+    static doublereal dum[1], eps;
+    static integer iru, ivt, iscl;
+    static doublereal anrm;
+    static integer idum[1], ierr, itau, irvt;
+    extern logical lsame_(char *, char *);
+    static integer chunk, minmn;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static integer wrkbl, itaup, itauq;
+    static logical wntqa;
+    static integer nwork;
+    static logical wntqn, wntqo, wntqs;
+    extern /* Subroutine */ int zlacp2_(char *, integer *, integer *,
+	    doublereal *, integer *, doublecomplex *, integer *);
+    static integer mnthr1, mnthr2;
+    extern /* Subroutine */ int dbdsdc_(char *, char *, integer *, doublereal
+	    *, doublereal *, doublereal *, integer *, doublereal *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, integer *);
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), xerbla_(char *, integer *),
+	     zgebrd_(integer *, integer *, doublecomplex *, integer *,
+	    doublereal *, doublereal *, doublecomplex *, doublecomplex *,
+	    doublecomplex *, integer *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static doublereal bignum;
+    extern doublereal zlange_(char *, integer *, integer *, doublecomplex *,
+	    integer *, doublereal *);
+    extern /* Subroutine */ int zgelqf_(integer *, integer *, doublecomplex *,
+	     integer *, doublecomplex *, doublecomplex *, integer *, integer *
+	    ), zlacrm_(integer *, integer *, doublecomplex *, integer *,
+	    doublereal *, integer *, doublecomplex *, integer *, doublereal *)
+	    , zlarcm_(integer *, integer *, doublereal *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublereal *), zlascl_(char *, integer *, integer *, doublereal *,
+	     doublereal *, integer *, integer *, doublecomplex *, integer *,
+	    integer *), zgeqrf_(integer *, integer *, doublecomplex *,
+	     integer *, doublecomplex *, doublecomplex *, integer *, integer *
+	    );
+    static integer ldwrkl;
+    extern /* Subroutine */ int zlacpy_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *),
+	    zlaset_(char *, integer *, integer *, doublecomplex *,
+	    doublecomplex *, doublecomplex *, integer *);
+    static integer ldwrkr, minwrk, ldwrku, maxwrk;
+    extern /* Subroutine */ int zungbr_(char *, integer *, integer *, integer
+	    *, doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *);
+    static integer ldwkvt;
+    static doublereal smlnum;
+    static logical wntqas;
+    extern /* Subroutine */ int zunmbr_(char *, char *, char *, integer *,
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     doublecomplex *, integer *, doublecomplex *, integer *, integer *
+	    ), zunglq_(integer *, integer *, integer *
+	    , doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *);
+    static logical lquery;
+    static integer nrwork;
+    extern /* Subroutine */ int zungqr_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    ZGESDD computes the singular value decomposition (SVD) of a complex
+    M-by-N matrix A, optionally computing the left and/or right singular
+    vectors, by using divide-and-conquer method. The SVD is written
+
+         A = U * SIGMA * conjugate-transpose(V)
+
+    where SIGMA is an M-by-N matrix which is zero except for its
+    min(m,n) diagonal elements, U is an M-by-M unitary matrix, and
+    V is an N-by-N unitary matrix.  The diagonal elements of SIGMA
+    are the singular values of A; they are real and non-negative, and
+    are returned in descending order.  The first min(m,n) columns of
+    U and V are the left and right singular vectors of A.
+
+    Note that the routine returns VT = V**H, not V.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            Specifies options for computing all or part of the matrix U:
+            = 'A':  all M columns of U and all N rows of V**H are
+                    returned in the arrays U and VT;
+            = 'S':  the first min(M,N) columns of U and the first
+                    min(M,N) rows of V**H are returned in the arrays U
+                    and VT;
+            = 'O':  If M >= N, the first N columns of U are overwritten
+                    on the array A and all rows of V**H are returned in
+                    the array VT;
+                    otherwise, all columns of U are returned in the
+                    array U and the first M rows of V**H are overwritten
+                    in the array VT;
+            = 'N':  no columns of U or rows of V**H are computed.
+
+    M       (input) INTEGER
+            The number of rows of the input matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the input matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the M-by-N matrix A.
+            On exit,
+            if JOBZ = 'O',  A is overwritten with the first N columns
+                            of U (the left singular vectors, stored
+                            columnwise) if M >= N;
+                            A is overwritten with the first M rows
+                            of V**H (the right singular vectors, stored
+                            rowwise) otherwise.
+            if JOBZ .ne. 'O', the contents of A are destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    S       (output) DOUBLE PRECISION array, dimension (min(M,N))
+            The singular values of A, sorted so that S(i) >= S(i+1).
+
+    U       (output) COMPLEX*16 array, dimension (LDU,UCOL)
+            UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
+            UCOL = min(M,N) if JOBZ = 'S'.
+            If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
+            unitary matrix U;
+            if JOBZ = 'S', U contains the first min(M,N) columns of U
+            (the left singular vectors, stored columnwise);
+            if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
+
+    LDU     (input) INTEGER
+            The leading dimension of the array U.  LDU >= 1; if
+            JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
+
+    VT      (output) COMPLEX*16 array, dimension (LDVT,N)
+            If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
+            N-by-N unitary matrix V**H;
+            if JOBZ = 'S', VT contains the first min(M,N) rows of
+            V**H (the right singular vectors, stored rowwise);
+            if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
+
+    LDVT    (input) INTEGER
+            The leading dimension of the array VT.  LDVT >= 1; if
+            JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
+            if JOBZ = 'S', LDVT >= min(M,N).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= 1.
+            if JOBZ = 'N', LWORK >= 2*min(M,N)+max(M,N).
+            if JOBZ = 'O',
+                  LWORK >= 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+            if JOBZ = 'S' or 'A',
+                  LWORK >= min(M,N)*min(M,N)+2*min(M,N)+max(M,N).
+            For good performance, LWORK should generally be larger.
+            If LWORK < 0 but other input arguments are legal, WORK(1)
+            returns the optimal LWORK.
+
+    RWORK   (workspace) DOUBLE PRECISION array, dimension (LRWORK)
+            If JOBZ = 'N', LRWORK >= 7*min(M,N).
+            Otherwise, LRWORK >= 5*min(M,N)*min(M,N) + 5*min(M,N)
+
+    IWORK   (workspace) INTEGER array, dimension (8*min(M,N))
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The updating process of DBDSDC did not converge.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Huan Ren, Computer Science Division, University of
+       California at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --s;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    vt_dim1 = *ldvt;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    minmn = min(*m,*n);
+    mnthr1 = (integer) (minmn * 17. / 9.);
+    mnthr2 = (integer) (minmn * 5. / 3.);
+    wntqa = lsame_(jobz, "A");
+    wntqs = lsame_(jobz, "S");
+    wntqas = (wntqa) || (wntqs);
+    wntqo = lsame_(jobz, "O");
+    wntqn = lsame_(jobz, "N");
+    minwrk = 1;
+    maxwrk = 1;
+    lquery = *lwork == -1;
+
+    if (! ((((wntqa) || (wntqs)) || (wntqo)) || (wntqn))) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (((*ldu < 1) || (wntqas && *ldu < *m)) || (wntqo && *m < *n && *
+	    ldu < *m)) {
+	*info = -8;
+    } else if ((((*ldvt < 1) || (wntqa && *ldvt < *n)) || (wntqs && *ldvt <
+	    minmn)) || (wntqo && *m >= *n && *ldvt < *n)) {
+	*info = -10;
+    }
+
+/*
+       Compute workspace
+        (Note: Comments in the code beginning "Workspace:" describe the
+         minimal amount of workspace needed at that point in the code,
+         as well as the preferred amount for good performance.
+         CWorkspace refers to complex workspace, and RWorkspace to
+         real workspace. NB refers to the optimal block size for the
+         immediately following subroutine, as returned by ILAENV.)
+*/
+
+    if (*info == 0 && *m > 0 && *n > 0) {
+	if (*m >= *n) {
+
+/*
+             There is no complex work space needed for bidiagonal SVD
+             The real work space needed for bidiagonal SVD is BDSPAC,
+             BDSPAC = 3*N*N + 4*N
+*/
+
+	    if (*m >= mnthr1) {
+		if (wntqn) {
+
+/*                 Path 1 (M much larger than N, JOBZ='N') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
+			    ilaenv_(&c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = wrkbl;
+		    minwrk = *n * 3;
+		} else if (wntqo) {
+
+/*                 Path 2 (M much larger than N, JOBZ='O') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
+			    ilaenv_(&c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *n + *n * *n + wrkbl;
+		    minwrk = ((*n) << (1)) * *n + *n * 3;
+		} else if (wntqs) {
+
+/*                 Path 3 (M much larger than N, JOBZ='S') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *n * ilaenv_(&c__1, "ZUNGQR",
+			    " ", m, n, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
+			    ilaenv_(&c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *n * *n + wrkbl;
+		    minwrk = *n * *n + *n * 3;
+		} else if (wntqa) {
+
+/*                 Path 4 (M much larger than N, JOBZ='A') */
+
+		    wrkbl = *n + *n * ilaenv_(&c__1, "ZGEQRF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *n + *m * ilaenv_(&c__1, "ZUNGQR",
+			    " ", m, m, n, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + ((*n) << (1)) *
+			    ilaenv_(&c__1, "ZGEBRD", " ", n, n, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "QLN", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *n * *n + wrkbl;
+		    minwrk = *n * *n + ((*n) << (1)) + *m;
+		}
+	    } else if (*m >= mnthr2) {
+
+/*              Path 5 (M much larger than N, but not as much as MNTHR1) */
+
+		maxwrk = ((*n) << (1)) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = ((*n) << (1)) + *m;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *n * *n;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNGBR", "Q", m, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNGBR", "P", n, n, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		}
+	    } else {
+
+/*              Path 6 (M at least N, but not much larger) */
+
+		maxwrk = ((*n) << (1)) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = ((*n) << (1)) + *m;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *n * *n;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNMBR", "QLN", m, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNGBR", "PRC", n, n, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*n) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		}
+	    }
+	} else {
+
+/*
+             There is no complex work space needed for bidiagonal SVD
+             The real work space needed for bidiagonal SVD is BDSPAC,
+             BDSPAC = 3*M*M + 4*M
+*/
+
+	    if (*n >= mnthr1) {
+		if (wntqn) {
+
+/*                 Path 1t (N much larger than M, JOBZ='N') */
+
+		    maxwrk = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + ((*m) << (1)) *
+			    ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    maxwrk = max(i__1,i__2);
+		    minwrk = *m * 3;
+		} else if (wntqo) {
+
+/*                 Path 2t (N much larger than M, JOBZ='O') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "ZUNGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
+			    ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *n + *m * *m + wrkbl;
+		    minwrk = ((*m) << (1)) * *m + *m * 3;
+		} else if (wntqs) {
+
+/*                 Path 3t (N much larger than M, JOBZ='S') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *m * ilaenv_(&c__1, "ZUNGLQ",
+			    " ", m, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
+			    ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *m + wrkbl;
+		    minwrk = *m * *m + *m * 3;
+		} else if (wntqa) {
+
+/*                 Path 4t (N much larger than M, JOBZ='A') */
+
+		    wrkbl = *m + *m * ilaenv_(&c__1, "ZGELQF", " ", m, n, &
+			    c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = *m + *n * ilaenv_(&c__1, "ZUNGLQ",
+			    " ", n, n, m, &c_n1, (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + ((*m) << (1)) *
+			    ilaenv_(&c__1, "ZGEBRD", " ", m, m, &c_n1, &c_n1,
+			    (ftnlen)6, (ftnlen)1);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNMBR", "PRC", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = wrkbl, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNMBR", "QLN", m, m, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    wrkbl = max(i__1,i__2);
+		    maxwrk = *m * *m + wrkbl;
+		    minwrk = *m * *m + ((*m) << (1)) + *n;
+		}
+	    } else if (*n >= mnthr2) {
+
+/*              Path 5t (N much larger than M, but not as much as MNTHR1) */
+
+		maxwrk = ((*m) << (1)) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = ((*m) << (1)) + *n;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *m * *m;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "P", m, n, m, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNGBR", "P", n, n, m, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "Q", m, m, n, &c_n1, (ftnlen)6, (ftnlen)
+			    1);
+		    maxwrk = max(i__1,i__2);
+		}
+	    } else {
+
+/*              Path 6t (N greater than M, but not much larger) */
+
+		maxwrk = ((*m) << (1)) + (*m + *n) * ilaenv_(&c__1, "ZGEBRD",
+			" ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)1);
+		minwrk = ((*m) << (1)) + *n;
+		if (wntqo) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNMBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNMBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		    maxwrk += *m * *n;
+		    minwrk += *m * *m;
+		} else if (wntqs) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "PRC", m, n, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		} else if (wntqa) {
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *n * ilaenv_(&c__1,
+			    "ZUNGBR", "PRC", n, n, m, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+/* Computing MAX */
+		    i__1 = maxwrk, i__2 = ((*m) << (1)) + *m * ilaenv_(&c__1,
+			    "ZUNGBR", "QLN", m, m, n, &c_n1, (ftnlen)6, (
+			    ftnlen)3);
+		    maxwrk = max(i__1,i__2);
+		}
+	    }
+	}
+	maxwrk = max(maxwrk,minwrk);
+	work[1].r = (doublereal) maxwrk, work[1].i = 0.;
+    }
+
+    if (*lwork < minwrk && ! lquery) {
+	*info = -13;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGESDD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	if (*lwork >= 1) {
+	    work[1].r = 1., work[1].i = 0.;
+	}
+	return 0;
+    }
+
+/*     Get machine constants */
+
+    eps = PRECISION;
+    smlnum = sqrt(SAFEMINIMUM) / eps;
+    bignum = 1. / smlnum;
+
+/*     Scale A if max element outside range [SMLNUM,BIGNUM] */
+
+    anrm = zlange_("M", m, n, &a[a_offset], lda, dum);
+    iscl = 0;
+    if (anrm > 0. && anrm < smlnum) {
+	iscl = 1;
+	zlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, &
+		ierr);
+    } else if (anrm > bignum) {
+	iscl = 1;
+	zlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, &
+		ierr);
+    }
+
+    if (*m >= *n) {
+
+/*
+          A has at least as many rows as columns. If A has sufficiently
+          more rows than columns, first reduce using the QR
+          decomposition (if sufficient workspace available)
+*/
+
+	if (*m >= mnthr1) {
+
+	    if (wntqn) {
+
+/*
+                Path 1 (M much larger than N, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: need 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Zero out below R */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &a[a_dim1 + 2],
+			lda);
+		ie = 1;
+		itauq = 1;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+		nrwork = ie + *n;
+
+/*
+                Perform bidiagonal SVD, compute singular values only
+                (CWorkspace: 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2 (M much larger than N, JOBZ='O')
+                N left singular vectors to be overwritten on A and
+                N right singular vectors to be computed in VT
+*/
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		ir = iu + ldwrku * *n;
+		if (*lwork >= *m * *n + *n * *n + *n * 3) {
+
+/*                 WORK(IR) is M by N */
+
+		    ldwrkr = *m;
+		} else {
+		    ldwrkr = (*lwork - *n * *n - *n * 3) / *n;
+		}
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (CWorkspace: need N*N+2*N, prefer M*N+N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy R to WORK( IR ), zeroing out below it */
+
+		zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		zlaset_("L", &i__1, &i__2, &c_b59, &c_b59, &work[ir + 1], &
+			ldwrkr);
+
+/*
+                Generate Q in A
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in WORK(IR)
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of R in WORK(IRU) and computing right singular vectors
+                of R in WORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *n;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by the left singular vectors of R
+                (CWorkspace: need 2*N*N+3*N, prefer M*N+N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by the right singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IU), storing result in WORK(IR) and copying to A
+                (CWorkspace: need 2*N*N, prefer N*N+M*N)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *m;
+		i__2 = ldwrkr;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrkr);
+		    zgemm_("N", "N", &chunk, n, n, &c_b60, &a[i__ + a_dim1],
+			    lda, &work[iu], &ldwrku, &c_b59, &work[ir], &
+			    ldwrkr);
+		    zlacpy_("F", &chunk, n, &work[ir], &ldwrkr, &a[i__ +
+			    a_dim1], lda);
+/* L10: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Path 3 (M much larger than N, JOBZ='S')
+                N left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		ir = 1;
+
+/*              WORK(IR) is N by N */
+
+		ldwrkr = *n;
+		itau = ir + ldwrkr * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R
+                (CWorkspace: need N*N+2*N, prefer N*N+N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy R to WORK(IR), zeroing out below it */
+
+		zlacpy_("U", n, n, &a[a_offset], lda, &work[ir], &ldwrkr);
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		zlaset_("L", &i__2, &i__1, &c_b59, &c_b59, &work[ir + 1], &
+			ldwrkr);
+
+/*
+                Generate Q in A
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zungqr_(m, n, n, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in WORK(IR)
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zgebrd_(n, n, &work[ir], &ldwrkr, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *n;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", n, n, n, &work[ir], &ldwrkr, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", n, n, n, &work[ir], &ldwrkr, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in A by left singular vectors of R in
+                WORK(IR), storing result in U
+                (CWorkspace: need N*N)
+                (RWorkspace: 0)
+*/
+
+		zlacpy_("F", n, n, &u[u_offset], ldu, &work[ir], &ldwrkr);
+		zgemm_("N", "N", m, n, n, &c_b60, &a[a_offset], lda, &work[ir]
+			, &ldwrkr, &c_b59, &u[u_offset], ldu);
+
+	    } else if (wntqa) {
+
+/*
+                Path 4 (M much larger than N, JOBZ='A')
+                M left singular vectors to be computed in U and
+                N right singular vectors to be computed in VT
+*/
+
+		iu = 1;
+
+/*              WORK(IU) is N by N */
+
+		ldwrku = *n;
+		itau = iu + ldwrku * *n;
+		nwork = itau + *n;
+
+/*
+                Compute A=Q*R, copying result to U
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zgeqrf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+		zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Generate Q in U
+                (CWorkspace: need N+M, prefer N+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zungqr_(m, m, n, &u[u_offset], ldu, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+
+/*              Produce R in A, zeroing out below it */
+
+		i__2 = *n - 1;
+		i__1 = *n - 1;
+		zlaset_("L", &i__2, &i__1, &c_b59, &c_b59, &a[a_dim1 + 2],
+			lda);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *n;
+		nwork = itaup + *n;
+
+/*
+                Bidiagonalize R in A
+                (CWorkspace: need 3*N, prefer 2*N+2*N*NB)
+                (RWorkspace: need N)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zgebrd_(n, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+		iru = ie + *n;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by left singular vectors of R
+                (CWorkspace: need N*N+3*N, prefer N*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", n, n, n, &a[a_offset], lda, &work[
+			itauq], &work[iu], &ldwrku, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of R
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply Q in U by left singular vectors of R in
+                WORK(IU), storing result in A
+                (CWorkspace: need N*N)
+                (RWorkspace: 0)
+*/
+
+		zgemm_("N", "N", m, n, n, &c_b60, &u[u_offset], ldu, &work[iu]
+			, &ldwrku, &c_b59, &a[a_offset], lda);
+
+/*              Copy left singular vectors of A from A to U */
+
+		zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+
+	    }
+
+	} else if (*m >= mnthr2) {
+
+/*
+             MNTHR2 <= M < MNTHR1
+
+             Path 5 (M much larger than N, but not as much as MNTHR1)
+             Reduce to bidiagonal form without QR decomposition, use
+             ZUNGBR and matrix multiplication to compute singular vectors
+*/
+
+	    ie = 1;
+	    nrwork = ie + *n;
+	    itauq = 1;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+             (RWorkspace: need N)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
+
+		dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__2, &ierr);
+
+/*
+                Generate Q in A
+                (CWorkspace: need 2*N, prefer N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &work[
+			nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *n * 3) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		} else {
+
+/*                 WORK(IU) is LDWRKU by N */
+
+		    ldwrku = (*lwork - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in WORK(IU), copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need 3*N*N)
+*/
+
+		zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &work[iu]
+			, &ldwrku, &rwork[nrwork]);
+		zlacpy_("F", n, n, &work[iu], &ldwrku, &vt[vt_offset], ldvt);
+
+/*
+                Multiply Q in A by real matrix RWORK(IRU), storing the
+                result in WORK(IU), copying to A
+                (CWorkspace: need N*N, prefer M*N)
+                (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*/
+
+		nrwork = irvt;
+		i__2 = *m;
+		i__1 = ldwrku;
+		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			i__1) {
+/* Computing MIN */
+		    i__3 = *m - i__ + 1;
+		    chunk = min(i__3,ldwrku);
+		    zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru], n,
+			    &work[iu], &ldwrku, &rwork[nrwork]);
+		    zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
+			    a_dim1], lda);
+/* L20: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__1, &ierr);
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		zungbr_("Q", m, n, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need 3*N*N)
+*/
+
+		zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: need 0)
+                (Rworkspace: need N*N+2*M*N)
+*/
+
+		nrwork = irvt;
+		zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+	    } else {
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("U", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		zungbr_("P", n, n, n, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__1, &ierr);
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("L", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need 3*N*N)
+*/
+
+		zlarcm_(n, n, &rwork[irvt], n, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		zlacpy_("F", n, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: 0)
+                (Rworkspace: need 3*N*N)
+*/
+
+		nrwork = irvt;
+		zlacrm_(m, n, &u[u_offset], ldu, &rwork[iru], n, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		zlacpy_("F", m, n, &a[a_offset], lda, &u[u_offset], ldu);
+	    }
+
+	} else {
+
+/*
+             M .LT. MNTHR2
+
+             Path 6 (M at least N, but not much larger)
+             Reduce to bidiagonal form without QR decomposition
+             Use ZUNMBR to compute singular vectors
+*/
+
+	    ie = 1;
+	    nrwork = ie + *n;
+	    itauq = 1;
+	    itaup = itauq + *n;
+	    nwork = itaup + *n;
+
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*N+M, prefer 2*N+(M+N)*NB)
+             (RWorkspace: need N)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, &ierr);
+	    if (wntqn) {
+
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
+
+		dbdsdc_("U", "N", n, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		iu = nwork;
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		if (*lwork >= *m * *n + *n * 3) {
+
+/*                 WORK( IU ) is M by N */
+
+		    ldwrku = *m;
+		} else {
+
+/*                 WORK( IU ) is LDWRKU by N */
+
+		    ldwrku = (*lwork - *n * 3) / *n;
+		}
+		nwork = iu + ldwrku * *n;
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (Cworkspace: need 2*N, prefer N+N*NB)
+                (Rworkspace: need 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+		if (*lwork >= *m * *n + *n * 3) {
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by left singular vectors of A, copying
+                to A
+                (Cworkspace: need M*N+2*N, prefer M*N+N+N*NB)
+                (Rworkspace: need 0)
+*/
+
+		    zlaset_("F", m, n, &c_b59, &c_b59, &work[iu], &ldwrku);
+		    zlacp2_("F", n, n, &rwork[iru], n, &work[iu], &ldwrku);
+		    i__1 = *lwork - nwork + 1;
+		    zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			    itauq], &work[iu], &ldwrku, &work[nwork], &i__1, &
+			    ierr);
+		    zlacpy_("F", m, n, &work[iu], &ldwrku, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate Q in A
+                   (Cworkspace: need 2*N, prefer N+N*NB)
+                   (Rworkspace: need 0)
+*/
+
+		    i__1 = *lwork - nwork + 1;
+		    zungbr_("Q", m, n, n, &a[a_offset], lda, &work[itauq], &
+			    work[nwork], &i__1, &ierr);
+
+/*
+                   Multiply Q in A by real matrix RWORK(IRU), storing the
+                   result in WORK(IU), copying to A
+                   (CWorkspace: need N*N, prefer M*N)
+                   (Rworkspace: need 3*N*N, prefer N*N+2*M*N)
+*/
+
+		    nrwork = irvt;
+		    i__1 = *m;
+		    i__2 = ldwrku;
+		    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			     i__2) {
+/* Computing MIN */
+			i__3 = *m - i__ + 1;
+			chunk = min(i__3,ldwrku);
+			zlacrm_(&chunk, n, &a[i__ + a_dim1], lda, &rwork[iru],
+				 n, &work[iu], &ldwrku, &rwork[nrwork]);
+			zlacpy_("F", &chunk, n, &work[iu], &ldwrku, &a[i__ +
+				a_dim1], lda);
+/* L30: */
+		    }
+		}
+
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlaset_("F", m, n, &c_b59, &c_b59, &u[u_offset], ldu);
+		zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", m, n, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+	    } else {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = nrwork;
+		irvt = iru + *n * *n;
+		nrwork = irvt + *n * *n;
+		dbdsdc_("U", "I", n, &s[1], &rwork[ie], &rwork[iru], n, &
+			rwork[irvt], n, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*              Set the right corner of U to identity matrix */
+
+		zlaset_("F", m, m, &c_b59, &c_b59, &u[u_offset], ldu);
+		i__2 = *m - *n;
+		i__1 = *m - *n;
+		zlaset_("F", &i__2, &i__1, &c_b59, &c_b60, &u[*n + 1 + (*n +
+			1) * u_dim1], ldu);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 2*N+M, prefer 2*N+M*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[iru], n, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 3*N, prefer 2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", n, n, &rwork[irvt], n, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", n, n, n, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__2, &
+			ierr);
+	    }
+
+	}
+
+    } else {
+
+/*
+          A has more columns than rows. If A has sufficiently more
+          columns than rows, first reduce using the LQ decomposition
+          (if sufficient workspace available)
+*/
+
+	if (*n >= mnthr1) {
+
+	    if (wntqn) {
+
+/*
+                Path 1t (N much larger than M, JOBZ='N')
+                No singular vectors to be computed
+*/
+
+		itau = 1;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Zero out above L */
+
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &a[((a_dim1) << (1)
+			) + 1], lda);
+		ie = 1;
+		itauq = 1;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (CWorkspace: need 3*M, prefer 2*M+2*M*NB)
+                (RWorkspace: need M)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+		nrwork = ie + *m;
+
+/*
+                Perform bidiagonal SVD, compute singular values only
+                (CWorkspace: 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		dbdsdc_("U", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+
+	    } else if (wntqo) {
+
+/*
+                Path 2t (N much larger than M, JOBZ='O')
+                M right singular vectors to be overwritten on A and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+		ldwkvt = *m;
+
+/*              WORK(IVT) is M by M */
+
+		il = ivt + ldwkvt * *m;
+		if (*lwork >= *m * *n + *m * *m + *m * 3) {
+
+/*                 WORK(IL) M by N */
+
+		    ldwrkl = *m;
+		    chunk = *n;
+		} else {
+
+/*                 WORK(IL) is M by CHUNK */
+
+		    ldwrkl = *m;
+		    chunk = (*lwork - *m * *m - *m * 3) / *m;
+		}
+		itau = il + ldwrkl * chunk;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__2, &ierr);
+
+/*              Copy L to WORK(IL), zeroing about above it */
+
+		zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__2 = *m - 1;
+		i__1 = *m - 1;
+		zlaset_("U", &i__2, &i__1, &c_b59, &c_b59, &work[il + ldwrkl],
+			 &ldwrkl);
+
+/*
+                Generate Q in A
+                (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__2, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IL)
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+                (RWorkspace: need M)
+*/
+
+		i__2 = *lwork - nwork + 1;
+		zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *m;
+		irvt = iru + *m * *m;
+		nrwork = irvt + *m * *m;
+		dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix WORK(IU)
+                Overwrite WORK(IU) by the left singular vectors of L
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+                Overwrite WORK(IVT) by the right singular vectors of L
+                (CWorkspace: need N*N+3*N, prefer M*N+2*N+N*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IL) by Q
+                in A, storing result in WORK(IL) and copying to A
+                (CWorkspace: need 2*M*M, prefer M*M+M*N))
+                (RWorkspace: 0)
+*/
+
+		i__2 = *n;
+		i__1 = chunk;
+		for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			i__1) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    zgemm_("N", "N", m, &blk, m, &c_b60, &work[ivt], m, &a[
+			    i__ * a_dim1 + 1], lda, &c_b59, &work[il], &
+			    ldwrkl);
+		    zlacpy_("F", m, &blk, &work[il], &ldwrkl, &a[i__ * a_dim1
+			    + 1], lda);
+/* L40: */
+		}
+
+	    } else if (wntqs) {
+
+/*
+               Path 3t (N much larger than M, JOBZ='S')
+               M right singular vectors to be computed in VT and
+               M left singular vectors to be computed in U
+*/
+
+		il = 1;
+
+/*              WORK(IL) is M by M */
+
+		ldwrkl = *m;
+		itau = il + ldwrkl * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+
+/*              Copy L to WORK(IL), zeroing out above it */
+
+		zlacpy_("L", m, m, &a[a_offset], lda, &work[il], &ldwrkl);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &work[il + ldwrkl],
+			 &ldwrkl);
+
+/*
+                Generate Q in A
+                (CWorkspace: need M*M+2*M, prefer M*M+M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zunglq_(m, n, m, &a[a_offset], lda, &work[itau], &work[nwork],
+			 &i__1, &ierr);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in WORK(IL)
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+                (RWorkspace: need M)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zgebrd_(m, m, &work[il], &ldwrkl, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *m;
+		irvt = iru + *m * *m;
+		nrwork = irvt + *m * *m;
+		dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of L
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", m, m, m, &work[il], &ldwrkl, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by left singular vectors of L
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", m, m, m, &work[il], &ldwrkl, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Copy VT to WORK(IL), multiply right singular vectors of L
+                in WORK(IL) by Q in A, storing result in VT
+                (CWorkspace: need M*M)
+                (RWorkspace: 0)
+*/
+
+		zlacpy_("F", m, m, &vt[vt_offset], ldvt, &work[il], &ldwrkl);
+		zgemm_("N", "N", m, n, m, &c_b60, &work[il], &ldwrkl, &a[
+			a_offset], lda, &c_b59, &vt[vt_offset], ldvt);
+
+	    } else if (wntqa) {
+
+/*
+                Path 9t (N much larger than M, JOBZ='A')
+                N right singular vectors to be computed in VT and
+                M left singular vectors to be computed in U
+*/
+
+		ivt = 1;
+
+/*              WORK(IVT) is M by M */
+
+		ldwkvt = *m;
+		itau = ivt + ldwkvt * *m;
+		nwork = itau + *m;
+
+/*
+                Compute A=L*Q, copying result to VT
+                (CWorkspace: need 2*M, prefer M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zgelqf_(m, n, &a[a_offset], lda, &work[itau], &work[nwork], &
+			i__1, &ierr);
+		zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+/*
+                Generate Q in VT
+                (CWorkspace: need M+N, prefer M+N*NB)
+                (RWorkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zunglq_(n, n, m, &vt[vt_offset], ldvt, &work[itau], &work[
+			nwork], &i__1, &ierr);
+
+/*              Produce L in A, zeroing out above it */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		zlaset_("U", &i__1, &i__2, &c_b59, &c_b59, &a[((a_dim1) << (1)
+			) + 1], lda);
+		ie = 1;
+		itauq = itau;
+		itaup = itauq + *m;
+		nwork = itaup + *m;
+
+/*
+                Bidiagonalize L in A
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+2*M*NB)
+                (RWorkspace: need M)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zgebrd_(m, m, &a[a_offset], lda, &s[1], &rwork[ie], &work[
+			itauq], &work[itaup], &work[nwork], &i__1, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		iru = ie + *m;
+		irvt = iru + *m * *m;
+		nrwork = irvt + *m * *m;
+		dbdsdc_("U", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of L
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", m, m, m, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+                Overwrite WORK(IVT) by right singular vectors of L
+                (CWorkspace: need M*M+3*M, prefer M*M+2*M+M*NB)
+                (RWorkspace: 0)
+*/
+
+		zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", m, m, m, &a[a_offset], lda, &work[
+			itaup], &work[ivt], &ldwkvt, &work[nwork], &i__1, &
+			ierr);
+
+/*
+                Multiply right singular vectors of L in WORK(IVT) by
+                Q in VT, storing result in A
+                (CWorkspace: need M*M)
+                (RWorkspace: 0)
+*/
+
+		zgemm_("N", "N", m, n, m, &c_b60, &work[ivt], &ldwkvt, &vt[
+			vt_offset], ldvt, &c_b59, &a[a_offset], lda);
+
+/*              Copy right singular vectors of A from A to VT */
+
+		zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+
+	    }
+
+	} else if (*n >= mnthr2) {
+
+/*
+             MNTHR2 <= N < MNTHR1
+
+             Path 5t (N much larger than M, but not as much as MNTHR1)
+             Reduce to bidiagonal form without QR decomposition, use
+             ZUNGBR and matrix multiplication to compute singular vectors
+*/
+
+
+	    ie = 1;
+	    nrwork = ie + *m;
+	    itauq = 1;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+             (RWorkspace: M)
+*/
+
+	    i__1 = *lwork - nwork + 1;
+	    zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__1, &ierr);
+
+	    if (wntqn) {
+
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
+
+		dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		ivt = nwork;
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__1, &ierr);
+
+/*
+                Generate P**H in A
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		i__1 = *lwork - nwork + 1;
+		zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &work[
+			nwork], &i__1, &ierr);
+
+		ldwkvt = *m;
+		if (*lwork >= *m * *n + *m * 3) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    nwork = ivt + ldwkvt * *n;
+		    chunk = *n;
+		} else {
+
+/*                 WORK( IVT ) is M by CHUNK */
+
+		    chunk = (*lwork - *m * 3) / *m;
+		    nwork = ivt + ldwkvt * chunk;
+		}
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRVT)
+                storing the result in WORK(IVT), copying to U
+                (Cworkspace: need 0)
+                (Rworkspace: need 2*M*M)
+*/
+
+		zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &work[ivt], &
+			ldwkvt, &rwork[nrwork]);
+		zlacpy_("F", m, m, &work[ivt], &ldwkvt, &u[u_offset], ldu);
+
+/*
+                Multiply RWORK(IRVT) by P**H in A, storing the
+                result in WORK(IVT), copying to A
+                (CWorkspace: need M*M, prefer M*N)
+                (Rworkspace: need 2*M*M, prefer 2*M*N)
+*/
+
+		nrwork = iru;
+		i__1 = *n;
+		i__2 = chunk;
+		for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+			i__2) {
+/* Computing MIN */
+		    i__3 = *n - i__ + 1;
+		    blk = min(i__3,chunk);
+		    zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1],
+			    lda, &work[ivt], &ldwkvt, &rwork[nrwork]);
+		    zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ *
+			    a_dim1 + 1], lda);
+/* L50: */
+		}
+	    } else if (wntqs) {
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__2, &ierr);
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		zungbr_("P", m, n, m, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: need 0)
+                (Rworkspace: need 3*M*M)
+*/
+
+		zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need M*M+2*M*N)
+*/
+
+		nrwork = iru;
+		zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+	    } else {
+
+/*
+                Copy A to U, generate Q
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("L", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		zungbr_("Q", m, m, n, &u[u_offset], ldu, &work[itauq], &work[
+			nwork], &i__2, &ierr);
+
+/*
+                Copy A to VT, generate P**H
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: 0)
+*/
+
+		zlacpy_("U", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+		i__2 = *lwork - nwork + 1;
+		zungbr_("P", n, n, m, &vt[vt_offset], ldvt, &work[itaup], &
+			work[nwork], &i__2, &ierr);
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Multiply Q in U by real matrix RWORK(IRU), storing the
+                result in A, copying to U
+                (CWorkspace: need 0)
+                (Rworkspace: need 3*M*M)
+*/
+
+		zlacrm_(m, m, &u[u_offset], ldu, &rwork[iru], m, &a[a_offset],
+			 lda, &rwork[nrwork]);
+		zlacpy_("F", m, m, &a[a_offset], lda, &u[u_offset], ldu);
+
+/*
+                Multiply real matrix RWORK(IRVT) by P**H in VT,
+                storing the result in A, copying to VT
+                (Cworkspace: need 0)
+                (Rworkspace: need M*M+2*M*N)
+*/
+
+		zlarcm_(m, n, &rwork[irvt], m, &vt[vt_offset], ldvt, &a[
+			a_offset], lda, &rwork[nrwork]);
+		zlacpy_("F", m, n, &a[a_offset], lda, &vt[vt_offset], ldvt);
+	    }
+
+	} else {
+
+/*
+             N .LT. MNTHR2
+
+             Path 6t (N greater than M, but not much larger)
+             Reduce to bidiagonal form without LQ decomposition
+             Use ZUNMBR to compute singular vectors
+*/
+
+	    ie = 1;
+	    nrwork = ie + *m;
+	    itauq = 1;
+	    itaup = itauq + *m;
+	    nwork = itaup + *m;
+
+/*
+             Bidiagonalize A
+             (CWorkspace: need 2*M+N, prefer 2*M+(M+N)*NB)
+             (RWorkspace: M)
+*/
+
+	    i__2 = *lwork - nwork + 1;
+	    zgebrd_(m, n, &a[a_offset], lda, &s[1], &rwork[ie], &work[itauq],
+		    &work[itaup], &work[nwork], &i__2, &ierr);
+	    if (wntqn) {
+
+/*
+                Compute singular values only
+                (Cworkspace: 0)
+                (Rworkspace: need BDSPAC)
+*/
+
+		dbdsdc_("L", "N", m, &s[1], &rwork[ie], dum, &c__1, dum, &
+			c__1, dum, idum, &rwork[nrwork], &iwork[1], info);
+	    } else if (wntqo) {
+		ldwkvt = *m;
+		ivt = nwork;
+		if (*lwork >= *m * *n + *m * 3) {
+
+/*                 WORK( IVT ) is M by N */
+
+		    zlaset_("F", m, n, &c_b59, &c_b59, &work[ivt], &ldwkvt);
+		    nwork = ivt + ldwkvt * *n;
+		} else {
+
+/*                 WORK( IVT ) is M by CHUNK */
+
+		    chunk = (*lwork - *m * 3) / *m;
+		    nwork = ivt + ldwkvt * chunk;
+		}
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (Cworkspace: need 2*M, prefer M+M*NB)
+                (Rworkspace: need 0)
+*/
+
+		zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__2 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__2, &ierr);
+
+		if (*lwork >= *m * *n + *m * 3) {
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix WORK(IVT)
+                Overwrite WORK(IVT) by right singular vectors of A,
+                copying to A
+                (Cworkspace: need M*N+2*M, prefer M*N+M+M*NB)
+                (Rworkspace: need 0)
+*/
+
+		    zlacp2_("F", m, m, &rwork[irvt], m, &work[ivt], &ldwkvt);
+		    i__2 = *lwork - nwork + 1;
+		    zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
+			    itaup], &work[ivt], &ldwkvt, &work[nwork], &i__2,
+			    &ierr);
+		    zlacpy_("F", m, n, &work[ivt], &ldwkvt, &a[a_offset], lda);
+		} else {
+
+/*
+                   Generate P**H in A
+                   (Cworkspace: need 2*M, prefer M+M*NB)
+                   (Rworkspace: need 0)
+*/
+
+		    i__2 = *lwork - nwork + 1;
+		    zungbr_("P", m, n, m, &a[a_offset], lda, &work[itaup], &
+			    work[nwork], &i__2, &ierr);
+
+/*
+                   Multiply Q in A by real matrix RWORK(IRU), storing the
+                   result in WORK(IU), copying to A
+                   (CWorkspace: need M*M, prefer M*N)
+                   (Rworkspace: need 3*M*M, prefer M*M+2*M*N)
+*/
+
+		    nrwork = iru;
+		    i__2 = *n;
+		    i__1 = chunk;
+		    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ +=
+			     i__1) {
+/* Computing MIN */
+			i__3 = *n - i__ + 1;
+			blk = min(i__3,chunk);
+			zlarcm_(m, &blk, &rwork[irvt], m, &a[i__ * a_dim1 + 1]
+				, lda, &work[ivt], &ldwkvt, &rwork[nrwork]);
+			zlacpy_("F", m, &blk, &work[ivt], &ldwkvt, &a[i__ *
+				a_dim1 + 1], lda);
+/* L60: */
+		    }
+		}
+	    } else if (wntqs) {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+		dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: M*M)
+*/
+
+		zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: M*M)
+*/
+
+		zlaset_("F", m, n, &c_b59, &c_b59, &vt[vt_offset], ldvt);
+		zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", m, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    } else {
+
+/*
+                Perform bidiagonal SVD, computing left singular vectors
+                of bidiagonal matrix in RWORK(IRU) and computing right
+                singular vectors of bidiagonal matrix in RWORK(IRVT)
+                (CWorkspace: need 0)
+                (RWorkspace: need BDSPAC)
+*/
+
+		irvt = nrwork;
+		iru = irvt + *m * *m;
+		nrwork = iru + *m * *m;
+
+		dbdsdc_("L", "I", m, &s[1], &rwork[ie], &rwork[iru], m, &
+			rwork[irvt], m, dum, idum, &rwork[nrwork], &iwork[1],
+			info);
+
+/*
+                Copy real matrix RWORK(IRU) to complex matrix U
+                Overwrite U by left singular vectors of A
+                (CWorkspace: need 3*M, prefer 2*M+M*NB)
+                (RWorkspace: M*M)
+*/
+
+		zlacp2_("F", m, m, &rwork[iru], m, &u[u_offset], ldu);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("Q", "L", "N", m, m, n, &a[a_offset], lda, &work[
+			itauq], &u[u_offset], ldu, &work[nwork], &i__1, &ierr);
+
+/*              Set the right corner of VT to identity matrix */
+
+		i__1 = *n - *m;
+		i__2 = *n - *m;
+		zlaset_("F", &i__1, &i__2, &c_b59, &c_b60, &vt[*m + 1 + (*m +
+			1) * vt_dim1], ldvt);
+
+/*
+                Copy real matrix RWORK(IRVT) to complex matrix VT
+                Overwrite VT by right singular vectors of A
+                (CWorkspace: need 2*M+N, prefer 2*M+N*NB)
+                (RWorkspace: M*M)
+*/
+
+		zlaset_("F", n, n, &c_b59, &c_b59, &vt[vt_offset], ldvt);
+		zlacp2_("F", m, m, &rwork[irvt], m, &vt[vt_offset], ldvt);
+		i__1 = *lwork - nwork + 1;
+		zunmbr_("P", "R", "C", n, n, m, &a[a_offset], lda, &work[
+			itaup], &vt[vt_offset], ldvt, &work[nwork], &i__1, &
+			ierr);
+	    }
+
+	}
+
+    }
+
+/*     Undo scaling if necessary */
+
+    if (iscl == 1) {
+	if (anrm > bignum) {
+	    dlascl_("G", &c__0, &c__0, &bignum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+	if (anrm < smlnum) {
+	    dlascl_("G", &c__0, &c__0, &smlnum, &anrm, &minmn, &c__1, &s[1], &
+		    minmn, &ierr);
+	}
+    }
+
+/*     Return optimal workspace in WORK(1) */
+
+    work[1].r = (doublereal) maxwrk, work[1].i = 0.;
+
+    return 0;
+
+/*     End of ZGESDD */
+
+} /* zgesdd_ */
+
+/* Subroutine */ int zgesv_(integer *n, integer *nrhs, doublecomplex *a,
+	integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *
+	info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern /* Subroutine */ int xerbla_(char *, integer *), zgetrf_(
+	    integer *, integer *, doublecomplex *, integer *, integer *,
+	    integer *), zgetrs_(char *, integer *, integer *, doublecomplex *,
+	     integer *, integer *, doublecomplex *, integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    ZGESV computes the solution to a complex system of linear equations
+       A * X = B,
+    where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
+
+    The LU decomposition with partial pivoting and row interchanges is
+    used to factor A as
+       A = P * L * U,
+    where P is a permutation matrix, L is unit lower triangular, and U is
+    upper triangular.  The factored form of A is then used to solve the
+    system of equations A * X = B.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of linear equations, i.e., the order of the
+            matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the N-by-N coefficient matrix A.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (output) INTEGER array, dimension (N)
+            The pivot indices that define the permutation matrix P;
+            row i of the matrix was interchanged with row IPIV(i).
+
+    B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+            On entry, the N-by-NRHS matrix of right hand side matrix B.
+            On exit, if INFO = 0, the N-by-NRHS solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero.  The factorization
+                  has been completed, but the factor U is exactly
+                  singular, so the solution could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (*n < 0) {
+	*info = -1;
+    } else if (*nrhs < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGESV ", &i__1);
+	return 0;
+    }
+
+/*     Compute the LU factorization of A. */
+
+    zgetrf_(n, n, &a[a_offset], lda, &ipiv[1], info);
+    if (*info == 0) {
+
+/*        Solve the system A*X = B, overwriting B with X. */
+
+	zgetrs_("No transpose", n, nrhs, &a[a_offset], lda, &ipiv[1], &b[
+		b_offset], ldb, info);
+    }
+    return 0;
+
+/*     End of ZGESV */
+
+} /* zgesv_ */
+
+/* Subroutine */ int zgetf2_(integer *m, integer *n, doublecomplex *a,
+	integer *lda, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer j, jp;
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *), zgeru_(integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *), zswap_(integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(
+	    char *, integer *);
+    extern integer izamax_(integer *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZGETF2 computes an LU factorization of a general m-by-n matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the m by n matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, U(k,k) is exactly zero. The factorization
+                 has been completed, but the factor U is exactly
+                 singular, and division by zero will occur if it is used
+                 to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGETF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    i__1 = min(*m,*n);
+    for (j = 1; j <= i__1; ++j) {
+
+/*        Find pivot and test for singularity. */
+
+	i__2 = *m - j + 1;
+	jp = j - 1 + izamax_(&i__2, &a[j + j * a_dim1], &c__1);
+	ipiv[j] = jp;
+	i__2 = jp + j * a_dim1;
+	if ((a[i__2].r != 0.) || (a[i__2].i != 0.)) {
+
+/*           Apply the interchange to columns 1:N. */
+
+	    if (jp != j) {
+		zswap_(n, &a[j + a_dim1], lda, &a[jp + a_dim1], lda);
+	    }
+
+/*           Compute elements J+1:M of J-th column. */
+
+	    if (j < *m) {
+		i__2 = *m - j;
+		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
+		zscal_(&i__2, &z__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+
+	} else if (*info == 0) {
+
+	    *info = j;
+	}
+
+	if (j < min(*m,*n)) {
+
+/*           Update trailing submatrix. */
+
+	    i__2 = *m - j;
+	    i__3 = *n - j;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgeru_(&i__2, &i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, &a[j +
+		    (j + 1) * a_dim1], lda, &a[j + 1 + (j + 1) * a_dim1], lda)
+		    ;
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of ZGETF2 */
+
+} /* zgetf2_ */
+
+/* Subroutine */ int zgetrf_(integer *m, integer *n, doublecomplex *a,
+	integer *lda, integer *ipiv, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__, j, jb, nb, iinfo;
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), ztrsm_(char *, char *, char *, char *,
+	     integer *, integer *, doublecomplex *, doublecomplex *, integer *
+	    , doublecomplex *, integer *),
+	    zgetf2_(integer *, integer *, doublecomplex *, integer *, integer
+	    *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *,
+	     integer *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZGETRF computes an LU factorization of a general M-by-N matrix A
+    using partial pivoting with row interchanges.
+
+    The factorization has the form
+       A = P * L * U
+    where P is a permutation matrix, L is lower triangular with unit
+    diagonal elements (lower trapezoidal if m > n), and U is upper
+    triangular (upper trapezoidal if m < n).
+
+    This is the right-looking Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the M-by-N matrix to be factored.
+            On exit, the factors L and U from the factorization
+            A = P*L*U; the unit diagonal elements of L are not stored.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    IPIV    (output) INTEGER array, dimension (min(M,N))
+            The pivot indices; for 1 <= i <= min(M,N), row i of the
+            matrix was interchanged with row IPIV(i).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
+                  has been completed, but the factor U is exactly
+                  singular, and division by zero will occur if it is used
+                  to solve a system of equations.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*m)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGETRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "ZGETRF", " ", m, n, &c_n1, &c_n1, (ftnlen)6, (ftnlen)
+	    1);
+    if ((nb <= 1) || (nb >= min(*m,*n))) {
+
+/*        Use unblocked code. */
+
+	zgetf2_(m, n, &a[a_offset], lda, &ipiv[1], info);
+    } else {
+
+/*        Use blocked code. */
+
+	i__1 = min(*m,*n);
+	i__2 = nb;
+	for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+	    i__3 = min(*m,*n) - j + 1;
+	    jb = min(i__3,nb);
+
+/*
+             Factor diagonal and subdiagonal blocks and test for exact
+             singularity.
+*/
+
+	    i__3 = *m - j + 1;
+	    zgetf2_(&i__3, &jb, &a[j + j * a_dim1], lda, &ipiv[j], &iinfo);
+
+/*           Adjust INFO and the pivot indices. */
+
+	    if (*info == 0 && iinfo > 0) {
+		*info = iinfo + j - 1;
+	    }
+/* Computing MIN */
+	    i__4 = *m, i__5 = j + jb - 1;
+	    i__3 = min(i__4,i__5);
+	    for (i__ = j; i__ <= i__3; ++i__) {
+		ipiv[i__] = j - 1 + ipiv[i__];
+/* L10: */
+	    }
+
+/*           Apply interchanges to columns 1:J-1. */
+
+	    i__3 = j - 1;
+	    i__4 = j + jb - 1;
+	    zlaswp_(&i__3, &a[a_offset], lda, &j, &i__4, &ipiv[1], &c__1);
+
+	    if (j + jb <= *n) {
+
+/*              Apply interchanges to columns J+JB:N. */
+
+		i__3 = *n - j - jb + 1;
+		i__4 = j + jb - 1;
+		zlaswp_(&i__3, &a[(j + jb) * a_dim1 + 1], lda, &j, &i__4, &
+			ipiv[1], &c__1);
+
+/*              Compute block row of U. */
+
+		i__3 = *n - j - jb + 1;
+		ztrsm_("Left", "Lower", "No transpose", "Unit", &jb, &i__3, &
+			c_b60, &a[j + j * a_dim1], lda, &a[j + (j + jb) *
+			a_dim1], lda);
+		if (j + jb <= *m) {
+
+/*                 Update trailing submatrix. */
+
+		    i__3 = *m - j - jb + 1;
+		    i__4 = *n - j - jb + 1;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "No transpose", &i__3, &i__4, &jb,
+			    &z__1, &a[j + jb + j * a_dim1], lda, &a[j + (j +
+			    jb) * a_dim1], lda, &c_b60, &a[j + jb + (j + jb) *
+			     a_dim1], lda);
+		}
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of ZGETRF */
+
+} /* zgetrf_ */
+
+/* Subroutine */ int zgetrs_(char *trans, integer *n, integer *nrhs,
+	doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b,
+	integer *ldb, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
+	     doublecomplex *, integer *),
+	    xerbla_(char *, integer *);
+    static logical notran;
+    extern /* Subroutine */ int zlaswp_(integer *, doublecomplex *, integer *,
+	     integer *, integer *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZGETRS solves a system of linear equations
+       A * X = B,  A**T * X = B,  or  A**H * X = B
+    with a general N-by-N matrix A using the LU factorization computed
+    by ZGETRF.
+
+    Arguments
+    =========
+
+    TRANS   (input) CHARACTER*1
+            Specifies the form of the system of equations:
+            = 'N':  A * X = B     (No transpose)
+            = 'T':  A**T * X = B  (Transpose)
+            = 'C':  A**H * X = B  (Conjugate transpose)
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The factors L and U from the factorization A = P*L*U
+            as computed by ZGETRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    IPIV    (input) INTEGER array, dimension (N)
+            The pivot indices from ZGETRF; for 1<=i<=N, row i of the
+            matrix was interchanged with row IPIV(i).
+
+    B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    notran = lsame_(trans, "N");
+    if (! notran && ! lsame_(trans, "T") && ! lsame_(
+	    trans, "C")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZGETRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*nrhs == 0)) {
+	return 0;
+    }
+
+    if (notran) {
+
+/*
+          Solve A * X = B.
+
+          Apply row interchanges to the right hand sides.
+*/
+
+	zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c__1);
+
+/*        Solve L*X = B, overwriting B with X. */
+
+	ztrsm_("Left", "Lower", "No transpose", "Unit", n, nrhs, &c_b60, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A**T * X = B  or A**H * X = B.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	ztrsm_("Left", "Upper", trans, "Non-unit", n, nrhs, &c_b60, &a[
+		a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	ztrsm_("Left", "Lower", trans, "Unit", n, nrhs, &c_b60, &a[a_offset],
+		lda, &b[b_offset], ldb);
+
+/*        Apply row interchanges to the solution vectors. */
+
+	zlaswp_(nrhs, &b[b_offset], ldb, &c__1, n, &ipiv[1], &c_n1);
+    }
+
+    return 0;
+
+/*     End of ZGETRS */
+
+} /* zgetrs_ */
+
+/* Subroutine */ int zheevd_(char *jobz, char *uplo, integer *n,
+	doublecomplex *a, integer *lda, doublereal *w, doublecomplex *work,
+	integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork,
+	integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal eps;
+    static integer inde;
+    static doublereal anrm;
+    static integer imax;
+    static doublereal rmin, rmax;
+    static integer lopt;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    static doublereal sigma;
+    extern logical lsame_(char *, char *);
+    static integer iinfo, lwmin, liopt;
+    static logical lower;
+    static integer llrwk, lropt;
+    static logical wantz;
+    static integer indwk2, llwrk2;
+
+    static integer iscale;
+    static doublereal safmin;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal bignum;
+    extern doublereal zlanhe_(char *, char *, integer *, doublecomplex *,
+	    integer *, doublereal *);
+    static integer indtau;
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *), zlascl_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublecomplex *, integer *,
+	    integer *), zstedc_(char *, integer *, doublereal *,
+	    doublereal *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublereal *, integer *, integer *, integer *, integer
+	    *);
+    static integer indrwk, indwrk, liwmin;
+    extern /* Subroutine */ int zhetrd_(char *, integer *, doublecomplex *,
+	    integer *, doublereal *, doublereal *, doublecomplex *,
+	    doublecomplex *, integer *, integer *), zlacpy_(char *,
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     integer *);
+    static integer lrwmin, llwork;
+    static doublereal smlnum;
+    static logical lquery;
+    extern /* Subroutine */ int zunmtr_(char *, char *, char *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
+
+
+/*
+    -- LAPACK driver routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZHEEVD computes all eigenvalues and, optionally, eigenvectors of a
+    complex Hermitian matrix A.  If eigenvectors are desired, it uses a
+    divide and conquer algorithm.
+
+    The divide and conquer algorithm makes very mild assumptions about
+    floating point arithmetic. It will work on machines with a guard
+    digit in add/subtract, or on those binary machines without guard
+    digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
+    Cray-2. It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    JOBZ    (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only;
+            = 'V':  Compute eigenvalues and eigenvectors.
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA, N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of A contains the
+            upper triangular part of the matrix A.  If UPLO = 'L',
+            the leading N-by-N lower triangular part of A contains
+            the lower triangular part of the matrix A.
+            On exit, if JOBZ = 'V', then if INFO = 0, A contains the
+            orthonormal eigenvectors of the matrix A.
+            If JOBZ = 'N', then on exit the lower triangle (if UPLO='L')
+            or the upper triangle (if UPLO='U') of A, including the
+            diagonal, is destroyed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    W       (output) DOUBLE PRECISION array, dimension (N)
+            If INFO = 0, the eigenvalues in ascending order.
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The length of the array WORK.
+            If N <= 1,                LWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LWORK must be at least N + 1.
+            If JOBZ  = 'V' and N > 1, LWORK must be at least 2*N + N**2.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    RWORK   (workspace/output) DOUBLE PRECISION array,
+                                           dimension (LRWORK)
+            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+
+    LRWORK  (input) INTEGER
+            The dimension of the array RWORK.
+            If N <= 1,                LRWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LRWORK must be at least N.
+            If JOBZ  = 'V' and N > 1, LRWORK must be at least
+                           1 + 5*N + 2*N**2.
+
+            If LRWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the RWORK array,
+            returns this value as the first entry of the RWORK array, and
+            no error message related to LRWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If N <= 1,                LIWORK must be at least 1.
+            If JOBZ  = 'N' and N > 1, LIWORK must be at least 1.
+            If JOBZ  = 'V' and N > 1, LIWORK must be at least 3 + 5*N.
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the algorithm failed to converge; i
+                  off-diagonal elements of an intermediate tridiagonal
+                  form did not converge to zero.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --w;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    wantz = lsame_(jobz, "V");
+    lower = lsame_(uplo, "L");
+    lquery = ((*lwork == -1) || (*lrwork == -1)) || (*liwork == -1);
+
+    *info = 0;
+    if (*n <= 1) {
+	lwmin = 1;
+	lrwmin = 1;
+	liwmin = 1;
+	lopt = lwmin;
+	lropt = lrwmin;
+	liopt = liwmin;
+    } else {
+	if (wantz) {
+	    lwmin = ((*n) << (1)) + *n * *n;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = *n * 5 + 1 + ((i__1 * i__1) << (1));
+	    liwmin = *n * 5 + 3;
+	} else {
+	    lwmin = *n + 1;
+	    lrwmin = *n;
+	    liwmin = 1;
+	}
+	lopt = lwmin;
+	lropt = lrwmin;
+	liopt = liwmin;
+    }
+    if (! ((wantz) || (lsame_(jobz, "N")))) {
+	*info = -1;
+    } else if (! ((lower) || (lsame_(uplo, "U")))) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*lrwork < lrwmin && ! lquery) {
+	*info = -10;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	work[1].r = (doublereal) lopt, work[1].i = 0.;
+	rwork[1] = (doublereal) lropt;
+	iwork[1] = liopt;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZHEEVD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	i__1 = a_dim1 + 1;
+	w[1] = a[i__1].r;
+	if (wantz) {
+	    i__1 = a_dim1 + 1;
+	    a[i__1].r = 1., a[i__1].i = 0.;
+	}
+	return 0;
+    }
+
+/*     Get machine constants. */
+
+    safmin = SAFEMINIMUM;
+    eps = PRECISION;
+    smlnum = safmin / eps;
+    bignum = 1. / smlnum;
+    rmin = sqrt(smlnum);
+    rmax = sqrt(bignum);
+
+/*     Scale matrix to allowable range, if necessary. */
+
+    anrm = zlanhe_("M", uplo, n, &a[a_offset], lda, &rwork[1]);
+    iscale = 0;
+    if (anrm > 0. && anrm < rmin) {
+	iscale = 1;
+	sigma = rmin / anrm;
+    } else if (anrm > rmax) {
+	iscale = 1;
+	sigma = rmax / anrm;
+    }
+    if (iscale == 1) {
+	zlascl_(uplo, &c__0, &c__0, &c_b1015, &sigma, n, n, &a[a_offset], lda,
+		 info);
+    }
+
+/*     Call ZHETRD to reduce Hermitian matrix to tridiagonal form. */
+
+    inde = 1;
+    indtau = 1;
+    indwrk = indtau + *n;
+    indrwk = inde + *n;
+    indwk2 = indwrk + *n * *n;
+    llwork = *lwork - indwrk + 1;
+    llwrk2 = *lwork - indwk2 + 1;
+    llrwk = *lrwork - indrwk + 1;
+    zhetrd_(uplo, n, &a[a_offset], lda, &w[1], &rwork[inde], &work[indtau], &
+	    work[indwrk], &llwork, &iinfo);
+/* Computing MAX */
+    i__1 = indwrk;
+    d__1 = (doublereal) lopt, d__2 = (doublereal) (*n) + work[i__1].r;
+    lopt = (integer) max(d__1,d__2);
+
+/*
+       For eigenvalues only, call DSTERF.  For eigenvectors, first call
+       ZSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the
+       tridiagonal matrix, then call ZUNMTR to multiply it to the
+       Householder transformations represented as Householder vectors in
+       A.
+*/
+
+    if (! wantz) {
+	dsterf_(n, &w[1], &rwork[inde], info);
+    } else {
+	zstedc_("I", n, &w[1], &rwork[inde], &work[indwrk], n, &work[indwk2],
+		&llwrk2, &rwork[indrwk], &llrwk, &iwork[1], liwork, info);
+	zunmtr_("L", uplo, "N", n, n, &a[a_offset], lda, &work[indtau], &work[
+		indwrk], n, &work[indwk2], &llwrk2, &iinfo);
+	zlacpy_("A", n, n, &work[indwrk], n, &a[a_offset], lda);
+/*
+   Computing MAX
+   Computing 2nd power
+*/
+	i__3 = *n;
+	i__4 = indwk2;
+	i__1 = lopt, i__2 = *n + i__3 * i__3 + (integer) work[i__4].r;
+	lopt = max(i__1,i__2);
+    }
+
+/*     If matrix was scaled, then rescale eigenvalues appropriately. */
+
+    if (iscale == 1) {
+	if (*info == 0) {
+	    imax = *n;
+	} else {
+	    imax = *info - 1;
+	}
+	d__1 = 1. / sigma;
+	dscal_(&imax, &d__1, &w[1], &c__1);
+    }
+
+    work[1].r = (doublereal) lopt, work[1].i = 0.;
+    rwork[1] = (doublereal) lropt;
+    iwork[1] = liopt;
+
+    return 0;
+
+/*     End of ZHEEVD */
+
+} /* zheevd_ */
+
+/* Subroutine */ int zhetd2_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+    doublecomplex z__1, z__2, z__3, z__4;
+
+    /* Local variables */
+    static integer i__;
+    static doublecomplex taui;
+    extern /* Subroutine */ int zher2_(char *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static doublecomplex alpha;
+    extern logical lsame_(char *, char *);
+    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *);
+    extern /* Subroutine */ int zhemv_(char *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, doublecomplex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(
+	    char *, integer *), zlarfg_(integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    ZHETD2 reduces a complex Hermitian matrix A to real symmetric
+    tridiagonal form T by a unitary similarity transformation:
+    Q' * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the unitary
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the unitary matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) COMPLEX*16 array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZHETD2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Reduce the upper triangle of A */
+
+	i__1 = *n + *n * a_dim1;
+	i__2 = *n + *n * a_dim1;
+	d__1 = a[i__2].r;
+	a[i__1].r = d__1, a[i__1].i = 0.;
+	for (i__ = *n - 1; i__ >= 1; --i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(1:i-1,i+1)
+*/
+
+	    i__1 = i__ + (i__ + 1) * a_dim1;
+	    alpha.r = a[i__1].r, alpha.i = a[i__1].i;
+	    zlarfg_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &taui);
+	    i__1 = i__;
+	    e[i__1] = alpha.r;
+
+	    if ((taui.r != 0.) || (taui.i != 0.)) {
+
+/*              Apply H(i) from both sides to A(1:i,1:i) */
+
+		i__1 = i__ + (i__ + 1) * a_dim1;
+		a[i__1].r = 1., a[i__1].i = 0.;
+
+/*              Compute  x := tau * A * v  storing x in TAU(1:i) */
+
+		zhemv_(uplo, &i__, &taui, &a[a_offset], lda, &a[(i__ + 1) *
+			a_dim1 + 1], &c__1, &c_b59, &tau[1], &c__1)
+			;
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		z__3.r = -.5, z__3.i = -0.;
+		z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r *
+			taui.i + z__3.i * taui.r;
+		zdotc_(&z__4, &i__, &tau[1], &c__1, &a[(i__ + 1) * a_dim1 + 1]
+			, &c__1);
+		z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r *
+			z__4.i + z__2.i * z__4.r;
+		alpha.r = z__1.r, alpha.i = z__1.i;
+		zaxpy_(&i__, &alpha, &a[(i__ + 1) * a_dim1 + 1], &c__1, &tau[
+			1], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		z__1.r = -1., z__1.i = -0.;
+		zher2_(uplo, &i__, &z__1, &a[(i__ + 1) * a_dim1 + 1], &c__1, &
+			tau[1], &c__1, &a[a_offset], lda);
+
+	    } else {
+		i__1 = i__ + i__ * a_dim1;
+		i__2 = i__ + i__ * a_dim1;
+		d__1 = a[i__2].r;
+		a[i__1].r = d__1, a[i__1].i = 0.;
+	    }
+	    i__1 = i__ + (i__ + 1) * a_dim1;
+	    i__2 = i__;
+	    a[i__1].r = e[i__2], a[i__1].i = 0.;
+	    i__1 = i__ + 1;
+	    i__2 = i__ + 1 + (i__ + 1) * a_dim1;
+	    d__[i__1] = a[i__2].r;
+	    i__1 = i__;
+	    tau[i__1].r = taui.r, tau[i__1].i = taui.i;
+/* L10: */
+	}
+	i__1 = a_dim1 + 1;
+	d__[1] = a[i__1].r;
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__1 = a_dim1 + 1;
+	i__2 = a_dim1 + 1;
+	d__1 = a[i__2].r;
+	a[i__1].r = d__1, a[i__1].i = 0.;
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+             Generate elementary reflector H(i) = I - tau * v * v'
+             to annihilate A(i+2:n,i)
+*/
+
+	    i__2 = i__ + 1 + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *n - i__;
+/* Computing MIN */
+	    i__3 = i__ + 2;
+	    zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &
+		    taui);
+	    i__2 = i__;
+	    e[i__2] = alpha.r;
+
+	    if ((taui.r != 0.) || (taui.i != 0.)) {
+
+/*              Apply H(i) from both sides to A(i+1:n,i+1:n) */
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+
+/*              Compute  x := tau * A * v  storing y in TAU(i:n-1) */
+
+		i__2 = *n - i__;
+		zhemv_(uplo, &i__2, &taui, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &c_b59, &tau[
+			i__], &c__1);
+
+/*              Compute  w := x - 1/2 * tau * (x'*v) * v */
+
+		z__3.r = -.5, z__3.i = -0.;
+		z__2.r = z__3.r * taui.r - z__3.i * taui.i, z__2.i = z__3.r *
+			taui.i + z__3.i * taui.r;
+		i__2 = *n - i__;
+		zdotc_(&z__4, &i__2, &tau[i__], &c__1, &a[i__ + 1 + i__ *
+			a_dim1], &c__1);
+		z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r *
+			z__4.i + z__2.i * z__4.r;
+		alpha.r = z__1.r, alpha.i = z__1.i;
+		i__2 = *n - i__;
+		zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &tau[
+			i__], &c__1);
+
+/*
+                Apply the transformation as a rank-2 update:
+                   A := A - v * w' - w * v'
+*/
+
+		i__2 = *n - i__;
+		z__1.r = -1., z__1.i = -0.;
+		zher2_(uplo, &i__2, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1,
+			&tau[i__], &c__1, &a[i__ + 1 + (i__ + 1) * a_dim1],
+			lda);
+
+	    } else {
+		i__2 = i__ + 1 + (i__ + 1) * a_dim1;
+		i__3 = i__ + 1 + (i__ + 1) * a_dim1;
+		d__1 = a[i__3].r;
+		a[i__2].r = d__1, a[i__2].i = 0.;
+	    }
+	    i__2 = i__ + 1 + i__ * a_dim1;
+	    i__3 = i__;
+	    a[i__2].r = e[i__3], a[i__2].i = 0.;
+	    i__2 = i__;
+	    i__3 = i__ + i__ * a_dim1;
+	    d__[i__2] = a[i__3].r;
+	    i__2 = i__;
+	    tau[i__2].r = taui.r, tau[i__2].i = taui.i;
+/* L20: */
+	}
+	i__1 = *n;
+	i__2 = *n + *n * a_dim1;
+	d__[i__1] = a[i__2].r;
+    }
+
+    return 0;
+
+/*     End of ZHETD2 */
+
+} /* zhetd2_ */
+
+/* Subroutine */ int zhetrd_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, doublereal *d__, doublereal *e, doublecomplex *tau,
+	doublecomplex *work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__, j, nb, kk, nx, iws;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    static logical upper;
+    extern /* Subroutine */ int zhetd2_(char *, integer *, doublecomplex *,
+	    integer *, doublereal *, doublereal *, doublecomplex *, integer *), zher2k_(char *, char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublereal *, doublecomplex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlatrd_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublereal *, doublecomplex *,
+	    doublecomplex *, integer *);
+    static integer ldwork, lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZHETRD reduces a complex Hermitian matrix A to real symmetric
+    tridiagonal form T by a unitary similarity transformation:
+    Q**H * A * Q = T.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit, if UPLO = 'U', the diagonal and first superdiagonal
+            of A are overwritten by the corresponding elements of the
+            tridiagonal matrix T, and the elements above the first
+            superdiagonal, with the array TAU, represent the unitary
+            matrix Q as a product of elementary reflectors; if UPLO
+            = 'L', the diagonal and first subdiagonal of A are over-
+            written by the corresponding elements of the tridiagonal
+            matrix T, and the elements below the first subdiagonal, with
+            the array TAU, represent the unitary matrix Q as a product
+            of elementary reflectors. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    D       (output) DOUBLE PRECISION array, dimension (N)
+            The diagonal elements of the tridiagonal matrix T:
+            D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
+            The off-diagonal elements of the tridiagonal matrix T:
+            E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
+
+    TAU     (output) COMPLEX*16 array, dimension (N-1)
+            The scalar factors of the elementary reflectors (see Further
+            Details).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= 1.
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n-1) . . . H(2) H(1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
+    A(1:i-1,i+1), and tau in TAU(i).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(n-1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
+    and tau in TAU(i).
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  d   e   v2  v3  v4 )              (  d                  )
+      (      d   e   v3  v4 )              (  e   d              )
+      (          d   e   v4 )              (  v1  e   d          )
+      (              d   e  )              (  v1  v2  e   d      )
+      (                  d  )              (  v1  v2  v3  e   d  )
+
+    where d and e denote diagonal and off-diagonal elements of T, and vi
+    denotes an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    } else if (*lwork < 1 && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+
+/*        Determine the block size. */
+
+	nb = ilaenv_(&c__1, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
+		 (ftnlen)1);
+	lwkopt = *n * nb;
+	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZHETRD", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    nx = *n;
+    iws = 1;
+    if (nb > 1 && nb < *n) {
+
+/*
+          Determine when to cross over from blocked to unblocked code
+          (last block is always handled by unblocked code).
+
+   Computing MAX
+*/
+	i__1 = nb, i__2 = ilaenv_(&c__3, "ZHETRD", uplo, n, &c_n1, &c_n1, &
+		c_n1, (ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *n) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  determine the
+                minimum value of NB, and reduce NB or force use of
+                unblocked code by setting NX = N.
+
+   Computing MAX
+*/
+		i__1 = *lwork / ldwork;
+		nb = max(i__1,1);
+		nbmin = ilaenv_(&c__2, "ZHETRD", uplo, n, &c_n1, &c_n1, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		if (nb < nbmin) {
+		    nx = *n;
+		}
+	    }
+	} else {
+	    nx = *n;
+	}
+    } else {
+	nb = 1;
+    }
+
+    if (upper) {
+
+/*
+          Reduce the upper triangle of A.
+          Columns 1:kk are handled by the unblocked method.
+*/
+
+	kk = *n - (*n - nx + nb - 1) / nb * nb;
+	i__1 = kk + 1;
+	i__2 = -nb;
+	for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
+		i__2) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = i__ + nb - 1;
+	    zlatrd_(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
+		    work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(1:i-1,1:i-1), using an
+             update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = i__ - 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ * a_dim1
+		    + 1], lda, &work[1], &ldwork, &c_b1015, &a[a_offset], lda);
+
+/*
+             Copy superdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j - 1 + j * a_dim1;
+		i__5 = j - 1;
+		a[i__4].r = e[i__5], a[i__4].i = 0.;
+		i__4 = j;
+		i__5 = j + j * a_dim1;
+		d__[i__4] = a[i__5].r;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	zhetd2_(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
+    } else {
+
+/*        Reduce the lower triangle of A */
+
+	i__2 = *n - nx;
+	i__1 = nb;
+	for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+
+/*
+             Reduce columns i:i+nb-1 to tridiagonal form and form the
+             matrix W which is needed to update the unreduced part of
+             the matrix
+*/
+
+	    i__3 = *n - i__ + 1;
+	    zlatrd_(uplo, &i__3, &nb, &a[i__ + i__ * a_dim1], lda, &e[i__], &
+		    tau[i__], &work[1], &ldwork);
+
+/*
+             Update the unreduced submatrix A(i+nb:n,i+nb:n), using
+             an update of the form:  A := A - V*W' - W*V'
+*/
+
+	    i__3 = *n - i__ - nb + 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zher2k_(uplo, "No transpose", &i__3, &nb, &z__1, &a[i__ + nb +
+		    i__ * a_dim1], lda, &work[nb + 1], &ldwork, &c_b1015, &a[
+		    i__ + nb + (i__ + nb) * a_dim1], lda);
+
+/*
+             Copy subdiagonal elements back into A, and diagonal
+             elements into D
+*/
+
+	    i__3 = i__ + nb - 1;
+	    for (j = i__; j <= i__3; ++j) {
+		i__4 = j + 1 + j * a_dim1;
+		i__5 = j;
+		a[i__4].r = e[i__5], a[i__4].i = 0.;
+		i__4 = j;
+		i__5 = j + j * a_dim1;
+		d__[i__4] = a[i__5].r;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+/*        Use unblocked code to reduce the last or only block */
+
+	i__1 = *n - i__ + 1;
+	zhetd2_(uplo, &i__1, &a[i__ + i__ * a_dim1], lda, &d__[i__], &e[i__],
+		&tau[i__], &iinfo);
+    }
+
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    return 0;
+
+/*     End of ZHETRD */
+
+} /* zhetrd_ */
+
+/* Subroutine */ int zhseqr_(char *job, char *compz, integer *n, integer *ilo,
+	 integer *ihi, doublecomplex *h__, integer *ldh, doublecomplex *w,
+	doublecomplex *z__, integer *ldz, doublecomplex *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4[2],
+	    i__5, i__6;
+    doublereal d__1, d__2, d__3, d__4;
+    doublecomplex z__1;
+    char ch__1[2];
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    void d_cnjg(doublecomplex *, doublecomplex *);
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__, j, k, l;
+    static doublecomplex s[225]	/* was [15][15] */, v[16];
+    static integer i1, i2, ii, nh, nr, ns, nv;
+    static doublecomplex vv[16];
+    static integer itn;
+    static doublecomplex tau;
+    static integer its;
+    static doublereal ulp, tst1;
+    static integer maxb, ierr;
+    static doublereal unfl;
+    static doublecomplex temp;
+    static doublereal ovfl;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *);
+    static integer itemp;
+    static doublereal rtemp;
+    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+    static logical initz, wantt, wantz;
+    static doublereal rwork[1];
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    extern doublereal dlapy2_(doublereal *, doublereal *);
+    extern /* Subroutine */ int dlabad_(doublereal *, doublereal *);
+
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zdscal_(integer *, doublereal *,
+	    doublecomplex *, integer *), zlarfg_(integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *);
+    extern integer izamax_(integer *, doublecomplex *, integer *);
+    extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *,
+	    doublereal *);
+    extern /* Subroutine */ int zlahqr_(logical *, logical *, integer *,
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     integer *, integer *, doublecomplex *, integer *, integer *),
+	    zlacpy_(char *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), zlaset_(char *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, doublecomplex *,
+	    integer *), zlarfx_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *);
+    static doublereal smlnum;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZHSEQR computes the eigenvalues of a complex upper Hessenberg
+    matrix H, and, optionally, the matrices T and Z from the Schur
+    decomposition H = Z T Z**H, where T is an upper triangular matrix
+    (the Schur form), and Z is the unitary matrix of Schur vectors.
+
+    Optionally Z may be postmultiplied into an input unitary matrix Q,
+    so that this routine can give the Schur factorization of a matrix A
+    which has been reduced to the Hessenberg form H by the unitary
+    matrix Q:  A = Q*H*Q**H = (QZ)*T*(QZ)**H.
+
+    Arguments
+    =========
+
+    JOB     (input) CHARACTER*1
+            = 'E': compute eigenvalues only;
+            = 'S': compute eigenvalues and the Schur form T.
+
+    COMPZ   (input) CHARACTER*1
+            = 'N': no Schur vectors are computed;
+            = 'I': Z is initialized to the unit matrix and the matrix Z
+                   of Schur vectors of H is returned;
+            = 'V': Z must contain an unitary matrix Q on entry, and
+                   the product Q*Z is returned.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows
+            and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
+            set by a previous call to ZGEBAL, and then passed to CGEHRD
+            when the matrix output by ZGEBAL is reduced to Hessenberg
+            form. Otherwise ILO and IHI should be set to 1 and N
+            respectively.
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    H       (input/output) COMPLEX*16 array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if JOB = 'S', H contains the upper triangular matrix
+            T from the Schur decomposition (the Schur form). If
+            JOB = 'E', the contents of H are unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    W       (output) COMPLEX*16 array, dimension (N)
+            The computed eigenvalues. If JOB = 'S', the eigenvalues are
+            stored in the same order as on the diagonal of the Schur form
+            returned in H, with W(i) = H(i,i).
+
+    Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+            If COMPZ = 'N': Z is not referenced.
+            If COMPZ = 'I': on entry, Z need not be set, and on exit, Z
+            contains the unitary matrix Z of the Schur vectors of H.
+            If COMPZ = 'V': on entry Z must contain an N-by-N matrix Q,
+            which is assumed to be equal to the unit matrix except for
+            the submatrix Z(ILO:IHI,ILO:IHI); on exit Z contains Q*Z.
+            Normally Q is the unitary matrix generated by ZUNGHR after
+            the call to ZGEHRD which formed the Hessenberg matrix H.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.
+            LDZ >= max(1,N) if COMPZ = 'I' or 'V'; LDZ >= 1 otherwise.
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.  LWORK >= max(1,N).
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, ZHSEQR failed to compute all the
+                  eigenvalues in a total of 30*(IHI-ILO+1) iterations;
+                  elements 1:ilo-1 and i+1:n of W contain those
+                  eigenvalues which have been successfully computed.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    wantt = lsame_(job, "S");
+    initz = lsame_(compz, "I");
+    wantz = (initz) || (lsame_(compz, "V"));
+
+    *info = 0;
+    i__1 = max(1,*n);
+    work[1].r = (doublereal) i__1, work[1].i = 0.;
+    lquery = *lwork == -1;
+    if (! lsame_(job, "E") && ! wantt) {
+	*info = -1;
+    } else if (! lsame_(compz, "N") && ! wantz) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -4;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -5;
+    } else if (*ldh < max(1,*n)) {
+	*info = -7;
+    } else if ((*ldz < 1) || (wantz && *ldz < max(1,*n))) {
+	*info = -10;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZHSEQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Initialize Z, if necessary */
+
+    if (initz) {
+	zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz);
+    }
+
+/*     Store the eigenvalues isolated by ZGEBAL. */
+
+    i__1 = *ilo - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__ + i__ * h_dim1;
+	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
+/* L10: */
+    }
+    i__1 = *n;
+    for (i__ = *ihi + 1; i__ <= i__1; ++i__) {
+	i__2 = i__;
+	i__3 = i__ + i__ * h_dim1;
+	w[i__2].r = h__[i__3].r, w[i__2].i = h__[i__3].i;
+/* L20: */
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	i__1 = *ilo;
+	i__2 = *ilo + *ilo * h_dim1;
+	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+	return 0;
+    }
+
+/*
+       Set rows and columns ILO to IHI to zero below the first
+       subdiagonal.
+*/
+
+    i__1 = *ihi - 2;
+    for (j = *ilo; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = j + 2; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * h_dim1;
+	    h__[i__3].r = 0., h__[i__3].i = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    nh = *ihi - *ilo + 1;
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are re-set inside the main loop.
+*/
+
+    if (wantt) {
+	i1 = 1;
+	i2 = *n;
+    } else {
+	i1 = *ilo;
+	i2 = *ihi;
+    }
+
+/*     Ensure that the subdiagonal elements are real. */
+
+    i__1 = *ihi;
+    for (i__ = *ilo + 1; i__ <= i__1; ++i__) {
+	i__2 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	if (d_imag(&temp) != 0.) {
+	    d__1 = temp.r;
+	    d__2 = d_imag(&temp);
+	    rtemp = dlapy2_(&d__1, &d__2);
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__2].r = rtemp, h__[i__2].i = 0.;
+	    z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp;
+	    temp.r = z__1.r, temp.i = z__1.i;
+	    if (i2 > i__) {
+		i__2 = i2 - i__;
+		d_cnjg(&z__1, &temp);
+		zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+	    }
+	    i__2 = i__ - i1;
+	    zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    if (i__ < *ihi) {
+		i__2 = i__ + 1 + i__ * h_dim1;
+		i__3 = i__ + 1 + i__ * h_dim1;
+		z__1.r = temp.r * h__[i__3].r - temp.i * h__[i__3].i, z__1.i =
+			 temp.r * h__[i__3].i + temp.i * h__[i__3].r;
+		h__[i__2].r = z__1.r, h__[i__2].i = z__1.i;
+	    }
+	    if (wantz) {
+		zscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
+	    }
+	}
+/* L50: */
+    }
+
+/*
+       Determine the order of the multi-shift QR algorithm to be used.
+
+   Writing concatenation
+*/
+    i__4[0] = 1, a__1[0] = job;
+    i__4[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
+    ns = ilaenv_(&c__4, "ZHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+/* Writing concatenation */
+    i__4[0] = 1, a__1[0] = job;
+    i__4[1] = 1, a__1[1] = compz;
+    s_cat(ch__1, a__1, i__4, &c__2, (ftnlen)2);
+    maxb = ilaenv_(&c__8, "ZHSEQR", ch__1, n, ilo, ihi, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if (((ns <= 1) || (ns > nh)) || (maxb >= nh)) {
+
+/*        Use the standard double-shift algorithm */
+
+	zlahqr_(&wantt, &wantz, n, ilo, ihi, &h__[h_offset], ldh, &w[1], ilo,
+		ihi, &z__[z_offset], ldz, info);
+	return 0;
+    }
+    maxb = max(2,maxb);
+/* Computing MIN */
+    i__1 = min(ns,maxb);
+    ns = min(i__1,15);
+
+/*
+       Now 1 < NS <= MAXB < NH.
+
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (nh / ulp);
+
+/*     ITN is the total number of multiple-shift QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of at most MAXB. Each iteration of the loop
+       works with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L60:
+    if (i__ < *ilo) {
+	goto L180;
+    }
+
+/*
+       Perform multiple-shift QR iterations on rows and columns ILO to I
+       until a submatrix of order at most MAXB splits off at the bottom
+       because a subdiagonal element has become negligible.
+*/
+
+    l = *ilo;
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    i__3 = k - 1 + (k - 1) * h_dim1;
+	    i__5 = k + k * h_dim1;
+	    tst1 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[k -
+		    1 + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__5].r,
+		     abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs(
+		    d__4)));
+	    if (tst1 == 0.) {
+		i__3 = i__ - l + 1;
+		tst1 = zlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
+	    }
+	    i__3 = k + (k - 1) * h_dim1;
+/* Computing MAX */
+	    d__2 = ulp * tst1;
+	    if ((d__1 = h__[i__3].r, abs(d__1)) <= max(d__2,smlnum)) {
+		goto L80;
+	    }
+/* L70: */
+	}
+L80:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible. */
+
+	    i__2 = l + (l - 1) * h_dim1;
+	    h__[i__2].r = 0., h__[i__2].i = 0.;
+	}
+
+/*        Exit from loop if a submatrix of order <= MAXB has split off. */
+
+	if (l >= i__ - maxb + 1) {
+	    goto L170;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! wantt) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if ((its == 20) || (its == 30)) {
+
+/*           Exceptional shifts. */
+
+	    i__2 = i__;
+	    for (ii = i__ - ns + 1; ii <= i__2; ++ii) {
+		i__3 = ii;
+		i__5 = ii + (ii - 1) * h_dim1;
+		i__6 = ii + ii * h_dim1;
+		d__3 = ((d__1 = h__[i__5].r, abs(d__1)) + (d__2 = h__[i__6].r,
+			 abs(d__2))) * 1.5;
+		w[i__3].r = d__3, w[i__3].i = 0.;
+/* L90: */
+	    }
+	} else {
+
+/*           Use eigenvalues of trailing submatrix of order NS as shifts. */
+
+	    zlacpy_("Full", &ns, &ns, &h__[i__ - ns + 1 + (i__ - ns + 1) *
+		    h_dim1], ldh, s, &c__15);
+	    zlahqr_(&c_false, &c_false, &ns, &c__1, &ns, s, &c__15, &w[i__ -
+		    ns + 1], &c__1, &ns, &z__[z_offset], ldz, &ierr);
+	    if (ierr > 0) {
+
+/*
+                If ZLAHQR failed to compute all NS eigenvalues, use the
+                unconverged diagonal elements as the remaining shifts.
+*/
+
+		i__2 = ierr;
+		for (ii = 1; ii <= i__2; ++ii) {
+		    i__3 = i__ - ns + ii;
+		    i__5 = ii + ii * 15 - 16;
+		    w[i__3].r = s[i__5].r, w[i__3].i = s[i__5].i;
+/* L100: */
+		}
+	    }
+	}
+
+/*
+          Form the first column of (G-w(1)) (G-w(2)) . . . (G-w(ns))
+          where G is the Hessenberg submatrix H(L:I,L:I) and w is
+          the vector of shifts (stored in W). The result is
+          stored in the local array V.
+*/
+
+	v[0].r = 1., v[0].i = 0.;
+	i__2 = ns + 1;
+	for (ii = 2; ii <= i__2; ++ii) {
+	    i__3 = ii - 1;
+	    v[i__3].r = 0., v[i__3].i = 0.;
+/* L110: */
+	}
+	nv = 1;
+	i__2 = i__;
+	for (j = i__ - ns + 1; j <= i__2; ++j) {
+	    i__3 = nv + 1;
+	    zcopy_(&i__3, v, &c__1, vv, &c__1);
+	    i__3 = nv + 1;
+	    i__5 = j;
+	    z__1.r = -w[i__5].r, z__1.i = -w[i__5].i;
+	    zgemv_("No transpose", &i__3, &nv, &c_b60, &h__[l + l * h_dim1],
+		    ldh, vv, &c__1, &z__1, v, &c__1);
+	    ++nv;
+
+/*
+             Scale V(1:NV) so that max(abs(V(i))) = 1. If V is zero,
+             reset it to the unit vector.
+*/
+
+	    itemp = izamax_(&nv, v, &c__1);
+	    i__3 = itemp - 1;
+	    rtemp = (d__1 = v[i__3].r, abs(d__1)) + (d__2 = d_imag(&v[itemp -
+		    1]), abs(d__2));
+	    if (rtemp == 0.) {
+		v[0].r = 1., v[0].i = 0.;
+		i__3 = nv;
+		for (ii = 2; ii <= i__3; ++ii) {
+		    i__5 = ii - 1;
+		    v[i__5].r = 0., v[i__5].i = 0.;
+/* L120: */
+		}
+	    } else {
+		rtemp = max(rtemp,smlnum);
+		d__1 = 1. / rtemp;
+		zdscal_(&nv, &d__1, v, &c__1);
+	    }
+/* L130: */
+	}
+
+/*        Multiple-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = l; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix. NR is the order of G.
+
+   Computing MIN
+*/
+	    i__3 = ns + 1, i__5 = i__ - k + 1;
+	    nr = min(i__3,i__5);
+	    if (k > l) {
+		zcopy_(&nr, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    zlarfg_(&nr, v, &v[1], &c__1, &tau);
+	    if (k > l) {
+		i__3 = k + (k - 1) * h_dim1;
+		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
+		i__3 = i__;
+		for (ii = k + 1; ii <= i__3; ++ii) {
+		    i__5 = ii + (k - 1) * h_dim1;
+		    h__[i__5].r = 0., h__[i__5].i = 0.;
+/* L140: */
+		}
+	    }
+	    v[0].r = 1., v[0].i = 0.;
+
+/*
+             Apply G' from the left to transform the rows of the matrix
+             in columns K to I2.
+*/
+
+	    i__3 = i2 - k + 1;
+	    d_cnjg(&z__1, &tau);
+	    zlarfx_("Left", &nr, &i__3, v, &z__1, &h__[k + k * h_dim1], ldh, &
+		    work[1]);
+
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+NR,I).
+
+   Computing MIN
+*/
+	    i__5 = k + nr;
+	    i__3 = min(i__5,i__) - i1 + 1;
+	    zlarfx_("Right", &i__3, &nr, v, &tau, &h__[i1 + k * h_dim1], ldh,
+		    &work[1]);
+
+	    if (wantz) {
+
+/*              Accumulate transformations in the matrix Z */
+
+		zlarfx_("Right", &nh, &nr, v, &tau, &z__[*ilo + k * z_dim1],
+			ldz, &work[1]);
+	    }
+/* L150: */
+	}
+
+/*        Ensure that H(I,I-1) is real. */
+
+	i__2 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	if (d_imag(&temp) != 0.) {
+	    d__1 = temp.r;
+	    d__2 = d_imag(&temp);
+	    rtemp = dlapy2_(&d__1, &d__2);
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__2].r = rtemp, h__[i__2].i = 0.;
+	    z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp;
+	    temp.r = z__1.r, temp.i = z__1.i;
+	    if (i2 > i__) {
+		i__2 = i2 - i__;
+		d_cnjg(&z__1, &temp);
+		zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+	    }
+	    i__2 = i__ - i1;
+	    zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    if (wantz) {
+		zscal_(&nh, &temp, &z__[*ilo + i__ * z_dim1], &c__1);
+	    }
+	}
+
+/* L160: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L170:
+
+/*
+       A submatrix of order <= MAXB in rows and columns L to I has split
+       off. Use the double-shift QR algorithm to handle it.
+*/
+
+    zlahqr_(&wantt, &wantz, n, &l, &i__, &h__[h_offset], ldh, &w[1], ilo, ihi,
+	     &z__[z_offset], ldz, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with a new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L60;
+
+L180:
+    i__1 = max(1,*n);
+    work[1].r = (doublereal) i__1, work[1].i = 0.;
+    return 0;
+
+/*     End of ZHSEQR */
+
+} /* zhseqr_ */
+
+/* Subroutine */ int zlabrd_(integer *m, integer *n, integer *nb,
+	doublecomplex *a, integer *lda, doublereal *d__, doublereal *e,
+	doublecomplex *tauq, doublecomplex *taup, doublecomplex *x, integer *
+	ldx, doublecomplex *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, x_dim1, x_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__;
+    static doublecomplex alpha;
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *), zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *),
+	    zlarfg_(integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *), zlacgv_(integer *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLABRD reduces the first NB rows and columns of a complex general
+    m by n matrix A to upper or lower real bidiagonal form by a unitary
+    transformation Q' * A * P, and returns the matrices X and Y which
+    are needed to apply the transformation to the unreduced part of A.
+
+    If m >= n, A is reduced to upper bidiagonal form; if m < n, to lower
+    bidiagonal form.
+
+    This is an auxiliary routine called by ZGEBRD
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows in the matrix A.
+
+    N       (input) INTEGER
+            The number of columns in the matrix A.
+
+    NB      (input) INTEGER
+            The number of leading rows and columns of A to be reduced.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the m by n general matrix to be reduced.
+            On exit, the first NB rows and columns of the matrix are
+            overwritten; the rest of the array is unchanged.
+            If m >= n, elements on and below the diagonal in the first NB
+              columns, with the array TAUQ, represent the unitary
+              matrix Q as a product of elementary reflectors; and
+              elements above the diagonal in the first NB rows, with the
+              array TAUP, represent the unitary matrix P as a product
+              of elementary reflectors.
+            If m < n, elements below the diagonal in the first NB
+              columns, with the array TAUQ, represent the unitary
+              matrix Q as a product of elementary reflectors, and
+              elements on and above the diagonal in the first NB rows,
+              with the array TAUP, represent the unitary matrix P as
+              a product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    D       (output) DOUBLE PRECISION array, dimension (NB)
+            The diagonal elements of the first NB rows and columns of
+            the reduced matrix.  D(i) = A(i,i).
+
+    E       (output) DOUBLE PRECISION array, dimension (NB)
+            The off-diagonal elements of the first NB rows and columns of
+            the reduced matrix.
+
+    TAUQ    (output) COMPLEX*16 array dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix Q. See Further Details.
+
+    TAUP    (output) COMPLEX*16 array, dimension (NB)
+            The scalar factors of the elementary reflectors which
+            represent the unitary matrix P. See Further Details.
+
+    X       (output) COMPLEX*16 array, dimension (LDX,NB)
+            The m-by-nb matrix X required to update the unreduced part
+            of A.
+
+    LDX     (input) INTEGER
+            The leading dimension of the array X. LDX >= max(1,M).
+
+    Y       (output) COMPLEX*16 array, dimension (LDY,NB)
+            The n-by-nb matrix Y required to update the unreduced part
+            of A.
+
+    LDY     (output) INTEGER
+            The leading dimension of the array Y. LDY >= max(1,N).
+
+    Further Details
+    ===============
+
+    The matrices Q and P are represented as products of elementary
+    reflectors:
+
+       Q = H(1) H(2) . . . H(nb)  and  P = G(1) G(2) . . . G(nb)
+
+    Each H(i) and G(i) has the form:
+
+       H(i) = I - tauq * v * v'  and G(i) = I - taup * u * u'
+
+    where tauq and taup are complex scalars, and v and u are complex
+    vectors.
+
+    If m >= n, v(1:i-1) = 0, v(i) = 1, and v(i:m) is stored on exit in
+    A(i:m,i); u(1:i) = 0, u(i+1) = 1, and u(i+1:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    If m < n, v(1:i) = 0, v(i+1) = 1, and v(i+1:m) is stored on exit in
+    A(i+2:m,i); u(1:i-1) = 0, u(i) = 1, and u(i:n) is stored on exit in
+    A(i,i+1:n); tauq is stored in TAUQ(i) and taup in TAUP(i).
+
+    The elements of the vectors v and u together form the m-by-nb matrix
+    V and the nb-by-n matrix U' which are needed, with X and Y, to apply
+    the transformation to the unreduced part of the matrix, using a block
+    update of the form:  A := A - V*Y' - X*U'.
+
+    The contents of A on exit are illustrated by the following examples
+    with nb = 2:
+
+    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):
+
+      (  1   1   u1  u1  u1 )           (  1   u1  u1  u1  u1  u1 )
+      (  v1  1   1   u2  u2 )           (  1   1   u2  u2  u2  u2 )
+      (  v1  v2  a   a   a  )           (  v1  1   a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )           (  v1  v2  a   a   a   a  )
+      (  v1  v2  a   a   a  )
+
+    where a denotes an element of the original matrix which is unchanged,
+    vi denotes an element of the vector defining H(i), and ui an element
+    of the vector defining G(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --d__;
+    --e;
+    --tauq;
+    --taup;
+    x_dim1 = *ldx;
+    x_offset = 1 + x_dim1;
+    x -= x_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if ((*m <= 0) || (*n <= 0)) {
+	return 0;
+    }
+
+    if (*m >= *n) {
+
+/*        Reduce to upper bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:m,i) */
+
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &y[i__ + y_dim1], ldy);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda,
+		     &y[i__ + y_dim1], ldy, &c_b60, &a[i__ + i__ * a_dim1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &y[i__ + y_dim1], ldy);
+	    i__2 = *m - i__ + 1;
+	    i__3 = i__ - 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + x_dim1], ldx,
+		     &a[i__ * a_dim1 + 1], &c__1, &c_b60, &a[i__ + i__ *
+		    a_dim1], &c__1);
+
+/*           Generate reflection Q(i) to annihilate A(i+1:m,i) */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *m - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1, &
+		    tauq[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ + (
+			i__ + 1) * a_dim1], lda, &a[i__ + i__ * a_dim1], &
+			c__1, &c_b59, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+			a_dim1], lda, &a[i__ + i__ * a_dim1], &c__1, &c_b59, &
+			y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b60, &y[
+			i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__ + 1;
+		i__3 = i__ - 1;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &x[i__ +
+			x_dim1], ldx, &a[i__ + i__ * a_dim1], &c__1, &c_b59, &
+			y[i__ * y_dim1 + 1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ +
+			1) * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b60, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *n - i__;
+		zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+
+/*              Update A(i,i+1:n) */
+
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		zlacgv_(&i__, &a[i__ + a_dim1], lda);
+		i__2 = *n - i__;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__, &z__1, &y[i__ + 1 +
+			y_dim1], ldy, &a[i__ + a_dim1], lda, &c_b60, &a[i__ +
+			(i__ + 1) * a_dim1], lda);
+		zlacgv_(&i__, &a[i__ + a_dim1], lda);
+		i__2 = i__ - 1;
+		zlacgv_(&i__2, &x[i__ + x_dim1], ldx);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[(i__ +
+			1) * a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b60,
+			&a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		zlacgv_(&i__2, &x[i__ + x_dim1], ldx);
+
+/*              Generate reflection P(i) to annihilate A(i,i+2:n) */
+
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+			taup[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + (i__ + 1) * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ + 1 + (
+			i__ + 1) * a_dim1], lda, &a[i__ + (i__ + 1) * a_dim1],
+			 lda, &c_b59, &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		zgemv_("Conjugate transpose", &i__2, &i__, &c_b60, &y[i__ + 1
+			+ y_dim1], ldy, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b59, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__, &z__1, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b59, &x[i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Reduce to lower bidiagonal form */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i,i:n) */
+
+	    i__2 = *n - i__ + 1;
+	    zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + y_dim1], ldy,
+		     &a[i__ + a_dim1], lda, &c_b60, &a[i__ + i__ * a_dim1],
+		    lda);
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &x[i__ + x_dim1], ldx);
+	    i__2 = i__ - 1;
+	    i__3 = *n - i__ + 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &a[i__ *
+		    a_dim1 + 1], lda, &x[i__ + x_dim1], ldx, &c_b60, &a[i__ +
+		    i__ * a_dim1], lda);
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &x[i__ + x_dim1], ldx);
+
+/*           Generate reflection P(i) to annihilate A(i,i+1:n) */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+	    i__2 = *n - i__ + 1;
+/* Computing MIN */
+	    i__3 = i__ + 1;
+	    zlarfg_(&i__2, &alpha, &a[i__ + min(i__3,*n) * a_dim1], lda, &
+		    taup[i__]);
+	    i__2 = i__;
+	    d__[i__2] = alpha.r;
+	    if (i__ < *m) {
+		i__2 = i__ + i__ * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+
+/*              Compute X(i+1:m,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__ + 1;
+		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ + 1 + i__
+			* a_dim1], lda, &a[i__ + i__ * a_dim1], lda, &c_b59, &
+			x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__ + 1;
+		i__3 = i__ - 1;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &y[i__ +
+			y_dim1], ldy, &a[i__ + i__ * a_dim1], lda, &c_b59, &x[
+			i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 +
+			a_dim1], lda, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = i__ - 1;
+		i__3 = *n - i__ + 1;
+		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[i__ * a_dim1
+			+ 1], lda, &a[i__ + i__ * a_dim1], lda, &c_b59, &x[
+			i__ * x_dim1 + 1], &c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &x[i__ + 1 +
+			x_dim1], ldx, &x[i__ * x_dim1 + 1], &c__1, &c_b60, &x[
+			i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *m - i__;
+		zscal_(&i__2, &taup[i__], &x[i__ + 1 + i__ * x_dim1], &c__1);
+		i__2 = *n - i__ + 1;
+		zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+
+/*              Update A(i+1:m,i) */
+
+		i__2 = i__ - 1;
+		zlacgv_(&i__2, &y[i__ + y_dim1], ldy);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 +
+			a_dim1], lda, &y[i__ + y_dim1], ldy, &c_b60, &a[i__ +
+			1 + i__ * a_dim1], &c__1);
+		i__2 = i__ - 1;
+		zlacgv_(&i__2, &y[i__ + y_dim1], ldy);
+		i__2 = *m - i__;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__, &z__1, &x[i__ + 1 +
+			x_dim1], ldx, &a[i__ * a_dim1 + 1], &c__1, &c_b60, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
+
+/*              Generate reflection Q(i) to annihilate A(i+2:m,i) */
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *m - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		zlarfg_(&i__2, &alpha, &a[min(i__3,*m) + i__ * a_dim1], &c__1,
+			 &tauq[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+
+/*              Compute Y(i+1:n,i) */
+
+		i__2 = *m - i__;
+		i__3 = *n - i__;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+			1 + (i__ + 1) * a_dim1], lda, &a[i__ + 1 + i__ *
+			a_dim1], &c__1, &c_b59, &y[i__ + 1 + i__ * y_dim1], &
+			c__1);
+		i__2 = *m - i__;
+		i__3 = i__ - 1;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b59, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &y[i__ + 1 +
+			y_dim1], ldy, &y[i__ * y_dim1 + 1], &c__1, &c_b60, &y[
+			i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *m - i__;
+		zgemv_("Conjugate transpose", &i__2, &i__, &c_b60, &x[i__ + 1
+			+ x_dim1], ldx, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b59, &y[i__ * y_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("Conjugate transpose", &i__, &i__2, &z__1, &a[(i__ + 1)
+			 * a_dim1 + 1], lda, &y[i__ * y_dim1 + 1], &c__1, &
+			c_b60, &y[i__ + 1 + i__ * y_dim1], &c__1);
+		i__2 = *n - i__;
+		zscal_(&i__2, &tauq[i__], &y[i__ + 1 + i__ * y_dim1], &c__1);
+	    } else {
+		i__2 = *n - i__ + 1;
+		zlacgv_(&i__2, &a[i__ + i__ * a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of ZLABRD */
+
+} /* zlabrd_ */
+
+/* Subroutine */ int zlacgv_(integer *n, doublecomplex *x, integer *incx)
+{
+    /* System generated locals */
+    integer i__1, i__2;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, ioff;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    ZLACGV conjugates a complex vector of length N.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The length of the vector X.  N >= 0.
+
+    X       (input/output) COMPLEX*16 array, dimension
+                           (1+(N-1)*abs(INCX))
+            On entry, the vector of length N to be conjugated.
+            On exit, X is overwritten with conjg(X).
+
+    INCX    (input) INTEGER
+            The spacing between successive elements of X.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*incx == 1) {
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__;
+	    d_cnjg(&z__1, &x[i__]);
+	    x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+/* L10: */
+	}
+    } else {
+	ioff = 1;
+	if (*incx < 0) {
+	    ioff = 1 - (*n - 1) * *incx;
+	}
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = ioff;
+	    d_cnjg(&z__1, &x[ioff]);
+	    x[i__2].r = z__1.r, x[i__2].i = z__1.i;
+	    ioff += *incx;
+/* L20: */
+	}
+    }
+    return 0;
+
+/*     End of ZLACGV */
+
+} /* zlacgv_ */
+
+/* Subroutine */ int zlacp2_(char *uplo, integer *m, integer *n, doublereal *
+	a, integer *lda, doublecomplex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZLACP2 copies all or part of a real two-dimensional matrix A to a
+    complex matrix B.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+            is accessed; if UPLO = 'L', only the lower trapezium is
+            accessed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (output) COMPLEX*16 array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4], b[i__3].i = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4], b[i__3].i = 0.;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4], b[i__3].i = 0.;
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZLACP2 */
+
+} /* zlacp2_ */
+
+/* Subroutine */ int zlacpy_(char *uplo, integer *m, integer *n,
+	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    ZLACPY copies all or part of a two-dimensional matrix A to another
+    matrix B.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be copied to B.
+            = 'U':      Upper triangular part
+            = 'L':      Lower triangular part
+            Otherwise:  All of the matrix A
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The m by n matrix A.  If UPLO = 'U', only the upper trapezium
+            is accessed; if UPLO = 'L', only the lower trapezium is
+            accessed.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    B       (output) COMPLEX*16 array, dimension (LDB,N)
+            On exit, B = A in the locations specified by UPLO.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
+/* L10: */
+	    }
+/* L20: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
+/* L30: */
+	    }
+/* L40: */
+	}
+
+    } else {
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * b_dim1;
+		i__4 = i__ + j * a_dim1;
+		b[i__3].r = a[i__4].r, b[i__3].i = a[i__4].i;
+/* L50: */
+	    }
+/* L60: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZLACPY */
+
+} /* zlacpy_ */
+
+/* Subroutine */ int zlacrm_(integer *m, integer *n, doublecomplex *a,
+	integer *lda, doublereal *b, integer *ldb, doublecomplex *c__,
+	integer *ldc, doublereal *rwork)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, a_dim1, a_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5;
+    doublereal d__1;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLACRM performs a very simple matrix-matrix multiplication:
+             C := A * B,
+    where A is M by N and complex; B is N by N and real;
+    C is M by N and complex.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A and of the matrix C.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns and rows of the matrix B and
+            the number of columns of the matrix C.
+            N >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA, N)
+            A contains the M by N matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >=max(1,M).
+
+    B       (input) DOUBLE PRECISION array, dimension (LDB, N)
+            B contains the N by N matrix B.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B. LDB >=max(1,N).
+
+    C       (input) COMPLEX*16 array, dimension (LDC, N)
+            C contains the M by N matrix C.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >=max(1,N).
+
+    RWORK   (workspace) DOUBLE PRECISION array, dimension (2*M*N)
+
+    =====================================================================
+
+
+       Quick return if possible.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --rwork;
+
+    /* Function Body */
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    rwork[(j - 1) * *m + i__] = a[i__3].r;
+/* L10: */
+	}
+/* L20: */
+    }
+
+    l = *m * *n + 1;
+    dgemm_("N", "N", m, n, n, &c_b1015, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b324, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = l + (j - 1) * *m + i__ - 1;
+	    c__[i__3].r = rwork[i__4], c__[i__3].i = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    rwork[(j - 1) * *m + i__] = d_imag(&a[i__ + j * a_dim1]);
+/* L50: */
+	}
+/* L60: */
+    }
+    dgemm_("N", "N", m, n, n, &c_b1015, &rwork[1], m, &b[b_offset], ldb, &
+	    c_b324, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = i__ + j * c_dim1;
+	    d__1 = c__[i__4].r;
+	    i__5 = l + (j - 1) * *m + i__ - 1;
+	    z__1.r = d__1, z__1.i = rwork[i__5];
+	    c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L70: */
+	}
+/* L80: */
+    }
+
+    return 0;
+
+/*     End of ZLACRM */
+
+} /* zlacrm_ */
+
+/* Double Complex */ VOID zladiv_(doublecomplex * ret_val, doublecomplex *x,
+	doublecomplex *y)
+{
+    /* System generated locals */
+    doublereal d__1, d__2, d__3, d__4;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+
+    /* Local variables */
+    static doublereal zi, zr;
+    extern /* Subroutine */ int dladiv_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    ZLADIV := X / Y, where X and Y are complex.  The computation of X / Y
+    will not overflow on an intermediary step unless the results
+    overflows.
+
+    Arguments
+    =========
+
+    X       (input) COMPLEX*16
+    Y       (input) COMPLEX*16
+            The complex scalars X and Y.
+
+    =====================================================================
+*/
+
+
+    d__1 = x->r;
+    d__2 = d_imag(x);
+    d__3 = y->r;
+    d__4 = d_imag(y);
+    dladiv_(&d__1, &d__2, &d__3, &d__4, &zr, &zi);
+    z__1.r = zr, z__1.i = zi;
+     ret_val->r = z__1.r,  ret_val->i = z__1.i;
+
+    return ;
+
+/*     End of ZLADIV */
+
+} /* zladiv_ */
+
+/* Subroutine */ int zlaed0_(integer *qsiz, integer *n, doublereal *d__,
+	doublereal *e, doublecomplex *q, integer *ldq, doublecomplex *qstore,
+	integer *ldqs, doublereal *rwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, qstore_dim1, qstore_offset, i__1, i__2;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, k, ll, iq, lgn, msd2, smm1, spm1, spm2;
+    static doublereal temp;
+    static integer curr, iperm;
+    extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *,
+	    doublereal *, integer *);
+    static integer indxq, iwrem, iqptr, tlvls;
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), zlaed7_(integer *, integer *,
+	    integer *, integer *, integer *, integer *, doublereal *,
+	    doublecomplex *, integer *, doublereal *, integer *, doublereal *,
+	     integer *, integer *, integer *, integer *, integer *,
+	    doublereal *, doublecomplex *, doublereal *, integer *, integer *)
+	    ;
+    static integer igivcl;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlacrm_(integer *, integer *, doublecomplex *,
+	     integer *, doublereal *, integer *, doublecomplex *, integer *,
+	    doublereal *);
+    static integer igivnm, submat, curprb, subpbs, igivpt;
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *);
+    static integer curlvl, matsiz, iprmpt, smlsiz;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    Using the divide and conquer method, ZLAED0 computes all eigenvalues
+    of a symmetric tridiagonal matrix which is one diagonal block of
+    those from reducing a dense or band Hermitian matrix and
+    corresponding eigenvectors of the dense or band matrix.
+
+    Arguments
+    =========
+
+    QSIZ   (input) INTEGER
+           The dimension of the unitary matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N if ICOMPQ = 1.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the diagonal elements of the tridiagonal matrix.
+           On exit, the eigenvalues in ascending order.
+
+    E      (input/output) DOUBLE PRECISION array, dimension (N-1)
+           On entry, the off-diagonal elements of the tridiagonal matrix.
+           On exit, E has been destroyed.
+
+    Q      (input/output) COMPLEX*16 array, dimension (LDQ,N)
+           On entry, Q must contain an QSIZ x N matrix whose columns
+           unitarily orthonormal. It is a part of the unitary matrix
+           that reduces the full dense Hermitian matrix to a
+           (reducible) symmetric tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    IWORK  (workspace) INTEGER array,
+           the dimension of IWORK must be at least
+                        6 + 6*N + 5*N*lg N
+                        ( lg( N ) = smallest integer k
+                                    such that 2^k >= N )
+
+    RWORK  (workspace) DOUBLE PRECISION array,
+                                 dimension (1 + 3*N + 2*N*lg N + 3*N**2)
+                          ( lg( N ) = smallest integer k
+                                      such that 2^k >= N )
+
+    QSTORE (workspace) COMPLEX*16 array, dimension (LDQS, N)
+           Used to store parts of
+           the eigenvector matrix when the updating matrix multiplies
+           take place.
+
+    LDQS   (input) INTEGER
+           The leading dimension of the array QSTORE.
+           LDQS >= max(1,N).
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    =====================================================================
+
+    Warning:      N could be as big as QSIZ!
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    qstore_dim1 = *ldqs;
+    qstore_offset = 1 + qstore_dim1;
+    qstore -= qstore_offset;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+/*
+       IF( ICOMPQ .LT. 0 .OR. ICOMPQ .GT. 2 ) THEN
+          INFO = -1
+       ELSE IF( ( ICOMPQ .EQ. 1 ) .AND. ( QSIZ .LT. MAX( 0, N ) ) )
+      $        THEN
+*/
+    if (*qsiz < max(0,*n)) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*ldq < max(1,*n)) {
+	*info = -6;
+    } else if (*ldqs < max(1,*n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLAED0", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "ZLAED0", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       Determine the size and placement of the submatrices, and save in
+       the leading elements of IWORK.
+*/
+
+    iwork[1] = *n;
+    subpbs = 1;
+    tlvls = 0;
+L10:
+    if (iwork[subpbs] > smlsiz) {
+	for (j = subpbs; j >= 1; --j) {
+	    iwork[j * 2] = (iwork[j] + 1) / 2;
+	    iwork[((j) << (1)) - 1] = iwork[j] / 2;
+/* L20: */
+	}
+	++tlvls;
+	subpbs <<= 1;
+	goto L10;
+    }
+    i__1 = subpbs;
+    for (j = 2; j <= i__1; ++j) {
+	iwork[j] += iwork[j - 1];
+/* L30: */
+    }
+
+/*
+       Divide the matrix into SUBPBS submatrices of size at most SMLSIZ+1
+       using rank-1 modifications (cuts).
+*/
+
+    spm1 = subpbs - 1;
+    i__1 = spm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	submat = iwork[i__] + 1;
+	smm1 = submat - 1;
+	d__[smm1] -= (d__1 = e[smm1], abs(d__1));
+	d__[submat] -= (d__1 = e[smm1], abs(d__1));
+/* L40: */
+    }
+
+    indxq = ((*n) << (2)) + 3;
+
+/*
+       Set up workspaces for eigenvalues only/accumulate new vectors
+       routine
+*/
+
+    temp = log((doublereal) (*n)) / log(2.);
+    lgn = (integer) temp;
+    if (pow_ii(&c__2, &lgn) < *n) {
+	++lgn;
+    }
+    if (pow_ii(&c__2, &lgn) < *n) {
+	++lgn;
+    }
+    iprmpt = indxq + *n + 1;
+    iperm = iprmpt + *n * lgn;
+    iqptr = iperm + *n * lgn;
+    igivpt = iqptr + *n + 2;
+    igivcl = igivpt + *n * lgn;
+
+    igivnm = 1;
+    iq = igivnm + ((*n) << (1)) * lgn;
+/* Computing 2nd power */
+    i__1 = *n;
+    iwrem = iq + i__1 * i__1 + 1;
+/*     Initialize pointers */
+    i__1 = subpbs;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	iwork[iprmpt + i__] = 1;
+	iwork[igivpt + i__] = 1;
+/* L50: */
+    }
+    iwork[iqptr] = 1;
+
+/*
+       Solve each submatrix eigenproblem at the bottom of the divide and
+       conquer tree.
+*/
+
+    curr = 0;
+    i__1 = spm1;
+    for (i__ = 0; i__ <= i__1; ++i__) {
+	if (i__ == 0) {
+	    submat = 1;
+	    matsiz = iwork[1];
+	} else {
+	    submat = iwork[i__] + 1;
+	    matsiz = iwork[i__ + 1] - iwork[i__];
+	}
+	ll = iq - 1 + iwork[iqptr + curr];
+	dsteqr_("I", &matsiz, &d__[submat], &e[submat], &rwork[ll], &matsiz, &
+		rwork[1], info);
+	zlacrm_(qsiz, &matsiz, &q[submat * q_dim1 + 1], ldq, &rwork[ll], &
+		matsiz, &qstore[submat * qstore_dim1 + 1], ldqs, &rwork[iwrem]
+		);
+/* Computing 2nd power */
+	i__2 = matsiz;
+	iwork[iqptr + curr + 1] = iwork[iqptr + curr] + i__2 * i__2;
+	++curr;
+	if (*info > 0) {
+	    *info = submat * (*n + 1) + submat + matsiz - 1;
+	    return 0;
+	}
+	k = 1;
+	i__2 = iwork[i__ + 1];
+	for (j = submat; j <= i__2; ++j) {
+	    iwork[indxq + j] = k;
+	    ++k;
+/* L60: */
+	}
+/* L70: */
+    }
+
+/*
+       Successively merge eigensystems of adjacent submatrices
+       into eigensystem for the corresponding larger matrix.
+
+       while ( SUBPBS > 1 )
+*/
+
+    curlvl = 1;
+L80:
+    if (subpbs > 1) {
+	spm2 = subpbs - 2;
+	i__1 = spm2;
+	for (i__ = 0; i__ <= i__1; i__ += 2) {
+	    if (i__ == 0) {
+		submat = 1;
+		matsiz = iwork[2];
+		msd2 = iwork[1];
+		curprb = 0;
+	    } else {
+		submat = iwork[i__] + 1;
+		matsiz = iwork[i__ + 2] - iwork[i__];
+		msd2 = matsiz / 2;
+		++curprb;
+	    }
+
+/*
+       Merge lower order eigensystems (of size MSD2 and MATSIZ - MSD2)
+       into an eigensystem of size MATSIZ.  ZLAED7 handles the case
+       when the eigenvectors of a full or band Hermitian matrix (which
+       was reduced to tridiagonal form) are desired.
+
+       I am free to use Q as a valuable working space until Loop 150.
+*/
+
+	    zlaed7_(&matsiz, &msd2, qsiz, &tlvls, &curlvl, &curprb, &d__[
+		    submat], &qstore[submat * qstore_dim1 + 1], ldqs, &e[
+		    submat + msd2 - 1], &iwork[indxq + submat], &rwork[iq], &
+		    iwork[iqptr], &iwork[iprmpt], &iwork[iperm], &iwork[
+		    igivpt], &iwork[igivcl], &rwork[igivnm], &q[submat *
+		    q_dim1 + 1], &rwork[iwrem], &iwork[subpbs + 1], info);
+	    if (*info > 0) {
+		*info = submat * (*n + 1) + submat + matsiz - 1;
+		return 0;
+	    }
+	    iwork[i__ / 2 + 1] = iwork[i__ + 2];
+/* L90: */
+	}
+	subpbs /= 2;
+	++curlvl;
+	goto L80;
+    }
+
+/*
+       end while
+
+       Re-merge the eigenvalues/vectors which were deflated at the final
+       merge step.
+*/
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	j = iwork[indxq + i__];
+	rwork[i__] = d__[j];
+	zcopy_(qsiz, &qstore[j * qstore_dim1 + 1], &c__1, &q[i__ * q_dim1 + 1]
+		, &c__1);
+/* L100: */
+    }
+    dcopy_(n, &rwork[1], &c__1, &d__[1], &c__1);
+
+    return 0;
+
+/*     End of ZLAED0 */
+
+} /* zlaed0_ */
+
+/* Subroutine */ int zlaed7_(integer *n, integer *cutpnt, integer *qsiz,
+	integer *tlvls, integer *curlvl, integer *curpbm, doublereal *d__,
+	doublecomplex *q, integer *ldq, doublereal *rho, integer *indxq,
+	doublereal *qstore, integer *qptr, integer *prmptr, integer *perm,
+	integer *givptr, integer *givcol, doublereal *givnum, doublecomplex *
+	work, doublereal *rwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, i__1, i__2;
+
+    /* Builtin functions */
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, k, n1, n2, iq, iw, iz, ptr, ind1, ind2, indx, curr,
+	    indxc, indxp;
+    extern /* Subroutine */ int dlaed9_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *, integer *),
+	    zlaed8_(integer *, integer *, integer *, doublecomplex *, integer
+	    *, doublereal *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublecomplex *, integer *, doublereal *, integer *,
+	     integer *, integer *, integer *, integer *, integer *,
+	    doublereal *, integer *), dlaeda_(integer *, integer *, integer *,
+	     integer *, integer *, integer *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     integer *);
+    static integer idlmda;
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *), zlacrm_(integer *, integer *, doublecomplex *, integer *,
+	     doublereal *, integer *, doublecomplex *, integer *, doublereal *
+	    );
+    static integer coltyp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLAED7 computes the updated eigensystem of a diagonal
+    matrix after modification by a rank-one symmetric matrix. This
+    routine is used only for the eigenproblem which requires all
+    eigenvalues and optionally eigenvectors of a dense or banded
+    Hermitian matrix that has been reduced to tridiagonal form.
+
+      T = Q(in) ( D(in) + RHO * Z*Z' ) Q'(in) = Q(out) * D(out) * Q'(out)
+
+      where Z = Q'u, u is a vector of length N with ones in the
+      CUTPNT and CUTPNT + 1 th elements and zeros elsewhere.
+
+       The eigenvectors of the original matrix are stored in Q, and the
+       eigenvalues are in D.  The algorithm consists of three stages:
+
+          The first stage consists of deflating the size of the problem
+          when there are multiple eigenvalues or if there is a zero in
+          the Z vector.  For each such occurence the dimension of the
+          secular equation problem is reduced by one.  This stage is
+          performed by the routine DLAED2.
+
+          The second stage consists of calculating the updated
+          eigenvalues. This is done by finding the roots of the secular
+          equation via the routine DLAED4 (as called by SLAED3).
+          This routine also calculates the eigenvectors of the current
+          problem.
+
+          The final stage consists of computing the updated eigenvectors
+          directly using the updated eigenvalues.  The eigenvectors for
+          the current problem are multiplied with the eigenvectors from
+          the overall problem.
+
+    Arguments
+    =========
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  min(1,N) <= CUTPNT <= N.
+
+    QSIZ   (input) INTEGER
+           The dimension of the unitary matrix used to reduce
+           the full matrix to tridiagonal form.  QSIZ >= N.
+
+    TLVLS  (input) INTEGER
+           The total number of merging levels in the overall divide and
+           conquer tree.
+
+    CURLVL (input) INTEGER
+           The current level in the overall merge routine,
+           0 <= curlvl <= tlvls.
+
+    CURPBM (input) INTEGER
+           The current problem in the current level in the overall
+           merge routine (counting from upper left to lower right).
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, the eigenvalues of the rank-1-perturbed matrix.
+           On exit, the eigenvalues of the repaired matrix.
+
+    Q      (input/output) COMPLEX*16 array, dimension (LDQ,N)
+           On entry, the eigenvectors of the rank-1-perturbed matrix.
+           On exit, the eigenvectors of the repaired tridiagonal matrix.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max(1,N).
+
+    RHO    (input) DOUBLE PRECISION
+           Contains the subdiagonal element used to create the rank-1
+           modification.
+
+    INDXQ  (output) INTEGER array, dimension (N)
+           This contains the permutation which will reintegrate the
+           subproblem just solved back into sorted order,
+           ie. D( INDXQ( I = 1, N ) ) will be in ascending order.
+
+    IWORK  (workspace) INTEGER array, dimension (4*N)
+
+    RWORK  (workspace) DOUBLE PRECISION array,
+                                   dimension (3*N+2*QSIZ*N)
+
+    WORK   (workspace) COMPLEX*16 array, dimension (QSIZ*N)
+
+    QSTORE (input/output) DOUBLE PRECISION array, dimension (N**2+1)
+           Stores eigenvectors of submatrices encountered during
+           divide and conquer, packed together. QPTR points to
+           beginning of the submatrices.
+
+    QPTR   (input/output) INTEGER array, dimension (N+2)
+           List of indices pointing to beginning of submatrices stored
+           in QSTORE. The submatrices are numbered starting at the
+           bottom left of the divide and conquer tree, from left to
+           right and bottom to top.
+
+    PRMPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in PERM a
+           level's permutation is stored.  PRMPTR(i+1) - PRMPTR(i)
+           indicates the size of the permutation and also the size of
+           the full, non-deflated problem.
+
+    PERM   (input) INTEGER array, dimension (N lg N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (input) INTEGER array, dimension (N lg N)
+           Contains a list of pointers which indicate where in GIVCOL a
+           level's Givens rotations are stored.  GIVPTR(i+1) - GIVPTR(i)
+           indicates the number of Givens rotations.
+
+    GIVCOL (input) INTEGER array, dimension (2, N lg N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension (2, N lg N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  if INFO = 1, an eigenvalue did not converge
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --indxq;
+    --qstore;
+    --qptr;
+    --prmptr;
+    --perm;
+    --givptr;
+    givcol -= 3;
+    givnum -= 3;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+/*
+       IF( ICOMPQ.LT.0 .OR. ICOMPQ.GT.1 ) THEN
+          INFO = -1
+       ELSE IF( N.LT.0 ) THEN
+*/
+    if (*n < 0) {
+	*info = -1;
+    } else if ((min(1,*n) > *cutpnt) || (*n < *cutpnt)) {
+	*info = -2;
+    } else if (*qsiz < *n) {
+	*info = -3;
+    } else if (*ldq < max(1,*n)) {
+	*info = -9;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLAED7", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*
+       The following values are for bookkeeping purposes only.  They are
+       integer pointers which indicate the portion of the workspace
+       used by a particular array in DLAED2 and SLAED3.
+*/
+
+    iz = 1;
+    idlmda = iz + *n;
+    iw = idlmda + *n;
+    iq = iw + *n;
+
+    indx = 1;
+    indxc = indx + *n;
+    coltyp = indxc + *n;
+    indxp = coltyp + *n;
+
+/*
+       Form the z-vector which consists of the last row of Q_1 and the
+       first row of Q_2.
+*/
+
+    ptr = pow_ii(&c__2, tlvls) + 1;
+    i__1 = *curlvl - 1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = *tlvls - i__;
+	ptr += pow_ii(&c__2, &i__2);
+/* L10: */
+    }
+    curr = ptr + *curpbm;
+    dlaeda_(n, tlvls, curlvl, curpbm, &prmptr[1], &perm[1], &givptr[1], &
+	    givcol[3], &givnum[3], &qstore[1], &qptr[1], &rwork[iz], &rwork[
+	    iz + *n], info);
+
+/*
+       When solving the final problem, we no longer need the stored data,
+       so we will overwrite the data from this level onto the previously
+       used storage space.
+*/
+
+    if (*curlvl == *tlvls) {
+	qptr[curr] = 1;
+	prmptr[curr] = 1;
+	givptr[curr] = 1;
+    }
+
+/*     Sort and Deflate eigenvalues. */
+
+    zlaed8_(&k, n, qsiz, &q[q_offset], ldq, &d__[1], rho, cutpnt, &rwork[iz],
+	    &rwork[idlmda], &work[1], qsiz, &rwork[iw], &iwork[indxp], &iwork[
+	    indx], &indxq[1], &perm[prmptr[curr]], &givptr[curr + 1], &givcol[
+	    ((givptr[curr]) << (1)) + 1], &givnum[((givptr[curr]) << (1)) + 1]
+	    , info);
+    prmptr[curr + 1] = prmptr[curr] + *n;
+    givptr[curr + 1] += givptr[curr];
+
+/*     Solve Secular Equation. */
+
+    if (k != 0) {
+	dlaed9_(&k, &c__1, &k, n, &d__[1], &rwork[iq], &k, rho, &rwork[idlmda]
+		, &rwork[iw], &qstore[qptr[curr]], &k, info);
+	zlacrm_(qsiz, &k, &work[1], qsiz, &qstore[qptr[curr]], &k, &q[
+		q_offset], ldq, &rwork[iq]);
+/* Computing 2nd power */
+	i__1 = k;
+	qptr[curr + 1] = qptr[curr] + i__1 * i__1;
+	if (*info != 0) {
+	    return 0;
+	}
+
+/*     Prepare the INDXQ sorting premutation. */
+
+	n1 = k;
+	n2 = *n - k;
+	ind1 = 1;
+	ind2 = *n;
+	dlamrg_(&n1, &n2, &d__[1], &c__1, &c_n1, &indxq[1]);
+    } else {
+	qptr[curr + 1] = qptr[curr];
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    indxq[i__] = i__;
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZLAED7 */
+
+} /* zlaed7_ */
+
+/* Subroutine */ int zlaed8_(integer *k, integer *n, integer *qsiz,
+	doublecomplex *q, integer *ldq, doublereal *d__, doublereal *rho,
+	integer *cutpnt, doublereal *z__, doublereal *dlamda, doublecomplex *
+	q2, integer *ldq2, doublereal *w, integer *indxp, integer *indx,
+	integer *indxq, integer *perm, integer *givptr, integer *givcol,
+	doublereal *givnum, integer *info)
+{
+    /* System generated locals */
+    integer q_dim1, q_offset, q2_dim1, q2_offset, i__1;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static doublereal c__;
+    static integer i__, j;
+    static doublereal s, t;
+    static integer k2, n1, n2, jp, n1p1;
+    static doublereal eps, tau, tol;
+    static integer jlam, imax, jmax;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *), dcopy_(integer *, doublereal *, integer *, doublereal
+	    *, integer *), zdrot_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublereal *, doublereal *), zcopy_(
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *)
+	    ;
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlamrg_(integer *, integer *, doublereal *,
+	    integer *, integer *, integer *), xerbla_(char *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab,
+       Courant Institute, NAG Ltd., and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLAED8 merges the two sets of eigenvalues together into a single
+    sorted set.  Then it tries to deflate the size of the problem.
+    There are two ways in which deflation can occur:  when two or more
+    eigenvalues are close together or if there is a tiny element in the
+    Z vector.  For each such occurrence the order of the related secular
+    equation problem is reduced by one.
+
+    Arguments
+    =========
+
+    K      (output) INTEGER
+           Contains the number of non-deflated eigenvalues.
+           This is the order of the related secular equation.
+
+    N      (input) INTEGER
+           The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    QSIZ   (input) INTEGER
+           The dimension of the unitary matrix used to reduce
+           the dense or band matrix to tridiagonal form.
+           QSIZ >= N if ICOMPQ = 1.
+
+    Q      (input/output) COMPLEX*16 array, dimension (LDQ,N)
+           On entry, Q contains the eigenvectors of the partially solved
+           system which has been previously updated in matrix
+           multiplies with other partially solved eigensystems.
+           On exit, Q contains the trailing (N-K) updated eigenvectors
+           (those which were deflated) in its last N-K columns.
+
+    LDQ    (input) INTEGER
+           The leading dimension of the array Q.  LDQ >= max( 1, N ).
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry, D contains the eigenvalues of the two submatrices to
+           be combined.  On exit, D contains the trailing (N-K) updated
+           eigenvalues (those which were deflated) sorted into increasing
+           order.
+
+    RHO    (input/output) DOUBLE PRECISION
+           Contains the off diagonal element associated with the rank-1
+           cut which originally split the two submatrices which are now
+           being recombined. RHO is modified during the computation to
+           the value required by DLAED3.
+
+    CUTPNT (input) INTEGER
+           Contains the location of the last eigenvalue in the leading
+           sub-matrix.  MIN(1,N) <= CUTPNT <= N.
+
+    Z      (input) DOUBLE PRECISION array, dimension (N)
+           On input this vector contains the updating vector (the last
+           row of the first sub-eigenvector matrix and the first row of
+           the second sub-eigenvector matrix).  The contents of Z are
+           destroyed during the updating process.
+
+    DLAMDA (output) DOUBLE PRECISION array, dimension (N)
+           Contains a copy of the first K eigenvalues which will be used
+           by DLAED3 to form the secular equation.
+
+    Q2     (output) COMPLEX*16 array, dimension (LDQ2,N)
+           If ICOMPQ = 0, Q2 is not referenced.  Otherwise,
+           Contains a copy of the first K eigenvectors which will be used
+           by DLAED7 in a matrix multiply (DGEMM) to update the new
+           eigenvectors.
+
+    LDQ2   (input) INTEGER
+           The leading dimension of the array Q2.  LDQ2 >= max( 1, N ).
+
+    W      (output) DOUBLE PRECISION array, dimension (N)
+           This will hold the first k values of the final
+           deflation-altered z-vector and will be passed to DLAED3.
+
+    INDXP  (workspace) INTEGER array, dimension (N)
+           This will contain the permutation used to place deflated
+           values of D at the end of the array. On output INDXP(1:K)
+           points to the nondeflated D-values and INDXP(K+1:N)
+           points to the deflated eigenvalues.
+
+    INDX   (workspace) INTEGER array, dimension (N)
+           This will contain the permutation used to sort the contents of
+           D into ascending order.
+
+    INDXQ  (input) INTEGER array, dimension (N)
+           This contains the permutation which separately sorts the two
+           sub-problems in D into ascending order.  Note that elements in
+           the second half of this permutation must first have CUTPNT
+           added to their values in order to be accurate.
+
+    PERM   (output) INTEGER array, dimension (N)
+           Contains the permutations (from deflation and sorting) to be
+           applied to each eigenblock.
+
+    GIVPTR (output) INTEGER
+           Contains the number of Givens rotations which took place in
+           this subproblem.
+
+    GIVCOL (output) INTEGER array, dimension (2, N)
+           Each pair of numbers indicates a pair of columns to take place
+           in a Givens rotation.
+
+    GIVNUM (output) DOUBLE PRECISION array, dimension (2, N)
+           Each number indicates the S value to be used in the
+           corresponding Givens rotation.
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    q_dim1 = *ldq;
+    q_offset = 1 + q_dim1;
+    q -= q_offset;
+    --d__;
+    --z__;
+    --dlamda;
+    q2_dim1 = *ldq2;
+    q2_offset = 1 + q2_dim1;
+    q2 -= q2_offset;
+    --w;
+    --indxp;
+    --indx;
+    --indxq;
+    --perm;
+    givcol -= 3;
+    givnum -= 3;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -2;
+    } else if (*qsiz < *n) {
+	*info = -3;
+    } else if (*ldq < max(1,*n)) {
+	*info = -5;
+    } else if ((*cutpnt < min(1,*n)) || (*cutpnt > *n)) {
+	*info = -8;
+    } else if (*ldq2 < max(1,*n)) {
+	*info = -12;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLAED8", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    n1 = *cutpnt;
+    n2 = *n - n1;
+    n1p1 = n1 + 1;
+
+    if (*rho < 0.) {
+	dscal_(&n2, &c_b1294, &z__[n1p1], &c__1);
+    }
+
+/*     Normalize z so that norm(z) = 1 */
+
+    t = 1. / sqrt(2.);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	indx[j] = j;
+/* L10: */
+    }
+    dscal_(n, &t, &z__[1], &c__1);
+    *rho = (d__1 = *rho * 2., abs(d__1));
+
+/*     Sort the eigenvalues into increasing order */
+
+    i__1 = *n;
+    for (i__ = *cutpnt + 1; i__ <= i__1; ++i__) {
+	indxq[i__] += *cutpnt;
+/* L20: */
+    }
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	dlamda[i__] = d__[indxq[i__]];
+	w[i__] = z__[indxq[i__]];
+/* L30: */
+    }
+    i__ = 1;
+    j = *cutpnt + 1;
+    dlamrg_(&n1, &n2, &dlamda[1], &c__1, &c__1, &indx[1]);
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	d__[i__] = dlamda[indx[i__]];
+	z__[i__] = w[indx[i__]];
+/* L40: */
+    }
+
+/*     Calculate the allowable deflation tolerance */
+
+    imax = idamax_(n, &z__[1], &c__1);
+    jmax = idamax_(n, &d__[1], &c__1);
+    eps = EPSILON;
+    tol = eps * 8. * (d__1 = d__[jmax], abs(d__1));
+
+/*
+       If the rank-1 modifier is small enough, no more needs to be done
+       -- except to reorganize Q so that its columns correspond with the
+       elements in D.
+*/
+
+    if (*rho * (d__1 = z__[imax], abs(d__1)) <= tol) {
+	*k = 0;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    perm[j] = indxq[indx[j]];
+	    zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1]
+		    , &c__1);
+/* L50: */
+	}
+	zlacpy_("A", qsiz, n, &q2[q2_dim1 + 1], ldq2, &q[q_dim1 + 1], ldq);
+	return 0;
+    }
+
+/*
+       If there are multiple eigenvalues then the problem deflates.  Here
+       the number of equal eigenvalues are found.  As each equal
+       eigenvalue is found, an elementary reflector is computed to rotate
+       the corresponding eigensubspace so that the corresponding
+       components of Z are zero in this new basis.
+*/
+
+    *k = 0;
+    *givptr = 0;
+    k2 = *n + 1;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*           Deflate due to small z component. */
+
+	    --k2;
+	    indxp[k2] = j;
+	    if (j == *n) {
+		goto L100;
+	    }
+	} else {
+	    jlam = j;
+	    goto L70;
+	}
+/* L60: */
+    }
+L70:
+    ++j;
+    if (j > *n) {
+	goto L90;
+    }
+    if (*rho * (d__1 = z__[j], abs(d__1)) <= tol) {
+
+/*        Deflate due to small z component. */
+
+	--k2;
+	indxp[k2] = j;
+    } else {
+
+/*        Check if eigenvalues are close enough to allow deflation. */
+
+	s = z__[jlam];
+	c__ = z__[j];
+
+/*
+          Find sqrt(a**2+b**2) without overflow or
+          destructive underflow.
+*/
+
+	tau = dlapy2_(&c__, &s);
+	t = d__[j] - d__[jlam];
+	c__ /= tau;
+	s = -s / tau;
+	if ((d__1 = t * c__ * s, abs(d__1)) <= tol) {
+
+/*           Deflation is possible. */
+
+	    z__[j] = tau;
+	    z__[jlam] = 0.;
+
+/*           Record the appropriate Givens rotation */
+
+	    ++(*givptr);
+	    givcol[((*givptr) << (1)) + 1] = indxq[indx[jlam]];
+	    givcol[((*givptr) << (1)) + 2] = indxq[indx[j]];
+	    givnum[((*givptr) << (1)) + 1] = c__;
+	    givnum[((*givptr) << (1)) + 2] = s;
+	    zdrot_(qsiz, &q[indxq[indx[jlam]] * q_dim1 + 1], &c__1, &q[indxq[
+		    indx[j]] * q_dim1 + 1], &c__1, &c__, &s);
+	    t = d__[jlam] * c__ * c__ + d__[j] * s * s;
+	    d__[j] = d__[jlam] * s * s + d__[j] * c__ * c__;
+	    d__[jlam] = t;
+	    --k2;
+	    i__ = 1;
+L80:
+	    if (k2 + i__ <= *n) {
+		if (d__[jlam] < d__[indxp[k2 + i__]]) {
+		    indxp[k2 + i__ - 1] = indxp[k2 + i__];
+		    indxp[k2 + i__] = jlam;
+		    ++i__;
+		    goto L80;
+		} else {
+		    indxp[k2 + i__ - 1] = jlam;
+		}
+	    } else {
+		indxp[k2 + i__ - 1] = jlam;
+	    }
+	    jlam = j;
+	} else {
+	    ++(*k);
+	    w[*k] = z__[jlam];
+	    dlamda[*k] = d__[jlam];
+	    indxp[*k] = jlam;
+	    jlam = j;
+	}
+    }
+    goto L70;
+L90:
+
+/*     Record the last eigenvalue. */
+
+    ++(*k);
+    w[*k] = z__[jlam];
+    dlamda[*k] = d__[jlam];
+    indxp[*k] = jlam;
+
+L100:
+
+/*
+       Sort the eigenvalues and corresponding eigenvectors into DLAMDA
+       and Q2 respectively.  The eigenvalues/vectors which were not
+       deflated go into the first K slots of DLAMDA and Q2 respectively,
+       while those which were deflated go into the last N - K slots.
+*/
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	jp = indxp[j];
+	dlamda[j] = d__[jp];
+	perm[j] = indxq[indx[jp]];
+	zcopy_(qsiz, &q[perm[j] * q_dim1 + 1], &c__1, &q2[j * q2_dim1 + 1], &
+		c__1);
+/* L110: */
+    }
+
+/*
+       The deflated eigenvalues and their corresponding vectors go back
+       into the last N - K slots of D and Q respectively.
+*/
+
+    if (*k < *n) {
+	i__1 = *n - *k;
+	dcopy_(&i__1, &dlamda[*k + 1], &c__1, &d__[*k + 1], &c__1);
+	i__1 = *n - *k;
+	zlacpy_("A", qsiz, &i__1, &q2[(*k + 1) * q2_dim1 + 1], ldq2, &q[(*k +
+		1) * q_dim1 + 1], ldq);
+    }
+
+    return 0;
+
+/*     End of ZLAED8 */
+
+} /* zlaed8_ */
+
+/* Subroutine */ int zlahqr_(logical *wantt, logical *wantz, integer *n,
+	integer *ilo, integer *ihi, doublecomplex *h__, integer *ldh,
+	doublecomplex *w, integer *iloz, integer *ihiz, doublecomplex *z__,
+	integer *ldz, integer *info)
+{
+    /* System generated locals */
+    integer h_dim1, h_offset, z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3, d__4, d__5, d__6;
+    doublecomplex z__1, z__2, z__3, z__4;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    void z_sqrt(doublecomplex *, doublecomplex *), d_cnjg(doublecomplex *,
+	    doublecomplex *);
+    double z_abs(doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, k, l, m;
+    static doublereal s;
+    static doublecomplex t, u, v[2], x, y;
+    static integer i1, i2;
+    static doublecomplex t1;
+    static doublereal t2;
+    static doublecomplex v2;
+    static doublereal h10;
+    static doublecomplex h11;
+    static doublereal h21;
+    static doublecomplex h22;
+    static integer nh, nz;
+    static doublecomplex h11s;
+    static integer itn, its;
+    static doublereal ulp;
+    static doublecomplex sum;
+    static doublereal tst1;
+    static doublecomplex temp;
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *);
+    static doublereal rtemp, rwork[1];
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+
+    extern /* Subroutine */ int zlarfg_(integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *);
+    extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *,
+	     doublecomplex *);
+    extern doublereal zlanhs_(char *, integer *, doublecomplex *, integer *,
+	    doublereal *);
+    static doublereal smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZLAHQR is an auxiliary routine called by ZHSEQR to update the
+    eigenvalues and Schur decomposition already computed by ZHSEQR, by
+    dealing with the Hessenberg submatrix in rows and columns ILO to IHI.
+
+    Arguments
+    =========
+
+    WANTT   (input) LOGICAL
+            = .TRUE. : the full Schur form T is required;
+            = .FALSE.: only eigenvalues are required.
+
+    WANTZ   (input) LOGICAL
+            = .TRUE. : the matrix of Schur vectors Z is required;
+            = .FALSE.: Schur vectors are not required.
+
+    N       (input) INTEGER
+            The order of the matrix H.  N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            It is assumed that H is already upper triangular in rows and
+            columns IHI+1:N, and that H(ILO,ILO-1) = 0 (unless ILO = 1).
+            ZLAHQR works primarily with the Hessenberg submatrix in rows
+            and columns ILO to IHI, but applies transformations to all of
+            H if WANTT is .TRUE..
+            1 <= ILO <= max(1,IHI); IHI <= N.
+
+    H       (input/output) COMPLEX*16 array, dimension (LDH,N)
+            On entry, the upper Hessenberg matrix H.
+            On exit, if WANTT is .TRUE., H is upper triangular in rows
+            and columns ILO:IHI, with any 2-by-2 diagonal blocks in
+            standard form. If WANTT is .FALSE., the contents of H are
+            unspecified on exit.
+
+    LDH     (input) INTEGER
+            The leading dimension of the array H. LDH >= max(1,N).
+
+    W       (output) COMPLEX*16 array, dimension (N)
+            The computed eigenvalues ILO to IHI are stored in the
+            corresponding elements of W. If WANTT is .TRUE., the
+            eigenvalues are stored in the same order as on the diagonal
+            of the Schur form returned in H, with W(i) = H(i,i).
+
+    ILOZ    (input) INTEGER
+    IHIZ    (input) INTEGER
+            Specify the rows of Z to which transformations must be
+            applied if WANTZ is .TRUE..
+            1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
+
+    Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+            If WANTZ is .TRUE., on entry Z must contain the current
+            matrix Z of transformations accumulated by ZHSEQR, and on
+            exit Z has been updated; transformations are applied only to
+            the submatrix Z(ILOZ:IHIZ,ILO:IHI).
+            If WANTZ is .FALSE., Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z. LDZ >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            > 0: if INFO = i, ZLAHQR failed to compute all the
+                 eigenvalues ILO to IHI in a total of 30*(IHI-ILO+1)
+                 iterations; elements i+1:ihi of W contain those
+                 eigenvalues which have been successfully computed.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    h_dim1 = *ldh;
+    h_offset = 1 + h_dim1;
+    h__ -= h_offset;
+    --w;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+
+    /* Function Body */
+    *info = 0;
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*ilo == *ihi) {
+	i__1 = *ilo;
+	i__2 = *ilo + *ilo * h_dim1;
+	w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+	return 0;
+    }
+
+    nh = *ihi - *ilo + 1;
+    nz = *ihiz - *iloz + 1;
+
+/*
+       Set machine-dependent constants for the stopping criterion.
+       If norm(H) <= sqrt(OVFL), overflow should not occur.
+*/
+
+    ulp = PRECISION;
+    smlnum = SAFEMINIMUM / ulp;
+
+/*
+       I1 and I2 are the indices of the first row and last column of H
+       to which transformations must be applied. If eigenvalues only are
+       being computed, I1 and I2 are set inside the main loop.
+*/
+
+    if (*wantt) {
+	i1 = 1;
+	i2 = *n;
+    }
+
+/*     ITN is the total number of QR iterations allowed. */
+
+    itn = nh * 30;
+
+/*
+       The main loop begins here. I is the loop index and decreases from
+       IHI to ILO in steps of 1. Each iteration of the loop works
+       with the active submatrix in rows and columns L to I.
+       Eigenvalues I+1 to IHI have already converged. Either L = ILO, or
+       H(L,L-1) is negligible so that the matrix splits.
+*/
+
+    i__ = *ihi;
+L10:
+    if (i__ < *ilo) {
+	goto L130;
+    }
+
+/*
+       Perform QR iterations on rows and columns ILO to I until a
+       submatrix of order 1 splits off at the bottom because a
+       subdiagonal element has become negligible.
+*/
+
+    l = *ilo;
+    i__1 = itn;
+    for (its = 0; its <= i__1; ++its) {
+
+/*        Look for a single small subdiagonal element. */
+
+	i__2 = l + 1;
+	for (k = i__; k >= i__2; --k) {
+	    i__3 = k - 1 + (k - 1) * h_dim1;
+	    i__4 = k + k * h_dim1;
+	    tst1 = (d__1 = h__[i__3].r, abs(d__1)) + (d__2 = d_imag(&h__[k -
+		    1 + (k - 1) * h_dim1]), abs(d__2)) + ((d__3 = h__[i__4].r,
+		     abs(d__3)) + (d__4 = d_imag(&h__[k + k * h_dim1]), abs(
+		    d__4)));
+	    if (tst1 == 0.) {
+		i__3 = i__ - l + 1;
+		tst1 = zlanhs_("1", &i__3, &h__[l + l * h_dim1], ldh, rwork);
+	    }
+	    i__3 = k + (k - 1) * h_dim1;
+/* Computing MAX */
+	    d__2 = ulp * tst1;
+	    if ((d__1 = h__[i__3].r, abs(d__1)) <= max(d__2,smlnum)) {
+		goto L30;
+	    }
+/* L20: */
+	}
+L30:
+	l = k;
+	if (l > *ilo) {
+
+/*           H(L,L-1) is negligible */
+
+	    i__2 = l + (l - 1) * h_dim1;
+	    h__[i__2].r = 0., h__[i__2].i = 0.;
+	}
+
+/*        Exit from loop if a submatrix of order 1 has split off. */
+
+	if (l >= i__) {
+	    goto L120;
+	}
+
+/*
+          Now the active submatrix is in rows and columns L to I. If
+          eigenvalues only are being computed, only the active submatrix
+          need be transformed.
+*/
+
+	if (! (*wantt)) {
+	    i1 = l;
+	    i2 = i__;
+	}
+
+	if ((its == 10) || (its == 20)) {
+
+/*           Exceptional shift. */
+
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    s = (d__1 = h__[i__2].r, abs(d__1)) * .75;
+	    i__2 = i__ + i__ * h_dim1;
+	    z__1.r = s + h__[i__2].r, z__1.i = h__[i__2].i;
+	    t.r = z__1.r, t.i = z__1.i;
+	} else {
+
+/*           Wilkinson's shift. */
+
+	    i__2 = i__ + i__ * h_dim1;
+	    t.r = h__[i__2].r, t.i = h__[i__2].i;
+	    i__2 = i__ - 1 + i__ * h_dim1;
+	    i__3 = i__ + (i__ - 1) * h_dim1;
+	    d__1 = h__[i__3].r;
+	    z__1.r = d__1 * h__[i__2].r, z__1.i = d__1 * h__[i__2].i;
+	    u.r = z__1.r, u.i = z__1.i;
+	    if ((u.r != 0.) || (u.i != 0.)) {
+		i__2 = i__ - 1 + (i__ - 1) * h_dim1;
+		z__2.r = h__[i__2].r - t.r, z__2.i = h__[i__2].i - t.i;
+		z__1.r = z__2.r * .5, z__1.i = z__2.i * .5;
+		x.r = z__1.r, x.i = z__1.i;
+		z__3.r = x.r * x.r - x.i * x.i, z__3.i = x.r * x.i + x.i *
+			x.r;
+		z__2.r = z__3.r + u.r, z__2.i = z__3.i + u.i;
+		z_sqrt(&z__1, &z__2);
+		y.r = z__1.r, y.i = z__1.i;
+		if (x.r * y.r + d_imag(&x) * d_imag(&y) < 0.) {
+		    z__1.r = -y.r, z__1.i = -y.i;
+		    y.r = z__1.r, y.i = z__1.i;
+		}
+		z__3.r = x.r + y.r, z__3.i = x.i + y.i;
+		zladiv_(&z__2, &u, &z__3);
+		z__1.r = t.r - z__2.r, z__1.i = t.i - z__2.i;
+		t.r = z__1.r, t.i = z__1.i;
+	    }
+	}
+
+/*        Look for two consecutive small subdiagonal elements. */
+
+	i__2 = l + 1;
+	for (m = i__ - 1; m >= i__2; --m) {
+
+/*
+             Determine the effect of starting the single-shift QR
+             iteration at row M, and see if this would make H(M,M-1)
+             negligible.
+*/
+
+	    i__3 = m + m * h_dim1;
+	    h11.r = h__[i__3].r, h11.i = h__[i__3].i;
+	    i__3 = m + 1 + (m + 1) * h_dim1;
+	    h22.r = h__[i__3].r, h22.i = h__[i__3].i;
+	    z__1.r = h11.r - t.r, z__1.i = h11.i - t.i;
+	    h11s.r = z__1.r, h11s.i = z__1.i;
+	    i__3 = m + 1 + m * h_dim1;
+	    h21 = h__[i__3].r;
+	    s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2))
+		     + abs(h21);
+	    z__1.r = h11s.r / s, z__1.i = h11s.i / s;
+	    h11s.r = z__1.r, h11s.i = z__1.i;
+	    h21 /= s;
+	    v[0].r = h11s.r, v[0].i = h11s.i;
+	    v[1].r = h21, v[1].i = 0.;
+	    i__3 = m + (m - 1) * h_dim1;
+	    h10 = h__[i__3].r;
+	    tst1 = ((d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(
+		    d__2))) * ((d__3 = h11.r, abs(d__3)) + (d__4 = d_imag(&
+		    h11), abs(d__4)) + ((d__5 = h22.r, abs(d__5)) + (d__6 =
+		    d_imag(&h22), abs(d__6))));
+	    if ((d__1 = h10 * h21, abs(d__1)) <= ulp * tst1) {
+		goto L50;
+	    }
+/* L40: */
+	}
+	i__2 = l + l * h_dim1;
+	h11.r = h__[i__2].r, h11.i = h__[i__2].i;
+	i__2 = l + 1 + (l + 1) * h_dim1;
+	h22.r = h__[i__2].r, h22.i = h__[i__2].i;
+	z__1.r = h11.r - t.r, z__1.i = h11.i - t.i;
+	h11s.r = z__1.r, h11s.i = z__1.i;
+	i__2 = l + 1 + l * h_dim1;
+	h21 = h__[i__2].r;
+	s = (d__1 = h11s.r, abs(d__1)) + (d__2 = d_imag(&h11s), abs(d__2)) +
+		abs(h21);
+	z__1.r = h11s.r / s, z__1.i = h11s.i / s;
+	h11s.r = z__1.r, h11s.i = z__1.i;
+	h21 /= s;
+	v[0].r = h11s.r, v[0].i = h11s.i;
+	v[1].r = h21, v[1].i = 0.;
+L50:
+
+/*        Single-shift QR step */
+
+	i__2 = i__ - 1;
+	for (k = m; k <= i__2; ++k) {
+
+/*
+             The first iteration of this loop determines a reflection G
+             from the vector V and applies it from left and right to H,
+             thus creating a nonzero bulge below the subdiagonal.
+
+             Each subsequent iteration determines a reflection G to
+             restore the Hessenberg form in the (K-1)th column, and thus
+             chases the bulge one step toward the bottom of the active
+             submatrix.
+
+             V(2) is always real before the call to ZLARFG, and hence
+             after the call T2 ( = T1*V(2) ) is also real.
+*/
+
+	    if (k > m) {
+		zcopy_(&c__2, &h__[k + (k - 1) * h_dim1], &c__1, v, &c__1);
+	    }
+	    zlarfg_(&c__2, v, &v[1], &c__1, &t1);
+	    if (k > m) {
+		i__3 = k + (k - 1) * h_dim1;
+		h__[i__3].r = v[0].r, h__[i__3].i = v[0].i;
+		i__3 = k + 1 + (k - 1) * h_dim1;
+		h__[i__3].r = 0., h__[i__3].i = 0.;
+	    }
+	    v2.r = v[1].r, v2.i = v[1].i;
+	    z__1.r = t1.r * v2.r - t1.i * v2.i, z__1.i = t1.r * v2.i + t1.i *
+		    v2.r;
+	    t2 = z__1.r;
+
+/*
+             Apply G from the left to transform the rows of the matrix
+             in columns K to I2.
+*/
+
+	    i__3 = i2;
+	    for (j = k; j <= i__3; ++j) {
+		d_cnjg(&z__3, &t1);
+		i__4 = k + j * h_dim1;
+		z__2.r = z__3.r * h__[i__4].r - z__3.i * h__[i__4].i, z__2.i =
+			 z__3.r * h__[i__4].i + z__3.i * h__[i__4].r;
+		i__5 = k + 1 + j * h_dim1;
+		z__4.r = t2 * h__[i__5].r, z__4.i = t2 * h__[i__5].i;
+		z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
+		sum.r = z__1.r, sum.i = z__1.i;
+		i__4 = k + j * h_dim1;
+		i__5 = k + j * h_dim1;
+		z__1.r = h__[i__5].r - sum.r, z__1.i = h__[i__5].i - sum.i;
+		h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
+		i__4 = k + 1 + j * h_dim1;
+		i__5 = k + 1 + j * h_dim1;
+		z__2.r = sum.r * v2.r - sum.i * v2.i, z__2.i = sum.r * v2.i +
+			sum.i * v2.r;
+		z__1.r = h__[i__5].r - z__2.r, z__1.i = h__[i__5].i - z__2.i;
+		h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
+/* L60: */
+	    }
+
+/*
+             Apply G from the right to transform the columns of the
+             matrix in rows I1 to min(K+2,I).
+
+   Computing MIN
+*/
+	    i__4 = k + 2;
+	    i__3 = min(i__4,i__);
+	    for (j = i1; j <= i__3; ++j) {
+		i__4 = j + k * h_dim1;
+		z__2.r = t1.r * h__[i__4].r - t1.i * h__[i__4].i, z__2.i =
+			t1.r * h__[i__4].i + t1.i * h__[i__4].r;
+		i__5 = j + (k + 1) * h_dim1;
+		z__3.r = t2 * h__[i__5].r, z__3.i = t2 * h__[i__5].i;
+		z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+		sum.r = z__1.r, sum.i = z__1.i;
+		i__4 = j + k * h_dim1;
+		i__5 = j + k * h_dim1;
+		z__1.r = h__[i__5].r - sum.r, z__1.i = h__[i__5].i - sum.i;
+		h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
+		i__4 = j + (k + 1) * h_dim1;
+		i__5 = j + (k + 1) * h_dim1;
+		d_cnjg(&z__3, &v2);
+		z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r *
+			z__3.i + sum.i * z__3.r;
+		z__1.r = h__[i__5].r - z__2.r, z__1.i = h__[i__5].i - z__2.i;
+		h__[i__4].r = z__1.r, h__[i__4].i = z__1.i;
+/* L70: */
+	    }
+
+	    if (*wantz) {
+
+/*              Accumulate transformations in the matrix Z */
+
+		i__3 = *ihiz;
+		for (j = *iloz; j <= i__3; ++j) {
+		    i__4 = j + k * z_dim1;
+		    z__2.r = t1.r * z__[i__4].r - t1.i * z__[i__4].i, z__2.i =
+			     t1.r * z__[i__4].i + t1.i * z__[i__4].r;
+		    i__5 = j + (k + 1) * z_dim1;
+		    z__3.r = t2 * z__[i__5].r, z__3.i = t2 * z__[i__5].i;
+		    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+		    sum.r = z__1.r, sum.i = z__1.i;
+		    i__4 = j + k * z_dim1;
+		    i__5 = j + k * z_dim1;
+		    z__1.r = z__[i__5].r - sum.r, z__1.i = z__[i__5].i -
+			    sum.i;
+		    z__[i__4].r = z__1.r, z__[i__4].i = z__1.i;
+		    i__4 = j + (k + 1) * z_dim1;
+		    i__5 = j + (k + 1) * z_dim1;
+		    d_cnjg(&z__3, &v2);
+		    z__2.r = sum.r * z__3.r - sum.i * z__3.i, z__2.i = sum.r *
+			     z__3.i + sum.i * z__3.r;
+		    z__1.r = z__[i__5].r - z__2.r, z__1.i = z__[i__5].i -
+			    z__2.i;
+		    z__[i__4].r = z__1.r, z__[i__4].i = z__1.i;
+/* L80: */
+		}
+	    }
+
+	    if (k == m && m > l) {
+
+/*
+                If the QR step was started at row M > L because two
+                consecutive small subdiagonals were found, then extra
+                scaling must be performed to ensure that H(M,M-1) remains
+                real.
+*/
+
+		z__1.r = 1. - t1.r, z__1.i = 0. - t1.i;
+		temp.r = z__1.r, temp.i = z__1.i;
+		d__1 = z_abs(&temp);
+		z__1.r = temp.r / d__1, z__1.i = temp.i / d__1;
+		temp.r = z__1.r, temp.i = z__1.i;
+		i__3 = m + 1 + m * h_dim1;
+		i__4 = m + 1 + m * h_dim1;
+		d_cnjg(&z__2, &temp);
+		z__1.r = h__[i__4].r * z__2.r - h__[i__4].i * z__2.i, z__1.i =
+			 h__[i__4].r * z__2.i + h__[i__4].i * z__2.r;
+		h__[i__3].r = z__1.r, h__[i__3].i = z__1.i;
+		if (m + 2 <= i__) {
+		    i__3 = m + 2 + (m + 1) * h_dim1;
+		    i__4 = m + 2 + (m + 1) * h_dim1;
+		    z__1.r = h__[i__4].r * temp.r - h__[i__4].i * temp.i,
+			    z__1.i = h__[i__4].r * temp.i + h__[i__4].i *
+			    temp.r;
+		    h__[i__3].r = z__1.r, h__[i__3].i = z__1.i;
+		}
+		i__3 = i__;
+		for (j = m; j <= i__3; ++j) {
+		    if (j != m + 1) {
+			if (i2 > j) {
+			    i__4 = i2 - j;
+			    zscal_(&i__4, &temp, &h__[j + (j + 1) * h_dim1],
+				    ldh);
+			}
+			i__4 = j - i1;
+			d_cnjg(&z__1, &temp);
+			zscal_(&i__4, &z__1, &h__[i1 + j * h_dim1], &c__1);
+			if (*wantz) {
+			    d_cnjg(&z__1, &temp);
+			    zscal_(&nz, &z__1, &z__[*iloz + j * z_dim1], &
+				    c__1);
+			}
+		    }
+/* L90: */
+		}
+	    }
+/* L100: */
+	}
+
+/*        Ensure that H(I,I-1) is real. */
+
+	i__2 = i__ + (i__ - 1) * h_dim1;
+	temp.r = h__[i__2].r, temp.i = h__[i__2].i;
+	if (d_imag(&temp) != 0.) {
+	    rtemp = z_abs(&temp);
+	    i__2 = i__ + (i__ - 1) * h_dim1;
+	    h__[i__2].r = rtemp, h__[i__2].i = 0.;
+	    z__1.r = temp.r / rtemp, z__1.i = temp.i / rtemp;
+	    temp.r = z__1.r, temp.i = z__1.i;
+	    if (i2 > i__) {
+		i__2 = i2 - i__;
+		d_cnjg(&z__1, &temp);
+		zscal_(&i__2, &z__1, &h__[i__ + (i__ + 1) * h_dim1], ldh);
+	    }
+	    i__2 = i__ - i1;
+	    zscal_(&i__2, &temp, &h__[i1 + i__ * h_dim1], &c__1);
+	    if (*wantz) {
+		zscal_(&nz, &temp, &z__[*iloz + i__ * z_dim1], &c__1);
+	    }
+	}
+
+/* L110: */
+    }
+
+/*     Failure to converge in remaining number of iterations */
+
+    *info = i__;
+    return 0;
+
+L120:
+
+/*     H(I,I-1) is negligible: one eigenvalue has converged. */
+
+    i__1 = i__;
+    i__2 = i__ + i__ * h_dim1;
+    w[i__1].r = h__[i__2].r, w[i__1].i = h__[i__2].i;
+
+/*
+       Decrement number of remaining iterations, and return to start of
+       the main loop with new value of I.
+*/
+
+    itn -= its;
+    i__ = l - 1;
+    goto L10;
+
+L130:
+    return 0;
+
+/*     End of ZLAHQR */
+
+} /* zlahqr_ */
+
+/* Subroutine */ int zlahrd_(integer *n, integer *k, integer *nb,
+	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *t,
+	integer *ldt, doublecomplex *y, integer *ldy)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, t_dim1, t_offset, y_dim1, y_offset, i__1, i__2,
+	    i__3;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__;
+    static doublecomplex ei;
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *), zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *),
+	    zcopy_(integer *, doublecomplex *, integer *, doublecomplex *,
+	    integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *, integer *), ztrmv_(char *, char *,
+	    char *, integer *, doublecomplex *, integer *, doublecomplex *,
+	    integer *), zlarfg_(integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *),
+	    zlacgv_(integer *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZLAHRD reduces the first NB columns of a complex general n-by-(n-k+1)
+    matrix A so that elements below the k-th subdiagonal are zero. The
+    reduction is performed by a unitary similarity transformation
+    Q' * A * Q. The routine returns the matrices V and T which determine
+    Q as a block reflector I - V*T*V', and also the matrix Y = A * V * T.
+
+    This is an auxiliary routine called by ZGEHRD.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    K       (input) INTEGER
+            The offset for the reduction. Elements below the k-th
+            subdiagonal in the first NB columns are reduced to zero.
+
+    NB      (input) INTEGER
+            The number of columns to be reduced.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N-K+1)
+            On entry, the n-by-(n-k+1) general matrix A.
+            On exit, the elements on and above the k-th subdiagonal in
+            the first NB columns are overwritten with the corresponding
+            elements of the reduced matrix; the elements below the k-th
+            subdiagonal, with the array TAU, represent the matrix Q as a
+            product of elementary reflectors. The other columns of A are
+            unchanged. See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    TAU     (output) COMPLEX*16 array, dimension (NB)
+            The scalar factors of the elementary reflectors. See Further
+            Details.
+
+    T       (output) COMPLEX*16 array, dimension (LDT,NB)
+            The upper triangular matrix T.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T.  LDT >= NB.
+
+    Y       (output) COMPLEX*16 array, dimension (LDY,NB)
+            The n-by-nb matrix Y.
+
+    LDY     (input) INTEGER
+            The leading dimension of the array Y. LDY >= max(1,N).
+
+    Further Details
+    ===============
+
+    The matrix Q is represented as a product of nb elementary reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i+k-1) = 0, v(i+k) = 1; v(i+k+1:n) is stored on exit in
+    A(i+k+1:n,i), and tau in TAU(i).
+
+    The elements of the vectors v together form the (n-k+1)-by-nb matrix
+    V which is needed, with T and Y, to apply the transformation to the
+    unreduced part of the matrix, using an update of the form:
+    A := (I - V*T*V') * (A - Y*V').
+
+    The contents of A on exit are illustrated by the following example
+    with n = 7, k = 3 and nb = 2:
+
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( a   h   a   a   a )
+       ( h   h   a   a   a )
+       ( v1  h   a   a   a )
+       ( v1  v2  a   a   a )
+       ( v1  v2  a   a   a )
+
+    where a denotes an element of the original matrix A, h denotes a
+    modified element of the upper Hessenberg matrix H, and vi denotes an
+    element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    --tau;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    y_dim1 = *ldy;
+    y_offset = 1 + y_dim1;
+    y -= y_offset;
+
+    /* Function Body */
+    if (*n <= 1) {
+	return 0;
+    }
+
+    i__1 = *nb;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (i__ > 1) {
+
+/*
+             Update A(1:n,i)
+
+             Compute i-th column of A - Y * V'
+*/
+
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
+	    i__2 = i__ - 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgemv_("No transpose", n, &i__2, &z__1, &y[y_offset], ldy, &a[*k
+		    + i__ - 1 + a_dim1], lda, &c_b60, &a[i__ * a_dim1 + 1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &a[*k + i__ - 1 + a_dim1], lda);
+
+/*
+             Apply I - V * T' * V' to this column (call it b) from the
+             left, using the last column of T as workspace
+
+             Let  V = ( V1 )   and   b = ( b1 )   (first I-1 rows)
+                      ( V2 )             ( b2 )
+
+             where V1 is unit lower triangular
+
+             w := V1' * b1
+*/
+
+	    i__2 = i__ - 1;
+	    zcopy_(&i__2, &a[*k + 1 + i__ * a_dim1], &c__1, &t[*nb * t_dim1 +
+		    1], &c__1);
+	    i__2 = i__ - 1;
+	    ztrmv_("Lower", "Conjugate transpose", "Unit", &i__2, &a[*k + 1 +
+		    a_dim1], lda, &t[*nb * t_dim1 + 1], &c__1);
+
+/*           w := w + V2'*b2 */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[*k + i__ +
+		    a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b60,
+		    &t[*nb * t_dim1 + 1], &c__1);
+
+/*           w := T'*w */
+
+	    i__2 = i__ - 1;
+	    ztrmv_("Upper", "Conjugate transpose", "Non-unit", &i__2, &t[
+		    t_offset], ldt, &t[*nb * t_dim1 + 1], &c__1);
+
+/*           b2 := b2 - V2*w */
+
+	    i__2 = *n - *k - i__ + 1;
+	    i__3 = i__ - 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgemv_("No transpose", &i__2, &i__3, &z__1, &a[*k + i__ + a_dim1],
+		     lda, &t[*nb * t_dim1 + 1], &c__1, &c_b60, &a[*k + i__ +
+		    i__ * a_dim1], &c__1);
+
+/*           b1 := b1 - V1*w */
+
+	    i__2 = i__ - 1;
+	    ztrmv_("Lower", "No transpose", "Unit", &i__2, &a[*k + 1 + a_dim1]
+		    , lda, &t[*nb * t_dim1 + 1], &c__1);
+	    i__2 = i__ - 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zaxpy_(&i__2, &z__1, &t[*nb * t_dim1 + 1], &c__1, &a[*k + 1 + i__
+		    * a_dim1], &c__1);
+
+	    i__2 = *k + i__ - 1 + (i__ - 1) * a_dim1;
+	    a[i__2].r = ei.r, a[i__2].i = ei.i;
+	}
+
+/*
+          Generate the elementary reflector H(i) to annihilate
+          A(k+i+1:n,i)
+*/
+
+	i__2 = *k + i__ + i__ * a_dim1;
+	ei.r = a[i__2].r, ei.i = a[i__2].i;
+	i__2 = *n - *k - i__ + 1;
+/* Computing MIN */
+	i__3 = *k + i__ + 1;
+	zlarfg_(&i__2, &ei, &a[min(i__3,*n) + i__ * a_dim1], &c__1, &tau[i__])
+		;
+	i__2 = *k + i__ + i__ * a_dim1;
+	a[i__2].r = 1., a[i__2].i = 0.;
+
+/*        Compute  Y(1:n,i) */
+
+	i__2 = *n - *k - i__ + 1;
+	zgemv_("No transpose", n, &i__2, &c_b60, &a[(i__ + 1) * a_dim1 + 1],
+		lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b59, &y[i__ *
+		y_dim1 + 1], &c__1);
+	i__2 = *n - *k - i__ + 1;
+	i__3 = i__ - 1;
+	zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[*k + i__ +
+		a_dim1], lda, &a[*k + i__ + i__ * a_dim1], &c__1, &c_b59, &t[
+		i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	z__1.r = -1., z__1.i = -0.;
+	zgemv_("No transpose", n, &i__2, &z__1, &y[y_offset], ldy, &t[i__ *
+		t_dim1 + 1], &c__1, &c_b60, &y[i__ * y_dim1 + 1], &c__1);
+	zscal_(n, &tau[i__], &y[i__ * y_dim1 + 1], &c__1);
+
+/*        Compute T(1:i,i) */
+
+	i__2 = i__ - 1;
+	i__3 = i__;
+	z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
+	zscal_(&i__2, &z__1, &t[i__ * t_dim1 + 1], &c__1);
+	i__2 = i__ - 1;
+	ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[t_offset], ldt,
+		&t[i__ * t_dim1 + 1], &c__1)
+		;
+	i__2 = i__ + i__ * t_dim1;
+	i__3 = i__;
+	t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+
+/* L10: */
+    }
+    i__1 = *k + *nb + *nb * a_dim1;
+    a[i__1].r = ei.r, a[i__1].i = ei.i;
+
+    return 0;
+
+/*     End of ZLAHRD */
+
+} /* zlahrd_ */
+
+/* Subroutine */ int zlals0_(integer *icompq, integer *nl, integer *nr,
+	integer *sqre, integer *nrhs, doublecomplex *b, integer *ldb,
+	doublecomplex *bx, integer *ldbx, integer *perm, integer *givptr,
+	integer *givcol, integer *ldgcol, doublereal *givnum, integer *ldgnum,
+	 doublereal *poles, doublereal *difl, doublereal *difr, doublereal *
+	z__, integer *k, doublereal *c__, doublereal *s, doublereal *rwork,
+	integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, difr_dim1, difr_offset, givnum_dim1,
+	    givnum_offset, poles_dim1, poles_offset, b_dim1, b_offset,
+	    bx_dim1, bx_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, m, n;
+    static doublereal dj;
+    static integer nlp1, jcol;
+    static doublereal temp;
+    static integer jrow;
+    extern doublereal dnrm2_(integer *, doublereal *, integer *);
+    static doublereal diflj, difrj, dsigj;
+    extern /* Subroutine */ int dgemv_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, doublereal *, integer *), zdrot_(integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublereal *, doublereal *);
+    extern doublereal dlamc3_(doublereal *, doublereal *);
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), xerbla_(char *, integer *);
+    static doublereal dsigjp;
+    extern /* Subroutine */ int zdscal_(integer *, doublereal *,
+	    doublecomplex *, integer *), zlascl_(char *, integer *, integer *,
+	     doublereal *, doublereal *, integer *, integer *, doublecomplex *
+	    , integer *, integer *), zlacpy_(char *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       December 1, 1999
+
+
+    Purpose
+    =======
+
+    ZLALS0 applies back the multiplying factors of either the left or the
+    right singular vector matrix of a diagonal matrix appended by a row
+    to the right hand side matrix B in solving the least squares problem
+    using the divide-and-conquer SVD approach.
+
+    For the left singular vector matrix, three types of orthogonal
+    matrices are involved:
+
+    (1L) Givens rotations: the number of such rotations is GIVPTR; the
+         pairs of columns/rows they were applied to are stored in GIVCOL;
+         and the C- and S-values of these rotations are stored in GIVNUM.
+
+    (2L) Permutation. The (NL+1)-st row of B is to be moved to the first
+         row, and for J=2:N, PERM(J)-th row of B is to be moved to the
+         J-th row.
+
+    (3L) The left singular vector matrix of the remaining matrix.
+
+    For the right singular vector matrix, four types of orthogonal
+    matrices are involved:
+
+    (1R) The right singular vector matrix of the remaining matrix.
+
+    (2R) If SQRE = 1, one extra Givens rotation to generate the right
+         null space.
+
+    (3R) The inverse transformation of (2L).
+
+    (4R) The inverse transformation of (1L).
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether singular vectors are to be computed in
+           factored form:
+           = 0: Left singular vector matrix.
+           = 1: Right singular vector matrix.
+
+    NL     (input) INTEGER
+           The row dimension of the upper block. NL >= 1.
+
+    NR     (input) INTEGER
+           The row dimension of the lower block. NR >= 1.
+
+    SQRE   (input) INTEGER
+           = 0: the lower block is an NR-by-NR square matrix.
+           = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
+
+           The bidiagonal matrix has row dimension N = NL + NR + 1,
+           and column dimension M = N + SQRE.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input/output) COMPLEX*16 array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B. LDB must be at least
+           max(1,MAX( M, N ) ).
+
+    BX     (workspace) COMPLEX*16 array, dimension ( LDBX, NRHS )
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    PERM   (input) INTEGER array, dimension ( N )
+           The permutations (from deflation and sorting) applied
+           to the two blocks.
+
+    GIVPTR (input) INTEGER
+           The number of Givens rotations which took place in this
+           subproblem.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 )
+           Each pair of numbers indicates a pair of rows/columns
+           involved in a Givens rotation.
+
+    LDGCOL (input) INTEGER
+           The leading dimension of GIVCOL, must be at least N.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           Each number indicates the C or S value used in the
+           corresponding Givens rotation.
+
+    LDGNUM (input) INTEGER
+           The leading dimension of arrays DIFR, POLES and
+           GIVNUM, must be at least K.
+
+    POLES  (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
+           On entry, POLES(1:K, 1) contains the new singular
+           values obtained from solving the secular equation, and
+           POLES(1:K, 2) is an array containing the poles in the secular
+           equation.
+
+    DIFL   (input) DOUBLE PRECISION array, dimension ( K ).
+           On entry, DIFL(I) is the distance between I-th updated
+           (undeflated) singular value and the I-th (undeflated) old
+           singular value.
+
+    DIFR   (input) DOUBLE PRECISION array, dimension ( LDGNUM, 2 ).
+           On entry, DIFR(I, 1) contains the distances between I-th
+           updated (undeflated) singular value and the I+1-th
+           (undeflated) old singular value. And DIFR(I, 2) is the
+           normalizing factor for the I-th right singular vector.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( K )
+           Contain the components of the deflation-adjusted updating row
+           vector.
+
+    K      (input) INTEGER
+           Contains the dimension of the non-deflated matrix,
+           This is the order of the related secular equation. 1 <= K <=N.
+
+    C      (input) DOUBLE PRECISION
+           C contains garbage if SQRE =0 and the C-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    S      (input) DOUBLE PRECISION
+           S contains garbage if SQRE =0 and the S-value of a Givens
+           rotation related to the right null space if SQRE = 1.
+
+    RWORK  (workspace) DOUBLE PRECISION array, dimension
+           ( K*(1+NRHS) + 2*NRHS )
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    --perm;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    difr_dim1 = *ldgnum;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    poles_dim1 = *ldgnum;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    givnum_dim1 = *ldgnum;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    --difl;
+    --z__;
+    --rwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*nl < 1) {
+	*info = -2;
+    } else if (*nr < 1) {
+	*info = -3;
+    } else if ((*sqre < 0) || (*sqre > 1)) {
+	*info = -4;
+    }
+
+    n = *nl + *nr + 1;
+
+    if (*nrhs < 1) {
+	*info = -5;
+    } else if (*ldb < n) {
+	*info = -7;
+    } else if (*ldbx < n) {
+	*info = -9;
+    } else if (*givptr < 0) {
+	*info = -11;
+    } else if (*ldgcol < n) {
+	*info = -13;
+    } else if (*ldgnum < n) {
+	*info = -15;
+    } else if (*k < 1) {
+	*info = -20;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLALS0", &i__1);
+	return 0;
+    }
+
+    m = n + *sqre;
+    nlp1 = *nl + 1;
+
+    if (*icompq == 0) {
+
+/*
+          Apply back orthogonal transformations from the left.
+
+          Step (1L): apply back the Givens rotations performed.
+*/
+
+	i__1 = *givptr;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    zdrot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
+		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
+		    i__ + ((givnum_dim1) << (1))], &givnum[i__ + givnum_dim1])
+		    ;
+/* L10: */
+	}
+
+/*        Step (2L): permute rows of B. */
+
+	zcopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx);
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    zcopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1],
+		    ldbx);
+/* L20: */
+	}
+
+/*
+          Step (3L): apply the inverse of the left singular vector
+          matrix to BX.
+*/
+
+	if (*k == 1) {
+	    zcopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb);
+	    if (z__[1] < 0.) {
+		zdscal_(nrhs, &c_b1294, &b[b_offset], ldb);
+	    }
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		diflj = difl[j];
+		dj = poles[j + poles_dim1];
+		dsigj = -poles[j + ((poles_dim1) << (1))];
+		if (j < *k) {
+		    difrj = -difr[j + difr_dim1];
+		    dsigjp = -poles[j + 1 + ((poles_dim1) << (1))];
+		}
+		if ((z__[j] == 0.) || (poles[j + ((poles_dim1) << (1))] == 0.)
+			) {
+		    rwork[j] = 0.;
+		} else {
+		    rwork[j] = -poles[j + ((poles_dim1) << (1))] * z__[j] /
+			    diflj / (poles[j + ((poles_dim1) << (1))] + dj);
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if ((z__[i__] == 0.) || (poles[i__ + ((poles_dim1) << (1))
+			    ] == 0.)) {
+			rwork[i__] = 0.;
+		    } else {
+			rwork[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
+				i__] / (dlamc3_(&poles[i__ + ((poles_dim1) <<
+				(1))], &dsigj) - diflj) / (poles[i__ + ((
+				poles_dim1) << (1))] + dj);
+		    }
+/* L30: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if ((z__[i__] == 0.) || (poles[i__ + ((poles_dim1) << (1))
+			    ] == 0.)) {
+			rwork[i__] = 0.;
+		    } else {
+			rwork[i__] = poles[i__ + ((poles_dim1) << (1))] * z__[
+				i__] / (dlamc3_(&poles[i__ + ((poles_dim1) <<
+				(1))], &dsigjp) + difrj) / (poles[i__ + ((
+				poles_dim1) << (1))] + dj);
+		    }
+/* L40: */
+		}
+		rwork[1] = -1.;
+		temp = dnrm2_(k, &rwork[1], &c__1);
+
+/*
+                Since B and BX are complex, the following call to DGEMV
+                is performed in two steps (real and imaginary parts).
+
+                CALL DGEMV( 'T', K, NRHS, ONE, BX, LDBX, WORK, 1, ZERO,
+      $                     B( J, 1 ), LDB )
+*/
+
+		i__ = *k + ((*nrhs) << (1));
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = *k;
+		    for (jrow = 1; jrow <= i__3; ++jrow) {
+			++i__;
+			i__4 = jrow + jcol * bx_dim1;
+			rwork[i__] = bx[i__4].r;
+/* L50: */
+		    }
+/* L60: */
+		}
+		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + ((*nrhs) << (1)
+			)], k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &
+			c__1);
+		i__ = *k + ((*nrhs) << (1));
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = *k;
+		    for (jrow = 1; jrow <= i__3; ++jrow) {
+			++i__;
+			rwork[i__] = d_imag(&bx[jrow + jcol * bx_dim1]);
+/* L70: */
+		    }
+/* L80: */
+		}
+		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + ((*nrhs) << (1)
+			)], k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *
+			nrhs], &c__1);
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = j + jcol * b_dim1;
+		    i__4 = jcol + *k;
+		    i__5 = jcol + *k + *nrhs;
+		    z__1.r = rwork[i__4], z__1.i = rwork[i__5];
+		    b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L90: */
+		}
+		zlascl_("G", &c__0, &c__0, &temp, &c_b1015, &c__1, nrhs, &b[j
+			+ b_dim1], ldb, info);
+/* L100: */
+	    }
+	}
+
+/*        Move the deflated rows of BX to B also. */
+
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    zlacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1
+		    + b_dim1], ldb);
+	}
+    } else {
+
+/*
+          Apply back the right orthogonal transformations.
+
+          Step (1R): apply back the new right singular vector matrix
+          to B.
+*/
+
+	if (*k == 1) {
+	    zcopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx);
+	} else {
+	    i__1 = *k;
+	    for (j = 1; j <= i__1; ++j) {
+		dsigj = poles[j + ((poles_dim1) << (1))];
+		if (z__[j] == 0.) {
+		    rwork[j] = 0.;
+		} else {
+		    rwork[j] = -z__[j] / difl[j] / (dsigj + poles[j +
+			    poles_dim1]) / difr[j + ((difr_dim1) << (1))];
+		}
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.) {
+			rwork[i__] = 0.;
+		    } else {
+			d__1 = -poles[i__ + 1 + ((poles_dim1) << (1))];
+			rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difr[
+				i__ + difr_dim1]) / (dsigj + poles[i__ +
+				poles_dim1]) / difr[i__ + ((difr_dim1) << (1))
+				];
+		    }
+/* L110: */
+		}
+		i__2 = *k;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    if (z__[j] == 0.) {
+			rwork[i__] = 0.;
+		    } else {
+			d__1 = -poles[i__ + ((poles_dim1) << (1))];
+			rwork[i__] = z__[j] / (dlamc3_(&dsigj, &d__1) - difl[
+				i__]) / (dsigj + poles[i__ + poles_dim1]) /
+				difr[i__ + ((difr_dim1) << (1))];
+		    }
+/* L120: */
+		}
+
+/*
+                Since B and BX are complex, the following call to DGEMV
+                is performed in two steps (real and imaginary parts).
+
+                CALL DGEMV( 'T', K, NRHS, ONE, B, LDB, WORK, 1, ZERO,
+      $                     BX( J, 1 ), LDBX )
+*/
+
+		i__ = *k + ((*nrhs) << (1));
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = *k;
+		    for (jrow = 1; jrow <= i__3; ++jrow) {
+			++i__;
+			i__4 = jrow + jcol * b_dim1;
+			rwork[i__] = b[i__4].r;
+/* L130: */
+		    }
+/* L140: */
+		}
+		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + ((*nrhs) << (1)
+			)], k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1], &
+			c__1);
+		i__ = *k + ((*nrhs) << (1));
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = *k;
+		    for (jrow = 1; jrow <= i__3; ++jrow) {
+			++i__;
+			rwork[i__] = d_imag(&b[jrow + jcol * b_dim1]);
+/* L150: */
+		    }
+/* L160: */
+		}
+		dgemv_("T", k, nrhs, &c_b1015, &rwork[*k + 1 + ((*nrhs) << (1)
+			)], k, &rwork[1], &c__1, &c_b324, &rwork[*k + 1 + *
+			nrhs], &c__1);
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = j + jcol * bx_dim1;
+		    i__4 = jcol + *k;
+		    i__5 = jcol + *k + *nrhs;
+		    z__1.r = rwork[i__4], z__1.i = rwork[i__5];
+		    bx[i__3].r = z__1.r, bx[i__3].i = z__1.i;
+/* L170: */
+		}
+/* L180: */
+	    }
+	}
+
+/*
+          Step (2R): if SQRE = 1, apply back the rotation that is
+          related to the right null space of the subproblem.
+*/
+
+	if (*sqre == 1) {
+	    zcopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx);
+	    zdrot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__,
+		    s);
+	}
+	if (*k < max(m,n)) {
+	    i__1 = n - *k;
+	    zlacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 +
+		    bx_dim1], ldbx);
+	}
+
+/*        Step (3R): permute rows of B. */
+
+	zcopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb);
+	if (*sqre == 1) {
+	    zcopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb);
+	}
+	i__1 = n;
+	for (i__ = 2; i__ <= i__1; ++i__) {
+	    zcopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1],
+		    ldb);
+/* L190: */
+	}
+
+/*        Step (4R): apply back the Givens rotations performed. */
+
+	for (i__ = *givptr; i__ >= 1; --i__) {
+	    d__1 = -givnum[i__ + givnum_dim1];
+	    zdrot_(nrhs, &b[givcol[i__ + ((givcol_dim1) << (1))] + b_dim1],
+		    ldb, &b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[
+		    i__ + ((givnum_dim1) << (1))], &d__1);
+/* L200: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZLALS0 */
+
+} /* zlals0_ */
+
+/* Subroutine */ int zlalsa_(integer *icompq, integer *smlsiz, integer *n,
+	integer *nrhs, doublecomplex *b, integer *ldb, doublecomplex *bx,
+	integer *ldbx, doublereal *u, integer *ldu, doublereal *vt, integer *
+	k, doublereal *difl, doublereal *difr, doublereal *z__, doublereal *
+	poles, integer *givptr, integer *givcol, integer *ldgcol, integer *
+	perm, doublereal *givnum, doublereal *c__, doublereal *s, doublereal *
+	rwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer givcol_dim1, givcol_offset, perm_dim1, perm_offset, difl_dim1,
+	    difl_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset,
+	    poles_dim1, poles_offset, u_dim1, u_offset, vt_dim1, vt_offset,
+	    z_dim1, z_offset, b_dim1, b_offset, bx_dim1, bx_offset, i__1,
+	    i__2, i__3, i__4, i__5, i__6;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    integer pow_ii(integer *, integer *);
+
+    /* Local variables */
+    static integer i__, j, i1, ic, lf, nd, ll, nl, nr, im1, nlf, nrf, lvl,
+	    ndb1, nlp1, lvl2, nrp1, jcol, nlvl, sqre, jrow, jimag;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer jreal, inode, ndiml, ndimr;
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), zlals0_(integer *, integer *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, integer *, integer *, integer *,
+	    integer *, doublereal *, integer *, doublereal *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, doublereal *,
+	     doublereal *, integer *), dlasdt_(integer *, integer *, integer *
+	    , integer *, integer *, integer *, integer *), xerbla_(char *,
+	    integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZLALSA is an itermediate step in solving the least squares problem
+    by computing the SVD of the coefficient matrix in compact form (The
+    singular vectors are computed as products of simple orthorgonal
+    matrices.).
+
+    If ICOMPQ = 0, ZLALSA applies the inverse of the left singular vector
+    matrix of an upper bidiagonal matrix to the right hand side; and if
+    ICOMPQ = 1, ZLALSA applies the right singular vector matrix to the
+    right hand side. The singular vector matrices were generated in
+    compact form by ZLALSA.
+
+    Arguments
+    =========
+
+    ICOMPQ (input) INTEGER
+           Specifies whether the left or the right singular vector
+           matrix is involved.
+           = 0: Left singular vector matrix
+           = 1: Right singular vector matrix
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The row and column dimensions of the upper bidiagonal matrix.
+
+    NRHS   (input) INTEGER
+           The number of columns of B and BX. NRHS must be at least 1.
+
+    B      (input) COMPLEX*16 array, dimension ( LDB, NRHS )
+           On input, B contains the right hand sides of the least
+           squares problem in rows 1 through M. On output, B contains
+           the solution X in rows 1 through N.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,MAX( M, N ) ).
+
+    BX     (output) COMPLEX*16 array, dimension ( LDBX, NRHS )
+           On exit, the result of applying the left or right singular
+           vector matrix to B.
+
+    LDBX   (input) INTEGER
+           The leading dimension of BX.
+
+    U      (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ ).
+           On entry, U contains the left singular vector matrices of all
+           subproblems at the bottom level.
+
+    LDU    (input) INTEGER, LDU = > N.
+           The leading dimension of arrays U, VT, DIFL, DIFR,
+           POLES, GIVNUM, and Z.
+
+    VT     (input) DOUBLE PRECISION array, dimension ( LDU, SMLSIZ+1 ).
+           On entry, VT' contains the right singular vector matrices of
+           all subproblems at the bottom level.
+
+    K      (input) INTEGER array, dimension ( N ).
+
+    DIFL   (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+           where NLVL = INT(log_2 (N/(SMLSIZ+1))) + 1.
+
+    DIFR   (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, DIFL(*, I) and DIFR(*, 2 * I -1) record
+           distances between singular values on the I-th level and
+           singular values on the (I -1)-th level, and DIFR(*, 2 * I)
+           record the normalizing factors of the right singular vectors
+           matrices of subproblems on I-th level.
+
+    Z      (input) DOUBLE PRECISION array, dimension ( LDU, NLVL ).
+           On entry, Z(1, I) contains the components of the deflation-
+           adjusted updating row vector for subproblems on the I-th
+           level.
+
+    POLES  (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, POLES(*, 2 * I -1: 2 * I) contains the new and old
+           singular values involved in the secular equations on the I-th
+           level.
+
+    GIVPTR (input) INTEGER array, dimension ( N ).
+           On entry, GIVPTR( I ) records the number of Givens
+           rotations performed on the I-th problem on the computation
+           tree.
+
+    GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 * NLVL ).
+           On entry, for each I, GIVCOL(*, 2 * I - 1: 2 * I) records the
+           locations of Givens rotations performed on the I-th level on
+           the computation tree.
+
+    LDGCOL (input) INTEGER, LDGCOL = > N.
+           The leading dimension of arrays GIVCOL and PERM.
+
+    PERM   (input) INTEGER array, dimension ( LDGCOL, NLVL ).
+           On entry, PERM(*, I) records permutations done on the I-th
+           level of the computation tree.
+
+    GIVNUM (input) DOUBLE PRECISION array, dimension ( LDU, 2 * NLVL ).
+           On entry, GIVNUM(*, 2 *I -1 : 2 * I) records the C- and S-
+           values of Givens rotations performed on the I-th level on the
+           computation tree.
+
+    C      (input) DOUBLE PRECISION array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           C( I ) contains the C-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    S      (input) DOUBLE PRECISION array, dimension ( N ).
+           On entry, if the I-th subproblem is not square,
+           S( I ) contains the S-value of a Givens rotation related to
+           the right null space of the I-th subproblem.
+
+    RWORK  (workspace) DOUBLE PRECISION array, dimension at least
+           max ( N, (SMLSZ+1)*NRHS*3 ).
+
+    IWORK  (workspace) INTEGER array.
+           The dimension must be at least 3 * N
+
+    INFO   (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    bx_dim1 = *ldbx;
+    bx_offset = 1 + bx_dim1;
+    bx -= bx_offset;
+    givnum_dim1 = *ldu;
+    givnum_offset = 1 + givnum_dim1;
+    givnum -= givnum_offset;
+    poles_dim1 = *ldu;
+    poles_offset = 1 + poles_dim1;
+    poles -= poles_offset;
+    z_dim1 = *ldu;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    difr_dim1 = *ldu;
+    difr_offset = 1 + difr_dim1;
+    difr -= difr_offset;
+    difl_dim1 = *ldu;
+    difl_offset = 1 + difl_dim1;
+    difl -= difl_offset;
+    vt_dim1 = *ldu;
+    vt_offset = 1 + vt_dim1;
+    vt -= vt_offset;
+    u_dim1 = *ldu;
+    u_offset = 1 + u_dim1;
+    u -= u_offset;
+    --k;
+    --givptr;
+    perm_dim1 = *ldgcol;
+    perm_offset = 1 + perm_dim1;
+    perm -= perm_offset;
+    givcol_dim1 = *ldgcol;
+    givcol_offset = 1 + givcol_dim1;
+    givcol -= givcol_offset;
+    --c__;
+    --s;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if ((*icompq < 0) || (*icompq > 1)) {
+	*info = -1;
+    } else if (*smlsiz < 3) {
+	*info = -2;
+    } else if (*n < *smlsiz) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if (*ldb < *n) {
+	*info = -6;
+    } else if (*ldbx < *n) {
+	*info = -8;
+    } else if (*ldu < *n) {
+	*info = -10;
+    } else if (*ldgcol < *n) {
+	*info = -19;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLALSA", &i__1);
+	return 0;
+    }
+
+/*     Book-keeping and  setting up the computation tree. */
+
+    inode = 1;
+    ndiml = inode + *n;
+    ndimr = ndiml + *n;
+
+    dlasdt_(n, &nlvl, &nd, &iwork[inode], &iwork[ndiml], &iwork[ndimr],
+	    smlsiz);
+
+/*
+       The following code applies back the left singular vector factors.
+       For applying back the right singular vector factors, go to 170.
+*/
+
+    if (*icompq == 1) {
+	goto L170;
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by DLASDQ. The corresponding left and right singular vector
+       matrices are in explicit form. First apply back the left
+       singular vector matrices.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+
+/*
+          IC : center row of each node
+          NL : number of rows of left  subproblem
+          NR : number of rows of right subproblem
+          NLF: starting row of the left   subproblem
+          NRF: starting row of the right  subproblem
+*/
+
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlf = ic - nl;
+	nrf = ic + 1;
+
+/*
+          Since B and BX are complex, the following call to DGEMM
+          is performed in two steps (real and imaginary parts).
+
+          CALL DGEMM( 'T', 'N', NL, NRHS, NL, ONE, U( NLF, 1 ), LDU,
+       $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*/
+
+	j = (nl * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nl - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++j;
+		i__4 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__4].r;
+/* L10: */
+	    }
+/* L20: */
+	}
+	dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, &
+		rwork[((nl * *nrhs) << (1)) + 1], &nl, &c_b324, &rwork[1], &
+		nl);
+	j = (nl * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nl - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++j;
+		rwork[j] = d_imag(&b[jrow + jcol * b_dim1]);
+/* L30: */
+	    }
+/* L40: */
+	}
+	dgemm_("T", "N", &nl, nrhs, &nl, &c_b1015, &u[nlf + u_dim1], ldu, &
+		rwork[((nl * *nrhs) << (1)) + 1], &nl, &c_b324, &rwork[nl * *
+		nrhs + 1], &nl);
+	jreal = 0;
+	jimag = nl * *nrhs;
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nl - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++jreal;
+		++jimag;
+		i__4 = jrow + jcol * bx_dim1;
+		i__5 = jreal;
+		i__6 = jimag;
+		z__1.r = rwork[i__5], z__1.i = rwork[i__6];
+		bx[i__4].r = z__1.r, bx[i__4].i = z__1.i;
+/* L50: */
+	    }
+/* L60: */
+	}
+
+/*
+          Since B and BX are complex, the following call to DGEMM
+          is performed in two steps (real and imaginary parts).
+
+          CALL DGEMM( 'T', 'N', NR, NRHS, NR, ONE, U( NRF, 1 ), LDU,
+      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*/
+
+	j = (nr * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nr - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++j;
+		i__4 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__4].r;
+/* L70: */
+	    }
+/* L80: */
+	}
+	dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, &
+		rwork[((nr * *nrhs) << (1)) + 1], &nr, &c_b324, &rwork[1], &
+		nr);
+	j = (nr * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nr - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++j;
+		rwork[j] = d_imag(&b[jrow + jcol * b_dim1]);
+/* L90: */
+	    }
+/* L100: */
+	}
+	dgemm_("T", "N", &nr, nrhs, &nr, &c_b1015, &u[nrf + u_dim1], ldu, &
+		rwork[((nr * *nrhs) << (1)) + 1], &nr, &c_b324, &rwork[nr * *
+		nrhs + 1], &nr);
+	jreal = 0;
+	jimag = nr * *nrhs;
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nr - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++jreal;
+		++jimag;
+		i__4 = jrow + jcol * bx_dim1;
+		i__5 = jreal;
+		i__6 = jimag;
+		z__1.r = rwork[i__5], z__1.i = rwork[i__6];
+		bx[i__4].r = z__1.r, bx[i__4].i = z__1.i;
+/* L110: */
+	    }
+/* L120: */
+	}
+
+/* L130: */
+    }
+
+/*
+       Next copy the rows of B that correspond to unchanged rows
+       in the bidiagonal matrix to BX.
+*/
+
+    i__1 = nd;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	ic = iwork[inode + i__ - 1];
+	zcopy_(nrhs, &b[ic + b_dim1], ldb, &bx[ic + bx_dim1], ldbx);
+/* L140: */
+    }
+
+/*
+       Finally go through the left singular vector matrices of all
+       the other subproblems bottom-up on the tree.
+*/
+
+    j = pow_ii(&c__2, &nlvl);
+    sqre = 0;
+
+    for (lvl = nlvl; lvl >= 1; --lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          find the first node LF and last node LL on
+          the current level LVL
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__1 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__1);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__1 = ll;
+	for (i__ = lf; i__ <= i__1; ++i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    --j;
+	    zlals0_(icompq, &nl, &nr, &sqre, nrhs, &bx[nlf + bx_dim1], ldbx, &
+		    b[nlf + b_dim1], ldb, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &rwork[1], info);
+/* L150: */
+	}
+/* L160: */
+    }
+    goto L330;
+
+/*     ICOMPQ = 1: applying back the right singular vector factors. */
+
+L170:
+
+/*
+       First now go through the right singular vector matrices of all
+       the tree nodes top-down.
+*/
+
+    j = 0;
+    i__1 = nlvl;
+    for (lvl = 1; lvl <= i__1; ++lvl) {
+	lvl2 = ((lvl) << (1)) - 1;
+
+/*
+          Find the first node LF and last node LL on
+          the current level LVL.
+*/
+
+	if (lvl == 1) {
+	    lf = 1;
+	    ll = 1;
+	} else {
+	    i__2 = lvl - 1;
+	    lf = pow_ii(&c__2, &i__2);
+	    ll = ((lf) << (1)) - 1;
+	}
+	i__2 = lf;
+	for (i__ = ll; i__ >= i__2; --i__) {
+	    im1 = i__ - 1;
+	    ic = iwork[inode + im1];
+	    nl = iwork[ndiml + im1];
+	    nr = iwork[ndimr + im1];
+	    nlf = ic - nl;
+	    nrf = ic + 1;
+	    if (i__ == ll) {
+		sqre = 0;
+	    } else {
+		sqre = 1;
+	    }
+	    ++j;
+	    zlals0_(icompq, &nl, &nr, &sqre, nrhs, &b[nlf + b_dim1], ldb, &bx[
+		    nlf + bx_dim1], ldbx, &perm[nlf + lvl * perm_dim1], &
+		    givptr[j], &givcol[nlf + lvl2 * givcol_dim1], ldgcol, &
+		    givnum[nlf + lvl2 * givnum_dim1], ldu, &poles[nlf + lvl2 *
+		     poles_dim1], &difl[nlf + lvl * difl_dim1], &difr[nlf +
+		    lvl2 * difr_dim1], &z__[nlf + lvl * z_dim1], &k[j], &c__[
+		    j], &s[j], &rwork[1], info);
+/* L180: */
+	}
+/* L190: */
+    }
+
+/*
+       The nodes on the bottom level of the tree were solved
+       by DLASDQ. The corresponding right singular vector
+       matrices are in explicit form. Apply them back.
+*/
+
+    ndb1 = (nd + 1) / 2;
+    i__1 = nd;
+    for (i__ = ndb1; i__ <= i__1; ++i__) {
+	i1 = i__ - 1;
+	ic = iwork[inode + i1];
+	nl = iwork[ndiml + i1];
+	nr = iwork[ndimr + i1];
+	nlp1 = nl + 1;
+	if (i__ == nd) {
+	    nrp1 = nr;
+	} else {
+	    nrp1 = nr + 1;
+	}
+	nlf = ic - nl;
+	nrf = ic + 1;
+
+/*
+          Since B and BX are complex, the following call to DGEMM is
+          performed in two steps (real and imaginary parts).
+
+          CALL DGEMM( 'T', 'N', NLP1, NRHS, NLP1, ONE, VT( NLF, 1 ), LDU,
+      $               B( NLF, 1 ), LDB, ZERO, BX( NLF, 1 ), LDBX )
+*/
+
+	j = (nlp1 * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nlp1 - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++j;
+		i__4 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__4].r;
+/* L200: */
+	    }
+/* L210: */
+	}
+	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1],
+		ldu, &rwork[((nlp1 * *nrhs) << (1)) + 1], &nlp1, &c_b324, &
+		rwork[1], &nlp1);
+	j = (nlp1 * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nlp1 - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++j;
+		rwork[j] = d_imag(&b[jrow + jcol * b_dim1]);
+/* L220: */
+	    }
+/* L230: */
+	}
+	dgemm_("T", "N", &nlp1, nrhs, &nlp1, &c_b1015, &vt[nlf + vt_dim1],
+		ldu, &rwork[((nlp1 * *nrhs) << (1)) + 1], &nlp1, &c_b324, &
+		rwork[nlp1 * *nrhs + 1], &nlp1);
+	jreal = 0;
+	jimag = nlp1 * *nrhs;
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nlf + nlp1 - 1;
+	    for (jrow = nlf; jrow <= i__3; ++jrow) {
+		++jreal;
+		++jimag;
+		i__4 = jrow + jcol * bx_dim1;
+		i__5 = jreal;
+		i__6 = jimag;
+		z__1.r = rwork[i__5], z__1.i = rwork[i__6];
+		bx[i__4].r = z__1.r, bx[i__4].i = z__1.i;
+/* L240: */
+	    }
+/* L250: */
+	}
+
+/*
+          Since B and BX are complex, the following call to DGEMM is
+          performed in two steps (real and imaginary parts).
+
+          CALL DGEMM( 'T', 'N', NRP1, NRHS, NRP1, ONE, VT( NRF, 1 ), LDU,
+      $               B( NRF, 1 ), LDB, ZERO, BX( NRF, 1 ), LDBX )
+*/
+
+	j = (nrp1 * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nrp1 - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++j;
+		i__4 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__4].r;
+/* L260: */
+	    }
+/* L270: */
+	}
+	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1],
+		ldu, &rwork[((nrp1 * *nrhs) << (1)) + 1], &nrp1, &c_b324, &
+		rwork[1], &nrp1);
+	j = (nrp1 * *nrhs) << (1);
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nrp1 - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++j;
+		rwork[j] = d_imag(&b[jrow + jcol * b_dim1]);
+/* L280: */
+	    }
+/* L290: */
+	}
+	dgemm_("T", "N", &nrp1, nrhs, &nrp1, &c_b1015, &vt[nrf + vt_dim1],
+		ldu, &rwork[((nrp1 * *nrhs) << (1)) + 1], &nrp1, &c_b324, &
+		rwork[nrp1 * *nrhs + 1], &nrp1);
+	jreal = 0;
+	jimag = nrp1 * *nrhs;
+	i__2 = *nrhs;
+	for (jcol = 1; jcol <= i__2; ++jcol) {
+	    i__3 = nrf + nrp1 - 1;
+	    for (jrow = nrf; jrow <= i__3; ++jrow) {
+		++jreal;
+		++jimag;
+		i__4 = jrow + jcol * bx_dim1;
+		i__5 = jreal;
+		i__6 = jimag;
+		z__1.r = rwork[i__5], z__1.i = rwork[i__6];
+		bx[i__4].r = z__1.r, bx[i__4].i = z__1.i;
+/* L300: */
+	    }
+/* L310: */
+	}
+
+/* L320: */
+    }
+
+L330:
+
+    return 0;
+
+/*     End of ZLALSA */
+
+} /* zlalsa_ */
+
+/* Subroutine */ int zlalsd_(char *uplo, integer *smlsiz, integer *n, integer
+	*nrhs, doublereal *d__, doublereal *e, doublecomplex *b, integer *ldb,
+	 doublereal *rcond, integer *rank, doublecomplex *work, doublereal *
+	rwork, integer *iwork, integer *info)
+{
+    /* System generated locals */
+    integer b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+    doublereal d__1;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *), log(doublereal), d_sign(doublereal *,
+	    doublereal *);
+
+    /* Local variables */
+    static integer c__, i__, j, k;
+    static doublereal r__;
+    static integer s, u, z__;
+    static doublereal cs;
+    static integer bx;
+    static doublereal sn;
+    static integer st, vt, nm1, st1;
+    static doublereal eps;
+    static integer iwk;
+    static doublereal tol;
+    static integer difl, difr, jcol, irwb, perm, nsub, nlvl, sqre, bxst, jrow,
+	     irwu, jimag;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+    static integer jreal, irwib, poles, sizei, irwrb, nsize;
+    extern /* Subroutine */ int zdrot_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublereal *, doublereal *), zcopy_(
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *)
+	    ;
+    static integer irwvt, icmpq1, icmpq2;
+
+    extern /* Subroutine */ int dlasda_(integer *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *,
+	    doublereal *, integer *, doublereal *, doublereal *, doublereal *,
+	     doublereal *, integer *, integer *, integer *, integer *,
+	    doublereal *, doublereal *, doublereal *, doublereal *, integer *,
+	     integer *), dlascl_(char *, integer *, integer *, doublereal *,
+	    doublereal *, integer *, integer *, doublereal *, integer *,
+	    integer *);
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int dlasdq_(char *, integer *, integer *, integer
+	    *, integer *, integer *, doublereal *, doublereal *, doublereal *,
+	     integer *, doublereal *, integer *, doublereal *, integer *,
+	    doublereal *, integer *), dlaset_(char *, integer *,
+	    integer *, doublereal *, doublereal *, doublereal *, integer *), dlartg_(doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *), xerbla_(char *, integer *);
+    static integer givcol;
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int zlalsa_(integer *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *, integer *,
+	     doublereal *, integer *, doublereal *, integer *, doublereal *,
+	    doublereal *, doublereal *, doublereal *, integer *, integer *,
+	    integer *, integer *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, integer *, integer *), zlascl_(char *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, integer *,
+	    doublecomplex *, integer *, integer *), dlasrt_(char *,
+	    integer *, doublereal *, integer *), zlacpy_(char *,
+	    integer *, integer *, doublecomplex *, integer *, doublecomplex *,
+	     integer *), zlaset_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, doublecomplex *, integer *);
+    static doublereal orgnrm;
+    static integer givnum, givptr, nrwork, irwwrk, smlszp;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1999
+
+
+    Purpose
+    =======
+
+    ZLALSD uses the singular value decomposition of A to solve the least
+    squares problem of finding X to minimize the Euclidean norm of each
+    column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
+    are N-by-NRHS. The solution X overwrites B.
+
+    The singular values of A smaller than RCOND times the largest
+    singular value are treated as zero in solving the least squares
+    problem; in this case a minimum norm solution is returned.
+    The actual singular values are returned in D in ascending order.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.
+
+    Arguments
+    =========
+
+    UPLO   (input) CHARACTER*1
+           = 'U': D and E define an upper bidiagonal matrix.
+           = 'L': D and E define a  lower bidiagonal matrix.
+
+    SMLSIZ (input) INTEGER
+           The maximum size of the subproblems at the bottom of the
+           computation tree.
+
+    N      (input) INTEGER
+           The dimension of the  bidiagonal matrix.  N >= 0.
+
+    NRHS   (input) INTEGER
+           The number of columns of B. NRHS must be at least 1.
+
+    D      (input/output) DOUBLE PRECISION array, dimension (N)
+           On entry D contains the main diagonal of the bidiagonal
+           matrix. On exit, if INFO = 0, D contains its singular values.
+
+    E      (input) DOUBLE PRECISION array, dimension (N-1)
+           Contains the super-diagonal entries of the bidiagonal matrix.
+           On exit, E has been destroyed.
+
+    B      (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+           On input, B contains the right hand sides of the least
+           squares problem. On output, B contains the solution X.
+
+    LDB    (input) INTEGER
+           The leading dimension of B in the calling subprogram.
+           LDB must be at least max(1,N).
+
+    RCOND  (input) DOUBLE PRECISION
+           The singular values of A less than or equal to RCOND times
+           the largest singular value are treated as zero in solving
+           the least squares problem. If RCOND is negative,
+           machine precision is used instead.
+           For example, if diag(S)*X=B were the least squares problem,
+           where diag(S) is a diagonal matrix of singular values, the
+           solution would be X(i) = B(i) / S(i) if S(i) is greater than
+           RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
+           RCOND*max(S).
+
+    RANK   (output) INTEGER
+           The number of singular values of A greater than RCOND times
+           the largest singular value.
+
+    WORK   (workspace) COMPLEX*16 array, dimension at least
+           (N * NRHS).
+
+    RWORK  (workspace) DOUBLE PRECISION array, dimension at least
+           (9*N + 2*N*SMLSIZ + 8*N*NLVL + 3*SMLSIZ*NRHS + (SMLSIZ+1)**2),
+           where
+           NLVL = MAX( 0, INT( LOG_2( MIN( M,N )/(SMLSIZ+1) ) ) + 1 )
+
+    IWORK  (workspace) INTEGER array, dimension at least
+           (3*N*NLVL + 11*N).
+
+    INFO   (output) INTEGER
+           = 0:  successful exit.
+           < 0:  if INFO = -i, the i-th argument had an illegal value.
+           > 0:  The algorithm failed to compute an singular value while
+                 working on the submatrix lying in rows and columns
+                 INFO/(N+1) through MOD(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Ming Gu and Ren-Cang Li, Computer Science Division, University of
+         California at Berkeley, USA
+       Osni Marques, LBNL/NERSC, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+
+    if (*n < 0) {
+	*info = -3;
+    } else if (*nrhs < 1) {
+	*info = -4;
+    } else if ((*ldb < 1) || (*ldb < *n)) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLALSD", &i__1);
+	return 0;
+    }
+
+    eps = EPSILON;
+
+/*     Set up the tolerance. */
+
+    if ((*rcond <= 0.) || (*rcond >= 1.)) {
+	*rcond = eps;
+    }
+
+    *rank = 0;
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    } else if (*n == 1) {
+	if (d__[1] == 0.) {
+	    zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &b[b_offset], ldb);
+	} else {
+	    *rank = 1;
+	    zlascl_("G", &c__0, &c__0, &d__[1], &c_b1015, &c__1, nrhs, &b[
+		    b_offset], ldb, info);
+	    d__[1] = abs(d__[1]);
+	}
+	return 0;
+    }
+
+/*     Rotate the matrix if it is lower bidiagonal. */
+
+    if (*(unsigned char *)uplo == 'L') {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    dlartg_(&d__[i__], &e[i__], &cs, &sn, &r__);
+	    d__[i__] = r__;
+	    e[i__] = sn * d__[i__ + 1];
+	    d__[i__ + 1] = cs * d__[i__ + 1];
+	    if (*nrhs == 1) {
+		zdrot_(&c__1, &b[i__ + b_dim1], &c__1, &b[i__ + 1 + b_dim1], &
+			c__1, &cs, &sn);
+	    } else {
+		rwork[((i__) << (1)) - 1] = cs;
+		rwork[i__ * 2] = sn;
+	    }
+/* L10: */
+	}
+	if (*nrhs > 1) {
+	    i__1 = *nrhs;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		i__2 = *n - 1;
+		for (j = 1; j <= i__2; ++j) {
+		    cs = rwork[((j) << (1)) - 1];
+		    sn = rwork[j * 2];
+		    zdrot_(&c__1, &b[j + i__ * b_dim1], &c__1, &b[j + 1 + i__
+			    * b_dim1], &c__1, &cs, &sn);
+/* L20: */
+		}
+/* L30: */
+	    }
+	}
+    }
+
+/*     Scale. */
+
+    nm1 = *n - 1;
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	zlaset_("A", n, nrhs, &c_b59, &c_b59, &b[b_offset], ldb);
+	return 0;
+    }
+
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, &c__1, &d__[1], n, info);
+    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &nm1, &c__1, &e[1], &nm1,
+	    info);
+
+/*
+       If N is smaller than the minimum divide size SMLSIZ, then solve
+       the problem with another solver.
+*/
+
+    if (*n <= *smlsiz) {
+	irwu = 1;
+	irwvt = irwu + *n * *n;
+	irwwrk = irwvt + *n * *n;
+	irwrb = irwwrk;
+	irwib = irwrb + *n * *nrhs;
+	irwb = irwib + *n * *nrhs;
+	dlaset_("A", n, n, &c_b324, &c_b1015, &rwork[irwu], n);
+	dlaset_("A", n, n, &c_b324, &c_b1015, &rwork[irwvt], n);
+	dlasdq_("U", &c__0, n, n, n, &c__0, &d__[1], &e[1], &rwork[irwvt], n,
+		&rwork[irwu], n, &rwork[irwwrk], &c__1, &rwork[irwwrk], info);
+	if (*info != 0) {
+	    return 0;
+	}
+
+/*
+          In the real version, B is passed to DLASDQ and multiplied
+          internally by Q'. Here B is complex and that product is
+          computed below in two steps (real and imaginary parts).
+*/
+
+	j = irwb - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++j;
+		i__3 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__3].r;
+/* L40: */
+	    }
+/* L50: */
+	}
+	dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwu], n, &rwork[irwb],
+		n, &c_b324, &rwork[irwrb], n);
+	j = irwb - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++j;
+		rwork[j] = d_imag(&b[jrow + jcol * b_dim1]);
+/* L60: */
+	    }
+/* L70: */
+	}
+	dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwu], n, &rwork[irwb],
+		n, &c_b324, &rwork[irwib], n);
+	jreal = irwrb - 1;
+	jimag = irwib - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++jreal;
+		++jimag;
+		i__3 = jrow + jcol * b_dim1;
+		i__4 = jreal;
+		i__5 = jimag;
+		z__1.r = rwork[i__4], z__1.i = rwork[i__5];
+		b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L80: */
+	    }
+/* L90: */
+	}
+
+	tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (d__[i__] <= tol) {
+		zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &b[i__ + b_dim1],
+			ldb);
+	    } else {
+		zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1015, &c__1, nrhs, &
+			b[i__ + b_dim1], ldb, info);
+		++(*rank);
+	    }
+/* L100: */
+	}
+
+/*
+          Since B is complex, the following call to DGEMM is performed
+          in two steps (real and imaginary parts). That is for V * B
+          (in the real version of the code V' is stored in WORK).
+
+          CALL DGEMM( 'T', 'N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
+      $               WORK( NWORK ), N )
+*/
+
+	j = irwb - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++j;
+		i__3 = jrow + jcol * b_dim1;
+		rwork[j] = b[i__3].r;
+/* L110: */
+	    }
+/* L120: */
+	}
+	dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwvt], n, &rwork[irwb],
+		 n, &c_b324, &rwork[irwrb], n);
+	j = irwb - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++j;
+		rwork[j] = d_imag(&b[jrow + jcol * b_dim1]);
+/* L130: */
+	    }
+/* L140: */
+	}
+	dgemm_("T", "N", n, nrhs, n, &c_b1015, &rwork[irwvt], n, &rwork[irwb],
+		 n, &c_b324, &rwork[irwib], n);
+	jreal = irwrb - 1;
+	jimag = irwib - 1;
+	i__1 = *nrhs;
+	for (jcol = 1; jcol <= i__1; ++jcol) {
+	    i__2 = *n;
+	    for (jrow = 1; jrow <= i__2; ++jrow) {
+		++jreal;
+		++jimag;
+		i__3 = jrow + jcol * b_dim1;
+		i__4 = jreal;
+		i__5 = jimag;
+		z__1.r = rwork[i__4], z__1.i = rwork[i__5];
+		b[i__3].r = z__1.r, b[i__3].i = z__1.i;
+/* L150: */
+	    }
+/* L160: */
+	}
+
+/*        Unscale. */
+
+	dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, n, &c__1, &d__[1], n,
+		info);
+	dlasrt_("D", n, &d__[1], info);
+	zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, nrhs, &b[b_offset],
+		ldb, info);
+
+	return 0;
+    }
+
+/*     Book-keeping and setting up some constants. */
+
+    nlvl = (integer) (log((doublereal) (*n) / (doublereal) (*smlsiz + 1)) /
+	    log(2.)) + 1;
+
+    smlszp = *smlsiz + 1;
+
+    u = 1;
+    vt = *smlsiz * *n + 1;
+    difl = vt + smlszp * *n;
+    difr = difl + nlvl * *n;
+    z__ = difr + ((nlvl * *n) << (1));
+    c__ = z__ + nlvl * *n;
+    s = c__ + *n;
+    poles = s + *n;
+    givnum = poles + ((nlvl) << (1)) * *n;
+    nrwork = givnum + ((nlvl) << (1)) * *n;
+    bx = 1;
+
+    irwrb = nrwork;
+    irwib = irwrb + *smlsiz * *nrhs;
+    irwb = irwib + *smlsiz * *nrhs;
+
+    sizei = *n + 1;
+    k = sizei + *n;
+    givptr = k + *n;
+    perm = givptr + *n;
+    givcol = perm + nlvl * *n;
+    iwk = givcol + ((nlvl * *n) << (1));
+
+    st = 1;
+    sqre = 0;
+    icmpq1 = 1;
+    icmpq2 = 0;
+    nsub = 0;
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if ((d__1 = d__[i__], abs(d__1)) < eps) {
+	    d__[i__] = d_sign(&eps, &d__[i__]);
+	}
+/* L170: */
+    }
+
+    i__1 = nm1;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	if (((d__1 = e[i__], abs(d__1)) < eps) || (i__ == nm1)) {
+	    ++nsub;
+	    iwork[nsub] = st;
+
+/*
+             Subproblem found. First determine its size and then
+             apply divide and conquer on it.
+*/
+
+	    if (i__ < nm1) {
+
+/*              A subproblem with E(I) small for I < NM1. */
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else if ((d__1 = e[i__], abs(d__1)) >= eps) {
+
+/*              A subproblem with E(NM1) not too small but I = NM1. */
+
+		nsize = *n - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+	    } else {
+
+/*
+                A subproblem with E(NM1) small. This implies an
+                1-by-1 subproblem at D(N), which is not solved
+                explicitly.
+*/
+
+		nsize = i__ - st + 1;
+		iwork[sizei + nsub - 1] = nsize;
+		++nsub;
+		iwork[nsub] = *n;
+		iwork[sizei + nsub - 1] = 1;
+		zcopy_(nrhs, &b[*n + b_dim1], ldb, &work[bx + nm1], n);
+	    }
+	    st1 = st - 1;
+	    if (nsize == 1) {
+
+/*
+                This is a 1-by-1 subproblem and is not solved
+                explicitly.
+*/
+
+		zcopy_(nrhs, &b[st + b_dim1], ldb, &work[bx + st1], n);
+	    } else if (nsize <= *smlsiz) {
+
+/*              This is a small subproblem and is solved by DLASDQ. */
+
+		dlaset_("A", &nsize, &nsize, &c_b324, &c_b1015, &rwork[vt +
+			st1], n);
+		dlaset_("A", &nsize, &nsize, &c_b324, &c_b1015, &rwork[u +
+			st1], n);
+		dlasdq_("U", &c__0, &nsize, &nsize, &nsize, &c__0, &d__[st], &
+			e[st], &rwork[vt + st1], n, &rwork[u + st1], n, &
+			rwork[nrwork], &c__1, &rwork[nrwork], info)
+			;
+		if (*info != 0) {
+		    return 0;
+		}
+
+/*
+                In the real version, B is passed to DLASDQ and multiplied
+                internally by Q'. Here B is complex and that product is
+                computed below in two steps (real and imaginary parts).
+*/
+
+		j = irwb - 1;
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = st + nsize - 1;
+		    for (jrow = st; jrow <= i__3; ++jrow) {
+			++j;
+			i__4 = jrow + jcol * b_dim1;
+			rwork[j] = b[i__4].r;
+/* L180: */
+		    }
+/* L190: */
+		}
+		dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[u +
+			st1], n, &rwork[irwb], &nsize, &c_b324, &rwork[irwrb],
+			 &nsize);
+		j = irwb - 1;
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = st + nsize - 1;
+		    for (jrow = st; jrow <= i__3; ++jrow) {
+			++j;
+			rwork[j] = d_imag(&b[jrow + jcol * b_dim1]);
+/* L200: */
+		    }
+/* L210: */
+		}
+		dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[u +
+			st1], n, &rwork[irwb], &nsize, &c_b324, &rwork[irwib],
+			 &nsize);
+		jreal = irwrb - 1;
+		jimag = irwib - 1;
+		i__2 = *nrhs;
+		for (jcol = 1; jcol <= i__2; ++jcol) {
+		    i__3 = st + nsize - 1;
+		    for (jrow = st; jrow <= i__3; ++jrow) {
+			++jreal;
+			++jimag;
+			i__4 = jrow + jcol * b_dim1;
+			i__5 = jreal;
+			i__6 = jimag;
+			z__1.r = rwork[i__5], z__1.i = rwork[i__6];
+			b[i__4].r = z__1.r, b[i__4].i = z__1.i;
+/* L220: */
+		    }
+/* L230: */
+		}
+
+		zlacpy_("A", &nsize, nrhs, &b[st + b_dim1], ldb, &work[bx +
+			st1], n);
+	    } else {
+
+/*              A large problem. Solve it using divide and conquer. */
+
+		dlasda_(&icmpq1, smlsiz, &nsize, &sqre, &d__[st], &e[st], &
+			rwork[u + st1], n, &rwork[vt + st1], &iwork[k + st1],
+			&rwork[difl + st1], &rwork[difr + st1], &rwork[z__ +
+			st1], &rwork[poles + st1], &iwork[givptr + st1], &
+			iwork[givcol + st1], n, &iwork[perm + st1], &rwork[
+			givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &
+			rwork[nrwork], &iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+		bxst = bx + st1;
+		zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &b[st + b_dim1], ldb, &
+			work[bxst], n, &rwork[u + st1], n, &rwork[vt + st1], &
+			iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1]
+			, &rwork[z__ + st1], &rwork[poles + st1], &iwork[
+			givptr + st1], &iwork[givcol + st1], n, &iwork[perm +
+			st1], &rwork[givnum + st1], &rwork[c__ + st1], &rwork[
+			s + st1], &rwork[nrwork], &iwork[iwk], info);
+		if (*info != 0) {
+		    return 0;
+		}
+	    }
+	    st = i__ + 1;
+	}
+/* L240: */
+    }
+
+/*     Apply the singular values and treat the tiny ones as zero. */
+
+    tol = *rcond * (d__1 = d__[idamax_(n, &d__[1], &c__1)], abs(d__1));
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*
+          Some of the elements in D can be negative because 1-by-1
+          subproblems were not solved explicitly.
+*/
+
+	if ((d__1 = d__[i__], abs(d__1)) <= tol) {
+	    zlaset_("A", &c__1, nrhs, &c_b59, &c_b59, &work[bx + i__ - 1], n);
+	} else {
+	    ++(*rank);
+	    zlascl_("G", &c__0, &c__0, &d__[i__], &c_b1015, &c__1, nrhs, &
+		    work[bx + i__ - 1], n, info);
+	}
+	d__[i__] = (d__1 = d__[i__], abs(d__1));
+/* L250: */
+    }
+
+/*     Now apply back the right singular vectors. */
+
+    icmpq2 = 1;
+    i__1 = nsub;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	st = iwork[i__];
+	st1 = st - 1;
+	nsize = iwork[sizei + i__ - 1];
+	bxst = bx + st1;
+	if (nsize == 1) {
+	    zcopy_(nrhs, &work[bxst], n, &b[st + b_dim1], ldb);
+	} else if (nsize <= *smlsiz) {
+
+/*
+             Since B and BX are complex, the following call to DGEMM
+             is performed in two steps (real and imaginary parts).
+
+             CALL DGEMM( 'T', 'N', NSIZE, NRHS, NSIZE, ONE,
+      $                  RWORK( VT+ST1 ), N, RWORK( BXST ), N, ZERO,
+      $                  B( ST, 1 ), LDB )
+*/
+
+	    j = bxst - *n - 1;
+	    jreal = irwb - 1;
+	    i__2 = *nrhs;
+	    for (jcol = 1; jcol <= i__2; ++jcol) {
+		j += *n;
+		i__3 = nsize;
+		for (jrow = 1; jrow <= i__3; ++jrow) {
+		    ++jreal;
+		    i__4 = j + jrow;
+		    rwork[jreal] = work[i__4].r;
+/* L260: */
+		}
+/* L270: */
+	    }
+	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[vt + st1],
+		     n, &rwork[irwb], &nsize, &c_b324, &rwork[irwrb], &nsize);
+	    j = bxst - *n - 1;
+	    jimag = irwb - 1;
+	    i__2 = *nrhs;
+	    for (jcol = 1; jcol <= i__2; ++jcol) {
+		j += *n;
+		i__3 = nsize;
+		for (jrow = 1; jrow <= i__3; ++jrow) {
+		    ++jimag;
+		    rwork[jimag] = d_imag(&work[j + jrow]);
+/* L280: */
+		}
+/* L290: */
+	    }
+	    dgemm_("T", "N", &nsize, nrhs, &nsize, &c_b1015, &rwork[vt + st1],
+		     n, &rwork[irwb], &nsize, &c_b324, &rwork[irwib], &nsize);
+	    jreal = irwrb - 1;
+	    jimag = irwib - 1;
+	    i__2 = *nrhs;
+	    for (jcol = 1; jcol <= i__2; ++jcol) {
+		i__3 = st + nsize - 1;
+		for (jrow = st; jrow <= i__3; ++jrow) {
+		    ++jreal;
+		    ++jimag;
+		    i__4 = jrow + jcol * b_dim1;
+		    i__5 = jreal;
+		    i__6 = jimag;
+		    z__1.r = rwork[i__5], z__1.i = rwork[i__6];
+		    b[i__4].r = z__1.r, b[i__4].i = z__1.i;
+/* L300: */
+		}
+/* L310: */
+	    }
+	} else {
+	    zlalsa_(&icmpq2, smlsiz, &nsize, nrhs, &work[bxst], n, &b[st +
+		    b_dim1], ldb, &rwork[u + st1], n, &rwork[vt + st1], &
+		    iwork[k + st1], &rwork[difl + st1], &rwork[difr + st1], &
+		    rwork[z__ + st1], &rwork[poles + st1], &iwork[givptr +
+		    st1], &iwork[givcol + st1], n, &iwork[perm + st1], &rwork[
+		    givnum + st1], &rwork[c__ + st1], &rwork[s + st1], &rwork[
+		    nrwork], &iwork[iwk], info);
+	    if (*info != 0) {
+		return 0;
+	    }
+	}
+/* L320: */
+    }
+
+/*     Unscale and sort the singular values. */
+
+    dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, n, &c__1, &d__[1], n, info);
+    dlasrt_("D", n, &d__[1], info);
+    zlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, n, nrhs, &b[b_offset], ldb,
+	    info);
+
+    return 0;
+
+/*     End of ZLALSD */
+
+} /* zlalsd_ */
+
+doublereal zlange_(char *norm, integer *m, integer *n, doublecomplex *a,
+	integer *lda, doublereal *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal ret_val, d__1, d__2;
+
+    /* Builtin functions */
+    double z_abs(doublecomplex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal sum, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal value;
+    extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *,
+	     doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    ZLANGE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    complex matrix A.
+
+    Description
+    ===========
+
+    ZLANGE returns the value
+
+       ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in ZLANGE as described
+            above.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.  When M = 0,
+            ZLANGE is set to zero.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.  When N = 0,
+            ZLANGE is set to zero.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The m by n matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(M,1).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+            where LWORK >= M when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (min(*m,*n) == 0) {
+	value = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]);
+		value = max(d__1,d__2);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if ((lsame_(norm, "O")) || (*(unsigned char *
+	    )norm == '1')) {
+
+/*        Find norm1(A). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.;
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += z_abs(&a[i__ + j * a_dim1]);
+/* L30: */
+	    }
+	    value = max(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += z_abs(&a[i__ + j * a_dim1]);
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.;
+	i__1 = *m;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    d__1 = value, d__2 = work[i__];
+	    value = max(d__1,d__2);
+/* L80: */
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    zlassq_(m, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of ZLANGE */
+
+} /* zlange_ */
+
+doublereal zlanhe_(char *norm, char *uplo, integer *n, doublecomplex *a,
+	integer *lda, doublereal *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublereal ret_val, d__1, d__2, d__3;
+
+    /* Builtin functions */
+    double z_abs(doublecomplex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal sum, absa, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal value;
+    extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *,
+	     doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    ZLANHE  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    complex hermitian matrix A.
+
+    Description
+    ===========
+
+    ZLANHE returns the value
+
+       ZLANHE = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in ZLANHE as described
+            above.
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            hermitian matrix A is to be referenced.
+            = 'U':  Upper triangular part of A is referenced
+            = 'L':  Lower triangular part of A is referenced
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, ZLANHE is
+            set to zero.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The hermitian matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of A contains the upper triangular part
+            of the matrix A, and the strictly lower triangular part of A
+            is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of A contains the lower triangular part of
+            the matrix A, and the strictly upper triangular part of A is
+            not referenced. Note that the imaginary parts of the diagonal
+            elements need not be set and are assumed to be zero.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I' or '1' or 'O'; otherwise,
+            WORK is not referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]);
+		    value = max(d__1,d__2);
+/* L10: */
+		}
+/* Computing MAX */
+		i__2 = j + j * a_dim1;
+		d__2 = value, d__3 = (d__1 = a[i__2].r, abs(d__1));
+		value = max(d__2,d__3);
+/* L20: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+		i__2 = j + j * a_dim1;
+		d__2 = value, d__3 = (d__1 = a[i__2].r, abs(d__1));
+		value = max(d__2,d__3);
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		    d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]);
+		    value = max(d__1,d__2);
+/* L30: */
+		}
+/* L40: */
+	    }
+	}
+    } else if (((lsame_(norm, "I")) || (lsame_(norm,
+	    "O"))) || (*(unsigned char *)norm == '1')) {
+
+/*        Find normI(A) ( = norm1(A), since A is hermitian). */
+
+	value = 0.;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		sum = 0.;
+		i__2 = j - 1;
+		for (i__ = 1; i__ <= i__2; ++i__) {
+		    absa = z_abs(&a[i__ + j * a_dim1]);
+		    sum += absa;
+		    work[i__] += absa;
+/* L50: */
+		}
+		i__2 = j + j * a_dim1;
+		work[j] = sum + (d__1 = a[i__2].r, abs(d__1));
+/* L60: */
+	    }
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+		d__1 = value, d__2 = work[i__];
+		value = max(d__1,d__2);
+/* L70: */
+	    }
+	} else {
+	    i__1 = *n;
+	    for (i__ = 1; i__ <= i__1; ++i__) {
+		work[i__] = 0.;
+/* L80: */
+	    }
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j + j * a_dim1;
+		sum = work[j] + (d__1 = a[i__2].r, abs(d__1));
+		i__2 = *n;
+		for (i__ = j + 1; i__ <= i__2; ++i__) {
+		    absa = z_abs(&a[i__ + j * a_dim1]);
+		    sum += absa;
+		    work[i__] += absa;
+/* L90: */
+		}
+		value = max(value,sum);
+/* L100: */
+	    }
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	if (lsame_(uplo, "U")) {
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		i__2 = j - 1;
+		zlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L110: */
+	    }
+	} else {
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		zlassq_(&i__2, &a[j + 1 + j * a_dim1], &c__1, &scale, &sum);
+/* L120: */
+	    }
+	}
+	sum *= 2;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    if (a[i__2].r != 0.) {
+		i__2 = i__ + i__ * a_dim1;
+		absa = (d__1 = a[i__2].r, abs(d__1));
+		if (scale < absa) {
+/* Computing 2nd power */
+		    d__1 = scale / absa;
+		    sum = sum * (d__1 * d__1) + 1.;
+		    scale = absa;
+		} else {
+/* Computing 2nd power */
+		    d__1 = absa / scale;
+		    sum += d__1 * d__1;
+		}
+	    }
+/* L130: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of ZLANHE */
+
+} /* zlanhe_ */
+
+doublereal zlanhs_(char *norm, integer *n, doublecomplex *a, integer *lda,
+	doublereal *work)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublereal ret_val, d__1, d__2;
+
+    /* Builtin functions */
+    double z_abs(doublecomplex *), sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal sum, scale;
+    extern logical lsame_(char *, char *);
+    static doublereal value;
+    extern /* Subroutine */ int zlassq_(integer *, doublecomplex *, integer *,
+	     doublereal *, doublereal *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    ZLANHS  returns the value of the one norm,  or the Frobenius norm, or
+    the  infinity norm,  or the  element of  largest absolute value  of a
+    Hessenberg matrix A.
+
+    Description
+    ===========
+
+    ZLANHS returns the value
+
+       ZLANHS = ( max(abs(A(i,j))), NORM = 'M' or 'm'
+                (
+                ( norm1(A),         NORM = '1', 'O' or 'o'
+                (
+                ( normI(A),         NORM = 'I' or 'i'
+                (
+                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'
+
+    where  norm1  denotes the  one norm of a matrix (maximum column sum),
+    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and
+    normF  denotes the  Frobenius norm of a matrix (square root of sum of
+    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.
+
+    Arguments
+    =========
+
+    NORM    (input) CHARACTER*1
+            Specifies the value to be returned in ZLANHS as described
+            above.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.  When N = 0, ZLANHS is
+            set to zero.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The n by n upper Hessenberg matrix A; the part of A below the
+            first sub-diagonal is not referenced.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(N,1).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK),
+            where LWORK >= N when NORM = 'I'; otherwise, WORK is not
+            referenced.
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --work;
+
+    /* Function Body */
+    if (*n == 0) {
+	value = 0.;
+    } else if (lsame_(norm, "M")) {
+
+/*        Find max(abs(A(i,j))). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+/* Computing MAX */
+		d__1 = value, d__2 = z_abs(&a[i__ + j * a_dim1]);
+		value = max(d__1,d__2);
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else if ((lsame_(norm, "O")) || (*(unsigned char *
+	    )norm == '1')) {
+
+/*        Find norm1(A). */
+
+	value = 0.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    sum = 0.;
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		sum += z_abs(&a[i__ + j * a_dim1]);
+/* L30: */
+	    }
+	    value = max(value,sum);
+/* L40: */
+	}
+    } else if (lsame_(norm, "I")) {
+
+/*        Find normI(A). */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    work[i__] = 0.;
+/* L50: */
+	}
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		work[i__] += z_abs(&a[i__ + j * a_dim1]);
+/* L60: */
+	    }
+/* L70: */
+	}
+	value = 0.;
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+/* Computing MAX */
+	    d__1 = value, d__2 = work[i__];
+	    value = max(d__1,d__2);
+/* L80: */
+	}
+    } else if ((lsame_(norm, "F")) || (lsame_(norm,
+	    "E"))) {
+
+/*        Find normF(A). */
+
+	scale = 0.;
+	sum = 1.;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = *n, i__4 = j + 1;
+	    i__2 = min(i__3,i__4);
+	    zlassq_(&i__2, &a[j * a_dim1 + 1], &c__1, &scale, &sum);
+/* L90: */
+	}
+	value = scale * sqrt(sum);
+    }
+
+    ret_val = value;
+    return ret_val;
+
+/*     End of ZLANHS */
+
+} /* zlanhs_ */
+
+/* Subroutine */ int zlarcm_(integer *m, integer *n, doublereal *a, integer *
+	lda, doublecomplex *b, integer *ldb, doublecomplex *c__, integer *ldc,
+	 doublereal *rwork)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
+	    i__3, i__4, i__5;
+    doublereal d__1;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *,
+	    integer *, doublereal *, doublereal *, integer *, doublereal *,
+	    integer *, doublereal *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZLARCM performs a very simple matrix-matrix multiplication:
+             C := A * B,
+    where A is M by M and real; B is M by N and complex;
+    C is M by N and complex.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix A and of the matrix C.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns and rows of the matrix B and
+            the number of columns of the matrix C.
+            N >= 0.
+
+    A       (input) DOUBLE PRECISION array, dimension (LDA, M)
+            A contains the M by M matrix A.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >=max(1,M).
+
+    B       (input) DOUBLE PRECISION array, dimension (LDB, N)
+            B contains the M by N matrix B.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B. LDB >=max(1,M).
+
+    C       (input) COMPLEX*16 array, dimension (LDC, N)
+            C contains the M by N matrix C.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >=max(1,M).
+
+    RWORK   (workspace) DOUBLE PRECISION array, dimension (2*M*N)
+
+    =====================================================================
+
+
+       Quick return if possible.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --rwork;
+
+    /* Function Body */
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * b_dim1;
+	    rwork[(j - 1) * *m + i__] = b[i__3].r;
+/* L10: */
+	}
+/* L20: */
+    }
+
+    l = *m * *n + 1;
+    dgemm_("N", "N", m, n, m, &c_b1015, &a[a_offset], lda, &rwork[1], m, &
+	    c_b324, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = l + (j - 1) * *m + i__ - 1;
+	    c__[i__3].r = rwork[i__4], c__[i__3].i = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    rwork[(j - 1) * *m + i__] = d_imag(&b[i__ + j * b_dim1]);
+/* L50: */
+	}
+/* L60: */
+    }
+    dgemm_("N", "N", m, n, m, &c_b1015, &a[a_offset], lda, &rwork[1], m, &
+	    c_b324, &rwork[l], m);
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * c_dim1;
+	    i__4 = i__ + j * c_dim1;
+	    d__1 = c__[i__4].r;
+	    i__5 = l + (j - 1) * *m + i__ - 1;
+	    z__1.r = d__1, z__1.i = rwork[i__5];
+	    c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L70: */
+	}
+/* L80: */
+    }
+
+    return 0;
+
+/*     End of ZLARCM */
+
+} /* zlarcm_ */
+
+/* Subroutine */ int zlarf_(char *side, integer *m, integer *n, doublecomplex
+	*v, integer *incv, doublecomplex *tau, doublecomplex *c__, integer *
+	ldc, doublecomplex *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset;
+    doublecomplex z__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLARF applies a complex elementary reflector H to a complex M-by-N
+    matrix C, from either the left or the right. H is represented in the
+    form
+
+          H = I - tau * v * v'
+
+    where tau is a complex scalar and v is a complex vector.
+
+    If tau = 0, then H is taken to be the unit matrix.
+
+    To apply H' (the conjugate transpose of H), supply conjg(tau) instead
+    tau.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) COMPLEX*16 array, dimension
+                       (1 + (M-1)*abs(INCV)) if SIDE = 'L'
+                    or (1 + (N-1)*abs(INCV)) if SIDE = 'R'
+            The vector v in the representation of H. V is not used if
+            TAU = 0.
+
+    INCV    (input) INTEGER
+            The increment between elements of v. INCV <> 0.
+
+    TAU     (input) COMPLEX*16
+            The value tau in the representation of H.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX*16 array, dimension
+                           (N) if SIDE = 'L'
+                        or (M) if SIDE = 'R'
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C */
+
+	if ((tau->r != 0.) || (tau->i != 0.)) {
+
+/*           w := C' * v */
+
+	    zgemv_("Conjugate transpose", m, n, &c_b60, &c__[c_offset], ldc, &
+		    v[1], incv, &c_b59, &work[1], &c__1);
+
+/*           C := C - v * w' */
+
+	    z__1.r = -tau->r, z__1.i = -tau->i;
+	    zgerc_(m, n, &z__1, &v[1], incv, &work[1], &c__1, &c__[c_offset],
+		    ldc);
+	}
+    } else {
+
+/*        Form  C * H */
+
+	if ((tau->r != 0.) || (tau->i != 0.)) {
+
+/*           w := C * v */
+
+	    zgemv_("No transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1],
+		    incv, &c_b59, &work[1], &c__1);
+
+/*           C := C - w * v' */
+
+	    z__1.r = -tau->r, z__1.i = -tau->i;
+	    zgerc_(m, n, &z__1, &work[1], &c__1, &v[1], incv, &c__[c_offset],
+		    ldc);
+	}
+    }
+    return 0;
+
+/*     End of ZLARF */
+
+} /* zlarf_ */
+
+/* Subroutine */ int zlarfb_(char *side, char *trans, char *direct, char *
+	storev, integer *m, integer *n, integer *k, doublecomplex *v, integer
+	*ldv, doublecomplex *t, integer *ldt, doublecomplex *c__, integer *
+	ldc, doublecomplex *work, integer *ldwork)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, t_dim1, t_offset, v_dim1, v_offset, work_dim1,
+	    work_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), zcopy_(integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *), ztrmm_(char *, char *,
+	    char *, char *, integer *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *,
+	    integer *);
+    static char transt[1];
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLARFB applies a complex block reflector H or its transpose H' to a
+    complex M-by-N matrix C, from either the left or the right.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply H or H' from the Left
+            = 'R': apply H or H' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply H (No transpose)
+            = 'C': apply H' (Conjugate transpose)
+
+    DIRECT  (input) CHARACTER*1
+            Indicates how H is formed from a product of elementary
+            reflectors
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Indicates how the vectors which define the elementary
+            reflectors are stored:
+            = 'C': Columnwise
+            = 'R': Rowwise
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    K       (input) INTEGER
+            The order of the matrix T (= the number of elementary
+            reflectors whose product defines the block reflector).
+
+    V       (input) COMPLEX*16 array, dimension
+                                  (LDV,K) if STOREV = 'C'
+                                  (LDV,M) if STOREV = 'R' and SIDE = 'L'
+                                  (LDV,N) if STOREV = 'R' and SIDE = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C' and SIDE = 'L', LDV >= max(1,M);
+            if STOREV = 'C' and SIDE = 'R', LDV >= max(1,N);
+            if STOREV = 'R', LDV >= K.
+
+    T       (input) COMPLEX*16 array, dimension (LDT,K)
+            The triangular K-by-K matrix T in the representation of the
+            block reflector.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by H*C or H'*C or C*H or C*H'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX*16 array, dimension (LDWORK,K)
+
+    LDWORK  (input) INTEGER
+            The leading dimension of the array WORK.
+            If SIDE = 'L', LDWORK >= max(1,N);
+            if SIDE = 'R', LDWORK >= max(1,M).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    work_dim1 = *ldwork;
+    work_offset = 1 + work_dim1;
+    work -= work_offset;
+
+    /* Function Body */
+    if ((*m <= 0) || (*n <= 0)) {
+	return 0;
+    }
+
+    if (lsame_(trans, "N")) {
+	*(unsigned char *)transt = 'C';
+    } else {
+	*(unsigned char *)transt = 'N';
+    }
+
+    if (lsame_(storev, "C")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1 )    (first K rows)
+                       ( V2 )
+             where  V1  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+		    zlacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L10: */
+		}
+
+/*              W := W * V1 */
+
+		ztrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b60,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2 */
+
+		    i__1 = *m - *k;
+		    zgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
+			     &c_b60, &c__[*k + 1 + c_dim1], ldc, &v[*k + 1 +
+			    v_dim1], ldv, &c_b60, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		ztrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b60, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2 * W' */
+
+		    i__1 = *m - *k;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
+			     &z__1, &v[*k + 1 + v_dim1], ldv, &work[
+			    work_offset], ldwork, &c_b60, &c__[*k + 1 +
+			    c_dim1], ldc);
+		}
+
+/*              W := W * V1' */
+
+		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
+			&c_b60, &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = j + i__ * c_dim1;
+			i__4 = j + i__ * c_dim1;
+			d_cnjg(&z__2, &work[i__ + j * work_dim1]);
+			z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i -
+				z__2.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L20: */
+		    }
+/* L30: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L40: */
+		}
+
+/*              W := W * V1 */
+
+		ztrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b60,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2 */
+
+		    i__1 = *n - *k;
+		    zgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b60, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[*k +
+			    1 + v_dim1], ldv, &c_b60, &work[work_offset],
+			    ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		ztrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b60, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2' */
+
+		    i__1 = *n - *k;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
+			     &z__1, &work[work_offset], ldwork, &v[*k + 1 +
+			    v_dim1], ldv, &c_b60, &c__[(*k + 1) * c_dim1 + 1],
+			     ldc);
+		}
+
+/*              W := W * V1' */
+
+		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
+			&c_b60, &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L50: */
+		    }
+/* L60: */
+		}
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1 )
+                       ( V2 )    (last K rows)
+             where  V2  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V  =  (C1'*V1 + C2'*V2)  (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+		    zlacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L70: */
+		}
+
+/*              W := W * V2 */
+
+		ztrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b60,
+			 &v[*m - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1 */
+
+		    i__1 = *m - *k;
+		    zgemm_("Conjugate transpose", "No transpose", n, k, &i__1,
+			     &c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b60, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		ztrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b60, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1 * W' */
+
+		    i__1 = *m - *k;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "Conjugate transpose", &i__1, n, k,
+			     &z__1, &v[v_offset], ldv, &work[work_offset],
+			    ldwork, &c_b60, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
+			&c_b60, &v[*m - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = *m - *k + j + i__ * c_dim1;
+			i__4 = *m - *k + j + i__ * c_dim1;
+			d_cnjg(&z__2, &work[i__ + j * work_dim1]);
+			z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i -
+				z__2.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L80: */
+		    }
+/* L90: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V  =  (C1*V1 + C2*V2)  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L100: */
+		}
+
+/*              W := W * V2 */
+
+		ztrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b60,
+			 &v[*n - *k + 1 + v_dim1], ldv, &work[work_offset],
+			ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1 */
+
+		    i__1 = *n - *k;
+		    zgemm_("No transpose", "No transpose", m, k, &i__1, &
+			    c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b60, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		ztrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b60, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V' */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1' */
+
+		    i__1 = *n - *k;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "Conjugate transpose", m, &i__1, k,
+			     &z__1, &work[work_offset], ldwork, &v[v_offset],
+			    ldv, &c_b60, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2' */
+
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
+			&c_b60, &v[*n - *k + 1 + v_dim1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + (*n - *k + j) * c_dim1;
+			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L110: */
+		    }
+/* L120: */
+		}
+	    }
+	}
+
+    } else if (lsame_(storev, "R")) {
+
+	if (lsame_(direct, "F")) {
+
+/*
+             Let  V =  ( V1  V2 )    (V1: first K columns)
+             where  V1  is unit upper triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C1'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    zcopy_(n, &c__[j + c_dim1], ldc, &work[j * work_dim1 + 1],
+			     &c__1);
+		    zlacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L130: */
+		}
+
+/*              W := W * V1' */
+
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", n, k,
+			&c_b60, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C2'*V2' */
+
+		    i__1 = *m - *k;
+		    zgemm_("Conjugate transpose", "Conjugate transpose", n, k,
+			     &i__1, &c_b60, &c__[*k + 1 + c_dim1], ldc, &v[(*
+			    k + 1) * v_dim1 + 1], ldv, &c_b60, &work[
+			    work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		ztrmm_("Right", "Upper", transt, "Non-unit", n, k, &c_b60, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C2 := C2 - V2' * W' */
+
+		    i__1 = *m - *k;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("Conjugate transpose", "Conjugate transpose", &
+			    i__1, n, k, &z__1, &v[(*k + 1) * v_dim1 + 1], ldv,
+			     &work[work_offset], ldwork, &c_b60, &c__[*k + 1
+			    + c_dim1], ldc);
+		}
+
+/*              W := W * V1 */
+
+		ztrmm_("Right", "Upper", "No transpose", "Unit", n, k, &c_b60,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = j + i__ * c_dim1;
+			i__4 = j + i__ * c_dim1;
+			d_cnjg(&z__2, &work[i__ + j * work_dim1]);
+			z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i -
+				z__2.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L140: */
+		    }
+/* L150: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C1
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    zcopy_(m, &c__[j * c_dim1 + 1], &c__1, &work[j *
+			    work_dim1 + 1], &c__1);
+/* L160: */
+		}
+
+/*              W := W * V1' */
+
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Unit", m, k,
+			&c_b60, &v[v_offset], ldv, &work[work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C2 * V2' */
+
+		    i__1 = *n - *k;
+		    zgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
+			     &c_b60, &c__[(*k + 1) * c_dim1 + 1], ldc, &v[(*k
+			    + 1) * v_dim1 + 1], ldv, &c_b60, &work[
+			    work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		ztrmm_("Right", "Upper", trans, "Non-unit", m, k, &c_b60, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C2 := C2 - W * V2 */
+
+		    i__1 = *n - *k;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "No transpose", m, &i__1, k, &z__1,
+			     &work[work_offset], ldwork, &v[(*k + 1) * v_dim1
+			    + 1], ldv, &c_b60, &c__[(*k + 1) * c_dim1 + 1],
+			    ldc);
+		}
+
+/*              W := W * V1 */
+
+		ztrmm_("Right", "Upper", "No transpose", "Unit", m, k, &c_b60,
+			 &v[v_offset], ldv, &work[work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + j * c_dim1;
+			i__4 = i__ + j * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L170: */
+		    }
+/* L180: */
+		}
+
+	    }
+
+	} else {
+
+/*
+             Let  V =  ( V1  V2 )    (V2: last K columns)
+             where  V2  is unit lower triangular.
+*/
+
+	    if (lsame_(side, "L")) {
+
+/*
+                Form  H * C  or  H' * C  where  C = ( C1 )
+                                                    ( C2 )
+
+                W := C' * V'  =  (C1'*V1' + C2'*V2') (stored in WORK)
+
+                W := C2'
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    zcopy_(n, &c__[*m - *k + j + c_dim1], ldc, &work[j *
+			    work_dim1 + 1], &c__1);
+		    zlacgv_(n, &work[j * work_dim1 + 1], &c__1);
+/* L190: */
+		}
+
+/*              W := W * V2' */
+
+		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", n, k,
+			&c_b60, &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*m > *k) {
+
+/*                 W := W + C1'*V1' */
+
+		    i__1 = *m - *k;
+		    zgemm_("Conjugate transpose", "Conjugate transpose", n, k,
+			     &i__1, &c_b60, &c__[c_offset], ldc, &v[v_offset],
+			     ldv, &c_b60, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T'  or  W * T */
+
+		ztrmm_("Right", "Lower", transt, "Non-unit", n, k, &c_b60, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - V' * W' */
+
+		if (*m > *k) {
+
+/*                 C1 := C1 - V1' * W' */
+
+		    i__1 = *m - *k;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("Conjugate transpose", "Conjugate transpose", &
+			    i__1, n, k, &z__1, &v[v_offset], ldv, &work[
+			    work_offset], ldwork, &c_b60, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		ztrmm_("Right", "Lower", "No transpose", "Unit", n, k, &c_b60,
+			 &v[(*m - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C2 := C2 - W' */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *n;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = *m - *k + j + i__ * c_dim1;
+			i__4 = *m - *k + j + i__ * c_dim1;
+			d_cnjg(&z__2, &work[i__ + j * work_dim1]);
+			z__1.r = c__[i__4].r - z__2.r, z__1.i = c__[i__4].i -
+				z__2.i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L200: */
+		    }
+/* L210: */
+		}
+
+	    } else if (lsame_(side, "R")) {
+
+/*
+                Form  C * H  or  C * H'  where  C = ( C1  C2 )
+
+                W := C * V'  =  (C1*V1' + C2*V2')  (stored in WORK)
+
+                W := C2
+*/
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    zcopy_(m, &c__[(*n - *k + j) * c_dim1 + 1], &c__1, &work[
+			    j * work_dim1 + 1], &c__1);
+/* L220: */
+		}
+
+/*              W := W * V2' */
+
+		ztrmm_("Right", "Lower", "Conjugate transpose", "Unit", m, k,
+			&c_b60, &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+		if (*n > *k) {
+
+/*                 W := W + C1 * V1' */
+
+		    i__1 = *n - *k;
+		    zgemm_("No transpose", "Conjugate transpose", m, k, &i__1,
+			     &c_b60, &c__[c_offset], ldc, &v[v_offset], ldv, &
+			    c_b60, &work[work_offset], ldwork);
+		}
+
+/*              W := W * T  or  W * T' */
+
+		ztrmm_("Right", "Lower", trans, "Non-unit", m, k, &c_b60, &t[
+			t_offset], ldt, &work[work_offset], ldwork);
+
+/*              C := C - W * V */
+
+		if (*n > *k) {
+
+/*                 C1 := C1 - W * V1 */
+
+		    i__1 = *n - *k;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "No transpose", m, &i__1, k, &z__1,
+			     &work[work_offset], ldwork, &v[v_offset], ldv, &
+			    c_b60, &c__[c_offset], ldc);
+		}
+
+/*              W := W * V2 */
+
+		ztrmm_("Right", "Lower", "No transpose", "Unit", m, k, &c_b60,
+			 &v[(*n - *k + 1) * v_dim1 + 1], ldv, &work[
+			work_offset], ldwork);
+
+/*              C1 := C1 - W */
+
+		i__1 = *k;
+		for (j = 1; j <= i__1; ++j) {
+		    i__2 = *m;
+		    for (i__ = 1; i__ <= i__2; ++i__) {
+			i__3 = i__ + (*n - *k + j) * c_dim1;
+			i__4 = i__ + (*n - *k + j) * c_dim1;
+			i__5 = i__ + j * work_dim1;
+			z__1.r = c__[i__4].r - work[i__5].r, z__1.i = c__[
+				i__4].i - work[i__5].i;
+			c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
+/* L230: */
+		    }
+/* L240: */
+		}
+
+	    }
+
+	}
+    }
+
+    return 0;
+
+/*     End of ZLARFB */
+
+} /* zlarfb_ */
+
+/* Subroutine */ int zlarfg_(integer *n, doublecomplex *alpha, doublecomplex *
+	x, integer *incx, doublecomplex *tau)
+{
+    /* System generated locals */
+    integer i__1;
+    doublereal d__1, d__2;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static integer j, knt;
+    static doublereal beta, alphi, alphr;
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *);
+    static doublereal xnorm;
+    extern doublereal dlapy3_(doublereal *, doublereal *, doublereal *),
+	    dznrm2_(integer *, doublecomplex *, integer *), dlamch_(char *);
+    static doublereal safmin;
+    extern /* Subroutine */ int zdscal_(integer *, doublereal *,
+	    doublecomplex *, integer *);
+    static doublereal rsafmn;
+    extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *,
+	     doublecomplex *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLARFG generates a complex elementary reflector H of order n, such
+    that
+
+          H' * ( alpha ) = ( beta ),   H' * H = I.
+               (   x   )   (   0  )
+
+    where alpha and beta are scalars, with beta real, and x is an
+    (n-1)-element complex vector. H is represented in the form
+
+          H = I - tau * ( 1 ) * ( 1 v' ) ,
+                        ( v )
+
+    where tau is a complex scalar and v is a complex (n-1)-element
+    vector. Note that H is not hermitian.
+
+    If the elements of x are all zero and alpha is real, then tau = 0
+    and H is taken to be the unit matrix.
+
+    Otherwise  1 <= real(tau) <= 2  and  abs(tau-1) <= 1 .
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the elementary reflector.
+
+    ALPHA   (input/output) COMPLEX*16
+            On entry, the value alpha.
+            On exit, it is overwritten with the value beta.
+
+    X       (input/output) COMPLEX*16 array, dimension
+                           (1+(N-2)*abs(INCX))
+            On entry, the vector x.
+            On exit, it is overwritten with the vector v.
+
+    INCX    (input) INTEGER
+            The increment between elements of X. INCX > 0.
+
+    TAU     (output) COMPLEX*16
+            The value tau.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n <= 0) {
+	tau->r = 0., tau->i = 0.;
+	return 0;
+    }
+
+    i__1 = *n - 1;
+    xnorm = dznrm2_(&i__1, &x[1], incx);
+    alphr = alpha->r;
+    alphi = d_imag(alpha);
+
+    if (xnorm == 0. && alphi == 0.) {
+
+/*        H  =  I */
+
+	tau->r = 0., tau->i = 0.;
+    } else {
+
+/*        general case */
+
+	d__1 = dlapy3_(&alphr, &alphi, &xnorm);
+	beta = -d_sign(&d__1, &alphr);
+	safmin = SAFEMINIMUM / EPSILON;
+	rsafmn = 1. / safmin;
+
+	if (abs(beta) < safmin) {
+
+/*           XNORM, BETA may be inaccurate; scale X and recompute them */
+
+	    knt = 0;
+L10:
+	    ++knt;
+	    i__1 = *n - 1;
+	    zdscal_(&i__1, &rsafmn, &x[1], incx);
+	    beta *= rsafmn;
+	    alphi *= rsafmn;
+	    alphr *= rsafmn;
+	    if (abs(beta) < safmin) {
+		goto L10;
+	    }
+
+/*           New BETA is at most 1, at least SAFMIN */
+
+	    i__1 = *n - 1;
+	    xnorm = dznrm2_(&i__1, &x[1], incx);
+	    z__1.r = alphr, z__1.i = alphi;
+	    alpha->r = z__1.r, alpha->i = z__1.i;
+	    d__1 = dlapy3_(&alphr, &alphi, &xnorm);
+	    beta = -d_sign(&d__1, &alphr);
+	    d__1 = (beta - alphr) / beta;
+	    d__2 = -alphi / beta;
+	    z__1.r = d__1, z__1.i = d__2;
+	    tau->r = z__1.r, tau->i = z__1.i;
+	    z__2.r = alpha->r - beta, z__2.i = alpha->i;
+	    zladiv_(&z__1, &c_b60, &z__2);
+	    alpha->r = z__1.r, alpha->i = z__1.i;
+	    i__1 = *n - 1;
+	    zscal_(&i__1, alpha, &x[1], incx);
+
+/*           If ALPHA is subnormal, it may lose relative accuracy */
+
+	    alpha->r = beta, alpha->i = 0.;
+	    i__1 = knt;
+	    for (j = 1; j <= i__1; ++j) {
+		z__1.r = safmin * alpha->r, z__1.i = safmin * alpha->i;
+		alpha->r = z__1.r, alpha->i = z__1.i;
+/* L20: */
+	    }
+	} else {
+	    d__1 = (beta - alphr) / beta;
+	    d__2 = -alphi / beta;
+	    z__1.r = d__1, z__1.i = d__2;
+	    tau->r = z__1.r, tau->i = z__1.i;
+	    z__2.r = alpha->r - beta, z__2.i = alpha->i;
+	    zladiv_(&z__1, &c_b60, &z__2);
+	    alpha->r = z__1.r, alpha->i = z__1.i;
+	    i__1 = *n - 1;
+	    zscal_(&i__1, alpha, &x[1], incx);
+	    alpha->r = beta, alpha->i = 0.;
+	}
+    }
+
+    return 0;
+
+/*     End of ZLARFG */
+
+} /* zlarfg_ */
+
+/* Subroutine */ int zlarft_(char *direct, char *storev, integer *n, integer *
+	k, doublecomplex *v, integer *ldv, doublecomplex *tau, doublecomplex *
+	t, integer *ldt)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, v_dim1, v_offset, i__1, i__2, i__3, i__4;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__, j;
+    static doublecomplex vii;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *),
+	    ztrmv_(char *, char *, char *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *),
+	    zlacgv_(integer *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLARFT forms the triangular factor T of a complex block reflector H
+    of order n, which is defined as a product of k elementary reflectors.
+
+    If DIRECT = 'F', H = H(1) H(2) . . . H(k) and T is upper triangular;
+
+    If DIRECT = 'B', H = H(k) . . . H(2) H(1) and T is lower triangular.
+
+    If STOREV = 'C', the vector which defines the elementary reflector
+    H(i) is stored in the i-th column of the array V, and
+
+       H  =  I - V * T * V'
+
+    If STOREV = 'R', the vector which defines the elementary reflector
+    H(i) is stored in the i-th row of the array V, and
+
+       H  =  I - V' * T * V
+
+    Arguments
+    =========
+
+    DIRECT  (input) CHARACTER*1
+            Specifies the order in which the elementary reflectors are
+            multiplied to form the block reflector:
+            = 'F': H = H(1) H(2) . . . H(k) (Forward)
+            = 'B': H = H(k) . . . H(2) H(1) (Backward)
+
+    STOREV  (input) CHARACTER*1
+            Specifies how the vectors which define the elementary
+            reflectors are stored (see also Further Details):
+            = 'C': columnwise
+            = 'R': rowwise
+
+    N       (input) INTEGER
+            The order of the block reflector H. N >= 0.
+
+    K       (input) INTEGER
+            The order of the triangular factor T (= the number of
+            elementary reflectors). K >= 1.
+
+    V       (input/output) COMPLEX*16 array, dimension
+                                 (LDV,K) if STOREV = 'C'
+                                 (LDV,N) if STOREV = 'R'
+            The matrix V. See further details.
+
+    LDV     (input) INTEGER
+            The leading dimension of the array V.
+            If STOREV = 'C', LDV >= max(1,N); if STOREV = 'R', LDV >= K.
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i).
+
+    T       (output) COMPLEX*16 array, dimension (LDT,K)
+            The k by k triangular factor T of the block reflector.
+            If DIRECT = 'F', T is upper triangular; if DIRECT = 'B', T is
+            lower triangular. The rest of the array is not used.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= K.
+
+    Further Details
+    ===============
+
+    The shape of the matrix V and the storage of the vectors which define
+    the H(i) is best illustrated by the following example with n = 5 and
+    k = 3. The elements equal to 1 are not stored; the corresponding
+    array elements are modified but restored on exit. The rest of the
+    array is not used.
+
+    DIRECT = 'F' and STOREV = 'C':         DIRECT = 'F' and STOREV = 'R':
+
+                 V = (  1       )                 V = (  1 v1 v1 v1 v1 )
+                     ( v1  1    )                     (     1 v2 v2 v2 )
+                     ( v1 v2  1 )                     (        1 v3 v3 )
+                     ( v1 v2 v3 )
+                     ( v1 v2 v3 )
+
+    DIRECT = 'B' and STOREV = 'C':         DIRECT = 'B' and STOREV = 'R':
+
+                 V = ( v1 v2 v3 )                 V = ( v1 v1  1       )
+                     ( v1 v2 v3 )                     ( v2 v2 v2  1    )
+                     (  1 v2 v3 )                     ( v3 v3 v3 v3  1 )
+                     (     1 v3 )
+                     (        1 )
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    v_dim1 = *ldv;
+    v_offset = 1 + v_dim1;
+    v -= v_offset;
+    --tau;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+
+    /* Function Body */
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (lsame_(direct, "F")) {
+	i__1 = *k;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__;
+	    if (tau[i__2].r == 0. && tau[i__2].i == 0.) {
+
+/*              H(i)  =  I */
+
+		i__2 = i__;
+		for (j = 1; j <= i__2; ++j) {
+		    i__3 = j + i__ * t_dim1;
+		    t[i__3].r = 0., t[i__3].i = 0.;
+/* L10: */
+		}
+	    } else {
+
+/*              general case */
+
+		i__2 = i__ + i__ * v_dim1;
+		vii.r = v[i__2].r, vii.i = v[i__2].i;
+		i__2 = i__ + i__ * v_dim1;
+		v[i__2].r = 1., v[i__2].i = 0.;
+		if (lsame_(storev, "C")) {
+
+/*                 T(1:i-1,i) := - tau(i) * V(i:n,1:i-1)' * V(i:n,i) */
+
+		    i__2 = *n - i__ + 1;
+		    i__3 = i__ - 1;
+		    i__4 = i__;
+		    z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
+		    zgemv_("Conjugate transpose", &i__2, &i__3, &z__1, &v[i__
+			    + v_dim1], ldv, &v[i__ + i__ * v_dim1], &c__1, &
+			    c_b59, &t[i__ * t_dim1 + 1], &c__1);
+		} else {
+
+/*                 T(1:i-1,i) := - tau(i) * V(1:i-1,i:n) * V(i,i:n)' */
+
+		    if (i__ < *n) {
+			i__2 = *n - i__;
+			zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
+		    }
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__ + 1;
+		    i__4 = i__;
+		    z__1.r = -tau[i__4].r, z__1.i = -tau[i__4].i;
+		    zgemv_("No transpose", &i__2, &i__3, &z__1, &v[i__ *
+			    v_dim1 + 1], ldv, &v[i__ + i__ * v_dim1], ldv, &
+			    c_b59, &t[i__ * t_dim1 + 1], &c__1);
+		    if (i__ < *n) {
+			i__2 = *n - i__;
+			zlacgv_(&i__2, &v[i__ + (i__ + 1) * v_dim1], ldv);
+		    }
+		}
+		i__2 = i__ + i__ * v_dim1;
+		v[i__2].r = vii.r, v[i__2].i = vii.i;
+
+/*              T(1:i-1,i) := T(1:i-1,1:i-1) * T(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		ztrmv_("Upper", "No transpose", "Non-unit", &i__2, &t[
+			t_offset], ldt, &t[i__ * t_dim1 + 1], &c__1);
+		i__2 = i__ + i__ * t_dim1;
+		i__3 = i__;
+		t[i__2].r = tau[i__3].r, t[i__2].i = tau[i__3].i;
+	    }
+/* L20: */
+	}
+    } else {
+	for (i__ = *k; i__ >= 1; --i__) {
+	    i__1 = i__;
+	    if (tau[i__1].r == 0. && tau[i__1].i == 0.) {
+
+/*              H(i)  =  I */
+
+		i__1 = *k;
+		for (j = i__; j <= i__1; ++j) {
+		    i__2 = j + i__ * t_dim1;
+		    t[i__2].r = 0., t[i__2].i = 0.;
+/* L30: */
+		}
+	    } else {
+
+/*              general case */
+
+		if (i__ < *k) {
+		    if (lsame_(storev, "C")) {
+			i__1 = *n - *k + i__ + i__ * v_dim1;
+			vii.r = v[i__1].r, vii.i = v[i__1].i;
+			i__1 = *n - *k + i__ + i__ * v_dim1;
+			v[i__1].r = 1., v[i__1].i = 0.;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(1:n-k+i,i+1:k)' * V(1:n-k+i,i)
+*/
+
+			i__1 = *n - *k + i__;
+			i__2 = *k - i__;
+			i__3 = i__;
+			z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
+			zgemv_("Conjugate transpose", &i__1, &i__2, &z__1, &v[
+				(i__ + 1) * v_dim1 + 1], ldv, &v[i__ * v_dim1
+				+ 1], &c__1, &c_b59, &t[i__ + 1 + i__ *
+				t_dim1], &c__1);
+			i__1 = *n - *k + i__ + i__ * v_dim1;
+			v[i__1].r = vii.r, v[i__1].i = vii.i;
+		    } else {
+			i__1 = i__ + (*n - *k + i__) * v_dim1;
+			vii.r = v[i__1].r, vii.i = v[i__1].i;
+			i__1 = i__ + (*n - *k + i__) * v_dim1;
+			v[i__1].r = 1., v[i__1].i = 0.;
+
+/*
+                      T(i+1:k,i) :=
+                              - tau(i) * V(i+1:k,1:n-k+i) * V(i,1:n-k+i)'
+*/
+
+			i__1 = *n - *k + i__ - 1;
+			zlacgv_(&i__1, &v[i__ + v_dim1], ldv);
+			i__1 = *k - i__;
+			i__2 = *n - *k + i__;
+			i__3 = i__;
+			z__1.r = -tau[i__3].r, z__1.i = -tau[i__3].i;
+			zgemv_("No transpose", &i__1, &i__2, &z__1, &v[i__ +
+				1 + v_dim1], ldv, &v[i__ + v_dim1], ldv, &
+				c_b59, &t[i__ + 1 + i__ * t_dim1], &c__1);
+			i__1 = *n - *k + i__ - 1;
+			zlacgv_(&i__1, &v[i__ + v_dim1], ldv);
+			i__1 = i__ + (*n - *k + i__) * v_dim1;
+			v[i__1].r = vii.r, v[i__1].i = vii.i;
+		    }
+
+/*                 T(i+1:k,i) := T(i+1:k,i+1:k) * T(i+1:k,i) */
+
+		    i__1 = *k - i__;
+		    ztrmv_("Lower", "No transpose", "Non-unit", &i__1, &t[i__
+			    + 1 + (i__ + 1) * t_dim1], ldt, &t[i__ + 1 + i__ *
+			     t_dim1], &c__1)
+			    ;
+		}
+		i__1 = i__ + i__ * t_dim1;
+		i__2 = i__;
+		t[i__1].r = tau[i__2].r, t[i__1].i = tau[i__2].i;
+	    }
+/* L40: */
+	}
+    }
+    return 0;
+
+/*     End of ZLARFT */
+
+} /* zlarft_ */
+
+/* Subroutine */ int zlarfx_(char *side, integer *m, integer *n,
+	doublecomplex *v, doublecomplex *tau, doublecomplex *c__, integer *
+	ldc, doublecomplex *work)
+{
+    /* System generated locals */
+    integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8,
+	    i__9, i__10, i__11;
+    doublecomplex z__1, z__2, z__3, z__4, z__5, z__6, z__7, z__8, z__9, z__10,
+	     z__11, z__12, z__13, z__14, z__15, z__16, z__17, z__18, z__19;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer j;
+    static doublecomplex t1, t2, t3, t4, t5, t6, t7, t8, t9, v1, v2, v3, v4,
+	    v5, v6, v7, v8, v9, t10, v10, sum;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLARFX applies a complex elementary reflector H to a complex m by n
+    matrix C, from either the left or the right. H is represented in the
+    form
+
+          H = I - tau * v * v'
+
+    where tau is a complex scalar and v is a complex vector.
+
+    If tau = 0, then H is taken to be the unit matrix
+
+    This version uses inline code if H has order < 11.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': form  H * C
+            = 'R': form  C * H
+
+    M       (input) INTEGER
+            The number of rows of the matrix C.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C.
+
+    V       (input) COMPLEX*16 array, dimension (M) if SIDE = 'L'
+                                          or (N) if SIDE = 'R'
+            The vector v in the representation of H.
+
+    TAU     (input) COMPLEX*16
+            The value tau in the representation of H.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the m by n matrix C.
+            On exit, C is overwritten by the matrix H * C if SIDE = 'L',
+            or C * H if SIDE = 'R'.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDA >= max(1,M).
+
+    WORK    (workspace) COMPLEX*16 array, dimension (N) if SIDE = 'L'
+                                              or (M) if SIDE = 'R'
+            WORK is not referenced if H has order < 11.
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --v;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    if (tau->r == 0. && tau->i == 0.) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  H * C, where H has order m. */
+
+	switch (*m) {
+	    case 1:  goto L10;
+	    case 2:  goto L30;
+	    case 3:  goto L50;
+	    case 4:  goto L70;
+	    case 5:  goto L90;
+	    case 6:  goto L110;
+	    case 7:  goto L130;
+	    case 8:  goto L150;
+	    case 9:  goto L170;
+	    case 10:  goto L190;
+	}
+
+/*
+          Code for general M
+
+          w := C'*v
+*/
+
+	zgemv_("Conjugate transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1]
+		, &c__1, &c_b59, &work[1], &c__1);
+
+/*        C := C - tau * v * w' */
+
+	z__1.r = -tau->r, z__1.i = -tau->i;
+	zgerc_(m, n, &z__1, &v[1], &c__1, &work[1], &c__1, &c__[c_offset],
+		ldc);
+	goto L410;
+L10:
+
+/*        Special code for 1 x 1 Householder */
+
+	z__3.r = tau->r * v[1].r - tau->i * v[1].i, z__3.i = tau->r * v[1].i
+		+ tau->i * v[1].r;
+	d_cnjg(&z__4, &v[1]);
+	z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i
+		+ z__3.i * z__4.r;
+	z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i;
+	t1.r = z__1.r, t1.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, z__1.i = t1.r *
+		    c__[i__3].i + t1.i * c__[i__3].r;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L20: */
+	}
+	goto L410;
+L30:
+
+/*        Special code for 2 x 2 Householder */
+
+	d_cnjg(&z__1, &v[1]);
+	v1.r = z__1.r, v1.i = z__1.i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	d_cnjg(&z__1, &v[2]);
+	v2.r = z__1.r, v2.i = z__1.i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    z__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__2.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    z__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__3.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L40: */
+	}
+	goto L410;
+L50:
+
+/*        Special code for 3 x 3 Householder */
+
+	d_cnjg(&z__1, &v[1]);
+	v1.r = z__1.r, v1.i = z__1.i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	d_cnjg(&z__1, &v[2]);
+	v2.r = z__1.r, v2.i = z__1.i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	d_cnjg(&z__1, &v[3]);
+	v3.r = z__1.r, v3.i = z__1.i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    z__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__3.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    z__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__4.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
+	    i__4 = j * c_dim1 + 3;
+	    z__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__5.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L60: */
+	}
+	goto L410;
+L70:
+
+/*        Special code for 4 x 4 Householder */
+
+	d_cnjg(&z__1, &v[1]);
+	v1.r = z__1.r, v1.i = z__1.i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	d_cnjg(&z__1, &v[2]);
+	v2.r = z__1.r, v2.i = z__1.i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	d_cnjg(&z__1, &v[3]);
+	v3.r = z__1.r, v3.i = z__1.i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	d_cnjg(&z__1, &v[4]);
+	v4.r = z__1.r, v4.i = z__1.i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    z__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__4.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    z__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__5.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i;
+	    i__4 = j * c_dim1 + 3;
+	    z__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__6.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__2.r = z__3.r + z__6.r, z__2.i = z__3.i + z__6.i;
+	    i__5 = j * c_dim1 + 4;
+	    z__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__7.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + z__7.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L80: */
+	}
+	goto L410;
+L90:
+
+/*        Special code for 5 x 5 Householder */
+
+	d_cnjg(&z__1, &v[1]);
+	v1.r = z__1.r, v1.i = z__1.i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	d_cnjg(&z__1, &v[2]);
+	v2.r = z__1.r, v2.i = z__1.i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	d_cnjg(&z__1, &v[3]);
+	v3.r = z__1.r, v3.i = z__1.i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	d_cnjg(&z__1, &v[4]);
+	v4.r = z__1.r, v4.i = z__1.i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	d_cnjg(&z__1, &v[5]);
+	v5.r = z__1.r, v5.i = z__1.i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    z__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__5.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    z__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__6.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__4.r = z__5.r + z__6.r, z__4.i = z__5.i + z__6.i;
+	    i__4 = j * c_dim1 + 3;
+	    z__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__7.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i;
+	    i__5 = j * c_dim1 + 4;
+	    z__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__8.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    z__2.r = z__3.r + z__8.r, z__2.i = z__3.i + z__8.i;
+	    i__6 = j * c_dim1 + 5;
+	    z__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__9.i = v5.r *
+		    c__[i__6].i + v5.i * c__[i__6].r;
+	    z__1.r = z__2.r + z__9.r, z__1.i = z__2.i + z__9.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L100: */
+	}
+	goto L410;
+L110:
+
+/*        Special code for 6 x 6 Householder */
+
+	d_cnjg(&z__1, &v[1]);
+	v1.r = z__1.r, v1.i = z__1.i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	d_cnjg(&z__1, &v[2]);
+	v2.r = z__1.r, v2.i = z__1.i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	d_cnjg(&z__1, &v[3]);
+	v3.r = z__1.r, v3.i = z__1.i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	d_cnjg(&z__1, &v[4]);
+	v4.r = z__1.r, v4.i = z__1.i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	d_cnjg(&z__1, &v[5]);
+	v5.r = z__1.r, v5.i = z__1.i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	d_cnjg(&z__1, &v[6]);
+	v6.r = z__1.r, v6.i = z__1.i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    z__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__6.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    z__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__7.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__5.r = z__6.r + z__7.r, z__5.i = z__6.i + z__7.i;
+	    i__4 = j * c_dim1 + 3;
+	    z__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__8.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__4.r = z__5.r + z__8.r, z__4.i = z__5.i + z__8.i;
+	    i__5 = j * c_dim1 + 4;
+	    z__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__9.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    z__3.r = z__4.r + z__9.r, z__3.i = z__4.i + z__9.i;
+	    i__6 = j * c_dim1 + 5;
+	    z__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__10.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__2.r = z__3.r + z__10.r, z__2.i = z__3.i + z__10.i;
+	    i__7 = j * c_dim1 + 6;
+	    z__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__11.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__1.r = z__2.r + z__11.r, z__1.i = z__2.i + z__11.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L120: */
+	}
+	goto L410;
+L130:
+
+/*        Special code for 7 x 7 Householder */
+
+	d_cnjg(&z__1, &v[1]);
+	v1.r = z__1.r, v1.i = z__1.i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	d_cnjg(&z__1, &v[2]);
+	v2.r = z__1.r, v2.i = z__1.i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	d_cnjg(&z__1, &v[3]);
+	v3.r = z__1.r, v3.i = z__1.i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	d_cnjg(&z__1, &v[4]);
+	v4.r = z__1.r, v4.i = z__1.i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	d_cnjg(&z__1, &v[5]);
+	v5.r = z__1.r, v5.i = z__1.i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	d_cnjg(&z__1, &v[6]);
+	v6.r = z__1.r, v6.i = z__1.i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	d_cnjg(&z__1, &v[7]);
+	v7.r = z__1.r, v7.i = z__1.i;
+	d_cnjg(&z__2, &v7);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t7.r = z__1.r, t7.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    z__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__7.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    z__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__8.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__6.r = z__7.r + z__8.r, z__6.i = z__7.i + z__8.i;
+	    i__4 = j * c_dim1 + 3;
+	    z__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__9.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__5.r = z__6.r + z__9.r, z__5.i = z__6.i + z__9.i;
+	    i__5 = j * c_dim1 + 4;
+	    z__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__10.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    z__4.r = z__5.r + z__10.r, z__4.i = z__5.i + z__10.i;
+	    i__6 = j * c_dim1 + 5;
+	    z__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__11.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__3.r = z__4.r + z__11.r, z__3.i = z__4.i + z__11.i;
+	    i__7 = j * c_dim1 + 6;
+	    z__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__12.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__2.r = z__3.r + z__12.r, z__2.i = z__3.i + z__12.i;
+	    i__8 = j * c_dim1 + 7;
+	    z__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__13.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    z__1.r = z__2.r + z__13.r, z__1.i = z__2.i + z__13.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L140: */
+	}
+	goto L410;
+L150:
+
+/*        Special code for 8 x 8 Householder */
+
+	d_cnjg(&z__1, &v[1]);
+	v1.r = z__1.r, v1.i = z__1.i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	d_cnjg(&z__1, &v[2]);
+	v2.r = z__1.r, v2.i = z__1.i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	d_cnjg(&z__1, &v[3]);
+	v3.r = z__1.r, v3.i = z__1.i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	d_cnjg(&z__1, &v[4]);
+	v4.r = z__1.r, v4.i = z__1.i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	d_cnjg(&z__1, &v[5]);
+	v5.r = z__1.r, v5.i = z__1.i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	d_cnjg(&z__1, &v[6]);
+	v6.r = z__1.r, v6.i = z__1.i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	d_cnjg(&z__1, &v[7]);
+	v7.r = z__1.r, v7.i = z__1.i;
+	d_cnjg(&z__2, &v7);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t7.r = z__1.r, t7.i = z__1.i;
+	d_cnjg(&z__1, &v[8]);
+	v8.r = z__1.r, v8.i = z__1.i;
+	d_cnjg(&z__2, &v8);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t8.r = z__1.r, t8.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    z__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__8.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    z__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__9.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__7.r = z__8.r + z__9.r, z__7.i = z__8.i + z__9.i;
+	    i__4 = j * c_dim1 + 3;
+	    z__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__10.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    z__6.r = z__7.r + z__10.r, z__6.i = z__7.i + z__10.i;
+	    i__5 = j * c_dim1 + 4;
+	    z__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__11.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    z__5.r = z__6.r + z__11.r, z__5.i = z__6.i + z__11.i;
+	    i__6 = j * c_dim1 + 5;
+	    z__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__12.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__4.r = z__5.r + z__12.r, z__4.i = z__5.i + z__12.i;
+	    i__7 = j * c_dim1 + 6;
+	    z__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__13.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__3.r = z__4.r + z__13.r, z__3.i = z__4.i + z__13.i;
+	    i__8 = j * c_dim1 + 7;
+	    z__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__14.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    z__2.r = z__3.r + z__14.r, z__2.i = z__3.i + z__14.i;
+	    i__9 = j * c_dim1 + 8;
+	    z__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__15.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    z__1.r = z__2.r + z__15.r, z__1.i = z__2.i + z__15.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 8;
+	    i__3 = j * c_dim1 + 8;
+	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L160: */
+	}
+	goto L410;
+L170:
+
+/*        Special code for 9 x 9 Householder */
+
+	d_cnjg(&z__1, &v[1]);
+	v1.r = z__1.r, v1.i = z__1.i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	d_cnjg(&z__1, &v[2]);
+	v2.r = z__1.r, v2.i = z__1.i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	d_cnjg(&z__1, &v[3]);
+	v3.r = z__1.r, v3.i = z__1.i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	d_cnjg(&z__1, &v[4]);
+	v4.r = z__1.r, v4.i = z__1.i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	d_cnjg(&z__1, &v[5]);
+	v5.r = z__1.r, v5.i = z__1.i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	d_cnjg(&z__1, &v[6]);
+	v6.r = z__1.r, v6.i = z__1.i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	d_cnjg(&z__1, &v[7]);
+	v7.r = z__1.r, v7.i = z__1.i;
+	d_cnjg(&z__2, &v7);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t7.r = z__1.r, t7.i = z__1.i;
+	d_cnjg(&z__1, &v[8]);
+	v8.r = z__1.r, v8.i = z__1.i;
+	d_cnjg(&z__2, &v8);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t8.r = z__1.r, t8.i = z__1.i;
+	d_cnjg(&z__1, &v[9]);
+	v9.r = z__1.r, v9.i = z__1.i;
+	d_cnjg(&z__2, &v9);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t9.r = z__1.r, t9.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    z__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__9.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    z__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__10.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    z__8.r = z__9.r + z__10.r, z__8.i = z__9.i + z__10.i;
+	    i__4 = j * c_dim1 + 3;
+	    z__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__11.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    z__7.r = z__8.r + z__11.r, z__7.i = z__8.i + z__11.i;
+	    i__5 = j * c_dim1 + 4;
+	    z__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__12.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    z__6.r = z__7.r + z__12.r, z__6.i = z__7.i + z__12.i;
+	    i__6 = j * c_dim1 + 5;
+	    z__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__13.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__5.r = z__6.r + z__13.r, z__5.i = z__6.i + z__13.i;
+	    i__7 = j * c_dim1 + 6;
+	    z__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__14.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__4.r = z__5.r + z__14.r, z__4.i = z__5.i + z__14.i;
+	    i__8 = j * c_dim1 + 7;
+	    z__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__15.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    z__3.r = z__4.r + z__15.r, z__3.i = z__4.i + z__15.i;
+	    i__9 = j * c_dim1 + 8;
+	    z__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__16.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    z__2.r = z__3.r + z__16.r, z__2.i = z__3.i + z__16.i;
+	    i__10 = j * c_dim1 + 9;
+	    z__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__17.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    z__1.r = z__2.r + z__17.r, z__1.i = z__2.i + z__17.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 8;
+	    i__3 = j * c_dim1 + 8;
+	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 9;
+	    i__3 = j * c_dim1 + 9;
+	    z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L180: */
+	}
+	goto L410;
+L190:
+
+/*        Special code for 10 x 10 Householder */
+
+	d_cnjg(&z__1, &v[1]);
+	v1.r = z__1.r, v1.i = z__1.i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	d_cnjg(&z__1, &v[2]);
+	v2.r = z__1.r, v2.i = z__1.i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	d_cnjg(&z__1, &v[3]);
+	v3.r = z__1.r, v3.i = z__1.i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	d_cnjg(&z__1, &v[4]);
+	v4.r = z__1.r, v4.i = z__1.i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	d_cnjg(&z__1, &v[5]);
+	v5.r = z__1.r, v5.i = z__1.i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	d_cnjg(&z__1, &v[6]);
+	v6.r = z__1.r, v6.i = z__1.i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	d_cnjg(&z__1, &v[7]);
+	v7.r = z__1.r, v7.i = z__1.i;
+	d_cnjg(&z__2, &v7);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t7.r = z__1.r, t7.i = z__1.i;
+	d_cnjg(&z__1, &v[8]);
+	v8.r = z__1.r, v8.i = z__1.i;
+	d_cnjg(&z__2, &v8);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t8.r = z__1.r, t8.i = z__1.i;
+	d_cnjg(&z__1, &v[9]);
+	v9.r = z__1.r, v9.i = z__1.i;
+	d_cnjg(&z__2, &v9);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t9.r = z__1.r, t9.i = z__1.i;
+	d_cnjg(&z__1, &v[10]);
+	v10.r = z__1.r, v10.i = z__1.i;
+	d_cnjg(&z__2, &v10);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t10.r = z__1.r, t10.i = z__1.i;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j * c_dim1 + 1;
+	    z__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__10.i = v1.r
+		    * c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j * c_dim1 + 2;
+	    z__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__11.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    z__9.r = z__10.r + z__11.r, z__9.i = z__10.i + z__11.i;
+	    i__4 = j * c_dim1 + 3;
+	    z__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__12.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    z__8.r = z__9.r + z__12.r, z__8.i = z__9.i + z__12.i;
+	    i__5 = j * c_dim1 + 4;
+	    z__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__13.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    z__7.r = z__8.r + z__13.r, z__7.i = z__8.i + z__13.i;
+	    i__6 = j * c_dim1 + 5;
+	    z__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__14.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__6.r = z__7.r + z__14.r, z__6.i = z__7.i + z__14.i;
+	    i__7 = j * c_dim1 + 6;
+	    z__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__15.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__5.r = z__6.r + z__15.r, z__5.i = z__6.i + z__15.i;
+	    i__8 = j * c_dim1 + 7;
+	    z__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__16.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    z__4.r = z__5.r + z__16.r, z__4.i = z__5.i + z__16.i;
+	    i__9 = j * c_dim1 + 8;
+	    z__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__17.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    z__3.r = z__4.r + z__17.r, z__3.i = z__4.i + z__17.i;
+	    i__10 = j * c_dim1 + 9;
+	    z__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__18.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    z__2.r = z__3.r + z__18.r, z__2.i = z__3.i + z__18.i;
+	    i__11 = j * c_dim1 + 10;
+	    z__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, z__19.i =
+		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
+	    z__1.r = z__2.r + z__19.r, z__1.i = z__2.i + z__19.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j * c_dim1 + 1;
+	    i__3 = j * c_dim1 + 1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 2;
+	    i__3 = j * c_dim1 + 2;
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 3;
+	    i__3 = j * c_dim1 + 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 4;
+	    i__3 = j * c_dim1 + 4;
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 5;
+	    i__3 = j * c_dim1 + 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 6;
+	    i__3 = j * c_dim1 + 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 7;
+	    i__3 = j * c_dim1 + 7;
+	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 8;
+	    i__3 = j * c_dim1 + 8;
+	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 9;
+	    i__3 = j * c_dim1 + 9;
+	    z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j * c_dim1 + 10;
+	    i__3 = j * c_dim1 + 10;
+	    z__2.r = sum.r * t10.r - sum.i * t10.i, z__2.i = sum.r * t10.i +
+		    sum.i * t10.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L200: */
+	}
+	goto L410;
+    } else {
+
+/*        Form  C * H, where H has order n. */
+
+	switch (*n) {
+	    case 1:  goto L210;
+	    case 2:  goto L230;
+	    case 3:  goto L250;
+	    case 4:  goto L270;
+	    case 5:  goto L290;
+	    case 6:  goto L310;
+	    case 7:  goto L330;
+	    case 8:  goto L350;
+	    case 9:  goto L370;
+	    case 10:  goto L390;
+	}
+
+/*
+          Code for general N
+
+          w := C * v
+*/
+
+	zgemv_("No transpose", m, n, &c_b60, &c__[c_offset], ldc, &v[1], &
+		c__1, &c_b59, &work[1], &c__1);
+
+/*        C := C - tau * w * v' */
+
+	z__1.r = -tau->r, z__1.i = -tau->i;
+	zgerc_(m, n, &z__1, &work[1], &c__1, &v[1], &c__1, &c__[c_offset],
+		ldc);
+	goto L410;
+L210:
+
+/*        Special code for 1 x 1 Householder */
+
+	z__3.r = tau->r * v[1].r - tau->i * v[1].i, z__3.i = tau->r * v[1].i
+		+ tau->i * v[1].r;
+	d_cnjg(&z__4, &v[1]);
+	z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = z__3.r * z__4.i
+		+ z__3.i * z__4.r;
+	z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i;
+	t1.r = z__1.r, t1.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__1.r = t1.r * c__[i__3].r - t1.i * c__[i__3].i, z__1.i = t1.r *
+		    c__[i__3].i + t1.i * c__[i__3].r;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L220: */
+	}
+	goto L410;
+L230:
+
+/*        Special code for 2 x 2 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    z__2.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__2.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    z__3.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__3.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L240: */
+	}
+	goto L410;
+L250:
+
+/*        Special code for 3 x 3 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    z__3.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__3.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    z__4.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__4.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
+	    i__4 = j + c_dim1 * 3;
+	    z__5.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__5.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L260: */
+	}
+	goto L410;
+L270:
+
+/*        Special code for 4 x 4 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    z__4.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__4.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    z__5.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__5.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__3.r = z__4.r + z__5.r, z__3.i = z__4.i + z__5.i;
+	    i__4 = j + c_dim1 * 3;
+	    z__6.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__6.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__2.r = z__3.r + z__6.r, z__2.i = z__3.i + z__6.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    z__7.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__7.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    z__1.r = z__2.r + z__7.r, z__1.i = z__2.i + z__7.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L280: */
+	}
+	goto L410;
+L290:
+
+/*        Special code for 5 x 5 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    z__5.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__5.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    z__6.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__6.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__4.r = z__5.r + z__6.r, z__4.i = z__5.i + z__6.i;
+	    i__4 = j + c_dim1 * 3;
+	    z__7.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__7.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__3.r = z__4.r + z__7.r, z__3.i = z__4.i + z__7.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    z__8.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__8.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    z__2.r = z__3.r + z__8.r, z__2.i = z__3.i + z__8.i;
+	    i__6 = j + c_dim1 * 5;
+	    z__9.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__9.i = v5.r *
+		    c__[i__6].i + v5.i * c__[i__6].r;
+	    z__1.r = z__2.r + z__9.r, z__1.i = z__2.i + z__9.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L300: */
+	}
+	goto L410;
+L310:
+
+/*        Special code for 6 x 6 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    z__6.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__6.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    z__7.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__7.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__5.r = z__6.r + z__7.r, z__5.i = z__6.i + z__7.i;
+	    i__4 = j + c_dim1 * 3;
+	    z__8.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__8.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__4.r = z__5.r + z__8.r, z__4.i = z__5.i + z__8.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    z__9.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__9.i = v4.r *
+		    c__[i__5].i + v4.i * c__[i__5].r;
+	    z__3.r = z__4.r + z__9.r, z__3.i = z__4.i + z__9.i;
+	    i__6 = j + c_dim1 * 5;
+	    z__10.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__10.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__2.r = z__3.r + z__10.r, z__2.i = z__3.i + z__10.i;
+	    i__7 = j + c_dim1 * 6;
+	    z__11.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__11.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__1.r = z__2.r + z__11.r, z__1.i = z__2.i + z__11.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L320: */
+	}
+	goto L410;
+L330:
+
+/*        Special code for 7 x 7 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	d_cnjg(&z__2, &v7);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t7.r = z__1.r, t7.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    z__7.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__7.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    z__8.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__8.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__6.r = z__7.r + z__8.r, z__6.i = z__7.i + z__8.i;
+	    i__4 = j + c_dim1 * 3;
+	    z__9.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__9.i = v3.r *
+		    c__[i__4].i + v3.i * c__[i__4].r;
+	    z__5.r = z__6.r + z__9.r, z__5.i = z__6.i + z__9.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    z__10.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__10.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    z__4.r = z__5.r + z__10.r, z__4.i = z__5.i + z__10.i;
+	    i__6 = j + c_dim1 * 5;
+	    z__11.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__11.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__3.r = z__4.r + z__11.r, z__3.i = z__4.i + z__11.i;
+	    i__7 = j + c_dim1 * 6;
+	    z__12.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__12.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__2.r = z__3.r + z__12.r, z__2.i = z__3.i + z__12.i;
+	    i__8 = j + c_dim1 * 7;
+	    z__13.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__13.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    z__1.r = z__2.r + z__13.r, z__1.i = z__2.i + z__13.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L340: */
+	}
+	goto L410;
+L350:
+
+/*        Special code for 8 x 8 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	d_cnjg(&z__2, &v7);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t7.r = z__1.r, t7.i = z__1.i;
+	v8.r = v[8].r, v8.i = v[8].i;
+	d_cnjg(&z__2, &v8);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t8.r = z__1.r, t8.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    z__8.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__8.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    z__9.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__9.i = v2.r *
+		    c__[i__3].i + v2.i * c__[i__3].r;
+	    z__7.r = z__8.r + z__9.r, z__7.i = z__8.i + z__9.i;
+	    i__4 = j + c_dim1 * 3;
+	    z__10.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__10.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    z__6.r = z__7.r + z__10.r, z__6.i = z__7.i + z__10.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    z__11.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__11.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    z__5.r = z__6.r + z__11.r, z__5.i = z__6.i + z__11.i;
+	    i__6 = j + c_dim1 * 5;
+	    z__12.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__12.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__4.r = z__5.r + z__12.r, z__4.i = z__5.i + z__12.i;
+	    i__7 = j + c_dim1 * 6;
+	    z__13.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__13.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__3.r = z__4.r + z__13.r, z__3.i = z__4.i + z__13.i;
+	    i__8 = j + c_dim1 * 7;
+	    z__14.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__14.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    z__2.r = z__3.r + z__14.r, z__2.i = z__3.i + z__14.i;
+	    i__9 = j + ((c_dim1) << (3));
+	    z__15.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__15.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    z__1.r = z__2.r + z__15.r, z__1.i = z__2.i + z__15.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (3));
+	    i__3 = j + ((c_dim1) << (3));
+	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L360: */
+	}
+	goto L410;
+L370:
+
+/*        Special code for 9 x 9 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	d_cnjg(&z__2, &v7);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t7.r = z__1.r, t7.i = z__1.i;
+	v8.r = v[8].r, v8.i = v[8].i;
+	d_cnjg(&z__2, &v8);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t8.r = z__1.r, t8.i = z__1.i;
+	v9.r = v[9].r, v9.i = v[9].i;
+	d_cnjg(&z__2, &v9);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t9.r = z__1.r, t9.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    z__9.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__9.i = v1.r *
+		    c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    z__10.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__10.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    z__8.r = z__9.r + z__10.r, z__8.i = z__9.i + z__10.i;
+	    i__4 = j + c_dim1 * 3;
+	    z__11.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__11.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    z__7.r = z__8.r + z__11.r, z__7.i = z__8.i + z__11.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    z__12.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__12.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    z__6.r = z__7.r + z__12.r, z__6.i = z__7.i + z__12.i;
+	    i__6 = j + c_dim1 * 5;
+	    z__13.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__13.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__5.r = z__6.r + z__13.r, z__5.i = z__6.i + z__13.i;
+	    i__7 = j + c_dim1 * 6;
+	    z__14.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__14.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__4.r = z__5.r + z__14.r, z__4.i = z__5.i + z__14.i;
+	    i__8 = j + c_dim1 * 7;
+	    z__15.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__15.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    z__3.r = z__4.r + z__15.r, z__3.i = z__4.i + z__15.i;
+	    i__9 = j + ((c_dim1) << (3));
+	    z__16.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__16.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    z__2.r = z__3.r + z__16.r, z__2.i = z__3.i + z__16.i;
+	    i__10 = j + c_dim1 * 9;
+	    z__17.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__17.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    z__1.r = z__2.r + z__17.r, z__1.i = z__2.i + z__17.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (3));
+	    i__3 = j + ((c_dim1) << (3));
+	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 9;
+	    i__3 = j + c_dim1 * 9;
+	    z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L380: */
+	}
+	goto L410;
+L390:
+
+/*        Special code for 10 x 10 Householder */
+
+	v1.r = v[1].r, v1.i = v[1].i;
+	d_cnjg(&z__2, &v1);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t1.r = z__1.r, t1.i = z__1.i;
+	v2.r = v[2].r, v2.i = v[2].i;
+	d_cnjg(&z__2, &v2);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t2.r = z__1.r, t2.i = z__1.i;
+	v3.r = v[3].r, v3.i = v[3].i;
+	d_cnjg(&z__2, &v3);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t3.r = z__1.r, t3.i = z__1.i;
+	v4.r = v[4].r, v4.i = v[4].i;
+	d_cnjg(&z__2, &v4);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t4.r = z__1.r, t4.i = z__1.i;
+	v5.r = v[5].r, v5.i = v[5].i;
+	d_cnjg(&z__2, &v5);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t5.r = z__1.r, t5.i = z__1.i;
+	v6.r = v[6].r, v6.i = v[6].i;
+	d_cnjg(&z__2, &v6);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t6.r = z__1.r, t6.i = z__1.i;
+	v7.r = v[7].r, v7.i = v[7].i;
+	d_cnjg(&z__2, &v7);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t7.r = z__1.r, t7.i = z__1.i;
+	v8.r = v[8].r, v8.i = v[8].i;
+	d_cnjg(&z__2, &v8);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t8.r = z__1.r, t8.i = z__1.i;
+	v9.r = v[9].r, v9.i = v[9].i;
+	d_cnjg(&z__2, &v9);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t9.r = z__1.r, t9.i = z__1.i;
+	v10.r = v[10].r, v10.i = v[10].i;
+	d_cnjg(&z__2, &v10);
+	z__1.r = tau->r * z__2.r - tau->i * z__2.i, z__1.i = tau->r * z__2.i
+		+ tau->i * z__2.r;
+	t10.r = z__1.r, t10.i = z__1.i;
+	i__1 = *m;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = j + c_dim1;
+	    z__10.r = v1.r * c__[i__2].r - v1.i * c__[i__2].i, z__10.i = v1.r
+		    * c__[i__2].i + v1.i * c__[i__2].r;
+	    i__3 = j + ((c_dim1) << (1));
+	    z__11.r = v2.r * c__[i__3].r - v2.i * c__[i__3].i, z__11.i = v2.r
+		    * c__[i__3].i + v2.i * c__[i__3].r;
+	    z__9.r = z__10.r + z__11.r, z__9.i = z__10.i + z__11.i;
+	    i__4 = j + c_dim1 * 3;
+	    z__12.r = v3.r * c__[i__4].r - v3.i * c__[i__4].i, z__12.i = v3.r
+		    * c__[i__4].i + v3.i * c__[i__4].r;
+	    z__8.r = z__9.r + z__12.r, z__8.i = z__9.i + z__12.i;
+	    i__5 = j + ((c_dim1) << (2));
+	    z__13.r = v4.r * c__[i__5].r - v4.i * c__[i__5].i, z__13.i = v4.r
+		    * c__[i__5].i + v4.i * c__[i__5].r;
+	    z__7.r = z__8.r + z__13.r, z__7.i = z__8.i + z__13.i;
+	    i__6 = j + c_dim1 * 5;
+	    z__14.r = v5.r * c__[i__6].r - v5.i * c__[i__6].i, z__14.i = v5.r
+		    * c__[i__6].i + v5.i * c__[i__6].r;
+	    z__6.r = z__7.r + z__14.r, z__6.i = z__7.i + z__14.i;
+	    i__7 = j + c_dim1 * 6;
+	    z__15.r = v6.r * c__[i__7].r - v6.i * c__[i__7].i, z__15.i = v6.r
+		    * c__[i__7].i + v6.i * c__[i__7].r;
+	    z__5.r = z__6.r + z__15.r, z__5.i = z__6.i + z__15.i;
+	    i__8 = j + c_dim1 * 7;
+	    z__16.r = v7.r * c__[i__8].r - v7.i * c__[i__8].i, z__16.i = v7.r
+		    * c__[i__8].i + v7.i * c__[i__8].r;
+	    z__4.r = z__5.r + z__16.r, z__4.i = z__5.i + z__16.i;
+	    i__9 = j + ((c_dim1) << (3));
+	    z__17.r = v8.r * c__[i__9].r - v8.i * c__[i__9].i, z__17.i = v8.r
+		    * c__[i__9].i + v8.i * c__[i__9].r;
+	    z__3.r = z__4.r + z__17.r, z__3.i = z__4.i + z__17.i;
+	    i__10 = j + c_dim1 * 9;
+	    z__18.r = v9.r * c__[i__10].r - v9.i * c__[i__10].i, z__18.i =
+		    v9.r * c__[i__10].i + v9.i * c__[i__10].r;
+	    z__2.r = z__3.r + z__18.r, z__2.i = z__3.i + z__18.i;
+	    i__11 = j + c_dim1 * 10;
+	    z__19.r = v10.r * c__[i__11].r - v10.i * c__[i__11].i, z__19.i =
+		    v10.r * c__[i__11].i + v10.i * c__[i__11].r;
+	    z__1.r = z__2.r + z__19.r, z__1.i = z__2.i + z__19.i;
+	    sum.r = z__1.r, sum.i = z__1.i;
+	    i__2 = j + c_dim1;
+	    i__3 = j + c_dim1;
+	    z__2.r = sum.r * t1.r - sum.i * t1.i, z__2.i = sum.r * t1.i +
+		    sum.i * t1.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (1));
+	    i__3 = j + ((c_dim1) << (1));
+	    z__2.r = sum.r * t2.r - sum.i * t2.i, z__2.i = sum.r * t2.i +
+		    sum.i * t2.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 3;
+	    i__3 = j + c_dim1 * 3;
+	    z__2.r = sum.r * t3.r - sum.i * t3.i, z__2.i = sum.r * t3.i +
+		    sum.i * t3.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (2));
+	    i__3 = j + ((c_dim1) << (2));
+	    z__2.r = sum.r * t4.r - sum.i * t4.i, z__2.i = sum.r * t4.i +
+		    sum.i * t4.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 5;
+	    i__3 = j + c_dim1 * 5;
+	    z__2.r = sum.r * t5.r - sum.i * t5.i, z__2.i = sum.r * t5.i +
+		    sum.i * t5.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 6;
+	    i__3 = j + c_dim1 * 6;
+	    z__2.r = sum.r * t6.r - sum.i * t6.i, z__2.i = sum.r * t6.i +
+		    sum.i * t6.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 7;
+	    i__3 = j + c_dim1 * 7;
+	    z__2.r = sum.r * t7.r - sum.i * t7.i, z__2.i = sum.r * t7.i +
+		    sum.i * t7.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + ((c_dim1) << (3));
+	    i__3 = j + ((c_dim1) << (3));
+	    z__2.r = sum.r * t8.r - sum.i * t8.i, z__2.i = sum.r * t8.i +
+		    sum.i * t8.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 9;
+	    i__3 = j + c_dim1 * 9;
+	    z__2.r = sum.r * t9.r - sum.i * t9.i, z__2.i = sum.r * t9.i +
+		    sum.i * t9.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+	    i__2 = j + c_dim1 * 10;
+	    i__3 = j + c_dim1 * 10;
+	    z__2.r = sum.r * t10.r - sum.i * t10.i, z__2.i = sum.r * t10.i +
+		    sum.i * t10.r;
+	    z__1.r = c__[i__3].r - z__2.r, z__1.i = c__[i__3].i - z__2.i;
+	    c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
+/* L400: */
+	}
+	goto L410;
+    }
+L410:
+    return 0;
+
+/*     End of ZLARFX */
+
+} /* zlarfx_ */
+
+/* Subroutine */ int zlascl_(char *type__, integer *kl, integer *ku,
+	doublereal *cfrom, doublereal *cto, integer *m, integer *n,
+	doublecomplex *a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__, j, k1, k2, k3, k4;
+    static doublereal mul, cto1;
+    static logical done;
+    static doublereal ctoc;
+    extern logical lsame_(char *, char *);
+    static integer itype;
+    static doublereal cfrom1;
+
+    static doublereal cfromc;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    static doublereal bignum, smlnum;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       February 29, 1992
+
+
+    Purpose
+    =======
+
+    ZLASCL multiplies the M by N complex matrix A by the real scalar
+    CTO/CFROM.  This is done without over/underflow as long as the final
+    result CTO*A(I,J)/CFROM does not over/underflow. TYPE specifies that
+    A may be full, upper triangular, lower triangular, upper Hessenberg,
+    or banded.
+
+    Arguments
+    =========
+
+    TYPE    (input) CHARACTER*1
+            TYPE indices the storage type of the input matrix.
+            = 'G':  A is a full matrix.
+            = 'L':  A is a lower triangular matrix.
+            = 'U':  A is an upper triangular matrix.
+            = 'H':  A is an upper Hessenberg matrix.
+            = 'B':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the lower
+                    half stored.
+            = 'Q':  A is a symmetric band matrix with lower bandwidth KL
+                    and upper bandwidth KU and with the only the upper
+                    half stored.
+            = 'Z':  A is a band matrix with lower bandwidth KL and upper
+                    bandwidth KU.
+
+    KL      (input) INTEGER
+            The lower bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    KU      (input) INTEGER
+            The upper bandwidth of A.  Referenced only if TYPE = 'B',
+            'Q' or 'Z'.
+
+    CFROM   (input) DOUBLE PRECISION
+    CTO     (input) DOUBLE PRECISION
+            The matrix A is multiplied by CTO/CFROM. A(I,J) is computed
+            without over/underflow if the final result CTO*A(I,J)/CFROM
+            can be represented without over/underflow.  CFROM must be
+            nonzero.
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,M)
+            The matrix to be multiplied by CTO/CFROM.  See TYPE for the
+            storage type.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    INFO    (output) INTEGER
+            0  - successful exit
+            <0 - if INFO = -i, the i-th argument had an illegal value.
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(type__, "G")) {
+	itype = 0;
+    } else if (lsame_(type__, "L")) {
+	itype = 1;
+    } else if (lsame_(type__, "U")) {
+	itype = 2;
+    } else if (lsame_(type__, "H")) {
+	itype = 3;
+    } else if (lsame_(type__, "B")) {
+	itype = 4;
+    } else if (lsame_(type__, "Q")) {
+	itype = 5;
+    } else if (lsame_(type__, "Z")) {
+	itype = 6;
+    } else {
+	itype = -1;
+    }
+
+    if (itype == -1) {
+	*info = -1;
+    } else if (*cfrom == 0.) {
+	*info = -4;
+    } else if (*m < 0) {
+	*info = -6;
+    } else if (((*n < 0) || (itype == 4 && *n != *m)) || (itype == 5 && *n !=
+	    *m)) {
+	*info = -7;
+    } else if (itype <= 3 && *lda < max(1,*m)) {
+	*info = -9;
+    } else if (itype >= 4) {
+/* Computing MAX */
+	i__1 = *m - 1;
+	if ((*kl < 0) || (*kl > max(i__1,0))) {
+	    *info = -2;
+	} else /* if(complicated condition) */ {
+/* Computing MAX */
+	    i__1 = *n - 1;
+	    if (((*ku < 0) || (*ku > max(i__1,0))) || (((itype == 4) || (
+		    itype == 5)) && *kl != *ku)) {
+		*info = -3;
+	    } else if (((itype == 4 && *lda < *kl + 1) || (itype == 5 && *lda
+		    < *ku + 1)) || (itype == 6 && *lda < ((*kl) << (1)) + *ku
+		    + 1)) {
+		*info = -9;
+	    }
+	}
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLASCL", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*m == 0)) {
+	return 0;
+    }
+
+/*     Get machine parameters */
+
+    smlnum = SAFEMINIMUM;
+    bignum = 1. / smlnum;
+
+    cfromc = *cfrom;
+    ctoc = *cto;
+
+L10:
+    cfrom1 = cfromc * smlnum;
+    cto1 = ctoc / bignum;
+    if (abs(cfrom1) > abs(ctoc) && ctoc != 0.) {
+	mul = smlnum;
+	done = FALSE_;
+	cfromc = cfrom1;
+    } else if (abs(cto1) > abs(cfromc)) {
+	mul = bignum;
+	done = FALSE_;
+	ctoc = cto1;
+    } else {
+	mul = ctoc / cfromc;
+	done = TRUE_;
+    }
+
+    if (itype == 0) {
+
+/*        Full matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i;
+		a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L20: */
+	    }
+/* L30: */
+	}
+
+    } else if (itype == 1) {
+
+/*        Lower triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i;
+		a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L40: */
+	    }
+/* L50: */
+	}
+
+    } else if (itype == 2) {
+
+/*        Upper triangular matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = min(j,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i;
+		a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L60: */
+	    }
+/* L70: */
+	}
+
+    } else if (itype == 3) {
+
+/*        Upper Hessenberg matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j + 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i;
+		a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L80: */
+	    }
+/* L90: */
+	}
+
+    } else if (itype == 4) {
+
+/*        Lower half of a symmetric band matrix */
+
+	k3 = *kl + 1;
+	k4 = *n + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = k3, i__4 = k4 - j;
+	    i__2 = min(i__3,i__4);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i;
+		a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L100: */
+	    }
+/* L110: */
+	}
+
+    } else if (itype == 5) {
+
+/*        Upper half of a symmetric band matrix */
+
+	k1 = *ku + 2;
+	k3 = *ku + 1;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__2 = k1 - j;
+	    i__3 = k3;
+	    for (i__ = max(i__2,1); i__ <= i__3; ++i__) {
+		i__2 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i;
+		a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+/* L120: */
+	    }
+/* L130: */
+	}
+
+    } else if (itype == 6) {
+
+/*        Band matrix */
+
+	k1 = *kl + *ku + 2;
+	k2 = *kl + 1;
+	k3 = ((*kl) << (1)) + *ku + 1;
+	k4 = *kl + *ku + 1 + *m;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	    i__3 = k1 - j;
+/* Computing MIN */
+	    i__4 = k3, i__5 = k4 - j;
+	    i__2 = min(i__4,i__5);
+	    for (i__ = max(i__3,k2); i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		i__4 = i__ + j * a_dim1;
+		z__1.r = mul * a[i__4].r, z__1.i = mul * a[i__4].i;
+		a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L140: */
+	    }
+/* L150: */
+	}
+
+    }
+
+    if (! done) {
+	goto L10;
+    }
+
+    return 0;
+
+/*     End of ZLASCL */
+
+} /* zlascl_ */
+
+/* Subroutine */ int zlaset_(char *uplo, integer *m, integer *n,
+	doublecomplex *alpha, doublecomplex *beta, doublecomplex *a, integer *
+	lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j;
+    extern logical lsame_(char *, char *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    ZLASET initializes a 2-D array A to BETA on the diagonal and
+    ALPHA on the offdiagonals.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies the part of the matrix A to be set.
+            = 'U':      Upper triangular part is set. The lower triangle
+                        is unchanged.
+            = 'L':      Lower triangular part is set. The upper triangle
+                        is unchanged.
+            Otherwise:  All of the matrix A is set.
+
+    M       (input) INTEGER
+            On entry, M specifies the number of rows of A.
+
+    N       (input) INTEGER
+            On entry, N specifies the number of columns of A.
+
+    ALPHA   (input) COMPLEX*16
+            All the offdiagonal array elements are set to ALPHA.
+
+    BETA    (input) COMPLEX*16
+            All the diagonal array elements are set to BETA.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the m by n matrix A.
+            On exit, A(i,j) = ALPHA, 1 <= i <= m, 1 <= j <= n, i.ne.j;
+                     A(i,i) = BETA , 1 <= i <= min(m,n)
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    if (lsame_(uplo, "U")) {
+
+/*
+          Set the diagonal to BETA and the strictly upper triangular
+          part of the array to ALPHA.
+*/
+
+	i__1 = *n;
+	for (j = 2; j <= i__1; ++j) {
+/* Computing MIN */
+	    i__3 = j - 1;
+	    i__2 = min(i__3,*m);
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
+/* L10: */
+	    }
+/* L20: */
+	}
+	i__1 = min(*n,*m);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = beta->r, a[i__2].i = beta->i;
+/* L30: */
+	}
+
+    } else if (lsame_(uplo, "L")) {
+
+/*
+          Set the diagonal to BETA and the strictly lower triangular
+          part of the array to ALPHA.
+*/
+
+	i__1 = min(*m,*n);
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = j + 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
+/* L40: */
+	    }
+/* L50: */
+	}
+	i__1 = min(*n,*m);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = beta->r, a[i__2].i = beta->i;
+/* L60: */
+	}
+
+    } else {
+
+/*
+          Set the array to BETA on the diagonal and ALPHA on the
+          offdiagonal.
+*/
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = alpha->r, a[i__3].i = alpha->i;
+/* L70: */
+	    }
+/* L80: */
+	}
+	i__1 = min(*m,*n);
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    a[i__2].r = beta->r, a[i__2].i = beta->i;
+/* L90: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZLASET */
+
+} /* zlaset_ */
+
+/* Subroutine */ int zlasr_(char *side, char *pivot, char *direct, integer *m,
+	 integer *n, doublereal *c__, doublereal *s, doublecomplex *a,
+	integer *lda)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublecomplex z__1, z__2, z__3;
+
+    /* Local variables */
+    static integer i__, j, info;
+    static doublecomplex temp;
+    extern logical lsame_(char *, char *);
+    static doublereal ctemp, stemp;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       October 31, 1992
+
+
+    Purpose
+    =======
+
+    ZLASR   performs the transformation
+
+       A := P*A,   when SIDE = 'L' or 'l'  (  Left-hand side )
+
+       A := A*P',  when SIDE = 'R' or 'r'  ( Right-hand side )
+
+    where A is an m by n complex matrix and P is an orthogonal matrix,
+    consisting of a sequence of plane rotations determined by the
+    parameters PIVOT and DIRECT as follows ( z = m when SIDE = 'L' or 'l'
+    and z = n when SIDE = 'R' or 'r' ):
+
+    When  DIRECT = 'F' or 'f'  ( Forward sequence ) then
+
+       P = P( z - 1 )*...*P( 2 )*P( 1 ),
+
+    and when DIRECT = 'B' or 'b'  ( Backward sequence ) then
+
+       P = P( 1 )*P( 2 )*...*P( z - 1 ),
+
+    where  P( k ) is a plane rotation matrix for the following planes:
+
+       when  PIVOT = 'V' or 'v'  ( Variable pivot ),
+          the plane ( k, k + 1 )
+
+       when  PIVOT = 'T' or 't'  ( Top pivot ),
+          the plane ( 1, k + 1 )
+
+       when  PIVOT = 'B' or 'b'  ( Bottom pivot ),
+          the plane ( k, z )
+
+    c( k ) and s( k )  must contain the  cosine and sine that define the
+    matrix  P( k ).  The two by two plane rotation part of the matrix
+    P( k ), R( k ), is assumed to be of the form
+
+       R( k ) = (  c( k )  s( k ) ).
+                ( -s( k )  c( k ) )
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            Specifies whether the plane rotation matrix P is applied to
+            A on the left or the right.
+            = 'L':  Left, compute A := P*A
+            = 'R':  Right, compute A:= A*P'
+
+    DIRECT  (input) CHARACTER*1
+            Specifies whether P is a forward or backward sequence of
+            plane rotations.
+            = 'F':  Forward, P = P( z - 1 )*...*P( 2 )*P( 1 )
+            = 'B':  Backward, P = P( 1 )*P( 2 )*...*P( z - 1 )
+
+    PIVOT   (input) CHARACTER*1
+            Specifies the plane for which P(k) is a plane rotation
+            matrix.
+            = 'V':  Variable pivot, the plane (k,k+1)
+            = 'T':  Top pivot, the plane (1,k+1)
+            = 'B':  Bottom pivot, the plane (k,z)
+
+    M       (input) INTEGER
+            The number of rows of the matrix A.  If m <= 1, an immediate
+            return is effected.
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.  If n <= 1, an
+            immediate return is effected.
+
+    C, S    (input) DOUBLE PRECISION arrays, dimension
+                    (M-1) if SIDE = 'L'
+                    (N-1) if SIDE = 'R'
+            c(k) and s(k) contain the cosine and sine that define the
+            matrix P(k).  The two by two plane rotation part of the
+            matrix P(k), R(k), is assumed to be of the form
+            R( k ) = (  c( k )  s( k ) ).
+                     ( -s( k )  c( k ) )
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            The m by n matrix A.  On exit, A is overwritten by P*A if
+            SIDE = 'R' or by A*P' if SIDE = 'L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,M).
+
+    =====================================================================
+
+
+       Test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --c__;
+    --s;
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    info = 0;
+    if (! ((lsame_(side, "L")) || (lsame_(side, "R")))) {
+	info = 1;
+    } else if (! (((lsame_(pivot, "V")) || (lsame_(
+	    pivot, "T"))) || (lsame_(pivot, "B")))) {
+	info = 2;
+    } else if (! ((lsame_(direct, "F")) || (lsame_(
+	    direct, "B")))) {
+	info = 3;
+    } else if (*m < 0) {
+	info = 4;
+    } else if (*n < 0) {
+	info = 5;
+    } else if (*lda < max(1,*m)) {
+	info = 9;
+    }
+    if (info != 0) {
+	xerbla_("ZLASR ", &info);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+    if (lsame_(side, "L")) {
+
+/*        Form  P * A */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = j + 1 + i__ * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = j + 1 + i__ * a_dim1;
+			    z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i;
+			    i__4 = j + i__ * a_dim1;
+			    z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[
+				    i__4].i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+			    i__3 = j + i__ * a_dim1;
+			    z__2.r = stemp * temp.r, z__2.i = stemp * temp.i;
+			    i__4 = j + i__ * a_dim1;
+			    z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[
+				    i__4].i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L10: */
+			}
+		    }
+/* L20: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = j + 1 + i__ * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = j + 1 + i__ * a_dim1;
+			    z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i;
+			    i__3 = j + i__ * a_dim1;
+			    z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[
+				    i__3].i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+			    i__2 = j + i__ * a_dim1;
+			    z__2.r = stemp * temp.r, z__2.i = stemp * temp.i;
+			    i__3 = j + i__ * a_dim1;
+			    z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[
+				    i__3].i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+/* L30: */
+			}
+		    }
+/* L40: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = j + i__ * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = j + i__ * a_dim1;
+			    z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i;
+			    i__4 = i__ * a_dim1 + 1;
+			    z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[
+				    i__4].i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+			    i__3 = i__ * a_dim1 + 1;
+			    z__2.r = stemp * temp.r, z__2.i = stemp * temp.i;
+			    i__4 = i__ * a_dim1 + 1;
+			    z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[
+				    i__4].i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L50: */
+			}
+		    }
+/* L60: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = j + i__ * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = j + i__ * a_dim1;
+			    z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i;
+			    i__3 = i__ * a_dim1 + 1;
+			    z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[
+				    i__3].i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+			    i__2 = i__ * a_dim1 + 1;
+			    z__2.r = stemp * temp.r, z__2.i = stemp * temp.i;
+			    i__3 = i__ * a_dim1 + 1;
+			    z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[
+				    i__3].i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+/* L70: */
+			}
+		    }
+/* L80: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *m - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *n;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = j + i__ * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = j + i__ * a_dim1;
+			    i__4 = *m + i__ * a_dim1;
+			    z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[
+				    i__4].i;
+			    z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+			    i__3 = *m + i__ * a_dim1;
+			    i__4 = *m + i__ * a_dim1;
+			    z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[
+				    i__4].i;
+			    z__3.r = stemp * temp.r, z__3.i = stemp * temp.i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L90: */
+			}
+		    }
+/* L100: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *m - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *n;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = j + i__ * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = j + i__ * a_dim1;
+			    i__3 = *m + i__ * a_dim1;
+			    z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[
+				    i__3].i;
+			    z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+			    i__2 = *m + i__ * a_dim1;
+			    i__3 = *m + i__ * a_dim1;
+			    z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[
+				    i__3].i;
+			    z__3.r = stemp * temp.r, z__3.i = stemp * temp.i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+/* L110: */
+			}
+		    }
+/* L120: */
+		}
+	    }
+	}
+    } else if (lsame_(side, "R")) {
+
+/*        Form A * P' */
+
+	if (lsame_(pivot, "V")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + (j + 1) * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = i__ + (j + 1) * a_dim1;
+			    z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i;
+			    i__4 = i__ + j * a_dim1;
+			    z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[
+				    i__4].i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+			    i__3 = i__ + j * a_dim1;
+			    z__2.r = stemp * temp.r, z__2.i = stemp * temp.i;
+			    i__4 = i__ + j * a_dim1;
+			    z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[
+				    i__4].i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L130: */
+			}
+		    }
+/* L140: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + (j + 1) * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = i__ + (j + 1) * a_dim1;
+			    z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i;
+			    i__3 = i__ + j * a_dim1;
+			    z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[
+				    i__3].i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+			    i__2 = i__ + j * a_dim1;
+			    z__2.r = stemp * temp.r, z__2.i = stemp * temp.i;
+			    i__3 = i__ + j * a_dim1;
+			    z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[
+				    i__3].i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+/* L150: */
+			}
+		    }
+/* L160: */
+		}
+	    }
+	} else if (lsame_(pivot, "T")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n;
+		for (j = 2; j <= i__1; ++j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = i__ + j * a_dim1;
+			    z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i;
+			    i__4 = i__ + a_dim1;
+			    z__3.r = stemp * a[i__4].r, z__3.i = stemp * a[
+				    i__4].i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+			    i__3 = i__ + a_dim1;
+			    z__2.r = stemp * temp.r, z__2.i = stemp * temp.i;
+			    i__4 = i__ + a_dim1;
+			    z__3.r = ctemp * a[i__4].r, z__3.i = ctemp * a[
+				    i__4].i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L170: */
+			}
+		    }
+/* L180: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n; j >= 2; --j) {
+		    ctemp = c__[j - 1];
+		    stemp = s[j - 1];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = i__ + j * a_dim1;
+			    z__2.r = ctemp * temp.r, z__2.i = ctemp * temp.i;
+			    i__3 = i__ + a_dim1;
+			    z__3.r = stemp * a[i__3].r, z__3.i = stemp * a[
+				    i__3].i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+			    i__2 = i__ + a_dim1;
+			    z__2.r = stemp * temp.r, z__2.i = stemp * temp.i;
+			    i__3 = i__ + a_dim1;
+			    z__3.r = ctemp * a[i__3].r, z__3.i = ctemp * a[
+				    i__3].i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+/* L190: */
+			}
+		    }
+/* L200: */
+		}
+	    }
+	} else if (lsame_(pivot, "B")) {
+	    if (lsame_(direct, "F")) {
+		i__1 = *n - 1;
+		for (j = 1; j <= i__1; ++j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__2 = *m;
+			for (i__ = 1; i__ <= i__2; ++i__) {
+			    i__3 = i__ + j * a_dim1;
+			    temp.r = a[i__3].r, temp.i = a[i__3].i;
+			    i__3 = i__ + j * a_dim1;
+			    i__4 = i__ + *n * a_dim1;
+			    z__2.r = stemp * a[i__4].r, z__2.i = stemp * a[
+				    i__4].i;
+			    z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+			    i__3 = i__ + *n * a_dim1;
+			    i__4 = i__ + *n * a_dim1;
+			    z__2.r = ctemp * a[i__4].r, z__2.i = ctemp * a[
+				    i__4].i;
+			    z__3.r = stemp * temp.r, z__3.i = stemp * temp.i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__3].r = z__1.r, a[i__3].i = z__1.i;
+/* L210: */
+			}
+		    }
+/* L220: */
+		}
+	    } else if (lsame_(direct, "B")) {
+		for (j = *n - 1; j >= 1; --j) {
+		    ctemp = c__[j];
+		    stemp = s[j];
+		    if ((ctemp != 1.) || (stemp != 0.)) {
+			i__1 = *m;
+			for (i__ = 1; i__ <= i__1; ++i__) {
+			    i__2 = i__ + j * a_dim1;
+			    temp.r = a[i__2].r, temp.i = a[i__2].i;
+			    i__2 = i__ + j * a_dim1;
+			    i__3 = i__ + *n * a_dim1;
+			    z__2.r = stemp * a[i__3].r, z__2.i = stemp * a[
+				    i__3].i;
+			    z__3.r = ctemp * temp.r, z__3.i = ctemp * temp.i;
+			    z__1.r = z__2.r + z__3.r, z__1.i = z__2.i +
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+			    i__2 = i__ + *n * a_dim1;
+			    i__3 = i__ + *n * a_dim1;
+			    z__2.r = ctemp * a[i__3].r, z__2.i = ctemp * a[
+				    i__3].i;
+			    z__3.r = stemp * temp.r, z__3.i = stemp * temp.i;
+			    z__1.r = z__2.r - z__3.r, z__1.i = z__2.i -
+				    z__3.i;
+			    a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+/* L230: */
+			}
+		    }
+/* L240: */
+		}
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZLASR */
+
+} /* zlasr_ */
+
+/* Subroutine */ int zlassq_(integer *n, doublecomplex *x, integer *incx,
+	doublereal *scale, doublereal *sumsq)
+{
+    /* System generated locals */
+    integer i__1, i__2, i__3;
+    doublereal d__1;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+
+    /* Local variables */
+    static integer ix;
+    static doublereal temp1;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZLASSQ returns the values scl and ssq such that
+
+       ( scl**2 )*ssq = x( 1 )**2 +...+ x( n )**2 + ( scale**2 )*sumsq,
+
+    where x( i ) = abs( X( 1 + ( i - 1 )*INCX ) ). The value of sumsq is
+    assumed to be at least unity and the value of ssq will then satisfy
+
+       1.0 .le. ssq .le. ( sumsq + 2*n ).
+
+    scale is assumed to be non-negative and scl returns the value
+
+       scl = max( scale, abs( real( x( i ) ) ), abs( aimag( x( i ) ) ) ),
+              i
+
+    scale and sumsq must be supplied in SCALE and SUMSQ respectively.
+    SCALE and SUMSQ are overwritten by scl and ssq respectively.
+
+    The routine makes only one pass through the vector X.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of elements to be used from the vector X.
+
+    X       (input) COMPLEX*16 array, dimension (N)
+            The vector x as described above.
+               x( i )  = X( 1 + ( i - 1 )*INCX ), 1 <= i <= n.
+
+    INCX    (input) INTEGER
+            The increment between successive values of the vector X.
+            INCX > 0.
+
+    SCALE   (input/output) DOUBLE PRECISION
+            On entry, the value  scale  in the equation above.
+            On exit, SCALE is overwritten with the value  scl .
+
+    SUMSQ   (input/output) DOUBLE PRECISION
+            On entry, the value  sumsq  in the equation above.
+            On exit, SUMSQ is overwritten with the value  ssq .
+
+   =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    --x;
+
+    /* Function Body */
+    if (*n > 0) {
+	i__1 = (*n - 1) * *incx + 1;
+	i__2 = *incx;
+	for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
+	    i__3 = ix;
+	    if (x[i__3].r != 0.) {
+		i__3 = ix;
+		temp1 = (d__1 = x[i__3].r, abs(d__1));
+		if (*scale < temp1) {
+/* Computing 2nd power */
+		    d__1 = *scale / temp1;
+		    *sumsq = *sumsq * (d__1 * d__1) + 1;
+		    *scale = temp1;
+		} else {
+/* Computing 2nd power */
+		    d__1 = temp1 / *scale;
+		    *sumsq += d__1 * d__1;
+		}
+	    }
+	    if (d_imag(&x[ix]) != 0.) {
+		temp1 = (d__1 = d_imag(&x[ix]), abs(d__1));
+		if (*scale < temp1) {
+/* Computing 2nd power */
+		    d__1 = *scale / temp1;
+		    *sumsq = *sumsq * (d__1 * d__1) + 1;
+		    *scale = temp1;
+		} else {
+/* Computing 2nd power */
+		    d__1 = temp1 / *scale;
+		    *sumsq += d__1 * d__1;
+		}
+	    }
+/* L10: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZLASSQ */
+
+} /* zlassq_ */
+
+/* Subroutine */ int zlaswp_(integer *n, doublecomplex *a, integer *lda,
+	integer *k1, integer *k2, integer *ipiv, integer *incx)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
+
+    /* Local variables */
+    static integer i__, j, k, i1, i2, n32, ip, ix, ix0, inc;
+    static doublecomplex temp;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZLASWP performs a series of row interchanges on the matrix A.
+    One row interchange is initiated for each of rows K1 through K2 of A.
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The number of columns of the matrix A.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the matrix of column dimension N to which the row
+            interchanges will be applied.
+            On exit, the permuted matrix.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+
+    K1      (input) INTEGER
+            The first element of IPIV for which a row interchange will
+            be done.
+
+    K2      (input) INTEGER
+            The last element of IPIV for which a row interchange will
+            be done.
+
+    IPIV    (input) INTEGER array, dimension (M*abs(INCX))
+            The vector of pivot indices.  Only the elements in positions
+            K1 through K2 of IPIV are accessed.
+            IPIV(K) = L implies rows K and L are to be interchanged.
+
+    INCX    (input) INTEGER
+            The increment between successive values of IPIV.  If IPIV
+            is negative, the pivots are applied in reverse order.
+
+    Further Details
+    ===============
+
+    Modified by
+     R. C. Whaley, Computer Science Dept., Univ. of Tenn., Knoxville, USA
+
+   =====================================================================
+
+
+       Interchange row I with row IPIV(I) for each of rows K1 through K2.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --ipiv;
+
+    /* Function Body */
+    if (*incx > 0) {
+	ix0 = *k1;
+	i1 = *k1;
+	i2 = *k2;
+	inc = 1;
+    } else if (*incx < 0) {
+	ix0 = (1 - *k2) * *incx + 1;
+	i1 = *k2;
+	i2 = *k1;
+	inc = -1;
+    } else {
+	return 0;
+    }
+
+    n32 = (*n / 32) << (5);
+    if (n32 != 0) {
+	i__1 = n32;
+	for (j = 1; j <= i__1; j += 32) {
+	    ix = ix0;
+	    i__2 = i2;
+	    i__3 = inc;
+	    for (i__ = i1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3)
+		    {
+		ip = ipiv[ix];
+		if (ip != i__) {
+		    i__4 = j + 31;
+		    for (k = j; k <= i__4; ++k) {
+			i__5 = i__ + k * a_dim1;
+			temp.r = a[i__5].r, temp.i = a[i__5].i;
+			i__5 = i__ + k * a_dim1;
+			i__6 = ip + k * a_dim1;
+			a[i__5].r = a[i__6].r, a[i__5].i = a[i__6].i;
+			i__5 = ip + k * a_dim1;
+			a[i__5].r = temp.r, a[i__5].i = temp.i;
+/* L10: */
+		    }
+		}
+		ix += *incx;
+/* L20: */
+	    }
+/* L30: */
+	}
+    }
+    if (n32 != *n) {
+	++n32;
+	ix = ix0;
+	i__1 = i2;
+	i__3 = inc;
+	for (i__ = i1; i__3 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__3) {
+	    ip = ipiv[ix];
+	    if (ip != i__) {
+		i__2 = *n;
+		for (k = n32; k <= i__2; ++k) {
+		    i__4 = i__ + k * a_dim1;
+		    temp.r = a[i__4].r, temp.i = a[i__4].i;
+		    i__4 = i__ + k * a_dim1;
+		    i__5 = ip + k * a_dim1;
+		    a[i__4].r = a[i__5].r, a[i__4].i = a[i__5].i;
+		    i__4 = ip + k * a_dim1;
+		    a[i__4].r = temp.r, a[i__4].i = temp.i;
+/* L40: */
+		}
+	    }
+	    ix += *incx;
+/* L50: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZLASWP */
+
+} /* zlaswp_ */
+
+/* Subroutine */ int zlatrd_(char *uplo, integer *n, integer *nb,
+	doublecomplex *a, integer *lda, doublereal *e, doublecomplex *tau,
+	doublecomplex *w, integer *ldw)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
+    doublereal d__1;
+    doublecomplex z__1, z__2, z__3, z__4;
+
+    /* Local variables */
+    static integer i__, iw;
+    static doublecomplex alpha;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *);
+    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *);
+    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *),
+	    zhemv_(char *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *), zaxpy_(integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *), zlarfg_(integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), zlacgv_(integer *, doublecomplex *,
+	    integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLATRD reduces NB rows and columns of a complex Hermitian matrix A to
+    Hermitian tridiagonal form by a unitary similarity
+    transformation Q' * A * Q, and returns the matrices V and W which are
+    needed to apply the transformation to the unreduced part of A.
+
+    If UPLO = 'U', ZLATRD reduces the last NB rows and columns of a
+    matrix, of which the upper triangle is supplied;
+    if UPLO = 'L', ZLATRD reduces the first NB rows and columns of a
+    matrix, of which the lower triangle is supplied.
+
+    This is an auxiliary routine called by ZHETRD.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored:
+            = 'U': Upper triangular
+            = 'L': Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.
+
+    NB      (input) INTEGER
+            The number of rows and columns to be reduced.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n-by-n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n-by-n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+            On exit:
+            if UPLO = 'U', the last NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements above the diagonal
+              with the array TAU, represent the unitary matrix Q as a
+              product of elementary reflectors;
+            if UPLO = 'L', the first NB columns have been reduced to
+              tridiagonal form, with the diagonal elements overwriting
+              the diagonal elements of A; the elements below the diagonal
+              with the array TAU, represent the  unitary matrix Q as a
+              product of elementary reflectors.
+            See Further Details.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    E       (output) DOUBLE PRECISION array, dimension (N-1)
+            If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
+            elements of the last NB columns of the reduced matrix;
+            if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
+            the first NB columns of the reduced matrix.
+
+    TAU     (output) COMPLEX*16 array, dimension (N-1)
+            The scalar factors of the elementary reflectors, stored in
+            TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
+            See Further Details.
+
+    W       (output) COMPLEX*16 array, dimension (LDW,NB)
+            The n-by-nb matrix W required to update the unreduced part
+            of A.
+
+    LDW     (input) INTEGER
+            The leading dimension of the array W. LDW >= max(1,N).
+
+    Further Details
+    ===============
+
+    If UPLO = 'U', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(n) H(n-1) . . . H(n-nb+1).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
+    and tau in TAU(i-1).
+
+    If UPLO = 'L', the matrix Q is represented as a product of elementary
+    reflectors
+
+       Q = H(1) H(2) . . . H(nb).
+
+    Each H(i) has the form
+
+       H(i) = I - tau * v * v'
+
+    where tau is a complex scalar, and v is a complex vector with
+    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
+    and tau in TAU(i).
+
+    The elements of the vectors v together form the n-by-nb matrix V
+    which is needed, with W, to apply the transformation to the unreduced
+    part of the matrix, using a Hermitian rank-2k update of the form:
+    A := A - V*W' - W*V'.
+
+    The contents of A on exit are illustrated by the following examples
+    with n = 5 and nb = 2:
+
+    if UPLO = 'U':                       if UPLO = 'L':
+
+      (  a   a   a   v4  v5 )              (  d                  )
+      (      a   a   v4  v5 )              (  1   d              )
+      (          a   1   v5 )              (  v1  1   a          )
+      (              d   1  )              (  v1  v2  a   a      )
+      (                  d  )              (  v1  v2  a   a   a  )
+
+    where d denotes a diagonal element of the reduced matrix, a denotes
+    an element of the original matrix that is unchanged, and vi denotes
+    an element of the vector defining H(i).
+
+    =====================================================================
+
+
+       Quick return if possible
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --e;
+    --tau;
+    w_dim1 = *ldw;
+    w_offset = 1 + w_dim1;
+    w -= w_offset;
+
+    /* Function Body */
+    if (*n <= 0) {
+	return 0;
+    }
+
+    if (lsame_(uplo, "U")) {
+
+/*        Reduce last NB columns of upper triangle */
+
+	i__1 = *n - *nb + 1;
+	for (i__ = *n; i__ >= i__1; --i__) {
+	    iw = i__ - *n + *nb;
+	    if (i__ < *n) {
+
+/*              Update A(1:i,i) */
+
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = i__ + i__ * a_dim1;
+		d__1 = a[i__3].r;
+		a[i__2].r = d__1, a[i__2].i = 0.;
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
+		i__2 = *n - i__;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__, &i__2, &z__1, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &w[i__ + (iw + 1) * w_dim1], ldw, &
+			c_b60, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &w[i__ + (iw + 1) * w_dim1], ldw);
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = *n - i__;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__, &i__2, &z__1, &w[(iw + 1) *
+			w_dim1 + 1], ldw, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			c_b60, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = i__ + i__ * a_dim1;
+		d__1 = a[i__3].r;
+		a[i__2].r = d__1, a[i__2].i = 0.;
+	    }
+	    if (i__ > 1) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(1:i-2,i)
+*/
+
+		i__2 = i__ - 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = i__ - 1;
+		zlarfg_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &tau[i__
+			- 1]);
+		i__2 = i__ - 1;
+		e[i__2] = alpha.r;
+		i__2 = i__ - 1 + i__ * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+
+/*              Compute W(1:i-1,i) */
+
+		i__2 = i__ - 1;
+		zhemv_("Upper", &i__2, &c_b60, &a[a_offset], lda, &a[i__ *
+			a_dim1 + 1], &c__1, &c_b59, &w[iw * w_dim1 + 1], &
+			c__1);
+		if (i__ < *n) {
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &w[(
+			    iw + 1) * w_dim1 + 1], ldw, &a[i__ * a_dim1 + 1],
+			    &c__1, &c_b59, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemv_("No transpose", &i__2, &i__3, &z__1, &a[(i__ + 1) *
+			     a_dim1 + 1], lda, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b60, &w[iw * w_dim1 + 1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[(
+			    i__ + 1) * a_dim1 + 1], lda, &a[i__ * a_dim1 + 1],
+			     &c__1, &c_b59, &w[i__ + 1 + iw * w_dim1], &c__1);
+		    i__2 = i__ - 1;
+		    i__3 = *n - i__;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemv_("No transpose", &i__2, &i__3, &z__1, &w[(iw + 1) *
+			    w_dim1 + 1], ldw, &w[i__ + 1 + iw * w_dim1], &
+			    c__1, &c_b60, &w[iw * w_dim1 + 1], &c__1);
+		}
+		i__2 = i__ - 1;
+		zscal_(&i__2, &tau[i__ - 1], &w[iw * w_dim1 + 1], &c__1);
+		z__3.r = -.5, z__3.i = -0.;
+		i__2 = i__ - 1;
+		z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i =
+			 z__3.r * tau[i__2].i + z__3.i * tau[i__2].r;
+		i__3 = i__ - 1;
+		zdotc_(&z__4, &i__3, &w[iw * w_dim1 + 1], &c__1, &a[i__ *
+			a_dim1 + 1], &c__1);
+		z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r *
+			z__4.i + z__2.i * z__4.r;
+		alpha.r = z__1.r, alpha.i = z__1.i;
+		i__2 = i__ - 1;
+		zaxpy_(&i__2, &alpha, &a[i__ * a_dim1 + 1], &c__1, &w[iw *
+			w_dim1 + 1], &c__1);
+	    }
+
+/* L10: */
+	}
+    } else {
+
+/*        Reduce first NB columns of lower triangle */
+
+	i__1 = *nb;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+
+/*           Update A(i:n,i) */
+
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__ + i__ * a_dim1;
+	    d__1 = a[i__3].r;
+	    a[i__2].r = d__1, a[i__2].i = 0.;
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &w[i__ + w_dim1], ldw);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + a_dim1], lda,
+		     &w[i__ + w_dim1], ldw, &c_b60, &a[i__ + i__ * a_dim1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &w[i__ + w_dim1], ldw);
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = *n - i__ + 1;
+	    i__3 = i__ - 1;
+	    z__1.r = -1., z__1.i = -0.;
+	    zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + w_dim1], ldw,
+		     &a[i__ + a_dim1], lda, &c_b60, &a[i__ + i__ * a_dim1], &
+		    c__1);
+	    i__2 = i__ - 1;
+	    zlacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    i__2 = i__ + i__ * a_dim1;
+	    i__3 = i__ + i__ * a_dim1;
+	    d__1 = a[i__3].r;
+	    a[i__2].r = d__1, a[i__2].i = 0.;
+	    if (i__ < *n) {
+
+/*
+                Generate elementary reflector H(i) to annihilate
+                A(i+2:n,i)
+*/
+
+		i__2 = i__ + 1 + i__ * a_dim1;
+		alpha.r = a[i__2].r, alpha.i = a[i__2].i;
+		i__2 = *n - i__;
+/* Computing MIN */
+		i__3 = i__ + 2;
+		zlarfg_(&i__2, &alpha, &a[min(i__3,*n) + i__ * a_dim1], &c__1,
+			 &tau[i__]);
+		i__2 = i__;
+		e[i__2] = alpha.r;
+		i__2 = i__ + 1 + i__ * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+
+/*              Compute W(i+1:n,i) */
+
+		i__2 = *n - i__;
+		zhemv_("Lower", &i__2, &c_b60, &a[i__ + 1 + (i__ + 1) *
+			a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b59, &w[i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &w[i__ +
+			1 + w_dim1], ldw, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b59, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[i__ + 1 +
+			a_dim1], lda, &w[i__ * w_dim1 + 1], &c__1, &c_b60, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			c_b59, &w[i__ * w_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &w[i__ + 1 +
+			w_dim1], ldw, &w[i__ * w_dim1 + 1], &c__1, &c_b60, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+		i__2 = *n - i__;
+		zscal_(&i__2, &tau[i__], &w[i__ + 1 + i__ * w_dim1], &c__1);
+		z__3.r = -.5, z__3.i = -0.;
+		i__2 = i__;
+		z__2.r = z__3.r * tau[i__2].r - z__3.i * tau[i__2].i, z__2.i =
+			 z__3.r * tau[i__2].i + z__3.i * tau[i__2].r;
+		i__3 = *n - i__;
+		zdotc_(&z__4, &i__3, &w[i__ + 1 + i__ * w_dim1], &c__1, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
+		z__1.r = z__2.r * z__4.r - z__2.i * z__4.i, z__1.i = z__2.r *
+			z__4.i + z__2.i * z__4.r;
+		alpha.r = z__1.r, alpha.i = z__1.i;
+		i__2 = *n - i__;
+		zaxpy_(&i__2, &alpha, &a[i__ + 1 + i__ * a_dim1], &c__1, &w[
+			i__ + 1 + i__ * w_dim1], &c__1);
+	    }
+
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZLATRD */
+
+} /* zlatrd_ */
+
+/* Subroutine */ int zlatrs_(char *uplo, char *trans, char *diag, char *
+	normin, integer *n, doublecomplex *a, integer *lda, doublecomplex *x,
+	doublereal *scale, doublereal *cnorm, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3, d__4;
+    doublecomplex z__1, z__2, z__3, z__4;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j;
+    static doublereal xj, rec, tjj;
+    static integer jinc;
+    static doublereal xbnd;
+    static integer imax;
+    static doublereal tmax;
+    static doublecomplex tjjs;
+    static doublereal xmax, grow;
+    extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *,
+	    integer *);
+    extern logical lsame_(char *, char *);
+    static doublereal tscal;
+    static doublecomplex uscal;
+    static integer jlast;
+    static doublecomplex csumj;
+    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *);
+    static logical upper;
+    extern /* Double Complex */ VOID zdotu_(doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *);
+    extern /* Subroutine */ int zaxpy_(integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), ztrsv_(
+	    char *, char *, char *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), dlabad_(
+	    doublereal *, doublereal *);
+
+    extern integer idamax_(integer *, doublereal *, integer *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
+	    integer *, doublereal *, doublecomplex *, integer *);
+    static doublereal bignum;
+    extern integer izamax_(integer *, doublecomplex *, integer *);
+    extern /* Double Complex */ VOID zladiv_(doublecomplex *, doublecomplex *,
+	     doublecomplex *);
+    static logical notran;
+    static integer jfirst;
+    extern doublereal dzasum_(integer *, doublecomplex *, integer *);
+    static doublereal smlnum;
+    static logical nounit;
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1992
+
+
+    Purpose
+    =======
+
+    ZLATRS solves one of the triangular systems
+
+       A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+
+    with scaling to prevent overflow.  Here A is an upper or lower
+    triangular matrix, A**T denotes the transpose of A, A**H denotes the
+    conjugate transpose of A, x and b are n-element vectors, and s is a
+    scaling factor, usually less than or equal to 1, chosen so that the
+    components of x will be less than the overflow threshold.  If the
+    unscaled problem will not cause overflow, the Level 2 BLAS routine
+    ZTRSV is called. If the matrix A is singular (A(j,j) = 0 for some j),
+    then s is set to 0 and a non-trivial solution to A*x = 0 is returned.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    TRANS   (input) CHARACTER*1
+            Specifies the operation applied to A.
+            = 'N':  Solve A * x = s*b     (No transpose)
+            = 'T':  Solve A**T * x = s*b  (Transpose)
+            = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
+
+    NORMIN  (input) CHARACTER*1
+            Specifies whether CNORM has been set or not.
+            = 'Y':  CNORM contains the column norms on entry
+            = 'N':  CNORM is not set on entry.  On exit, the norms will
+                    be computed and stored in CNORM.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The triangular matrix A.  If UPLO = 'U', the leading n by n
+            upper triangular part of the array A contains the upper
+            triangular matrix, and the strictly lower triangular part of
+            A is not referenced.  If UPLO = 'L', the leading n by n lower
+            triangular part of the array A contains the lower triangular
+            matrix, and the strictly upper triangular part of A is not
+            referenced.  If DIAG = 'U', the diagonal elements of A are
+            also not referenced and are assumed to be 1.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max (1,N).
+
+    X       (input/output) COMPLEX*16 array, dimension (N)
+            On entry, the right hand side b of the triangular system.
+            On exit, X is overwritten by the solution vector x.
+
+    SCALE   (output) DOUBLE PRECISION
+            The scaling factor s for the triangular system
+               A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+            If SCALE = 0, the matrix A is singular or badly scaled, and
+            the vector x is an exact or approximate solution to A*x = 0.
+
+    CNORM   (input or output) DOUBLE PRECISION array, dimension (N)
+
+            If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
+            contains the norm of the off-diagonal part of the j-th column
+            of A.  If TRANS = 'N', CNORM(j) must be greater than or equal
+            to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
+            must be greater than or equal to the 1-norm.
+
+            If NORMIN = 'N', CNORM is an output argument and CNORM(j)
+            returns the 1-norm of the offdiagonal part of the j-th column
+            of A.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -k, the k-th argument had an illegal value
+
+    Further Details
+    ======= =======
+
+    A rough bound on x is computed; if that is less than overflow, ZTRSV
+    is called, otherwise, specific code is used which checks for possible
+    overflow or divide-by-zero at every operation.
+
+    A columnwise scheme is used for solving A*x = b.  The basic algorithm
+    if A is lower triangular is
+
+         x[1:n] := b[1:n]
+         for j = 1, ..., n
+              x(j) := x(j) / A(j,j)
+              x[j+1:n] := x[j+1:n] - x(j) * A[j+1:n,j]
+         end
+
+    Define bounds on the components of x after j iterations of the loop:
+       M(j) = bound on x[1:j]
+       G(j) = bound on x[j+1:n]
+    Initially, let M(0) = 0 and G(0) = max{x(i), i=1,...,n}.
+
+    Then for iteration j+1 we have
+       M(j+1) <= G(j) / | A(j+1,j+1) |
+       G(j+1) <= G(j) + M(j+1) * | A[j+2:n,j+1] |
+              <= G(j) ( 1 + CNORM(j+1) / | A(j+1,j+1) | )
+
+    where CNORM(j+1) is greater than or equal to the infinity-norm of
+    column j+1 of A, not counting the diagonal.  Hence
+
+       G(j) <= G(0) product ( 1 + CNORM(i) / | A(i,i) | )
+                    1<=i<=j
+    and
+
+       |x(j)| <= ( G(0) / |A(j,j)| ) product ( 1 + CNORM(i) / |A(i,i)| )
+                                     1<=i< j
+
+    Since |x(j)| <= M(j), we use the Level 2 BLAS routine ZTRSV if the
+    reciprocal of the largest M(j), j=1,..,n, is larger than
+    max(underflow, 1/overflow).
+
+    The bound on x(j) is also used to determine when a step in the
+    columnwise method can be performed without fear of overflow.  If
+    the computed bound is greater than a large constant, x is scaled to
+    prevent overflow, but if the bound overflows, x is set to 0, x(j) to
+    1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
+
+    Similarly, a row-wise scheme is used to solve A**T *x = b  or
+    A**H *x = b.  The basic algorithm for A upper triangular is
+
+         for j = 1, ..., n
+              x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
+         end
+
+    We simultaneously compute two bounds
+         G(j) = bound on ( b(i) - A[1:i-1,i]' * x[1:i-1] ), 1<=i<=j
+         M(j) = bound on x(i), 1<=i<=j
+
+    The initial values are G(0) = 0, M(0) = max{b(i), i=1,..,n}, and we
+    add the constraint G(j) >= G(j-1) and M(j) >= M(j-1) for j >= 1.
+    Then the bound on x(j) is
+
+         M(j) <= M(j-1) * ( 1 + CNORM(j) ) / | A(j,j) |
+
+              <= M(0) * product ( ( 1 + CNORM(i) ) / |A(i,i)| )
+                        1<=i<=j
+
+    and we can safely call ZTRSV if 1/M(n) and 1/G(n) are both greater
+    than max(underflow, 1/overflow).
+
+    =====================================================================
+*/
+
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --x;
+    --cnorm;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    notran = lsame_(trans, "N");
+    nounit = lsame_(diag, "N");
+
+/*     Test the input parameters. */
+
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "T") && !
+	    lsame_(trans, "C")) {
+	*info = -2;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -3;
+    } else if (! lsame_(normin, "Y") && ! lsame_(normin,
+	     "N")) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLATRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine machine dependent parameters to control overflow. */
+
+    smlnum = SAFEMINIMUM;
+    bignum = 1. / smlnum;
+    dlabad_(&smlnum, &bignum);
+    smlnum /= PRECISION;
+    bignum = 1. / smlnum;
+    *scale = 1.;
+
+    if (lsame_(normin, "N")) {
+
+/*        Compute the 1-norm of each column, not including the diagonal. */
+
+	if (upper) {
+
+/*           A is upper triangular. */
+
+	    i__1 = *n;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = j - 1;
+		cnorm[j] = dzasum_(&i__2, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
+	    }
+	} else {
+
+/*           A is lower triangular. */
+
+	    i__1 = *n - 1;
+	    for (j = 1; j <= i__1; ++j) {
+		i__2 = *n - j;
+		cnorm[j] = dzasum_(&i__2, &a[j + 1 + j * a_dim1], &c__1);
+/* L20: */
+	    }
+	    cnorm[*n] = 0.;
+	}
+    }
+
+/*
+       Scale the column norms by TSCAL if the maximum element in CNORM is
+       greater than BIGNUM/2.
+*/
+
+    imax = idamax_(n, &cnorm[1], &c__1);
+    tmax = cnorm[imax];
+    if (tmax <= bignum * .5) {
+	tscal = 1.;
+    } else {
+	tscal = .5 / (smlnum * tmax);
+	dscal_(n, &tscal, &cnorm[1], &c__1);
+    }
+
+/*
+       Compute a bound on the computed solution vector to see if the
+       Level 2 BLAS routine ZTRSV can be used.
+*/
+
+    xmax = 0.;
+    i__1 = *n;
+    for (j = 1; j <= i__1; ++j) {
+/* Computing MAX */
+	i__2 = j;
+	d__3 = xmax, d__4 = (d__1 = x[i__2].r / 2., abs(d__1)) + (d__2 =
+		d_imag(&x[j]) / 2., abs(d__2));
+	xmax = max(d__3,d__4);
+/* L30: */
+    }
+    xbnd = xmax;
+
+    if (notran) {
+
+/*        Compute the growth in A * x = b. */
+
+	if (upper) {
+	    jfirst = *n;
+	    jlast = 1;
+	    jinc = -1;
+	} else {
+	    jfirst = 1;
+	    jlast = *n;
+	    jinc = 1;
+	}
+
+	if (tscal != 1.) {
+	    grow = 0.;
+	    goto L60;
+	}
+
+	if (nounit) {
+
+/*
+             A is non-unit triangular.
+
+             Compute GROW = 1/G(j) and XBND = 1/M(j).
+             Initially, G(0) = max{x(i), i=1,...,n}.
+*/
+
+	    grow = .5 / max(xbnd,smlnum);
+	    xbnd = grow;
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*              Exit the loop if the growth factor is too small. */
+
+		if (grow <= smlnum) {
+		    goto L60;
+		}
+
+		i__3 = j + j * a_dim1;
+		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
+		tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs(
+			d__2));
+
+		if (tjj >= smlnum) {
+
+/*
+                   M(j) = G(j-1) / abs(A(j,j))
+
+   Computing MIN
+*/
+		    d__1 = xbnd, d__2 = min(1.,tjj) * grow;
+		    xbnd = min(d__1,d__2);
+		} else {
+
+/*                 M(j) could overflow, set XBND to 0. */
+
+		    xbnd = 0.;
+		}
+
+		if (tjj + cnorm[j] >= smlnum) {
+
+/*                 G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */
+
+		    grow *= tjj / (tjj + cnorm[j]);
+		} else {
+
+/*                 G(j) could overflow, set GROW to 0. */
+
+		    grow = 0.;
+		}
+/* L40: */
+	    }
+	    grow = xbnd;
+	} else {
+
+/*
+             A is unit triangular.
+
+             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+
+   Computing MIN
+*/
+	    d__1 = 1., d__2 = .5 / max(xbnd,smlnum);
+	    grow = min(d__1,d__2);
+	    i__2 = jlast;
+	    i__1 = jinc;
+	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*              Exit the loop if the growth factor is too small. */
+
+		if (grow <= smlnum) {
+		    goto L60;
+		}
+
+/*              G(j) = G(j-1)*( 1 + CNORM(j) ) */
+
+		grow *= 1. / (cnorm[j] + 1.);
+/* L50: */
+	    }
+	}
+L60:
+
+	;
+    } else {
+
+/*        Compute the growth in A**T * x = b  or  A**H * x = b. */
+
+	if (upper) {
+	    jfirst = 1;
+	    jlast = *n;
+	    jinc = 1;
+	} else {
+	    jfirst = *n;
+	    jlast = 1;
+	    jinc = -1;
+	}
+
+	if (tscal != 1.) {
+	    grow = 0.;
+	    goto L90;
+	}
+
+	if (nounit) {
+
+/*
+             A is non-unit triangular.
+
+             Compute GROW = 1/G(j) and XBND = 1/M(j).
+             Initially, M(0) = max{x(i), i=1,...,n}.
+*/
+
+	    grow = .5 / max(xbnd,smlnum);
+	    xbnd = grow;
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*              Exit the loop if the growth factor is too small. */
+
+		if (grow <= smlnum) {
+		    goto L90;
+		}
+
+/*              G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */
+
+		xj = cnorm[j] + 1.;
+/* Computing MIN */
+		d__1 = grow, d__2 = xbnd / xj;
+		grow = min(d__1,d__2);
+
+		i__3 = j + j * a_dim1;
+		tjjs.r = a[i__3].r, tjjs.i = a[i__3].i;
+		tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs(
+			d__2));
+
+		if (tjj >= smlnum) {
+
+/*                 M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */
+
+		    if (xj > tjj) {
+			xbnd *= tjj / xj;
+		    }
+		} else {
+
+/*                 M(j) could overflow, set XBND to 0. */
+
+		    xbnd = 0.;
+		}
+/* L70: */
+	    }
+	    grow = min(grow,xbnd);
+	} else {
+
+/*
+             A is unit triangular.
+
+             Compute GROW = 1/G(j), where G(0) = max{x(i), i=1,...,n}.
+
+   Computing MIN
+*/
+	    d__1 = 1., d__2 = .5 / max(xbnd,smlnum);
+	    grow = min(d__1,d__2);
+	    i__2 = jlast;
+	    i__1 = jinc;
+	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*              Exit the loop if the growth factor is too small. */
+
+		if (grow <= smlnum) {
+		    goto L90;
+		}
+
+/*              G(j) = ( 1 + CNORM(j) )*G(j-1) */
+
+		xj = cnorm[j] + 1.;
+		grow /= xj;
+/* L80: */
+	    }
+	}
+L90:
+	;
+    }
+
+    if (grow * tscal > smlnum) {
+
+/*
+          Use the Level 2 BLAS solve if the reciprocal of the bound on
+          elements of X is not too small.
+*/
+
+	ztrsv_(uplo, trans, diag, n, &a[a_offset], lda, &x[1], &c__1);
+    } else {
+
+/*        Use a Level 1 BLAS solve, scaling intermediate results. */
+
+	if (xmax > bignum * .5) {
+
+/*
+             Scale X so that its components are less than or equal to
+             BIGNUM in absolute value.
+*/
+
+	    *scale = bignum * .5 / xmax;
+	    zdscal_(n, scale, &x[1], &c__1);
+	    xmax = bignum;
+	} else {
+	    xmax *= 2.;
+	}
+
+	if (notran) {
+
+/*           Solve A * x = b */
+
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*              Compute x(j) = b(j) / A(j,j), scaling x if necessary. */
+
+		i__3 = j;
+		xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]),
+			abs(d__2));
+		if (nounit) {
+		    i__3 = j + j * a_dim1;
+		    z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3].i;
+		    tjjs.r = z__1.r, tjjs.i = z__1.i;
+		} else {
+		    tjjs.r = tscal, tjjs.i = 0.;
+		    if (tscal == 1.) {
+			goto L110;
+		    }
+		}
+		tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs), abs(
+			d__2));
+		if (tjj > smlnum) {
+
+/*                    abs(A(j,j)) > SMLNUM: */
+
+		    if (tjj < 1.) {
+			if (xj > tjj * bignum) {
+
+/*                          Scale x by 1/b(j). */
+
+			    rec = 1. / xj;
+			    zdscal_(n, &rec, &x[1], &c__1);
+			    *scale *= rec;
+			    xmax *= rec;
+			}
+		    }
+		    i__3 = j;
+		    zladiv_(&z__1, &x[j], &tjjs);
+		    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		    i__3 = j;
+		    xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j])
+			    , abs(d__2));
+		} else if (tjj > 0.) {
+
+/*                    0 < abs(A(j,j)) <= SMLNUM: */
+
+		    if (xj > tjj * bignum) {
+
+/*
+                         Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM
+                         to avoid overflow when dividing by A(j,j).
+*/
+
+			rec = tjj * bignum / xj;
+			if (cnorm[j] > 1.) {
+
+/*
+                            Scale by 1/CNORM(j) to avoid overflow when
+                            multiplying x(j) times column j.
+*/
+
+			    rec /= cnorm[j];
+			}
+			zdscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+			xmax *= rec;
+		    }
+		    i__3 = j;
+		    zladiv_(&z__1, &x[j], &tjjs);
+		    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		    i__3 = j;
+		    xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j])
+			    , abs(d__2));
+		} else {
+
+/*
+                      A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                      scale = 0, and compute a solution to A*x = 0.
+*/
+
+		    i__3 = *n;
+		    for (i__ = 1; i__ <= i__3; ++i__) {
+			i__4 = i__;
+			x[i__4].r = 0., x[i__4].i = 0.;
+/* L100: */
+		    }
+		    i__3 = j;
+		    x[i__3].r = 1., x[i__3].i = 0.;
+		    xj = 1.;
+		    *scale = 0.;
+		    xmax = 0.;
+		}
+L110:
+
+/*
+                Scale x if necessary to avoid overflow when adding a
+                multiple of column j of A.
+*/
+
+		if (xj > 1.) {
+		    rec = 1. / xj;
+		    if (cnorm[j] > (bignum - xmax) * rec) {
+
+/*                    Scale x by 1/(2*abs(x(j))). */
+
+			rec *= .5;
+			zdscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+		    }
+		} else if (xj * cnorm[j] > bignum - xmax) {
+
+/*                 Scale x by 1/2. */
+
+		    zdscal_(n, &c_b2210, &x[1], &c__1);
+		    *scale *= .5;
+		}
+
+		if (upper) {
+		    if (j > 1) {
+
+/*
+                      Compute the update
+                         x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j)
+*/
+
+			i__3 = j - 1;
+			i__4 = j;
+			z__2.r = -x[i__4].r, z__2.i = -x[i__4].i;
+			z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i;
+			zaxpy_(&i__3, &z__1, &a[j * a_dim1 + 1], &c__1, &x[1],
+				 &c__1);
+			i__3 = j - 1;
+			i__ = izamax_(&i__3, &x[1], &c__1);
+			i__3 = i__;
+			xmax = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(
+				&x[i__]), abs(d__2));
+		    }
+		} else {
+		    if (j < *n) {
+
+/*
+                      Compute the update
+                         x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j)
+*/
+
+			i__3 = *n - j;
+			i__4 = j;
+			z__2.r = -x[i__4].r, z__2.i = -x[i__4].i;
+			z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i;
+			zaxpy_(&i__3, &z__1, &a[j + 1 + j * a_dim1], &c__1, &
+				x[j + 1], &c__1);
+			i__3 = *n - j;
+			i__ = j + izamax_(&i__3, &x[j + 1], &c__1);
+			i__3 = i__;
+			xmax = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(
+				&x[i__]), abs(d__2));
+		    }
+		}
+/* L120: */
+	    }
+
+	} else if (lsame_(trans, "T")) {
+
+/*           Solve A**T * x = b */
+
+	    i__2 = jlast;
+	    i__1 = jinc;
+	    for (j = jfirst; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*
+                Compute x(j) = b(j) - sum A(k,j)*x(k).
+                                      k<>j
+*/
+
+		i__3 = j;
+		xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]),
+			abs(d__2));
+		uscal.r = tscal, uscal.i = 0.;
+		rec = 1. / max(xmax,1.);
+		if (cnorm[j] > (bignum - xj) * rec) {
+
+/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
+
+		    rec *= .5;
+		    if (nounit) {
+			i__3 = j + j * a_dim1;
+			z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3]
+				.i;
+			tjjs.r = z__1.r, tjjs.i = z__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.;
+		    }
+		    tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs),
+			    abs(d__2));
+		    if (tjj > 1.) {
+
+/*
+                         Divide by A(j,j) when scaling x if A(j,j) > 1.
+
+   Computing MIN
+*/
+			d__1 = 1., d__2 = rec * tjj;
+			rec = min(d__1,d__2);
+			zladiv_(&z__1, &uscal, &tjjs);
+			uscal.r = z__1.r, uscal.i = z__1.i;
+		    }
+		    if (rec < 1.) {
+			zdscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+			xmax *= rec;
+		    }
+		}
+
+		csumj.r = 0., csumj.i = 0.;
+		if (uscal.r == 1. && uscal.i == 0.) {
+
+/*
+                   If the scaling needed for A in the dot product is 1,
+                   call ZDOTU to perform the dot product.
+*/
+
+		    if (upper) {
+			i__3 = j - 1;
+			zdotu_(&z__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1],
+				 &c__1);
+			csumj.r = z__1.r, csumj.i = z__1.i;
+		    } else if (j < *n) {
+			i__3 = *n - j;
+			zdotu_(&z__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
+				x[j + 1], &c__1);
+			csumj.r = z__1.r, csumj.i = z__1.i;
+		    }
+		} else {
+
+/*                 Otherwise, use in-line code for the dot product. */
+
+		    if (upper) {
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * a_dim1;
+			    z__3.r = a[i__4].r * uscal.r - a[i__4].i *
+				    uscal.i, z__3.i = a[i__4].r * uscal.i + a[
+				    i__4].i * uscal.r;
+			    i__5 = i__;
+			    z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i,
+				    z__2.i = z__3.r * x[i__5].i + z__3.i * x[
+				    i__5].r;
+			    z__1.r = csumj.r + z__2.r, z__1.i = csumj.i +
+				    z__2.i;
+			    csumj.r = z__1.r, csumj.i = z__1.i;
+/* L130: */
+			}
+		    } else if (j < *n) {
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    i__4 = i__ + j * a_dim1;
+			    z__3.r = a[i__4].r * uscal.r - a[i__4].i *
+				    uscal.i, z__3.i = a[i__4].r * uscal.i + a[
+				    i__4].i * uscal.r;
+			    i__5 = i__;
+			    z__2.r = z__3.r * x[i__5].r - z__3.i * x[i__5].i,
+				    z__2.i = z__3.r * x[i__5].i + z__3.i * x[
+				    i__5].r;
+			    z__1.r = csumj.r + z__2.r, z__1.i = csumj.i +
+				    z__2.i;
+			    csumj.r = z__1.r, csumj.i = z__1.i;
+/* L140: */
+			}
+		    }
+		}
+
+		z__1.r = tscal, z__1.i = 0.;
+		if (uscal.r == z__1.r && uscal.i == z__1.i) {
+
+/*
+                   Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+                   was not used to scale the dotproduct.
+*/
+
+		    i__3 = j;
+		    i__4 = j;
+		    z__1.r = x[i__4].r - csumj.r, z__1.i = x[i__4].i -
+			    csumj.i;
+		    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		    i__3 = j;
+		    xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j])
+			    , abs(d__2));
+		    if (nounit) {
+			i__3 = j + j * a_dim1;
+			z__1.r = tscal * a[i__3].r, z__1.i = tscal * a[i__3]
+				.i;
+			tjjs.r = z__1.r, tjjs.i = z__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.;
+			if (tscal == 1.) {
+			    goto L160;
+			}
+		    }
+
+/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
+
+		    tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs),
+			    abs(d__2));
+		    if (tjj > smlnum) {
+
+/*                       abs(A(j,j)) > SMLNUM: */
+
+			if (tjj < 1.) {
+			    if (xj > tjj * bignum) {
+
+/*                             Scale X by 1/abs(x(j)). */
+
+				rec = 1. / xj;
+				zdscal_(n, &rec, &x[1], &c__1);
+				*scale *= rec;
+				xmax *= rec;
+			    }
+			}
+			i__3 = j;
+			zladiv_(&z__1, &x[j], &tjjs);
+			x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		    } else if (tjj > 0.) {
+
+/*                       0 < abs(A(j,j)) <= SMLNUM: */
+
+			if (xj > tjj * bignum) {
+
+/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
+
+			    rec = tjj * bignum / xj;
+			    zdscal_(n, &rec, &x[1], &c__1);
+			    *scale *= rec;
+			    xmax *= rec;
+			}
+			i__3 = j;
+			zladiv_(&z__1, &x[j], &tjjs);
+			x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		    } else {
+
+/*
+                         A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                         scale = 0 and compute a solution to A**T *x = 0.
+*/
+
+			i__3 = *n;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__;
+			    x[i__4].r = 0., x[i__4].i = 0.;
+/* L150: */
+			}
+			i__3 = j;
+			x[i__3].r = 1., x[i__3].i = 0.;
+			*scale = 0.;
+			xmax = 0.;
+		    }
+L160:
+		    ;
+		} else {
+
+/*
+                   Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+                   product has already been divided by 1/A(j,j).
+*/
+
+		    i__3 = j;
+		    zladiv_(&z__2, &x[j], &tjjs);
+		    z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i;
+		    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		}
+/* Computing MAX */
+		i__3 = j;
+		d__3 = xmax, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 =
+			d_imag(&x[j]), abs(d__2));
+		xmax = max(d__3,d__4);
+/* L170: */
+	    }
+
+	} else {
+
+/*           Solve A**H * x = b */
+
+	    i__1 = jlast;
+	    i__2 = jinc;
+	    for (j = jfirst; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*
+                Compute x(j) = b(j) - sum A(k,j)*x(k).
+                                      k<>j
+*/
+
+		i__3 = j;
+		xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j]),
+			abs(d__2));
+		uscal.r = tscal, uscal.i = 0.;
+		rec = 1. / max(xmax,1.);
+		if (cnorm[j] > (bignum - xj) * rec) {
+
+/*                 If x(j) could overflow, scale x by 1/(2*XMAX). */
+
+		    rec *= .5;
+		    if (nounit) {
+			d_cnjg(&z__2, &a[j + j * a_dim1]);
+			z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i;
+			tjjs.r = z__1.r, tjjs.i = z__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.;
+		    }
+		    tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs),
+			    abs(d__2));
+		    if (tjj > 1.) {
+
+/*
+                         Divide by A(j,j) when scaling x if A(j,j) > 1.
+
+   Computing MIN
+*/
+			d__1 = 1., d__2 = rec * tjj;
+			rec = min(d__1,d__2);
+			zladiv_(&z__1, &uscal, &tjjs);
+			uscal.r = z__1.r, uscal.i = z__1.i;
+		    }
+		    if (rec < 1.) {
+			zdscal_(n, &rec, &x[1], &c__1);
+			*scale *= rec;
+			xmax *= rec;
+		    }
+		}
+
+		csumj.r = 0., csumj.i = 0.;
+		if (uscal.r == 1. && uscal.i == 0.) {
+
+/*
+                   If the scaling needed for A in the dot product is 1,
+                   call ZDOTC to perform the dot product.
+*/
+
+		    if (upper) {
+			i__3 = j - 1;
+			zdotc_(&z__1, &i__3, &a[j * a_dim1 + 1], &c__1, &x[1],
+				 &c__1);
+			csumj.r = z__1.r, csumj.i = z__1.i;
+		    } else if (j < *n) {
+			i__3 = *n - j;
+			zdotc_(&z__1, &i__3, &a[j + 1 + j * a_dim1], &c__1, &
+				x[j + 1], &c__1);
+			csumj.r = z__1.r, csumj.i = z__1.i;
+		    }
+		} else {
+
+/*                 Otherwise, use in-line code for the dot product. */
+
+		    if (upper) {
+			i__3 = j - 1;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    d_cnjg(&z__4, &a[i__ + j * a_dim1]);
+			    z__3.r = z__4.r * uscal.r - z__4.i * uscal.i,
+				    z__3.i = z__4.r * uscal.i + z__4.i *
+				    uscal.r;
+			    i__4 = i__;
+			    z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i,
+				    z__2.i = z__3.r * x[i__4].i + z__3.i * x[
+				    i__4].r;
+			    z__1.r = csumj.r + z__2.r, z__1.i = csumj.i +
+				    z__2.i;
+			    csumj.r = z__1.r, csumj.i = z__1.i;
+/* L180: */
+			}
+		    } else if (j < *n) {
+			i__3 = *n;
+			for (i__ = j + 1; i__ <= i__3; ++i__) {
+			    d_cnjg(&z__4, &a[i__ + j * a_dim1]);
+			    z__3.r = z__4.r * uscal.r - z__4.i * uscal.i,
+				    z__3.i = z__4.r * uscal.i + z__4.i *
+				    uscal.r;
+			    i__4 = i__;
+			    z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i,
+				    z__2.i = z__3.r * x[i__4].i + z__3.i * x[
+				    i__4].r;
+			    z__1.r = csumj.r + z__2.r, z__1.i = csumj.i +
+				    z__2.i;
+			    csumj.r = z__1.r, csumj.i = z__1.i;
+/* L190: */
+			}
+		    }
+		}
+
+		z__1.r = tscal, z__1.i = 0.;
+		if (uscal.r == z__1.r && uscal.i == z__1.i) {
+
+/*
+                   Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j)
+                   was not used to scale the dotproduct.
+*/
+
+		    i__3 = j;
+		    i__4 = j;
+		    z__1.r = x[i__4].r - csumj.r, z__1.i = x[i__4].i -
+			    csumj.i;
+		    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		    i__3 = j;
+		    xj = (d__1 = x[i__3].r, abs(d__1)) + (d__2 = d_imag(&x[j])
+			    , abs(d__2));
+		    if (nounit) {
+			d_cnjg(&z__2, &a[j + j * a_dim1]);
+			z__1.r = tscal * z__2.r, z__1.i = tscal * z__2.i;
+			tjjs.r = z__1.r, tjjs.i = z__1.i;
+		    } else {
+			tjjs.r = tscal, tjjs.i = 0.;
+			if (tscal == 1.) {
+			    goto L210;
+			}
+		    }
+
+/*                    Compute x(j) = x(j) / A(j,j), scaling if necessary. */
+
+		    tjj = (d__1 = tjjs.r, abs(d__1)) + (d__2 = d_imag(&tjjs),
+			    abs(d__2));
+		    if (tjj > smlnum) {
+
+/*                       abs(A(j,j)) > SMLNUM: */
+
+			if (tjj < 1.) {
+			    if (xj > tjj * bignum) {
+
+/*                             Scale X by 1/abs(x(j)). */
+
+				rec = 1. / xj;
+				zdscal_(n, &rec, &x[1], &c__1);
+				*scale *= rec;
+				xmax *= rec;
+			    }
+			}
+			i__3 = j;
+			zladiv_(&z__1, &x[j], &tjjs);
+			x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		    } else if (tjj > 0.) {
+
+/*                       0 < abs(A(j,j)) <= SMLNUM: */
+
+			if (xj > tjj * bignum) {
+
+/*                          Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */
+
+			    rec = tjj * bignum / xj;
+			    zdscal_(n, &rec, &x[1], &c__1);
+			    *scale *= rec;
+			    xmax *= rec;
+			}
+			i__3 = j;
+			zladiv_(&z__1, &x[j], &tjjs);
+			x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		    } else {
+
+/*
+                         A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
+                         scale = 0 and compute a solution to A**H *x = 0.
+*/
+
+			i__3 = *n;
+			for (i__ = 1; i__ <= i__3; ++i__) {
+			    i__4 = i__;
+			    x[i__4].r = 0., x[i__4].i = 0.;
+/* L200: */
+			}
+			i__3 = j;
+			x[i__3].r = 1., x[i__3].i = 0.;
+			*scale = 0.;
+			xmax = 0.;
+		    }
+L210:
+		    ;
+		} else {
+
+/*
+                   Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot
+                   product has already been divided by 1/A(j,j).
+*/
+
+		    i__3 = j;
+		    zladiv_(&z__2, &x[j], &tjjs);
+		    z__1.r = z__2.r - csumj.r, z__1.i = z__2.i - csumj.i;
+		    x[i__3].r = z__1.r, x[i__3].i = z__1.i;
+		}
+/* Computing MAX */
+		i__3 = j;
+		d__3 = xmax, d__4 = (d__1 = x[i__3].r, abs(d__1)) + (d__2 =
+			d_imag(&x[j]), abs(d__2));
+		xmax = max(d__3,d__4);
+/* L220: */
+	    }
+	}
+	*scale /= tscal;
+    }
+
+/*     Scale the column norms by 1/TSCAL for return. */
+
+    if (tscal != 1.) {
+	d__1 = 1. / tscal;
+	dscal_(n, &d__1, &cnorm[1], &c__1);
+    }
+
+    return 0;
+
+/*     End of ZLATRS */
+
+} /* zlatrs_ */
+
+/* Subroutine */ int zlauu2_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__;
+    static doublereal aii;
+    extern logical lsame_(char *, char *);
+    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *);
+    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
+	    integer *, doublereal *, doublecomplex *, integer *), zlacgv_(
+	    integer *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLAUU2 computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the unblocked form of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLAUU2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the product U * U'. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    aii = a[i__2].r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = *n - i__;
+		zdotc_(&z__1, &i__3, &a[i__ + (i__ + 1) * a_dim1], lda, &a[
+			i__ + (i__ + 1) * a_dim1], lda);
+		d__1 = aii * aii + z__1.r;
+		a[i__2].r = d__1, a[i__2].i = 0.;
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+		i__2 = i__ - 1;
+		i__3 = *n - i__;
+		z__1.r = aii, z__1.i = 0.;
+		zgemv_("No transpose", &i__2, &i__3, &c_b60, &a[(i__ + 1) *
+			a_dim1 + 1], lda, &a[i__ + (i__ + 1) * a_dim1], lda, &
+			z__1, &a[i__ * a_dim1 + 1], &c__1);
+		i__2 = *n - i__;
+		zlacgv_(&i__2, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    } else {
+		zdscal_(&i__, &aii, &a[i__ * a_dim1 + 1], &c__1);
+	    }
+/* L10: */
+	}
+
+    } else {
+
+/*        Compute the product L' * L. */
+
+	i__1 = *n;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    i__2 = i__ + i__ * a_dim1;
+	    aii = a[i__2].r;
+	    if (i__ < *n) {
+		i__2 = i__ + i__ * a_dim1;
+		i__3 = *n - i__;
+		zdotc_(&z__1, &i__3, &a[i__ + 1 + i__ * a_dim1], &c__1, &a[
+			i__ + 1 + i__ * a_dim1], &c__1);
+		d__1 = aii * aii + z__1.r;
+		a[i__2].r = d__1, a[i__2].i = 0.;
+		i__2 = i__ - 1;
+		zlacgv_(&i__2, &a[i__ + a_dim1], lda);
+		i__2 = *n - i__;
+		i__3 = i__ - 1;
+		z__1.r = aii, z__1.i = 0.;
+		zgemv_("Conjugate transpose", &i__2, &i__3, &c_b60, &a[i__ +
+			1 + a_dim1], lda, &a[i__ + 1 + i__ * a_dim1], &c__1, &
+			z__1, &a[i__ + a_dim1], lda);
+		i__2 = i__ - 1;
+		zlacgv_(&i__2, &a[i__ + a_dim1], lda);
+	    } else {
+		zdscal_(&i__, &aii, &a[i__ + a_dim1], lda);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZLAUU2 */
+
+} /* zlauu2_ */
+
+/* Subroutine */ int zlauum_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, ib, nb;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), zherk_(char *, char *, integer *,
+	    integer *, doublereal *, doublecomplex *, integer *, doublereal *,
+	     doublecomplex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
+	     doublecomplex *, integer *),
+	    zlauu2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK auxiliary routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZLAUUM computes the product U * U' or L' * L, where the triangular
+    factor U or L is stored in the upper or lower triangular part of
+    the array A.
+
+    If UPLO = 'U' or 'u' then the upper triangle of the result is stored,
+    overwriting the factor U in A.
+    If UPLO = 'L' or 'l' then the lower triangle of the result is stored,
+    overwriting the factor L in A.
+
+    This is the blocked form of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the triangular factor stored in the array A
+            is upper or lower triangular:
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the triangular factor U or L.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the triangular factor U or L.
+            On exit, if UPLO = 'U', the upper triangle of A is
+            overwritten with the upper triangle of the product U * U';
+            if UPLO = 'L', the lower triangle of A is overwritten with
+            the lower triangle of the product L' * L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZLAUUM", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "ZLAUUM", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code */
+
+	zlauu2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute the product U * U'. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		ztrmm_("Right", "Upper", "Conjugate transpose", "Non-unit", &
+			i__3, &ib, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[
+			i__ * a_dim1 + 1], lda);
+		zlauu2_("Upper", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    zgemm_("No transpose", "Conjugate transpose", &i__3, &ib,
+			    &i__4, &c_b60, &a[(i__ + ib) * a_dim1 + 1], lda, &
+			    a[i__ + (i__ + ib) * a_dim1], lda, &c_b60, &a[i__
+			    * a_dim1 + 1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    zherk_("Upper", "No transpose", &ib, &i__3, &c_b1015, &a[
+			    i__ + (i__ + ib) * a_dim1], lda, &c_b1015, &a[i__
+			    + i__ * a_dim1], lda);
+		}
+/* L10: */
+	    }
+	} else {
+
+/*           Compute the product L' * L. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+/* Computing MIN */
+		i__3 = nb, i__4 = *n - i__ + 1;
+		ib = min(i__3,i__4);
+		i__3 = i__ - 1;
+		ztrmm_("Left", "Lower", "Conjugate transpose", "Non-unit", &
+			ib, &i__3, &c_b60, &a[i__ + i__ * a_dim1], lda, &a[
+			i__ + a_dim1], lda);
+		zlauu2_("Lower", &ib, &a[i__ + i__ * a_dim1], lda, info);
+		if (i__ + ib <= *n) {
+		    i__3 = i__ - 1;
+		    i__4 = *n - i__ - ib + 1;
+		    zgemm_("Conjugate transpose", "No transpose", &ib, &i__3,
+			    &i__4, &c_b60, &a[i__ + ib + i__ * a_dim1], lda, &
+			    a[i__ + ib + a_dim1], lda, &c_b60, &a[i__ +
+			    a_dim1], lda);
+		    i__3 = *n - i__ - ib + 1;
+		    zherk_("Lower", "Conjugate transpose", &ib, &i__3, &
+			    c_b1015, &a[i__ + ib + i__ * a_dim1], lda, &
+			    c_b1015, &a[i__ + i__ * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZLAUUM */
+
+} /* zlauum_ */
+
+/* Subroutine */ int zpotf2_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublereal d__1;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer j;
+    static doublereal ajj;
+    extern logical lsame_(char *, char *);
+    extern /* Double Complex */ VOID zdotc_(doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *);
+    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
+	    integer *, doublereal *, doublecomplex *, integer *), zlacgv_(
+	    integer *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZPOTF2 computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U' * U ,  if UPLO = 'U', or
+       A = L  * L',  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the unblocked version of the algorithm, calling Level 2 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the upper or lower triangular part of the
+            Hermitian matrix A is stored.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            n by n upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U'*U  or A = L*L'.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+            > 0: if INFO = k, the leading minor of order k is not
+                 positive definite, and the factorization could not be
+                 completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZPOTF2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute the Cholesky factorization A = U'*U. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute U(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j + j * a_dim1;
+	    d__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    zdotc_(&z__2, &i__3, &a[j * a_dim1 + 1], &c__1, &a[j * a_dim1 + 1]
+		    , &c__1);
+	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
+	    ajj = z__1.r;
+	    if (ajj <= 0.) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.;
+
+/*           Compute elements J+1:N of row J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = j - 1;
+		i__3 = *n - j;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("Transpose", &i__2, &i__3, &z__1, &a[(j + 1) * a_dim1
+			+ 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b60, &a[j + (
+			j + 1) * a_dim1], lda);
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j * a_dim1 + 1], &c__1);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		zdscal_(&i__2, &d__1, &a[j + (j + 1) * a_dim1], lda);
+	    }
+/* L10: */
+	}
+    } else {
+
+/*        Compute the Cholesky factorization A = L*L'. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+
+/*           Compute L(J,J) and test for non-positive-definiteness. */
+
+	    i__2 = j + j * a_dim1;
+	    d__1 = a[i__2].r;
+	    i__3 = j - 1;
+	    zdotc_(&z__2, &i__3, &a[j + a_dim1], lda, &a[j + a_dim1], lda);
+	    z__1.r = d__1 - z__2.r, z__1.i = -z__2.i;
+	    ajj = z__1.r;
+	    if (ajj <= 0.) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = ajj, a[i__2].i = 0.;
+		goto L30;
+	    }
+	    ajj = sqrt(ajj);
+	    i__2 = j + j * a_dim1;
+	    a[i__2].r = ajj, a[i__2].i = 0.;
+
+/*           Compute elements J+1:N of column J. */
+
+	    if (j < *n) {
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		i__3 = j - 1;
+		z__1.r = -1., z__1.i = -0.;
+		zgemv_("No transpose", &i__2, &i__3, &z__1, &a[j + 1 + a_dim1]
+			, lda, &a[j + a_dim1], lda, &c_b60, &a[j + 1 + j *
+			a_dim1], &c__1);
+		i__2 = j - 1;
+		zlacgv_(&i__2, &a[j + a_dim1], lda);
+		i__2 = *n - j;
+		d__1 = 1. / ajj;
+		zdscal_(&i__2, &d__1, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+    goto L40;
+
+L30:
+    *info = j;
+
+L40:
+    return 0;
+
+/*     End of ZPOTF2 */
+
+} /* zpotf2_ */
+
+/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer j, jb, nb;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zgemm_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), zherk_(char *, char *, integer *,
+	    integer *, doublereal *, doublecomplex *, integer *, doublereal *,
+	     doublecomplex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
+	     doublecomplex *, integer *),
+	    zpotf2_(char *, integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZPOTRF computes the Cholesky factorization of a complex Hermitian
+    positive definite matrix A.
+
+    The factorization has the form
+       A = U**H * U,  if UPLO = 'U', or
+       A = L  * L**H,  if UPLO = 'L',
+    where U is an upper triangular matrix and L is lower triangular.
+
+    This is the block version of the algorithm, calling Level 3 BLAS.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
+            N-by-N upper triangular part of A contains the upper
+            triangular part of the matrix A, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of A contains the lower
+            triangular part of the matrix A, and the strictly upper
+            triangular part of A is not referenced.
+
+            On exit, if INFO = 0, the factor U or L from the Cholesky
+            factorization A = U**H*U or A = L*L**H.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the leading minor of order i is not
+                  positive definite, and the factorization could not be
+                  completed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZPOTRF", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Determine the block size for this environment. */
+
+    nb = ilaenv_(&c__1, "ZPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)1);
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code. */
+
+	zpotf2_(uplo, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code. */
+
+	if (upper) {
+
+/*           Compute the Cholesky factorization A = U'*U. */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		zherk_("Upper", "Conjugate transpose", &jb, &i__3, &c_b1294, &
+			a[j * a_dim1 + 1], lda, &c_b1015, &a[j + j * a_dim1],
+			lda);
+		zpotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block row. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("Conjugate transpose", "No transpose", &jb, &i__3,
+			    &i__4, &z__1, &a[j * a_dim1 + 1], lda, &a[(j + jb)
+			     * a_dim1 + 1], lda, &c_b60, &a[j + (j + jb) *
+			    a_dim1], lda);
+		    i__3 = *n - j - jb + 1;
+		    ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit",
+			     &jb, &i__3, &c_b60, &a[j + j * a_dim1], lda, &a[
+			    j + (j + jb) * a_dim1], lda);
+		}
+/* L10: */
+	    }
+
+	} else {
+
+/*           Compute the Cholesky factorization A = L*L'. */
+
+	    i__2 = *n;
+	    i__1 = nb;
+	    for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) {
+
+/*
+                Update and factorize the current diagonal block and test
+                for non-positive-definiteness.
+
+   Computing MIN
+*/
+		i__3 = nb, i__4 = *n - j + 1;
+		jb = min(i__3,i__4);
+		i__3 = j - 1;
+		zherk_("Lower", "No transpose", &jb, &i__3, &c_b1294, &a[j +
+			a_dim1], lda, &c_b1015, &a[j + j * a_dim1], lda);
+		zpotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info);
+		if (*info != 0) {
+		    goto L30;
+		}
+		if (j + jb <= *n) {
+
+/*                 Compute the current block column. */
+
+		    i__3 = *n - j - jb + 1;
+		    i__4 = j - 1;
+		    z__1.r = -1., z__1.i = -0.;
+		    zgemm_("No transpose", "Conjugate transpose", &i__3, &jb,
+			    &i__4, &z__1, &a[j + jb + a_dim1], lda, &a[j +
+			    a_dim1], lda, &c_b60, &a[j + jb + j * a_dim1],
+			    lda);
+		    i__3 = *n - j - jb + 1;
+		    ztrsm_("Right", "Lower", "Conjugate transpose", "Non-unit"
+			    , &i__3, &jb, &c_b60, &a[j + j * a_dim1], lda, &a[
+			    j + jb + j * a_dim1], lda);
+		}
+/* L20: */
+	    }
+	}
+    }
+    goto L40;
+
+L30:
+    *info = *info + j - 1;
+
+L40:
+    return 0;
+
+/*     End of ZPOTRF */
+
+} /* zpotrf_ */
+
+/* Subroutine */ int zpotri_(char *uplo, integer *n, doublecomplex *a,
+	integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int xerbla_(char *, integer *), zlauum_(
+	    char *, integer *, doublecomplex *, integer *, integer *),
+	     ztrtri_(char *, char *, integer *, doublecomplex *, integer *,
+	    integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       March 31, 1993
+
+
+    Purpose
+    =======
+
+    ZPOTRI computes the inverse of a complex Hermitian positive definite
+    matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+    computed by ZPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the triangular factor U or L from the Cholesky
+            factorization A = U**H*U or A = L*L**H, as computed by
+            ZPOTRF.
+            On exit, the upper or lower triangle of the (Hermitian)
+            inverse of A, overwriting the input factor U or L.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  if INFO = i, the (i,i) element of the factor U or L is
+                  zero, and the inverse could not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*lda < max(1,*n)) {
+	*info = -4;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZPOTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Invert the triangular Cholesky factor U or L. */
+
+    ztrtri_(uplo, "Non-unit", n, &a[a_offset], lda, info);
+    if (*info > 0) {
+	return 0;
+    }
+
+/*     Form inv(U)*inv(U)' or inv(L)'*inv(L). */
+
+    zlauum_(uplo, n, &a[a_offset], lda, info);
+
+    return 0;
+
+/*     End of ZPOTRI */
+
+} /* zpotri_ */
+
+/* Subroutine */ int zpotrs_(char *uplo, integer *n, integer *nrhs,
+	doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb,
+	integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, b_dim1, b_offset, i__1;
+
+    /* Local variables */
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int ztrsm_(char *, char *, char *, char *,
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
+	     doublecomplex *, integer *),
+	    xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZPOTRS solves a system of linear equations A*X = B with a Hermitian
+    positive definite matrix A using the Cholesky factorization
+    A = U**H*U or A = L*L**H computed by ZPOTRF.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  Upper triangle of A is stored;
+            = 'L':  Lower triangle of A is stored.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    NRHS    (input) INTEGER
+            The number of right hand sides, i.e., the number of columns
+            of the matrix B.  NRHS >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,N)
+            The triangular factor U or L from the Cholesky factorization
+            A = U**H*U or A = L*L**H, as computed by ZPOTRF.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    B       (input/output) COMPLEX*16 array, dimension (LDB,NRHS)
+            On entry, the right hand side matrix B.
+            On exit, the solution matrix X.
+
+    LDB     (input) INTEGER
+            The leading dimension of the array B.  LDB >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    b_dim1 = *ldb;
+    b_offset = 1 + b_dim1;
+    b -= b_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if (*nrhs < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*ldb < max(1,*n)) {
+	*info = -7;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZPOTRS", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*n == 0) || (*nrhs == 0)) {
+	return 0;
+    }
+
+    if (upper) {
+
+/*
+          Solve A*X = B where A = U'*U.
+
+          Solve U'*X = B, overwriting B with X.
+*/
+
+	ztrsm_("Left", "Upper", "Conjugate transpose", "Non-unit", n, nrhs, &
+		c_b60, &a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve U*X = B, overwriting B with X. */
+
+	ztrsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b60, &
+		a[a_offset], lda, &b[b_offset], ldb);
+    } else {
+
+/*
+          Solve A*X = B where A = L*L'.
+
+          Solve L*X = B, overwriting B with X.
+*/
+
+	ztrsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b60, &
+		a[a_offset], lda, &b[b_offset], ldb);
+
+/*        Solve L'*X = B, overwriting B with X. */
+
+	ztrsm_("Left", "Lower", "Conjugate transpose", "Non-unit", n, nrhs, &
+		c_b60, &a[a_offset], lda, &b[b_offset], ldb);
+    }
+
+    return 0;
+
+/*     End of ZPOTRS */
+
+} /* zpotrs_ */
+
+/* Subroutine */ int zstedc_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublecomplex *z__, integer *ldz, doublecomplex *work,
+	integer *lwork, doublereal *rwork, integer *lrwork, integer *iwork,
+	integer *liwork, integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2, i__3, i__4;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double log(doublereal);
+    integer pow_ii(integer *, integer *);
+    double sqrt(doublereal);
+
+    /* Local variables */
+    static integer i__, j, k, m;
+    static doublereal p;
+    static integer ii, ll, end, lgn;
+    static doublereal eps, tiny;
+    extern logical lsame_(char *, char *);
+    static integer lwmin, start;
+    extern /* Subroutine */ int zswap_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), zlaed0_(integer *, integer *,
+	    doublereal *, doublereal *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublereal *, integer *, integer *);
+
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *), dstedc_(char *, integer *,
+	    doublereal *, doublereal *, doublereal *, integer *, doublereal *,
+	     integer *, integer *, integer *, integer *), dlaset_(
+	    char *, integer *, integer *, doublereal *, doublereal *,
+	    doublereal *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dsterf_(integer *, doublereal *, doublereal *,
+	     integer *), zlacrm_(integer *, integer *, doublecomplex *,
+	    integer *, doublereal *, integer *, doublecomplex *, integer *,
+	    doublereal *);
+    static integer liwmin, icompz;
+    extern /* Subroutine */ int dsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublereal *, integer *, doublereal *, integer *), zlacpy_(char *, integer *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *);
+    static doublereal orgnrm;
+    static integer lrwmin;
+    static logical lquery;
+    static integer smlsiz;
+    extern /* Subroutine */ int zsteqr_(char *, integer *, doublereal *,
+	    doublereal *, doublecomplex *, integer *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZSTEDC computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the divide and conquer method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
+    matrix to tridiagonal form.
+
+    This code makes very mild assumptions about floating point
+    arithmetic. It will work on machines with a guard digit in
+    add/subtract, or on those binary machines without guard digits
+    which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
+    It could conceivably fail on hexadecimal or decimal machines
+    without guard digits, but we know of none.  See DLAED3 for details.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'I':  Compute eigenvectors of tridiagonal matrix also.
+            = 'V':  Compute eigenvectors of original Hermitian matrix
+                    also.  On entry, Z contains the unitary matrix used
+                    to reduce the original matrix to tridiagonal form.
+
+    N       (input) INTEGER
+            The dimension of the symmetric tridiagonal matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the subdiagonal elements of the tridiagonal matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
+            On entry, if COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If  COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1.
+            If eigenvectors are desired, then LDZ >= max(1,N).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If COMPZ = 'N' or 'I', or N <= 1, LWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LWORK must be at least N*N.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    RWORK   (workspace/output) DOUBLE PRECISION array,
+                                           dimension (LRWORK)
+            On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
+
+    LRWORK  (input) INTEGER
+            The dimension of the array RWORK.
+            If COMPZ = 'N' or N <= 1, LRWORK must be at least 1.
+            If COMPZ = 'V' and N > 1, LRWORK must be at least
+                           1 + 3*N + 2*N*lg N + 3*N**2 ,
+                           where lg( N ) = smallest integer k such
+                           that 2**k >= N.
+            If COMPZ = 'I' and N > 1, LRWORK must be at least
+                           1 + 4*N + 2*N**2 .
+
+            If LRWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the RWORK array,
+            returns this value as the first entry of the RWORK array, and
+            no error message related to LRWORK is issued by XERBLA.
+
+    IWORK   (workspace/output) INTEGER array, dimension (LIWORK)
+            On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
+
+    LIWORK  (input) INTEGER
+            The dimension of the array IWORK.
+            If COMPZ = 'N' or N <= 1, LIWORK must be at least 1.
+            If COMPZ = 'V' or N > 1,  LIWORK must be at least
+                                      6 + 6*N + 5*N*lg N.
+            If COMPZ = 'I' or N > 1,  LIWORK must be at least
+                                      3 + 5*N .
+
+            If LIWORK = -1, then a workspace query is assumed; the
+            routine only calculates the optimal size of the IWORK array,
+            returns this value as the first entry of the IWORK array, and
+            no error message related to LIWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit.
+            < 0:  if INFO = -i, the i-th argument had an illegal value.
+            > 0:  The algorithm failed to compute an eigenvalue while
+                  working on the submatrix lying in rows and columns
+                  INFO/(N+1) through mod(INFO,N+1).
+
+    Further Details
+    ===============
+
+    Based on contributions by
+       Jeff Rutter, Computer Science Division, University of California
+       at Berkeley, USA
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+    --rwork;
+    --iwork;
+
+    /* Function Body */
+    *info = 0;
+    lquery = ((*lwork == -1) || (*lrwork == -1)) || (*liwork == -1);
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if ((*n <= 1) || (icompz <= 0)) {
+	lwmin = 1;
+	liwmin = 1;
+	lrwmin = 1;
+    } else {
+	lgn = (integer) (log((doublereal) (*n)) / log(2.));
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (pow_ii(&c__2, &lgn) < *n) {
+	    ++lgn;
+	}
+	if (icompz == 1) {
+	    lwmin = *n * *n;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = *n * 3 + 1 + ((*n) << (1)) * lgn + i__1 * i__1 * 3;
+	    liwmin = *n * 6 + 6 + *n * 5 * lgn;
+	} else if (icompz == 2) {
+	    lwmin = 1;
+/* Computing 2nd power */
+	    i__1 = *n;
+	    lrwmin = ((*n) << (2)) + 1 + ((i__1 * i__1) << (1));
+	    liwmin = *n * 5 + 3;
+	}
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+	*info = -6;
+    } else if (*lwork < lwmin && ! lquery) {
+	*info = -8;
+    } else if (*lrwork < lrwmin && ! lquery) {
+	*info = -10;
+    } else if (*liwork < liwmin && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	work[1].r = (doublereal) lwmin, work[1].i = 0.;
+	rwork[1] = (doublereal) lrwmin;
+	iwork[1] = liwmin;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZSTEDC", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+    if (*n == 1) {
+	if (icompz != 0) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1., z__[i__1].i = 0.;
+	}
+	return 0;
+    }
+
+    smlsiz = ilaenv_(&c__9, "ZSTEDC", " ", &c__0, &c__0, &c__0, &c__0, (
+	    ftnlen)6, (ftnlen)1);
+
+/*
+       If the following conditional clause is removed, then the routine
+       will use the Divide and Conquer routine to compute only the
+       eigenvalues, which requires (3N + 3N**2) real workspace and
+       (2 + 5N + 2N lg(N)) integer workspace.
+       Since on many architectures DSTERF is much faster than any other
+       algorithm for finding eigenvalues only, it is used here
+       as the default.
+
+       If COMPZ = 'N', use DSTERF to compute the eigenvalues.
+*/
+
+    if (icompz == 0) {
+	dsterf_(n, &d__[1], &e[1], info);
+	return 0;
+    }
+
+/*
+       If N is smaller than the minimum divide size (SMLSIZ+1), then
+       solve the problem with another solver.
+*/
+
+    if (*n <= smlsiz) {
+	if (icompz == 0) {
+	    dsterf_(n, &d__[1], &e[1], info);
+	    return 0;
+	} else if (icompz == 2) {
+	    zsteqr_("I", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	} else {
+	    zsteqr_("V", n, &d__[1], &e[1], &z__[z_offset], ldz, &rwork[1],
+		    info);
+	    return 0;
+	}
+    }
+
+/*     If COMPZ = 'I', we simply call DSTEDC instead. */
+
+    if (icompz == 2) {
+	dlaset_("Full", n, n, &c_b324, &c_b1015, &rwork[1], n);
+	ll = *n * *n + 1;
+	i__1 = *lrwork - ll + 1;
+	dstedc_("I", n, &d__[1], &e[1], &rwork[1], n, &rwork[ll], &i__1, &
+		iwork[1], liwork, info);
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *n;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * z_dim1;
+		i__4 = (j - 1) * *n + i__;
+		z__[i__3].r = rwork[i__4], z__[i__3].i = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+	return 0;
+    }
+
+/*
+       From now on, only option left to be handled is COMPZ = 'V',
+       i.e. ICOMPZ = 1.
+
+       Scale.
+*/
+
+    orgnrm = dlanst_("M", n, &d__[1], &e[1]);
+    if (orgnrm == 0.) {
+	return 0;
+    }
+
+    eps = EPSILON;
+
+    start = 1;
+
+/*     while ( START <= N ) */
+
+L30:
+    if (start <= *n) {
+
+/*
+       Let END be the position of the next subdiagonal entry such that
+       E( END ) <= TINY or END = N if no such subdiagonal exists.  The
+       matrix identified by the elements between START and END
+       constitutes an independent sub-problem.
+*/
+
+	end = start;
+L40:
+	if (end < *n) {
+	    tiny = eps * sqrt((d__1 = d__[end], abs(d__1))) * sqrt((d__2 =
+		    d__[end + 1], abs(d__2)));
+	    if ((d__1 = e[end], abs(d__1)) > tiny) {
+		++end;
+		goto L40;
+	    }
+	}
+
+/*        (Sub) Problem determined.  Compute its size and solve it. */
+
+	m = end - start + 1;
+	if (m > smlsiz) {
+	    *info = smlsiz;
+
+/*           Scale. */
+
+	    orgnrm = dlanst_("M", &m, &d__[start], &e[start]);
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &m, &c__1, &d__[
+		    start], &m, info);
+	    i__1 = m - 1;
+	    i__2 = m - 1;
+	    dlascl_("G", &c__0, &c__0, &orgnrm, &c_b1015, &i__1, &c__1, &e[
+		    start], &i__2, info);
+
+	    zlaed0_(n, &m, &d__[start], &e[start], &z__[start * z_dim1 + 1],
+		    ldz, &work[1], n, &rwork[1], &iwork[1], info);
+	    if (*info > 0) {
+		*info = (*info / (m + 1) + start - 1) * (*n + 1) + *info % (m
+			+ 1) + start - 1;
+		return 0;
+	    }
+
+/*           Scale back. */
+
+	    dlascl_("G", &c__0, &c__0, &c_b1015, &orgnrm, &m, &c__1, &d__[
+		    start], &m, info);
+
+	} else {
+	    dsteqr_("I", &m, &d__[start], &e[start], &rwork[1], &m, &rwork[m *
+		     m + 1], info);
+	    zlacrm_(n, &m, &z__[start * z_dim1 + 1], ldz, &rwork[1], &m, &
+		    work[1], n, &rwork[m * m + 1]);
+	    zlacpy_("A", n, &m, &work[1], n, &z__[start * z_dim1 + 1], ldz);
+	    if (*info > 0) {
+		*info = start * (*n + 1) + end;
+		return 0;
+	    }
+	}
+
+	start = end + 1;
+	goto L30;
+    }
+
+/*
+       endwhile
+
+       If the problem split any number of times, then the eigenvalues
+       will not be properly ordered.  Here we permute the eigenvalues
+       (and the associated eigenvectors) into ascending order.
+*/
+
+    if (m != *n) {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L50: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L60: */
+	}
+    }
+
+    work[1].r = (doublereal) lwmin, work[1].i = 0.;
+    rwork[1] = (doublereal) lrwmin;
+    iwork[1] = liwmin;
+
+    return 0;
+
+/*     End of ZSTEDC */
+
+} /* zstedc_ */
+
+/* Subroutine */ int zsteqr_(char *compz, integer *n, doublereal *d__,
+	doublereal *e, doublecomplex *z__, integer *ldz, doublereal *work,
+	integer *info)
+{
+    /* System generated locals */
+    integer z_dim1, z_offset, i__1, i__2;
+    doublereal d__1, d__2;
+
+    /* Builtin functions */
+    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
+
+    /* Local variables */
+    static doublereal b, c__, f, g;
+    static integer i__, j, k, l, m;
+    static doublereal p, r__, s;
+    static integer l1, ii, mm, lm1, mm1, nm1;
+    static doublereal rt1, rt2, eps;
+    static integer lsv;
+    static doublereal tst, eps2;
+    static integer lend, jtot;
+    extern /* Subroutine */ int dlae2_(doublereal *, doublereal *, doublereal
+	    *, doublereal *, doublereal *);
+    extern logical lsame_(char *, char *);
+    static doublereal anorm;
+    extern /* Subroutine */ int zlasr_(char *, char *, char *, integer *,
+	    integer *, doublereal *, doublereal *, doublecomplex *, integer *), zswap_(integer *, doublecomplex *,
+	    integer *, doublecomplex *, integer *), dlaev2_(doublereal *,
+	    doublereal *, doublereal *, doublereal *, doublereal *,
+	    doublereal *, doublereal *);
+    static integer lendm1, lendp1;
+
+    static integer iscale;
+    extern /* Subroutine */ int dlascl_(char *, integer *, integer *,
+	    doublereal *, doublereal *, integer *, integer *, doublereal *,
+	    integer *, integer *);
+    static doublereal safmin;
+    extern /* Subroutine */ int dlartg_(doublereal *, doublereal *,
+	    doublereal *, doublereal *, doublereal *);
+    static doublereal safmax;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern doublereal dlanst_(char *, integer *, doublereal *, doublereal *);
+    extern /* Subroutine */ int dlasrt_(char *, integer *, doublereal *,
+	    integer *);
+    static integer lendsv;
+    static doublereal ssfmin;
+    static integer nmaxit, icompz;
+    static doublereal ssfmax;
+    extern /* Subroutine */ int zlaset_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZSTEQR computes all eigenvalues and, optionally, eigenvectors of a
+    symmetric tridiagonal matrix using the implicit QL or QR method.
+    The eigenvectors of a full or band complex Hermitian matrix can also
+    be found if ZHETRD or ZHPTRD or ZHBTRD has been used to reduce this
+    matrix to tridiagonal form.
+
+    Arguments
+    =========
+
+    COMPZ   (input) CHARACTER*1
+            = 'N':  Compute eigenvalues only.
+            = 'V':  Compute eigenvalues and eigenvectors of the original
+                    Hermitian matrix.  On entry, Z must contain the
+                    unitary matrix used to reduce the original matrix
+                    to tridiagonal form.
+            = 'I':  Compute eigenvalues and eigenvectors of the
+                    tridiagonal matrix.  Z is initialized to the identity
+                    matrix.
+
+    N       (input) INTEGER
+            The order of the matrix.  N >= 0.
+
+    D       (input/output) DOUBLE PRECISION array, dimension (N)
+            On entry, the diagonal elements of the tridiagonal matrix.
+            On exit, if INFO = 0, the eigenvalues in ascending order.
+
+    E       (input/output) DOUBLE PRECISION array, dimension (N-1)
+            On entry, the (n-1) subdiagonal elements of the tridiagonal
+            matrix.
+            On exit, E has been destroyed.
+
+    Z       (input/output) COMPLEX*16 array, dimension (LDZ, N)
+            On entry, if  COMPZ = 'V', then Z contains the unitary
+            matrix used in the reduction to tridiagonal form.
+            On exit, if INFO = 0, then if COMPZ = 'V', Z contains the
+            orthonormal eigenvectors of the original Hermitian matrix,
+            and if COMPZ = 'I', Z contains the orthonormal eigenvectors
+            of the symmetric tridiagonal matrix.
+            If COMPZ = 'N', then Z is not referenced.
+
+    LDZ     (input) INTEGER
+            The leading dimension of the array Z.  LDZ >= 1, and if
+            eigenvectors are desired, then  LDZ >= max(1,N).
+
+    WORK    (workspace) DOUBLE PRECISION array, dimension (max(1,2*N-2))
+            If COMPZ = 'N', then WORK is not referenced.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+            > 0:  the algorithm has failed to find all the eigenvalues in
+                  a total of 30*N iterations; if INFO = i, then i
+                  elements of E have not converged to zero; on exit, D
+                  and E contain the elements of a symmetric tridiagonal
+                  matrix which is unitarily similar to the original
+                  matrix.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    --d__;
+    --e;
+    z_dim1 = *ldz;
+    z_offset = 1 + z_dim1;
+    z__ -= z_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+
+    if (lsame_(compz, "N")) {
+	icompz = 0;
+    } else if (lsame_(compz, "V")) {
+	icompz = 1;
+    } else if (lsame_(compz, "I")) {
+	icompz = 2;
+    } else {
+	icompz = -1;
+    }
+    if (icompz < 0) {
+	*info = -1;
+    } else if (*n < 0) {
+	*info = -2;
+    } else if ((*ldz < 1) || (icompz > 0 && *ldz < max(1,*n))) {
+	*info = -6;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZSTEQR", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+    if (*n == 1) {
+	if (icompz == 2) {
+	    i__1 = z_dim1 + 1;
+	    z__[i__1].r = 1., z__[i__1].i = 0.;
+	}
+	return 0;
+    }
+
+/*     Determine the unit roundoff and over/underflow thresholds. */
+
+    eps = EPSILON;
+/* Computing 2nd power */
+    d__1 = eps;
+    eps2 = d__1 * d__1;
+    safmin = SAFEMINIMUM;
+    safmax = 1. / safmin;
+    ssfmax = sqrt(safmax) / 3.;
+    ssfmin = sqrt(safmin) / eps2;
+
+/*
+       Compute the eigenvalues and eigenvectors of the tridiagonal
+       matrix.
+*/
+
+    if (icompz == 2) {
+	zlaset_("Full", n, n, &c_b59, &c_b60, &z__[z_offset], ldz);
+    }
+
+    nmaxit = *n * 30;
+    jtot = 0;
+
+/*
+       Determine where the matrix splits and choose QL or QR iteration
+       for each block, according to whether top or bottom diagonal
+       element is smaller.
+*/
+
+    l1 = 1;
+    nm1 = *n - 1;
+
+L10:
+    if (l1 > *n) {
+	goto L160;
+    }
+    if (l1 > 1) {
+	e[l1 - 1] = 0.;
+    }
+    if (l1 <= nm1) {
+	i__1 = nm1;
+	for (m = l1; m <= i__1; ++m) {
+	    tst = (d__1 = e[m], abs(d__1));
+	    if (tst == 0.) {
+		goto L30;
+	    }
+	    if (tst <= sqrt((d__1 = d__[m], abs(d__1))) * sqrt((d__2 = d__[m
+		    + 1], abs(d__2))) * eps) {
+		e[m] = 0.;
+		goto L30;
+	    }
+/* L20: */
+	}
+    }
+    m = *n;
+
+L30:
+    l = l1;
+    lsv = l;
+    lend = m;
+    lendsv = lend;
+    l1 = m + 1;
+    if (lend == l) {
+	goto L10;
+    }
+
+/*     Scale submatrix in rows and columns L to LEND */
+
+    i__1 = lend - l + 1;
+    anorm = dlanst_("I", &i__1, &d__[l], &e[l]);
+    iscale = 0;
+    if (anorm == 0.) {
+	goto L10;
+    }
+    if (anorm > ssfmax) {
+	iscale = 1;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmax, &i__1, &c__1, &e[l], n,
+		info);
+    } else if (anorm < ssfmin) {
+	iscale = 2;
+	i__1 = lend - l + 1;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &d__[l], n,
+		info);
+	i__1 = lend - l;
+	dlascl_("G", &c__0, &c__0, &anorm, &ssfmin, &i__1, &c__1, &e[l], n,
+		info);
+    }
+
+/*     Choose between QL and QR iteration */
+
+    if ((d__1 = d__[lend], abs(d__1)) < (d__2 = d__[l], abs(d__2))) {
+	lend = lsv;
+	l = lendsv;
+    }
+
+    if (lend > l) {
+
+/*
+          QL Iteration
+
+          Look for small subdiagonal element.
+*/
+
+L40:
+	if (l != lend) {
+	    lendm1 = lend - 1;
+	    i__1 = lendm1;
+	    for (m = l; m <= i__1; ++m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			+ 1], abs(d__2)) + safmin) {
+		    goto L60;
+		}
+/* L50: */
+	    }
+	}
+
+	m = lend;
+
+L60:
+	if (m < lend) {
+	    e[m] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L80;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l + 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2, &c__, &s);
+		work[l] = c__;
+		work[*n - 1 + l] = s;
+		zlasr_("R", "V", "B", n, &c__2, &work[l], &work[*n - 1 + l], &
+			z__[l * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l], &e[l], &d__[l + 1], &rt1, &rt2);
+	    }
+	    d__[l] = rt1;
+	    d__[l + 1] = rt2;
+	    e[l] = 0.;
+	    l += 2;
+	    if (l <= lend) {
+		goto L40;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l + 1] - p) / (e[l] * 2.);
+	r__ = dlapy2_(&g, &c_b1015);
+	g = d__[m] - p + e[l] / (g + d_sign(&r__, &g));
+
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
+
+/*        Inner loop */
+
+	mm1 = m - 1;
+	i__1 = l;
+	for (i__ = mm1; i__ >= i__1; --i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m - 1) {
+		e[i__ + 1] = r__;
+	    }
+	    g = d__[i__ + 1] - p;
+	    r__ = (d__[i__] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__ + 1] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = -s;
+	    }
+
+/* L70: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = m - l + 1;
+	    zlasr_("R", "V", "B", n, &mm, &work[l], &work[*n - 1 + l], &z__[l
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[l] = g;
+	goto L40;
+
+/*        Eigenvalue found. */
+
+L80:
+	d__[l] = p;
+
+	++l;
+	if (l <= lend) {
+	    goto L40;
+	}
+	goto L140;
+
+    } else {
+
+/*
+          QR Iteration
+
+          Look for small superdiagonal element.
+*/
+
+L90:
+	if (l != lend) {
+	    lendp1 = lend + 1;
+	    i__1 = lendp1;
+	    for (m = l; m >= i__1; --m) {
+/* Computing 2nd power */
+		d__2 = (d__1 = e[m - 1], abs(d__1));
+		tst = d__2 * d__2;
+		if (tst <= eps2 * (d__1 = d__[m], abs(d__1)) * (d__2 = d__[m
+			- 1], abs(d__2)) + safmin) {
+		    goto L110;
+		}
+/* L100: */
+	    }
+	}
+
+	m = lend;
+
+L110:
+	if (m > lend) {
+	    e[m - 1] = 0.;
+	}
+	p = d__[l];
+	if (m == l) {
+	    goto L130;
+	}
+
+/*
+          If remaining matrix is 2-by-2, use DLAE2 or SLAEV2
+          to compute its eigensystem.
+*/
+
+	if (m == l - 1) {
+	    if (icompz > 0) {
+		dlaev2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2, &c__, &s)
+			;
+		work[m] = c__;
+		work[*n - 1 + m] = s;
+		zlasr_("R", "V", "F", n, &c__2, &work[m], &work[*n - 1 + m], &
+			z__[(l - 1) * z_dim1 + 1], ldz);
+	    } else {
+		dlae2_(&d__[l - 1], &e[l - 1], &d__[l], &rt1, &rt2);
+	    }
+	    d__[l - 1] = rt1;
+	    d__[l] = rt2;
+	    e[l - 1] = 0.;
+	    l += -2;
+	    if (l >= lend) {
+		goto L90;
+	    }
+	    goto L140;
+	}
+
+	if (jtot == nmaxit) {
+	    goto L140;
+	}
+	++jtot;
+
+/*        Form shift. */
+
+	g = (d__[l - 1] - p) / (e[l - 1] * 2.);
+	r__ = dlapy2_(&g, &c_b1015);
+	g = d__[m] - p + e[l - 1] / (g + d_sign(&r__, &g));
+
+	s = 1.;
+	c__ = 1.;
+	p = 0.;
+
+/*        Inner loop */
+
+	lm1 = l - 1;
+	i__1 = lm1;
+	for (i__ = m; i__ <= i__1; ++i__) {
+	    f = s * e[i__];
+	    b = c__ * e[i__];
+	    dlartg_(&g, &f, &c__, &s, &r__);
+	    if (i__ != m) {
+		e[i__ - 1] = r__;
+	    }
+	    g = d__[i__] - p;
+	    r__ = (d__[i__ + 1] - g) * s + c__ * 2. * b;
+	    p = s * r__;
+	    d__[i__] = g + p;
+	    g = c__ * r__ - b;
+
+/*           If eigenvectors are desired, then save rotations. */
+
+	    if (icompz > 0) {
+		work[i__] = c__;
+		work[*n - 1 + i__] = s;
+	    }
+
+/* L120: */
+	}
+
+/*        If eigenvectors are desired, then apply saved rotations. */
+
+	if (icompz > 0) {
+	    mm = l - m + 1;
+	    zlasr_("R", "V", "F", n, &mm, &work[m], &work[*n - 1 + m], &z__[m
+		    * z_dim1 + 1], ldz);
+	}
+
+	d__[l] -= p;
+	e[lm1] = g;
+	goto L90;
+
+/*        Eigenvalue found. */
+
+L130:
+	d__[l] = p;
+
+	--l;
+	if (l >= lend) {
+	    goto L90;
+	}
+	goto L140;
+
+    }
+
+/*     Undo scaling if necessary */
+
+L140:
+    if (iscale == 1) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmax, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    } else if (iscale == 2) {
+	i__1 = lendsv - lsv + 1;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &d__[lsv],
+		n, info);
+	i__1 = lendsv - lsv;
+	dlascl_("G", &c__0, &c__0, &ssfmin, &anorm, &i__1, &c__1, &e[lsv], n,
+		info);
+    }
+
+/*
+       Check for no convergence to an eigenvalue after a total
+       of N*MAXIT iterations.
+*/
+
+    if (jtot == nmaxit) {
+	i__1 = *n - 1;
+	for (i__ = 1; i__ <= i__1; ++i__) {
+	    if (e[i__] != 0.) {
+		++(*info);
+	    }
+/* L150: */
+	}
+	return 0;
+    }
+    goto L10;
+
+/*     Order eigenvalues and eigenvectors. */
+
+L160:
+    if (icompz == 0) {
+
+/*        Use Quick Sort */
+
+	dlasrt_("I", n, &d__[1], info);
+
+    } else {
+
+/*        Use Selection Sort to minimize swaps of eigenvectors */
+
+	i__1 = *n;
+	for (ii = 2; ii <= i__1; ++ii) {
+	    i__ = ii - 1;
+	    k = i__;
+	    p = d__[i__];
+	    i__2 = *n;
+	    for (j = ii; j <= i__2; ++j) {
+		if (d__[j] < p) {
+		    k = j;
+		    p = d__[j];
+		}
+/* L170: */
+	    }
+	    if (k != i__) {
+		d__[k] = d__[i__];
+		d__[i__] = p;
+		zswap_(n, &z__[i__ * z_dim1 + 1], &c__1, &z__[k * z_dim1 + 1],
+			 &c__1);
+	    }
+/* L180: */
+	}
+    }
+    return 0;
+
+/*     End of ZSTEQR */
+
+} /* zsteqr_ */
+
+/* Subroutine */ int ztrevc_(char *side, char *howmny, logical *select,
+	integer *n, doublecomplex *t, integer *ldt, doublecomplex *vl,
+	integer *ldvl, doublecomplex *vr, integer *ldvr, integer *mm, integer
+	*m, doublecomplex *work, doublereal *rwork, integer *info)
+{
+    /* System generated locals */
+    integer t_dim1, t_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
+	    i__2, i__3, i__4, i__5;
+    doublereal d__1, d__2, d__3;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    double d_imag(doublecomplex *);
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, k, ii, ki, is;
+    static doublereal ulp;
+    static logical allv;
+    static doublereal unfl, ovfl, smin;
+    static logical over;
+    static doublereal scale;
+    extern logical lsame_(char *, char *);
+    static doublereal remax;
+    static logical leftv, bothv;
+    extern /* Subroutine */ int zgemv_(char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *, doublecomplex *, doublecomplex *, integer *);
+    static logical somev;
+    extern /* Subroutine */ int zcopy_(integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *), dlabad_(doublereal *, doublereal *);
+
+    extern /* Subroutine */ int xerbla_(char *, integer *), zdscal_(
+	    integer *, doublereal *, doublecomplex *, integer *);
+    extern integer izamax_(integer *, doublecomplex *, integer *);
+    static logical rightv;
+    extern doublereal dzasum_(integer *, doublecomplex *, integer *);
+    static doublereal smlnum;
+    extern /* Subroutine */ int zlatrs_(char *, char *, char *, char *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublereal *, doublereal *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZTREVC computes some or all of the right and/or left eigenvectors of
+    a complex upper triangular matrix T.
+
+    The right eigenvector x and the left eigenvector y of T corresponding
+    to an eigenvalue w are defined by:
+
+                 T*x = w*x,     y'*T = w*y'
+
+    where y' denotes the conjugate transpose of the vector y.
+
+    If all eigenvectors are requested, the routine may either return the
+    matrices X and/or Y of right or left eigenvectors of T, or the
+    products Q*X and/or Q*Y, where Q is an input unitary
+    matrix. If T was obtained from the Schur factorization of an
+    original matrix A = Q*T*Q', then Q*X and Q*Y are the matrices of
+    right or left eigenvectors of A.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'R':  compute right eigenvectors only;
+            = 'L':  compute left eigenvectors only;
+            = 'B':  compute both right and left eigenvectors.
+
+    HOWMNY  (input) CHARACTER*1
+            = 'A':  compute all right and/or left eigenvectors;
+            = 'B':  compute all right and/or left eigenvectors,
+                    and backtransform them using the input matrices
+                    supplied in VR and/or VL;
+            = 'S':  compute selected right and/or left eigenvectors,
+                    specified by the logical array SELECT.
+
+    SELECT  (input) LOGICAL array, dimension (N)
+            If HOWMNY = 'S', SELECT specifies the eigenvectors to be
+            computed.
+            If HOWMNY = 'A' or 'B', SELECT is not referenced.
+            To select the eigenvector corresponding to the j-th
+            eigenvalue, SELECT(j) must be set to .TRUE..
+
+    N       (input) INTEGER
+            The order of the matrix T. N >= 0.
+
+    T       (input/output) COMPLEX*16 array, dimension (LDT,N)
+            The upper triangular matrix T.  T is modified, but restored
+            on exit.
+
+    LDT     (input) INTEGER
+            The leading dimension of the array T. LDT >= max(1,N).
+
+    VL      (input/output) COMPLEX*16 array, dimension (LDVL,MM)
+            On entry, if SIDE = 'L' or 'B' and HOWMNY = 'B', VL must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by ZHSEQR).
+            On exit, if SIDE = 'L' or 'B', VL contains:
+            if HOWMNY = 'A', the matrix Y of left eigenvectors of T;
+                             VL is lower triangular. The i-th column
+                             VL(i) of VL is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*Y;
+            if HOWMNY = 'S', the left eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VL, in the same order as their
+                             eigenvalues.
+            If SIDE = 'R', VL is not referenced.
+
+    LDVL    (input) INTEGER
+            The leading dimension of the array VL.  LDVL >= max(1,N) if
+            SIDE = 'L' or 'B'; LDVL >= 1 otherwise.
+
+    VR      (input/output) COMPLEX*16 array, dimension (LDVR,MM)
+            On entry, if SIDE = 'R' or 'B' and HOWMNY = 'B', VR must
+            contain an N-by-N matrix Q (usually the unitary matrix Q of
+            Schur vectors returned by ZHSEQR).
+            On exit, if SIDE = 'R' or 'B', VR contains:
+            if HOWMNY = 'A', the matrix X of right eigenvectors of T;
+                             VR is upper triangular. The i-th column
+                             VR(i) of VR is the eigenvector corresponding
+                             to T(i,i).
+            if HOWMNY = 'B', the matrix Q*X;
+            if HOWMNY = 'S', the right eigenvectors of T specified by
+                             SELECT, stored consecutively in the columns
+                             of VR, in the same order as their
+                             eigenvalues.
+            If SIDE = 'L', VR is not referenced.
+
+    LDVR    (input) INTEGER
+            The leading dimension of the array VR.  LDVR >= max(1,N) if
+             SIDE = 'R' or 'B'; LDVR >= 1 otherwise.
+
+    MM      (input) INTEGER
+            The number of columns in the arrays VL and/or VR. MM >= M.
+
+    M       (output) INTEGER
+            The number of columns in the arrays VL and/or VR actually
+            used to store the eigenvectors.  If HOWMNY = 'A' or 'B', M
+            is set to N.  Each selected eigenvector occupies one
+            column.
+
+    WORK    (workspace) COMPLEX*16 array, dimension (2*N)
+
+    RWORK   (workspace) DOUBLE PRECISION array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    Further Details
+    ===============
+
+    The algorithm used in this program is basically backward (forward)
+    substitution, with scaling to make the the code robust against
+    possible overflow.
+
+    Each eigenvector is normalized so that the element of largest
+    magnitude has magnitude 1; here the magnitude of a complex number
+    (x,y) is taken to be |x| + |y|.
+
+    =====================================================================
+
+
+       Decode and test the input parameters
+*/
+
+    /* Parameter adjustments */
+    --select;
+    t_dim1 = *ldt;
+    t_offset = 1 + t_dim1;
+    t -= t_offset;
+    vl_dim1 = *ldvl;
+    vl_offset = 1 + vl_dim1;
+    vl -= vl_offset;
+    vr_dim1 = *ldvr;
+    vr_offset = 1 + vr_dim1;
+    vr -= vr_offset;
+    --work;
+    --rwork;
+
+    /* Function Body */
+    bothv = lsame_(side, "B");
+    rightv = (lsame_(side, "R")) || (bothv);
+    leftv = (lsame_(side, "L")) || (bothv);
+
+    allv = lsame_(howmny, "A");
+    over = lsame_(howmny, "B");
+    somev = lsame_(howmny, "S");
+
+/*
+       Set M to the number of columns required to store the selected
+       eigenvectors.
+*/
+
+    if (somev) {
+	*m = 0;
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (select[j]) {
+		++(*m);
+	    }
+/* L10: */
+	}
+    } else {
+	*m = *n;
+    }
+
+    *info = 0;
+    if (! rightv && ! leftv) {
+	*info = -1;
+    } else if (! allv && ! over && ! somev) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if (*ldt < max(1,*n)) {
+	*info = -6;
+    } else if ((*ldvl < 1) || (leftv && *ldvl < *n)) {
+	*info = -8;
+    } else if ((*ldvr < 1) || (rightv && *ldvr < *n)) {
+	*info = -10;
+    } else if (*mm < *m) {
+	*info = -11;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZTREVC", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible. */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Set the constants to control overflow. */
+
+    unfl = SAFEMINIMUM;
+    ovfl = 1. / unfl;
+    dlabad_(&unfl, &ovfl);
+    ulp = PRECISION;
+    smlnum = unfl * (*n / ulp);
+
+/*     Store the diagonal elements of T in working array WORK. */
+
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+	i__2 = i__ + *n;
+	i__3 = i__ + i__ * t_dim1;
+	work[i__2].r = t[i__3].r, work[i__2].i = t[i__3].i;
+/* L20: */
+    }
+
+/*
+       Compute 1-norm of each column of strictly upper triangular
+       part of T to control overflow in triangular solver.
+*/
+
+    rwork[1] = 0.;
+    i__1 = *n;
+    for (j = 2; j <= i__1; ++j) {
+	i__2 = j - 1;
+	rwork[j] = dzasum_(&i__2, &t[j * t_dim1 + 1], &c__1);
+/* L30: */
+    }
+
+    if (rightv) {
+
+/*        Compute right eigenvectors. */
+
+	is = *m;
+	for (ki = *n; ki >= 1; --ki) {
+
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L80;
+		}
+	    }
+/* Computing MAX */
+	    i__1 = ki + ki * t_dim1;
+	    d__3 = ulp * ((d__1 = t[i__1].r, abs(d__1)) + (d__2 = d_imag(&t[
+		    ki + ki * t_dim1]), abs(d__2)));
+	    smin = max(d__3,smlnum);
+
+	    work[1].r = 1., work[1].i = 0.;
+
+/*           Form right-hand side. */
+
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k;
+		i__3 = k + ki * t_dim1;
+		z__1.r = -t[i__3].r, z__1.i = -t[i__3].i;
+		work[i__2].r = z__1.r, work[i__2].i = z__1.i;
+/* L40: */
+	    }
+
+/*
+             Solve the triangular system:
+                (T(1:KI-1,1:KI-1) - T(KI,KI))*X = SCALE*WORK.
+*/
+
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + k * t_dim1;
+		i__4 = ki + ki * t_dim1;
+		z__1.r = t[i__3].r - t[i__4].r, z__1.i = t[i__3].i - t[i__4]
+			.i;
+		t[i__2].r = z__1.r, t[i__2].i = z__1.i;
+		i__2 = k + k * t_dim1;
+		if ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
+			t_dim1]), abs(d__2)) < smin) {
+		    i__3 = k + k * t_dim1;
+		    t[i__3].r = smin, t[i__3].i = 0.;
+		}
+/* L50: */
+	    }
+
+	    if (ki > 1) {
+		i__1 = ki - 1;
+		zlatrs_("Upper", "No transpose", "Non-unit", "Y", &i__1, &t[
+			t_offset], ldt, &work[1], &scale, &rwork[1], info);
+		i__1 = ki;
+		work[i__1].r = scale, work[i__1].i = 0.;
+	    }
+
+/*           Copy the vector x or Q*x to VR and normalize. */
+
+	    if (! over) {
+		zcopy_(&ki, &work[1], &c__1, &vr[is * vr_dim1 + 1], &c__1);
+
+		ii = izamax_(&ki, &vr[is * vr_dim1 + 1], &c__1);
+		i__1 = ii + is * vr_dim1;
+		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
+			&vr[ii + is * vr_dim1]), abs(d__2)));
+		zdscal_(&ki, &remax, &vr[is * vr_dim1 + 1], &c__1);
+
+		i__1 = *n;
+		for (k = ki + 1; k <= i__1; ++k) {
+		    i__2 = k + is * vr_dim1;
+		    vr[i__2].r = 0., vr[i__2].i = 0.;
+/* L60: */
+		}
+	    } else {
+		if (ki > 1) {
+		    i__1 = ki - 1;
+		    z__1.r = scale, z__1.i = 0.;
+		    zgemv_("N", n, &i__1, &c_b60, &vr[vr_offset], ldvr, &work[
+			    1], &c__1, &z__1, &vr[ki * vr_dim1 + 1], &c__1);
+		}
+
+		ii = izamax_(n, &vr[ki * vr_dim1 + 1], &c__1);
+		i__1 = ii + ki * vr_dim1;
+		remax = 1. / ((d__1 = vr[i__1].r, abs(d__1)) + (d__2 = d_imag(
+			&vr[ii + ki * vr_dim1]), abs(d__2)));
+		zdscal_(n, &remax, &vr[ki * vr_dim1 + 1], &c__1);
+	    }
+
+/*           Set back the original diagonal elements of T. */
+
+	    i__1 = ki - 1;
+	    for (k = 1; k <= i__1; ++k) {
+		i__2 = k + k * t_dim1;
+		i__3 = k + *n;
+		t[i__2].r = work[i__3].r, t[i__2].i = work[i__3].i;
+/* L70: */
+	    }
+
+	    --is;
+L80:
+	    ;
+	}
+    }
+
+    if (leftv) {
+
+/*        Compute left eigenvectors. */
+
+	is = 1;
+	i__1 = *n;
+	for (ki = 1; ki <= i__1; ++ki) {
+
+	    if (somev) {
+		if (! select[ki]) {
+		    goto L130;
+		}
+	    }
+/* Computing MAX */
+	    i__2 = ki + ki * t_dim1;
+	    d__3 = ulp * ((d__1 = t[i__2].r, abs(d__1)) + (d__2 = d_imag(&t[
+		    ki + ki * t_dim1]), abs(d__2)));
+	    smin = max(d__3,smlnum);
+
+	    i__2 = *n;
+	    work[i__2].r = 1., work[i__2].i = 0.;
+
+/*           Form right-hand side. */
+
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k;
+		d_cnjg(&z__2, &t[ki + k * t_dim1]);
+		z__1.r = -z__2.r, z__1.i = -z__2.i;
+		work[i__3].r = z__1.r, work[i__3].i = z__1.i;
+/* L90: */
+	    }
+
+/*
+             Solve the triangular system:
+                (T(KI+1:N,KI+1:N) - T(KI,KI))'*X = SCALE*WORK.
+*/
+
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + k * t_dim1;
+		i__5 = ki + ki * t_dim1;
+		z__1.r = t[i__4].r - t[i__5].r, z__1.i = t[i__4].i - t[i__5]
+			.i;
+		t[i__3].r = z__1.r, t[i__3].i = z__1.i;
+		i__3 = k + k * t_dim1;
+		if ((d__1 = t[i__3].r, abs(d__1)) + (d__2 = d_imag(&t[k + k *
+			t_dim1]), abs(d__2)) < smin) {
+		    i__4 = k + k * t_dim1;
+		    t[i__4].r = smin, t[i__4].i = 0.;
+		}
+/* L100: */
+	    }
+
+	    if (ki < *n) {
+		i__2 = *n - ki;
+		zlatrs_("Upper", "Conjugate transpose", "Non-unit", "Y", &
+			i__2, &t[ki + 1 + (ki + 1) * t_dim1], ldt, &work[ki +
+			1], &scale, &rwork[1], info);
+		i__2 = ki;
+		work[i__2].r = scale, work[i__2].i = 0.;
+	    }
+
+/*           Copy the vector x or Q*x to VL and normalize. */
+
+	    if (! over) {
+		i__2 = *n - ki + 1;
+		zcopy_(&i__2, &work[ki], &c__1, &vl[ki + is * vl_dim1], &c__1)
+			;
+
+		i__2 = *n - ki + 1;
+		ii = izamax_(&i__2, &vl[ki + is * vl_dim1], &c__1) + ki - 1;
+		i__2 = ii + is * vl_dim1;
+		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
+			&vl[ii + is * vl_dim1]), abs(d__2)));
+		i__2 = *n - ki + 1;
+		zdscal_(&i__2, &remax, &vl[ki + is * vl_dim1], &c__1);
+
+		i__2 = ki - 1;
+		for (k = 1; k <= i__2; ++k) {
+		    i__3 = k + is * vl_dim1;
+		    vl[i__3].r = 0., vl[i__3].i = 0.;
+/* L110: */
+		}
+	    } else {
+		if (ki < *n) {
+		    i__2 = *n - ki;
+		    z__1.r = scale, z__1.i = 0.;
+		    zgemv_("N", n, &i__2, &c_b60, &vl[(ki + 1) * vl_dim1 + 1],
+			     ldvl, &work[ki + 1], &c__1, &z__1, &vl[ki *
+			    vl_dim1 + 1], &c__1);
+		}
+
+		ii = izamax_(n, &vl[ki * vl_dim1 + 1], &c__1);
+		i__2 = ii + ki * vl_dim1;
+		remax = 1. / ((d__1 = vl[i__2].r, abs(d__1)) + (d__2 = d_imag(
+			&vl[ii + ki * vl_dim1]), abs(d__2)));
+		zdscal_(n, &remax, &vl[ki * vl_dim1 + 1], &c__1);
+	    }
+
+/*           Set back the original diagonal elements of T. */
+
+	    i__2 = *n;
+	    for (k = ki + 1; k <= i__2; ++k) {
+		i__3 = k + k * t_dim1;
+		i__4 = k + *n;
+		t[i__3].r = work[i__4].r, t[i__3].i = work[i__4].i;
+/* L120: */
+	    }
+
+	    ++is;
+L130:
+	    ;
+	}
+    }
+
+    return 0;
+
+/*     End of ZTREVC */
+
+} /* ztrevc_ */
+
+/* Subroutine */ int ztrti2_(char *uplo, char *diag, integer *n,
+	doublecomplex *a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer j;
+    static doublecomplex ajj;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *);
+    static logical upper;
+    extern /* Subroutine */ int ztrmv_(char *, char *, char *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZTRTI2 computes the inverse of a complex upper or lower triangular
+    matrix.
+
+    This is the Level 2 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            Specifies whether the matrix A is upper or lower triangular.
+            = 'U':  Upper triangular
+            = 'L':  Lower triangular
+
+    DIAG    (input) CHARACTER*1
+            Specifies whether or not the matrix A is unit triangular.
+            = 'N':  Non-unit triangular
+            = 'U':  Unit triangular
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading n by n upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading n by n lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -k, the k-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZTRTI2", &i__1);
+	return 0;
+    }
+
+    if (upper) {
+
+/*        Compute inverse of upper triangular matrix. */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    if (nounit) {
+		i__2 = j + j * a_dim1;
+		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
+		a[i__2].r = z__1.r, a[i__2].i = z__1.i;
+		i__2 = j + j * a_dim1;
+		z__1.r = -a[i__2].r, z__1.i = -a[i__2].i;
+		ajj.r = z__1.r, ajj.i = z__1.i;
+	    } else {
+		z__1.r = -1., z__1.i = -0.;
+		ajj.r = z__1.r, ajj.i = z__1.i;
+	    }
+
+/*           Compute elements 1:j-1 of j-th column. */
+
+	    i__2 = j - 1;
+	    ztrmv_("Upper", "No transpose", diag, &i__2, &a[a_offset], lda, &
+		    a[j * a_dim1 + 1], &c__1);
+	    i__2 = j - 1;
+	    zscal_(&i__2, &ajj, &a[j * a_dim1 + 1], &c__1);
+/* L10: */
+	}
+    } else {
+
+/*        Compute inverse of lower triangular matrix. */
+
+	for (j = *n; j >= 1; --j) {
+	    if (nounit) {
+		i__1 = j + j * a_dim1;
+		z_div(&z__1, &c_b60, &a[j + j * a_dim1]);
+		a[i__1].r = z__1.r, a[i__1].i = z__1.i;
+		i__1 = j + j * a_dim1;
+		z__1.r = -a[i__1].r, z__1.i = -a[i__1].i;
+		ajj.r = z__1.r, ajj.i = z__1.i;
+	    } else {
+		z__1.r = -1., z__1.i = -0.;
+		ajj.r = z__1.r, ajj.i = z__1.i;
+	    }
+	    if (j < *n) {
+
+/*              Compute elements j+1:n of j-th column. */
+
+		i__1 = *n - j;
+		ztrmv_("Lower", "No transpose", diag, &i__1, &a[j + 1 + (j +
+			1) * a_dim1], lda, &a[j + 1 + j * a_dim1], &c__1);
+		i__1 = *n - j;
+		zscal_(&i__1, &ajj, &a[j + 1 + j * a_dim1], &c__1);
+	    }
+/* L20: */
+	}
+    }
+
+    return 0;
+
+/*     End of ZTRTI2 */
+
+} /* ztrti2_ */
+
+/* Subroutine */ int ztrtri_(char *uplo, char *diag, integer *n,
+	doublecomplex *a, integer *lda, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, i__1, i__2, i__3[2], i__4, i__5;
+    doublecomplex z__1;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer j, jb, nb, nn;
+    extern logical lsame_(char *, char *);
+    static logical upper;
+    extern /* Subroutine */ int ztrmm_(char *, char *, char *, char *,
+	    integer *, integer *, doublecomplex *, doublecomplex *, integer *,
+	     doublecomplex *, integer *),
+	    ztrsm_(char *, char *, char *, char *, integer *, integer *,
+	    doublecomplex *, doublecomplex *, integer *, doublecomplex *,
+	    integer *), ztrti2_(char *, char *
+	    , integer *, doublecomplex *, integer *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical nounit;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZTRTRI computes the inverse of a complex upper or lower triangular
+    matrix A.
+
+    This is the Level 3 BLAS version of the algorithm.
+
+    Arguments
+    =========
+
+    UPLO    (input) CHARACTER*1
+            = 'U':  A is upper triangular;
+            = 'L':  A is lower triangular.
+
+    DIAG    (input) CHARACTER*1
+            = 'N':  A is non-unit triangular;
+            = 'U':  A is unit triangular.
+
+    N       (input) INTEGER
+            The order of the matrix A.  N >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the triangular matrix A.  If UPLO = 'U', the
+            leading N-by-N upper triangular part of the array A contains
+            the upper triangular matrix, and the strictly lower
+            triangular part of A is not referenced.  If UPLO = 'L', the
+            leading N-by-N lower triangular part of the array A contains
+            the lower triangular matrix, and the strictly upper
+            triangular part of A is not referenced.  If DIAG = 'U', the
+            diagonal elements of A are also not referenced and are
+            assumed to be 1.
+            On exit, the (triangular) inverse of the original matrix, in
+            the same storage format.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.  LDA >= max(1,N).
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+            > 0: if INFO = i, A(i,i) is exactly zero.  The triangular
+                 matrix is singular and its inverse can not be computed.
+
+    =====================================================================
+
+
+       Test the input parameters.
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+
+    /* Function Body */
+    *info = 0;
+    upper = lsame_(uplo, "U");
+    nounit = lsame_(diag, "N");
+    if (! upper && ! lsame_(uplo, "L")) {
+	*info = -1;
+    } else if (! nounit && ! lsame_(diag, "U")) {
+	*info = -2;
+    } else if (*n < 0) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZTRTRI", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	return 0;
+    }
+
+/*     Check for singularity if non-unit. */
+
+    if (nounit) {
+	i__1 = *n;
+	for (*info = 1; *info <= i__1; ++(*info)) {
+	    i__2 = *info + *info * a_dim1;
+	    if (a[i__2].r == 0. && a[i__2].i == 0.) {
+		return 0;
+	    }
+/* L10: */
+	}
+	*info = 0;
+    }
+
+/*
+       Determine the block size for this environment.
+
+   Writing concatenation
+*/
+    i__3[0] = 1, a__1[0] = uplo;
+    i__3[1] = 1, a__1[1] = diag;
+    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+    nb = ilaenv_(&c__1, "ZTRTRI", ch__1, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, (
+	    ftnlen)2);
+    if ((nb <= 1) || (nb >= *n)) {
+
+/*        Use unblocked code */
+
+	ztrti2_(uplo, diag, n, &a[a_offset], lda, info);
+    } else {
+
+/*        Use blocked code */
+
+	if (upper) {
+
+/*           Compute inverse of upper triangular matrix */
+
+	    i__1 = *n;
+	    i__2 = nb;
+	    for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) {
+/* Computing MIN */
+		i__4 = nb, i__5 = *n - j + 1;
+		jb = min(i__4,i__5);
+
+/*              Compute rows 1:j-1 of current block column */
+
+		i__4 = j - 1;
+		ztrmm_("Left", "Upper", "No transpose", diag, &i__4, &jb, &
+			c_b60, &a[a_offset], lda, &a[j * a_dim1 + 1], lda);
+		i__4 = j - 1;
+		z__1.r = -1., z__1.i = -0.;
+		ztrsm_("Right", "Upper", "No transpose", diag, &i__4, &jb, &
+			z__1, &a[j + j * a_dim1], lda, &a[j * a_dim1 + 1],
+			lda);
+
+/*              Compute inverse of current diagonal block */
+
+		ztrti2_("Upper", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L20: */
+	    }
+	} else {
+
+/*           Compute inverse of lower triangular matrix */
+
+	    nn = (*n - 1) / nb * nb + 1;
+	    i__2 = -nb;
+	    for (j = nn; i__2 < 0 ? j >= 1 : j <= 1; j += i__2) {
+/* Computing MIN */
+		i__1 = nb, i__4 = *n - j + 1;
+		jb = min(i__1,i__4);
+		if (j + jb <= *n) {
+
+/*                 Compute rows j+jb:n of current block column */
+
+		    i__1 = *n - j - jb + 1;
+		    ztrmm_("Left", "Lower", "No transpose", diag, &i__1, &jb,
+			    &c_b60, &a[j + jb + (j + jb) * a_dim1], lda, &a[j
+			    + jb + j * a_dim1], lda);
+		    i__1 = *n - j - jb + 1;
+		    z__1.r = -1., z__1.i = -0.;
+		    ztrsm_("Right", "Lower", "No transpose", diag, &i__1, &jb,
+			     &z__1, &a[j + j * a_dim1], lda, &a[j + jb + j *
+			    a_dim1], lda);
+		}
+
+/*              Compute inverse of current diagonal block */
+
+		ztrti2_("Lower", diag, &jb, &a[j + j * a_dim1], lda, info);
+/* L30: */
+	    }
+	}
+    }
+
+    return 0;
+
+/*     End of ZTRTRI */
+
+} /* ztrtri_ */
+
+/* Subroutine */ int zung2r_(integer *m, integer *n, integer *k,
+	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+	work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublecomplex z__1;
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *), zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), xerbla_(char *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZUNG2R generates an m by n complex matrix Q with orthonormal columns,
+    which is defined as the first n columns of a product of k elementary
+    reflectors of order m
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by ZGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by ZGEQRF in the first k columns of its array
+            argument A.
+            On exit, the m by n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGEQRF.
+
+    WORK    (workspace) COMPLEX*16 array, dimension (N)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if ((*n < 0) || (*n > *m)) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNG2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	return 0;
+    }
+
+/*     Initialise columns k+1:n to columns of the unit matrix */
+
+    i__1 = *n;
+    for (j = *k + 1; j <= i__1; ++j) {
+	i__2 = *m;
+	for (l = 1; l <= i__2; ++l) {
+	    i__3 = l + j * a_dim1;
+	    a[i__3].r = 0., a[i__3].i = 0.;
+/* L10: */
+	}
+	i__2 = j + j * a_dim1;
+	a[i__2].r = 1., a[i__2].i = 0.;
+/* L20: */
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i) to A(i:m,i:n) from the left */
+
+	if (i__ < *n) {
+	    i__1 = i__ + i__ * a_dim1;
+	    a[i__1].r = 1., a[i__1].i = 0.;
+	    i__1 = *m - i__ + 1;
+	    i__2 = *n - i__;
+	    zlarf_("Left", &i__1, &i__2, &a[i__ + i__ * a_dim1], &c__1, &tau[
+		    i__], &a[i__ + (i__ + 1) * a_dim1], lda, &work[1]);
+	}
+	if (i__ < *m) {
+	    i__1 = *m - i__;
+	    i__2 = i__;
+	    z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i;
+	    zscal_(&i__1, &z__1, &a[i__ + 1 + i__ * a_dim1], &c__1);
+	}
+	i__1 = i__ + i__ * a_dim1;
+	i__2 = i__;
+	z__1.r = 1. - tau[i__2].r, z__1.i = 0. - tau[i__2].i;
+	a[i__1].r = z__1.r, a[i__1].i = z__1.i;
+
+/*        Set A(1:i-1,i) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    i__2 = l + i__ * a_dim1;
+	    a[i__2].r = 0., a[i__2].i = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of ZUNG2R */
+
+} /* zung2r_ */
+
+/* Subroutine */ int zungbr_(char *vect, integer *m, integer *n, integer *k,
+	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+
+    /* Local variables */
+    static integer i__, j, nb, mn;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical wantq;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer lwkopt;
+    static logical lquery;
+    extern /* Subroutine */ int zunglq_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *), zungqr_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZUNGBR generates one of the complex unitary matrices Q or P**H
+    determined by ZGEBRD when reducing a complex matrix A to bidiagonal
+    form: A = Q * B * P**H.  Q and P**H are defined as products of
+    elementary reflectors H(i) or G(i) respectively.
+
+    If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q
+    is of order M:
+    if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n
+    columns of Q, where m >= n >= k;
+    if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an
+    M-by-M matrix.
+
+    If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H
+    is of order N:
+    if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m
+    rows of P**H, where n >= m >= k;
+    if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as
+    an N-by-N matrix.
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            Specifies whether the matrix Q or the matrix P**H is
+            required, as defined in the transformation applied by ZGEBRD:
+            = 'Q':  generate Q;
+            = 'P':  generate P**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q or P**H to be returned.
+            M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q or P**H to be returned.
+            N >= 0.
+            If VECT = 'Q', M >= N >= min(M,K);
+            if VECT = 'P', N >= M >= min(N,K).
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original M-by-K
+            matrix reduced by ZGEBRD.
+            If VECT = 'P', the number of rows in the original K-by-N
+            matrix reduced by ZGEBRD.
+            K >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by ZGEBRD.
+            On exit, the M-by-N matrix Q or P**H.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= M.
+
+    TAU     (input) COMPLEX*16 array, dimension
+                                  (min(M,K)) if VECT = 'Q'
+                                  (min(N,K)) if VECT = 'P'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i), which determines Q or P**H, as
+            returned by ZGEBRD in its array argument TAUQ or TAUP.
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,min(M,N)).
+            For optimum performance LWORK >= min(M,N)*NB, where NB
+            is the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    wantq = lsame_(vect, "Q");
+    mn = min(*m,*n);
+    lquery = *lwork == -1;
+    if (! wantq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (*m < 0) {
+	*info = -2;
+    } else if (((*n < 0) || (wantq && ((*n > *m) || (*n < min(*m,*k))))) || (!
+	     wantq && ((*m > *n) || (*m < min(*n,*k))))) {
+	*info = -3;
+    } else if (*k < 0) {
+	*info = -4;
+    } else if (*lda < max(1,*m)) {
+	*info = -6;
+    } else if (*lwork < max(1,mn) && ! lquery) {
+	*info = -9;
+    }
+
+    if (*info == 0) {
+	if (wantq) {
+	    nb = ilaenv_(&c__1, "ZUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	} else {
+	    nb = ilaenv_(&c__1, "ZUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (
+		    ftnlen)1);
+	}
+	lwkopt = max(1,mn) * nb;
+	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNGBR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if ((*m == 0) || (*n == 0)) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    if (wantq) {
+
+/*
+          Form Q, determined by a call to ZGEBRD to reduce an m-by-k
+          matrix
+*/
+
+	if (*m >= *k) {
+
+/*           If m >= k, assume m >= n >= k */
+
+	    zungqr_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If m < k, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             column to the right, and set the first row and column of Q
+             to those of the unit matrix
+*/
+
+	    for (j = *m; j >= 2; --j) {
+		i__1 = j * a_dim1 + 1;
+		a[i__1].r = 0., a[i__1].i = 0.;
+		i__1 = *m;
+		for (i__ = j + 1; i__ <= i__1; ++i__) {
+		    i__2 = i__ + j * a_dim1;
+		    i__3 = i__ + (j - 1) * a_dim1;
+		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
+/* L10: */
+		}
+/* L20: */
+	    }
+	    i__1 = a_dim1 + 1;
+	    a[i__1].r = 1., a[i__1].i = 0.;
+	    i__1 = *m;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		i__2 = i__ + a_dim1;
+		a[i__2].r = 0., a[i__2].i = 0.;
+/* L30: */
+	    }
+	    if (*m > 1) {
+
+/*              Form Q(2:m,2:m) */
+
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		zungqr_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
+			tau[1], &work[1], lwork, &iinfo);
+	    }
+	}
+    } else {
+
+/*
+          Form P', determined by a call to ZGEBRD to reduce a k-by-n
+          matrix
+*/
+
+	if (*k < *n) {
+
+/*           If k < n, assume k <= m <= n */
+
+	    zunglq_(m, n, k, &a[a_offset], lda, &tau[1], &work[1], lwork, &
+		    iinfo);
+
+	} else {
+
+/*
+             If k >= n, assume m = n
+
+             Shift the vectors which define the elementary reflectors one
+             row downward, and set the first row and column of P' to
+             those of the unit matrix
+*/
+
+	    i__1 = a_dim1 + 1;
+	    a[i__1].r = 1., a[i__1].i = 0.;
+	    i__1 = *n;
+	    for (i__ = 2; i__ <= i__1; ++i__) {
+		i__2 = i__ + a_dim1;
+		a[i__2].r = 0., a[i__2].i = 0.;
+/* L40: */
+	    }
+	    i__1 = *n;
+	    for (j = 2; j <= i__1; ++j) {
+		for (i__ = j - 1; i__ >= 2; --i__) {
+		    i__2 = i__ + j * a_dim1;
+		    i__3 = i__ - 1 + j * a_dim1;
+		    a[i__2].r = a[i__3].r, a[i__2].i = a[i__3].i;
+/* L50: */
+		}
+		i__2 = j * a_dim1 + 1;
+		a[i__2].r = 0., a[i__2].i = 0.;
+/* L60: */
+	    }
+	    if (*n > 1) {
+
+/*              Form P'(2:n,2:n) */
+
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		zunglq_(&i__1, &i__2, &i__3, &a[((a_dim1) << (1)) + 2], lda, &
+			tau[1], &work[1], lwork, &iinfo);
+	    }
+	}
+    }
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNGBR */
+
+} /* zungbr_ */
+
+/* Subroutine */ int zunghr_(integer *n, integer *ilo, integer *ihi,
+	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, nb, nh, iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer lwkopt;
+    static logical lquery;
+    extern /* Subroutine */ int zungqr_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZUNGHR generates a complex unitary matrix Q which is defined as the
+    product of IHI-ILO elementary reflectors of order N, as returned by
+    ZGEHRD:
+
+    Q = H(ilo) H(ilo+1) . . . H(ihi-1).
+
+    Arguments
+    =========
+
+    N       (input) INTEGER
+            The order of the matrix Q. N >= 0.
+
+    ILO     (input) INTEGER
+    IHI     (input) INTEGER
+            ILO and IHI must have the same values as in the previous call
+            of ZGEHRD. Q is equal to the unit matrix except in the
+            submatrix Q(ilo+1:ihi,ilo+1:ihi).
+            1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the vectors which define the elementary reflectors,
+            as returned by ZGEHRD.
+            On exit, the N-by-N unitary matrix Q.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,N).
+
+    TAU     (input) COMPLEX*16 array, dimension (N-1)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGEHRD.
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= IHI-ILO.
+            For optimum performance LWORK >= (IHI-ILO)*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nh = *ihi - *ilo;
+    lquery = *lwork == -1;
+    if (*n < 0) {
+	*info = -1;
+    } else if ((*ilo < 1) || (*ilo > max(1,*n))) {
+	*info = -2;
+    } else if ((*ihi < min(*ilo,*n)) || (*ihi > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*n)) {
+	*info = -5;
+    } else if (*lwork < max(1,nh) && ! lquery) {
+	*info = -8;
+    }
+
+    if (*info == 0) {
+	nb = ilaenv_(&c__1, "ZUNGQR", " ", &nh, &nh, &nh, &c_n1, (ftnlen)6, (
+		ftnlen)1);
+	lwkopt = max(1,nh) * nb;
+	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNGHR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n == 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+/*
+       Shift the vectors which define the elementary reflectors one
+       column to the right, and set the first ilo and the last n-ihi
+       rows and columns to those of the unit matrix
+*/
+
+    i__1 = *ilo + 1;
+    for (j = *ihi; j >= i__1; --j) {
+	i__2 = j - 1;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0., a[i__3].i = 0.;
+/* L10: */
+	}
+	i__2 = *ihi;
+	for (i__ = j + 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    i__4 = i__ + (j - 1) * a_dim1;
+	    a[i__3].r = a[i__4].r, a[i__3].i = a[i__4].i;
+/* L20: */
+	}
+	i__2 = *n;
+	for (i__ = *ihi + 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0., a[i__3].i = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    i__1 = *ilo;
+    for (j = 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0., a[i__3].i = 0.;
+/* L50: */
+	}
+	i__2 = j + j * a_dim1;
+	a[i__2].r = 1., a[i__2].i = 0.;
+/* L60: */
+    }
+    i__1 = *n;
+    for (j = *ihi + 1; j <= i__1; ++j) {
+	i__2 = *n;
+	for (i__ = 1; i__ <= i__2; ++i__) {
+	    i__3 = i__ + j * a_dim1;
+	    a[i__3].r = 0., a[i__3].i = 0.;
+/* L70: */
+	}
+	i__2 = j + j * a_dim1;
+	a[i__2].r = 1., a[i__2].i = 0.;
+/* L80: */
+    }
+
+    if (nh > 0) {
+
+/*        Generate Q(ilo+1:ihi,ilo+1:ihi) */
+
+	zungqr_(&nh, &nh, &nh, &a[*ilo + 1 + (*ilo + 1) * a_dim1], lda, &tau[*
+		ilo], &work[1], lwork, &iinfo);
+    }
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNGHR */
+
+} /* zunghr_ */
+
+/* Subroutine */ int zungl2_(integer *m, integer *n, integer *k,
+	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+	work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3;
+    doublecomplex z__1, z__2;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, j, l;
+    extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
+	    doublecomplex *, integer *), zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), xerbla_(char *, integer *), zlacgv_(integer *, doublecomplex *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZUNGL2 generates an m-by-n complex matrix Q with orthonormal rows,
+    which is defined as the first m rows of a product of k elementary
+    reflectors of order n
+
+          Q  =  H(k)' . . . H(2)' H(1)'
+
+    as returned by ZGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by ZGELQF in the first k rows of its array argument A.
+            On exit, the m by n matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGELQF.
+
+    WORK    (workspace) COMPLEX*16 array, dimension (M)
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *m)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNGL2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	return 0;
+    }
+
+    if (*k < *m) {
+
+/*        Initialise rows k+1:m to rows of the unit matrix */
+
+	i__1 = *n;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (l = *k + 1; l <= i__2; ++l) {
+		i__3 = l + j * a_dim1;
+		a[i__3].r = 0., a[i__3].i = 0.;
+/* L10: */
+	    }
+	    if (j > *k && j <= *m) {
+		i__2 = j + j * a_dim1;
+		a[i__2].r = 1., a[i__2].i = 0.;
+	    }
+/* L20: */
+	}
+    }
+
+    for (i__ = *k; i__ >= 1; --i__) {
+
+/*        Apply H(i)' to A(i:m,i:n) from the right */
+
+	if (i__ < *n) {
+	    i__1 = *n - i__;
+	    zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    if (i__ < *m) {
+		i__1 = i__ + i__ * a_dim1;
+		a[i__1].r = 1., a[i__1].i = 0.;
+		i__1 = *m - i__;
+		i__2 = *n - i__ + 1;
+		d_cnjg(&z__1, &tau[i__]);
+		zlarf_("Right", &i__1, &i__2, &a[i__ + i__ * a_dim1], lda, &
+			z__1, &a[i__ + 1 + i__ * a_dim1], lda, &work[1]);
+	    }
+	    i__1 = *n - i__;
+	    i__2 = i__;
+	    z__1.r = -tau[i__2].r, z__1.i = -tau[i__2].i;
+	    zscal_(&i__1, &z__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	    i__1 = *n - i__;
+	    zlacgv_(&i__1, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+	i__1 = i__ + i__ * a_dim1;
+	d_cnjg(&z__2, &tau[i__]);
+	z__1.r = 1. - z__2.r, z__1.i = 0. - z__2.i;
+	a[i__1].r = z__1.r, a[i__1].i = z__1.i;
+
+/*        Set A(i,1:i-1) to zero */
+
+	i__1 = i__ - 1;
+	for (l = 1; l <= i__1; ++l) {
+	    i__2 = i__ + l * a_dim1;
+	    a[i__2].r = 0., a[i__2].i = 0.;
+/* L30: */
+	}
+/* L40: */
+    }
+    return 0;
+
+/*     End of ZUNGL2 */
+
+} /* zungl2_ */
+
+/* Subroutine */ int zunglq_(integer *m, integer *n, integer *k,
+	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int zungl2_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static integer ldwork;
+    extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static logical lquery;
+    static integer lwkopt;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZUNGLQ generates an M-by-N complex matrix Q with orthonormal rows,
+    which is defined as the first M rows of a product of K elementary
+    reflectors of order N
+
+          Q  =  H(k)' . . . H(2)' H(1)'
+
+    as returned by ZGELQF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. N >= M.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. M >= K >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the i-th row must contain the vector which defines
+            the elementary reflector H(i), for i = 1,2,...,k, as returned
+            by ZGELQF in the first k rows of its array argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGELQF.
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,M).
+            For optimum performance LWORK >= M*NB, where NB is
+            the optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit;
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "ZUNGLQ", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*m) * nb;
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if (*n < *m) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *m)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*m) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNGLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*m <= 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *m;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "ZUNGLQ", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *m;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNGLQ", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk rows are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(kk+1:m,1:kk) to zero. */
+
+	i__1 = kk;
+	for (j = 1; j <= i__1; ++j) {
+	    i__2 = *m;
+	    for (i__ = kk + 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = 0., a[i__3].i = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *m) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	zungl2_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *m) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *n - i__ + 1;
+		zlarft_("Forward", "Rowwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H' to A(i+ib:m,i:n) from the right */
+
+		i__2 = *m - i__ - ib + 1;
+		i__3 = *n - i__ + 1;
+		zlarfb_("Right", "Conjugate transpose", "Forward", "Rowwise",
+			&i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
+			1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &work[
+			ib + 1], &ldwork);
+	    }
+
+/*           Apply H' to columns i:n of current block */
+
+	    i__2 = *n - i__ + 1;
+	    zungl2_(&ib, &i__2, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set columns 1:i-1 of current block to zero */
+
+	    i__2 = i__ - 1;
+	    for (j = 1; j <= i__2; ++j) {
+		i__3 = i__ + ib - 1;
+		for (l = i__; l <= i__3; ++l) {
+		    i__4 = l + j * a_dim1;
+		    a[i__4].r = 0., a[i__4].i = 0.;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1].r = (doublereal) iws, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNGLQ */
+
+} /* zunglq_ */
+
+/* Subroutine */ int zungqr_(integer *m, integer *n, integer *k,
+	doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+	work, integer *lwork, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
+
+    /* Local variables */
+    static integer i__, j, l, ib, nb, ki, kk, nx, iws, nbmin, iinfo;
+    extern /* Subroutine */ int zung2r_(integer *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static integer ldwork;
+    extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZUNGQR generates an M-by-N complex matrix Q with orthonormal columns,
+    which is defined as the first N columns of a product of K elementary
+    reflectors of order M
+
+          Q  =  H(1) H(2) . . . H(k)
+
+    as returned by ZGEQRF.
+
+    Arguments
+    =========
+
+    M       (input) INTEGER
+            The number of rows of the matrix Q. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix Q. M >= N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines the
+            matrix Q. N >= K >= 0.
+
+    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
+            On entry, the i-th column must contain the vector which
+            defines the elementary reflector H(i), for i = 1,2,...,k, as
+            returned by ZGEQRF in the first k columns of its array
+            argument A.
+            On exit, the M-by-N matrix Q.
+
+    LDA     (input) INTEGER
+            The first dimension of the array A. LDA >= max(1,M).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGEQRF.
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK. LWORK >= max(1,N).
+            For optimum performance LWORK >= N*NB, where NB is the
+            optimal blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument has an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    nb = ilaenv_(&c__1, "ZUNGQR", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
+    lwkopt = max(1,*n) * nb;
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    lquery = *lwork == -1;
+    if (*m < 0) {
+	*info = -1;
+    } else if ((*n < 0) || (*n > *m)) {
+	*info = -2;
+    } else if ((*k < 0) || (*k > *n)) {
+	*info = -3;
+    } else if (*lda < max(1,*m)) {
+	*info = -5;
+    } else if (*lwork < max(1,*n) && ! lquery) {
+	*info = -8;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNGQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (*n <= 0) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    nbmin = 2;
+    nx = 0;
+    iws = *n;
+    if (nb > 1 && nb < *k) {
+
+/*
+          Determine when to cross over from blocked to unblocked code.
+
+   Computing MAX
+*/
+	i__1 = 0, i__2 = ilaenv_(&c__3, "ZUNGQR", " ", m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)1);
+	nx = max(i__1,i__2);
+	if (nx < *k) {
+
+/*           Determine if workspace is large enough for blocked code. */
+
+	    ldwork = *n;
+	    iws = ldwork * nb;
+	    if (*lwork < iws) {
+
+/*
+                Not enough workspace to use optimal NB:  reduce NB and
+                determine the minimum value of NB.
+*/
+
+		nb = *lwork / ldwork;
+/* Computing MAX */
+		i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNGQR", " ", m, n, k, &c_n1,
+			 (ftnlen)6, (ftnlen)1);
+		nbmin = max(i__1,i__2);
+	    }
+	}
+    }
+
+    if (nb >= nbmin && nb < *k && nx < *k) {
+
+/*
+          Use blocked code after the last block.
+          The first kk columns are handled by the block method.
+*/
+
+	ki = (*k - nx - 1) / nb * nb;
+/* Computing MIN */
+	i__1 = *k, i__2 = ki + nb;
+	kk = min(i__1,i__2);
+
+/*        Set A(1:kk,kk+1:n) to zero. */
+
+	i__1 = *n;
+	for (j = kk + 1; j <= i__1; ++j) {
+	    i__2 = kk;
+	    for (i__ = 1; i__ <= i__2; ++i__) {
+		i__3 = i__ + j * a_dim1;
+		a[i__3].r = 0., a[i__3].i = 0.;
+/* L10: */
+	    }
+/* L20: */
+	}
+    } else {
+	kk = 0;
+    }
+
+/*     Use unblocked code for the last or only block. */
+
+    if (kk < *n) {
+	i__1 = *m - kk;
+	i__2 = *n - kk;
+	i__3 = *k - kk;
+	zung2r_(&i__1, &i__2, &i__3, &a[kk + 1 + (kk + 1) * a_dim1], lda, &
+		tau[kk + 1], &work[1], &iinfo);
+    }
+
+    if (kk > 0) {
+
+/*        Use blocked code */
+
+	i__1 = -nb;
+	for (i__ = ki + 1; i__1 < 0 ? i__ >= 1 : i__ <= 1; i__ += i__1) {
+/* Computing MIN */
+	    i__2 = nb, i__3 = *k - i__ + 1;
+	    ib = min(i__2,i__3);
+	    if (i__ + ib <= *n) {
+
+/*
+                Form the triangular factor of the block reflector
+                H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+		i__2 = *m - i__ + 1;
+		zlarft_("Forward", "Columnwise", &i__2, &ib, &a[i__ + i__ *
+			a_dim1], lda, &tau[i__], &work[1], &ldwork);
+
+/*              Apply H to A(i:m,i+ib:n) from the left */
+
+		i__2 = *m - i__ + 1;
+		i__3 = *n - i__ - ib + 1;
+		zlarfb_("Left", "No transpose", "Forward", "Columnwise", &
+			i__2, &i__3, &ib, &a[i__ + i__ * a_dim1], lda, &work[
+			1], &ldwork, &a[i__ + (i__ + ib) * a_dim1], lda, &
+			work[ib + 1], &ldwork);
+	    }
+
+/*           Apply H to rows i:m of current block */
+
+	    i__2 = *m - i__ + 1;
+	    zung2r_(&i__2, &ib, &ib, &a[i__ + i__ * a_dim1], lda, &tau[i__], &
+		    work[1], &iinfo);
+
+/*           Set rows 1:i-1 of current block to zero */
+
+	    i__2 = i__ + ib - 1;
+	    for (j = i__; j <= i__2; ++j) {
+		i__3 = i__ - 1;
+		for (l = 1; l <= i__3; ++l) {
+		    i__4 = l + j * a_dim1;
+		    a[i__4].r = 0., a[i__4].i = 0.;
+/* L30: */
+		}
+/* L40: */
+	    }
+/* L50: */
+	}
+    }
+
+    work[1].r = (doublereal) iws, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNGQR */
+
+} /* zungqr_ */
+
+/* Subroutine */ int zunm2l_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublecomplex *a, integer *lda, doublecomplex *tau,
+	doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, mi, ni, nq;
+    static doublecomplex aii;
+    static logical left;
+    static doublecomplex taui;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZUNM2L overwrites the general complex m-by-n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'C', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'C',
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by ZGEQLF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q' (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            ZGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGEQLF.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the m-by-n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX*16 array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNM2L", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && notran) || (! left && ! notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+    } else {
+	mi = *m;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) or H(i)' is applied to C(1:m-k+i,1:n) */
+
+	    mi = *m - *k + i__;
+	} else {
+
+/*           H(i) or H(i)' is applied to C(1:m,1:n-k+i) */
+
+	    ni = *n - *k + i__;
+	}
+
+/*        Apply H(i) or H(i)' */
+
+	if (notran) {
+	    i__3 = i__;
+	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
+	} else {
+	    d_cnjg(&z__1, &tau[i__]);
+	    taui.r = z__1.r, taui.i = z__1.i;
+	}
+	i__3 = nq - *k + i__ + i__ * a_dim1;
+	aii.r = a[i__3].r, aii.i = a[i__3].i;
+	i__3 = nq - *k + i__ + i__ * a_dim1;
+	a[i__3].r = 1., a[i__3].i = 0.;
+	zlarf_(side, &mi, &ni, &a[i__ * a_dim1 + 1], &c__1, &taui, &c__[
+		c_offset], ldc, &work[1]);
+	i__3 = nq - *k + i__ + i__ * a_dim1;
+	a[i__3].r = aii.r, a[i__3].i = aii.i;
+/* L10: */
+    }
+    return 0;
+
+/*     End of ZUNM2L */
+
+} /* zunm2l_ */
+
+/* Subroutine */ int zunm2r_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublecomplex *a, integer *lda, doublecomplex *tau,
+	doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static doublecomplex aii;
+    static logical left;
+    static doublecomplex taui;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), xerbla_(char *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZUNM2R overwrites the general complex m-by-n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'C', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'C',
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by ZGEQRF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q' (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            ZGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGEQRF.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the m-by-n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX*16 array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNM2R", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && ! notran) || (! left && notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) or H(i)' is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) or H(i)' is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) or H(i)' */
+
+	if (notran) {
+	    i__3 = i__;
+	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
+	} else {
+	    d_cnjg(&z__1, &tau[i__]);
+	    taui.r = z__1.r, taui.i = z__1.i;
+	}
+	i__3 = i__ + i__ * a_dim1;
+	aii.r = a[i__3].r, aii.i = a[i__3].i;
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = 1., a[i__3].i = 0.;
+	zlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], &c__1, &taui, &c__[ic
+		+ jc * c_dim1], ldc, &work[1]);
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = aii.r, a[i__3].i = aii.i;
+/* L10: */
+    }
+    return 0;
+
+/*     End of ZUNM2R */
+
+} /* zunm2r_ */
+
+/* Subroutine */ int zunmbr_(char *vect, char *side, char *trans, integer *m,
+	integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex
+	*tau, doublecomplex *c__, integer *ldc, doublecomplex *work, integer *
+	lwork, integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2];
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static logical notran, applyq;
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+    extern /* Subroutine */ int zunmlq_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    If VECT = 'Q', ZUNMBR overwrites the general complex M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    If VECT = 'P', ZUNMBR overwrites the general complex M-by-N matrix C
+    with
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      P * C          C * P
+    TRANS = 'C':      P**H * C       C * P**H
+
+    Here Q and P**H are the unitary matrices determined by ZGEBRD when
+    reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q
+    and P**H are defined as products of elementary reflectors H(i) and
+    G(i) respectively.
+
+    Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the
+    order of the unitary matrix Q or P**H that is applied.
+
+    If VECT = 'Q', A is assumed to have been an NQ-by-K matrix:
+    if nq >= k, Q = H(1) H(2) . . . H(k);
+    if nq < k, Q = H(1) H(2) . . . H(nq-1).
+
+    If VECT = 'P', A is assumed to have been a K-by-NQ matrix:
+    if k < nq, P = G(1) G(2) . . . G(k);
+    if k >= nq, P = G(1) G(2) . . . G(nq-1).
+
+    Arguments
+    =========
+
+    VECT    (input) CHARACTER*1
+            = 'Q': apply Q or Q**H;
+            = 'P': apply P or P**H.
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q, Q**H, P or P**H from the Left;
+            = 'R': apply Q, Q**H, P or P**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q or P;
+            = 'C':  Conjugate transpose, apply Q**H or P**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            If VECT = 'Q', the number of columns in the original
+            matrix reduced by ZGEBRD.
+            If VECT = 'P', the number of rows in the original
+            matrix reduced by ZGEBRD.
+            K >= 0.
+
+    A       (input) COMPLEX*16 array, dimension
+                                  (LDA,min(nq,K)) if VECT = 'Q'
+                                  (LDA,nq)        if VECT = 'P'
+            The vectors which define the elementary reflectors H(i) and
+            G(i), whose products determine the matrices Q and P, as
+            returned by ZGEBRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If VECT = 'Q', LDA >= max(1,nq);
+            if VECT = 'P', LDA >= max(1,min(nq,K)).
+
+    TAU     (input) COMPLEX*16 array, dimension (min(nq,K))
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i) or G(i) which determines Q or P, as returned
+            by ZGEBRD in the array argument TAUQ or TAUP.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q
+            or P*C or P**H*C or C*P or C*P**H.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    applyq = lsame_(vect, "Q");
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q or P and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! applyq && ! lsame_(vect, "P")) {
+	*info = -1;
+    } else if (! left && ! lsame_(side, "R")) {
+	*info = -2;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*k < 0) {
+	*info = -6;
+    } else /* if(complicated condition) */ {
+/* Computing MAX */
+	i__1 = 1, i__2 = min(nq,*k);
+	if ((applyq && *lda < max(1,nq)) || (! applyq && *lda < max(i__1,i__2)
+		)) {
+	    *info = -8;
+	} else if (*ldc < max(1,*m)) {
+	    *info = -11;
+	} else if (*lwork < max(1,nw) && ! lquery) {
+	    *info = -13;
+	}
+    }
+
+    if (*info == 0) {
+	if (applyq) {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *m - 1;
+		i__2 = *m - 1;
+		nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, &i__1, n, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__3[0] = 1, a__1[0] = side;
+		i__3[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+		i__1 = *n - 1;
+		i__2 = *n - 1;
+		nb = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, &i__1, &i__2, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNMBR", &i__1);
+	return 0;
+    } else if (lquery) {
+    }
+
+/*     Quick return if possible */
+
+    work[1].r = 1., work[1].i = 0.;
+    if ((*m == 0) || (*n == 0)) {
+	return 0;
+    }
+
+    if (applyq) {
+
+/*        Apply Q */
+
+	if (nq >= *k) {
+
+/*           Q was determined by a call to ZGEBRD with nq >= k */
+
+	    zunmqr_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           Q was determined by a call to ZGEBRD with nq < k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    zunmqr_(side, trans, &mi, &ni, &i__1, &a[a_dim1 + 2], lda, &tau[1]
+		    , &c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+	}
+    } else {
+
+/*        Apply P */
+
+	if (notran) {
+	    *(unsigned char *)transt = 'C';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+	if (nq > *k) {
+
+/*           P was determined by a call to ZGEBRD with nq > k */
+
+	    zunmlq_(side, transt, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		    c_offset], ldc, &work[1], lwork, &iinfo);
+	} else if (nq > 1) {
+
+/*           P was determined by a call to ZGEBRD with nq <= k */
+
+	    if (left) {
+		mi = *m - 1;
+		ni = *n;
+		i1 = 2;
+		i2 = 1;
+	    } else {
+		mi = *m;
+		ni = *n - 1;
+		i1 = 1;
+		i2 = 2;
+	    }
+	    i__1 = nq - 1;
+	    zunmlq_(side, transt, &mi, &ni, &i__1, &a[((a_dim1) << (1)) + 1],
+		    lda, &tau[1], &c__[i1 + i2 * c_dim1], ldc, &work[1],
+		    lwork, &iinfo);
+	}
+    }
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNMBR */
+
+} /* zunmbr_ */
+
+/* Subroutine */ int zunml2_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublecomplex *a, integer *lda, doublecomplex *tau,
+	doublecomplex *c__, integer *ldc, doublecomplex *work, integer *info)
+{
+    /* System generated locals */
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
+    doublecomplex z__1;
+
+    /* Builtin functions */
+    void d_cnjg(doublecomplex *, doublecomplex *);
+
+    /* Local variables */
+    static integer i__, i1, i2, i3, ic, jc, mi, ni, nq;
+    static doublecomplex aii;
+    static logical left;
+    static doublecomplex taui;
+    extern logical lsame_(char *, char *);
+    extern /* Subroutine */ int zlarf_(char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *, doublecomplex *), xerbla_(char *, integer *), zlacgv_(integer *, doublecomplex *, integer *);
+    static logical notran;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       September 30, 1994
+
+
+    Purpose
+    =======
+
+    ZUNML2 overwrites the general complex m-by-n matrix C with
+
+          Q * C  if SIDE = 'L' and TRANS = 'N', or
+
+          Q'* C  if SIDE = 'L' and TRANS = 'C', or
+
+          C * Q  if SIDE = 'R' and TRANS = 'N', or
+
+          C * Q' if SIDE = 'R' and TRANS = 'C',
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k)' . . . H(2)' H(1)'
+
+    as returned by ZGELQF. Q is of order m if SIDE = 'L' and of order n
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q' from the Left
+            = 'R': apply Q or Q' from the Right
+
+    TRANS   (input) CHARACTER*1
+            = 'N': apply Q  (No transpose)
+            = 'C': apply Q' (Conjugate transpose)
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX*16 array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            ZGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGELQF.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the m-by-n matrix C.
+            On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace) COMPLEX*16 array, dimension
+                                     (N) if SIDE = 'L',
+                                     (M) if SIDE = 'R'
+
+    INFO    (output) INTEGER
+            = 0: successful exit
+            < 0: if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+
+/*     NQ is the order of Q */
+
+    if (left) {
+	nq = *m;
+    } else {
+	nq = *n;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    }
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNML2", &i__1);
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	return 0;
+    }
+
+    if ((left && notran) || (! left && ! notran)) {
+	i1 = 1;
+	i2 = *k;
+	i3 = 1;
+    } else {
+	i1 = *k;
+	i2 = 1;
+	i3 = -1;
+    }
+
+    if (left) {
+	ni = *n;
+	jc = 1;
+    } else {
+	mi = *m;
+	ic = 1;
+    }
+
+    i__1 = i2;
+    i__2 = i3;
+    for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+	if (left) {
+
+/*           H(i) or H(i)' is applied to C(i:m,1:n) */
+
+	    mi = *m - i__ + 1;
+	    ic = i__;
+	} else {
+
+/*           H(i) or H(i)' is applied to C(1:m,i:n) */
+
+	    ni = *n - i__ + 1;
+	    jc = i__;
+	}
+
+/*        Apply H(i) or H(i)' */
+
+	if (notran) {
+	    d_cnjg(&z__1, &tau[i__]);
+	    taui.r = z__1.r, taui.i = z__1.i;
+	} else {
+	    i__3 = i__;
+	    taui.r = tau[i__3].r, taui.i = tau[i__3].i;
+	}
+	if (i__ < nq) {
+	    i__3 = nq - i__;
+	    zlacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+	i__3 = i__ + i__ * a_dim1;
+	aii.r = a[i__3].r, aii.i = a[i__3].i;
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = 1., a[i__3].i = 0.;
+	zlarf_(side, &mi, &ni, &a[i__ + i__ * a_dim1], lda, &taui, &c__[ic +
+		jc * c_dim1], ldc, &work[1]);
+	i__3 = i__ + i__ * a_dim1;
+	a[i__3].r = aii.r, a[i__3].i = aii.i;
+	if (i__ < nq) {
+	    i__3 = nq - i__;
+	    zlacgv_(&i__3, &a[i__ + (i__ + 1) * a_dim1], lda);
+	}
+/* L10: */
+    }
+    return 0;
+
+/*     End of ZUNML2 */
+
+} /* zunml2_ */
+
+/* Subroutine */ int zunmlq_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublecomplex *a, integer *lda, doublecomplex *tau,
+	doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static doublecomplex t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int zunml2_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static logical notran;
+    static integer ldwork;
+    extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static char transt[1];
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZUNMLQ overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k)' . . . H(2)' H(1)'
+
+    as returned by ZGELQF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Conjugate transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX*16 array, dimension
+                                 (LDA,M) if SIDE = 'L',
+                                 (LDA,N) if SIDE = 'R'
+            The i-th row must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            ZGELQF in the first k rows of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A. LDA >= max(1,K).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGELQF.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,*k)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMLQ", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNMLQ", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMLQ", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	zunml2_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && notran) || (! left && ! notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	if (notran) {
+	    *(unsigned char *)transt = 'C';
+	} else {
+	    *(unsigned char *)transt = 'N';
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    zlarft_("Forward", "Rowwise", &i__4, &ib, &a[i__ + i__ * a_dim1],
+		    lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    zlarfb_(side, transt, "Forward", "Rowwise", &mi, &ni, &ib, &a[i__
+		    + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc * c_dim1],
+		    ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNMLQ */
+
+} /* zunmlq_ */
+
+/* Subroutine */ int zunmql_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublecomplex *a, integer *lda, doublecomplex *tau,
+	doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static doublecomplex t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int zunm2l_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static logical notran;
+    static integer ldwork;
+    extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZUNMQL overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(k) . . . H(2) H(1)
+
+    as returned by ZGEQLF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            ZGEQLF in the last k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGEQLF.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQL", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNMQL", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMQL", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	zunm2l_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && notran) || (! left && ! notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	} else {
+	    mi = *m;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i+ib-1) . . . H(i+1) H(i)
+*/
+
+	    i__4 = nq - *k + i__ + ib - 1;
+	    zlarft_("Backward", "Columnwise", &i__4, &ib, &a[i__ * a_dim1 + 1]
+		    , lda, &tau[i__], t, &c__65);
+	    if (left) {
+
+/*              H or H' is applied to C(1:m-k+i+ib-1,1:n) */
+
+		mi = *m - *k + i__ + ib - 1;
+	    } else {
+
+/*              H or H' is applied to C(1:m,1:n-k+i+ib-1) */
+
+		ni = *n - *k + i__ + ib - 1;
+	    }
+
+/*           Apply H or H' */
+
+	    zlarfb_(side, trans, "Backward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ * a_dim1 + 1], lda, t, &c__65, &c__[c_offset], ldc, &
+		    work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNMQL */
+
+} /* zunmql_ */
+
+/* Subroutine */ int zunmqr_(char *side, char *trans, integer *m, integer *n,
+	integer *k, doublecomplex *a, integer *lda, doublecomplex *tau,
+	doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3[2], i__4,
+	    i__5;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i__;
+    static doublecomplex t[4160]	/* was [65][64] */;
+    static integer i1, i2, i3, ib, ic, jc, nb, mi, ni, nq, nw, iws;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer nbmin, iinfo;
+    extern /* Subroutine */ int zunm2r_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *), xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    extern /* Subroutine */ int zlarfb_(char *, char *, char *, char *,
+	    integer *, integer *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, integer *,
+	    doublecomplex *, integer *);
+    static logical notran;
+    static integer ldwork;
+    extern /* Subroutine */ int zlarft_(char *, char *, integer *, integer *,
+	    doublecomplex *, integer *, doublecomplex *, doublecomplex *,
+	    integer *);
+    static integer lwkopt;
+    static logical lquery;
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZUNMQR overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix defined as the product of k
+    elementary reflectors
+
+          Q = H(1) H(2) . . . H(k)
+
+    as returned by ZGEQRF. Q is of order M if SIDE = 'L' and of order N
+    if SIDE = 'R'.
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Conjugate transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    K       (input) INTEGER
+            The number of elementary reflectors whose product defines
+            the matrix Q.
+            If SIDE = 'L', M >= K >= 0;
+            if SIDE = 'R', N >= K >= 0.
+
+    A       (input) COMPLEX*16 array, dimension (LDA,K)
+            The i-th column must contain the vector which defines the
+            elementary reflector H(i), for i = 1,2,...,k, as returned by
+            ZGEQRF in the first k columns of its array argument A.
+            A is modified by the routine but restored on exit.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            If SIDE = 'L', LDA >= max(1,M);
+            if SIDE = 'R', LDA >= max(1,N).
+
+    TAU     (input) COMPLEX*16 array, dimension (K)
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZGEQRF.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >= M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    notran = lsame_(trans, "N");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! notran && ! lsame_(trans, "C")) {
+	*info = -2;
+    } else if (*m < 0) {
+	*info = -3;
+    } else if (*n < 0) {
+	*info = -4;
+    } else if ((*k < 0) || (*k > nq)) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+
+/*
+          Determine the block size.  NB may be at most NBMAX, where NBMAX
+          is used to define the local array T.
+
+   Computing MIN
+   Writing concatenation
+*/
+	i__3[0] = 1, a__1[0] = side;
+	i__3[1] = 1, a__1[1] = trans;
+	s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	i__1 = 64, i__2 = ilaenv_(&c__1, "ZUNMQR", ch__1, m, n, k, &c_n1, (
+		ftnlen)6, (ftnlen)2);
+	nb = min(i__1,i__2);
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    }
+
+    if (*info != 0) {
+	i__1 = -(*info);
+	xerbla_("ZUNMQR", &i__1);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (*k == 0)) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    nbmin = 2;
+    ldwork = nw;
+    if (nb > 1 && nb < *k) {
+	iws = nw * nb;
+	if (*lwork < iws) {
+	    nb = *lwork / ldwork;
+/*
+   Computing MAX
+   Writing concatenation
+*/
+	    i__3[0] = 1, a__1[0] = side;
+	    i__3[1] = 1, a__1[1] = trans;
+	    s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2);
+	    i__1 = 2, i__2 = ilaenv_(&c__2, "ZUNMQR", ch__1, m, n, k, &c_n1, (
+		    ftnlen)6, (ftnlen)2);
+	    nbmin = max(i__1,i__2);
+	}
+    } else {
+	iws = nw;
+    }
+
+    if ((nb < nbmin) || (nb >= *k)) {
+
+/*        Use unblocked code */
+
+	zunm2r_(side, trans, m, n, k, &a[a_offset], lda, &tau[1], &c__[
+		c_offset], ldc, &work[1], &iinfo);
+    } else {
+
+/*        Use blocked code */
+
+	if ((left && ! notran) || (! left && notran)) {
+	    i1 = 1;
+	    i2 = *k;
+	    i3 = nb;
+	} else {
+	    i1 = (*k - 1) / nb * nb + 1;
+	    i2 = 1;
+	    i3 = -nb;
+	}
+
+	if (left) {
+	    ni = *n;
+	    jc = 1;
+	} else {
+	    mi = *m;
+	    ic = 1;
+	}
+
+	i__1 = i2;
+	i__2 = i3;
+	for (i__ = i1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+/* Computing MIN */
+	    i__4 = nb, i__5 = *k - i__ + 1;
+	    ib = min(i__4,i__5);
+
+/*
+             Form the triangular factor of the block reflector
+             H = H(i) H(i+1) . . . H(i+ib-1)
+*/
+
+	    i__4 = nq - i__ + 1;
+	    zlarft_("Forward", "Columnwise", &i__4, &ib, &a[i__ + i__ *
+		    a_dim1], lda, &tau[i__], t, &c__65)
+		    ;
+	    if (left) {
+
+/*              H or H' is applied to C(i:m,1:n) */
+
+		mi = *m - i__ + 1;
+		ic = i__;
+	    } else {
+
+/*              H or H' is applied to C(1:m,i:n) */
+
+		ni = *n - i__ + 1;
+		jc = i__;
+	    }
+
+/*           Apply H or H' */
+
+	    zlarfb_(side, trans, "Forward", "Columnwise", &mi, &ni, &ib, &a[
+		    i__ + i__ * a_dim1], lda, t, &c__65, &c__[ic + jc *
+		    c_dim1], ldc, &work[1], &ldwork);
+/* L10: */
+	}
+    }
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNMQR */
+
+} /* zunmqr_ */
+
+/* Subroutine */ int zunmtr_(char *side, char *uplo, char *trans, integer *m,
+	integer *n, doublecomplex *a, integer *lda, doublecomplex *tau,
+	doublecomplex *c__, integer *ldc, doublecomplex *work, integer *lwork,
+	 integer *info)
+{
+    /* System generated locals */
+    address a__1[2];
+    integer a_dim1, a_offset, c_dim1, c_offset, i__1[2], i__2, i__3;
+    char ch__1[2];
+
+    /* Builtin functions */
+    /* Subroutine */ int s_cat(char *, char **, integer *, integer *, ftnlen);
+
+    /* Local variables */
+    static integer i1, i2, nb, mi, ni, nq, nw;
+    static logical left;
+    extern logical lsame_(char *, char *);
+    static integer iinfo;
+    static logical upper;
+    extern /* Subroutine */ int xerbla_(char *, integer *);
+    extern integer ilaenv_(integer *, char *, char *, integer *, integer *,
+	    integer *, integer *, ftnlen, ftnlen);
+    static integer lwkopt;
+    static logical lquery;
+    extern /* Subroutine */ int zunmql_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *,
+	    integer *, doublecomplex *, integer *, doublecomplex *,
+	    doublecomplex *, integer *, doublecomplex *, integer *, integer *);
+
+
+/*
+    -- LAPACK routine (version 3.0) --
+       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
+       Courant Institute, Argonne National Lab, and Rice University
+       June 30, 1999
+
+
+    Purpose
+    =======
+
+    ZUNMTR overwrites the general complex M-by-N matrix C with
+
+                    SIDE = 'L'     SIDE = 'R'
+    TRANS = 'N':      Q * C          C * Q
+    TRANS = 'C':      Q**H * C       C * Q**H
+
+    where Q is a complex unitary matrix of order nq, with nq = m if
+    SIDE = 'L' and nq = n if SIDE = 'R'. Q is defined as the product of
+    nq-1 elementary reflectors, as returned by ZHETRD:
+
+    if UPLO = 'U', Q = H(nq-1) . . . H(2) H(1);
+
+    if UPLO = 'L', Q = H(1) H(2) . . . H(nq-1).
+
+    Arguments
+    =========
+
+    SIDE    (input) CHARACTER*1
+            = 'L': apply Q or Q**H from the Left;
+            = 'R': apply Q or Q**H from the Right.
+
+    UPLO    (input) CHARACTER*1
+            = 'U': Upper triangle of A contains elementary reflectors
+                   from ZHETRD;
+            = 'L': Lower triangle of A contains elementary reflectors
+                   from ZHETRD.
+
+    TRANS   (input) CHARACTER*1
+            = 'N':  No transpose, apply Q;
+            = 'C':  Conjugate transpose, apply Q**H.
+
+    M       (input) INTEGER
+            The number of rows of the matrix C. M >= 0.
+
+    N       (input) INTEGER
+            The number of columns of the matrix C. N >= 0.
+
+    A       (input) COMPLEX*16 array, dimension
+                                 (LDA,M) if SIDE = 'L'
+                                 (LDA,N) if SIDE = 'R'
+            The vectors which define the elementary reflectors, as
+            returned by ZHETRD.
+
+    LDA     (input) INTEGER
+            The leading dimension of the array A.
+            LDA >= max(1,M) if SIDE = 'L'; LDA >= max(1,N) if SIDE = 'R'.
+
+    TAU     (input) COMPLEX*16 array, dimension
+                                 (M-1) if SIDE = 'L'
+                                 (N-1) if SIDE = 'R'
+            TAU(i) must contain the scalar factor of the elementary
+            reflector H(i), as returned by ZHETRD.
+
+    C       (input/output) COMPLEX*16 array, dimension (LDC,N)
+            On entry, the M-by-N matrix C.
+            On exit, C is overwritten by Q*C or Q**H*C or C*Q**H or C*Q.
+
+    LDC     (input) INTEGER
+            The leading dimension of the array C. LDC >= max(1,M).
+
+    WORK    (workspace/output) COMPLEX*16 array, dimension (LWORK)
+            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+
+    LWORK   (input) INTEGER
+            The dimension of the array WORK.
+            If SIDE = 'L', LWORK >= max(1,N);
+            if SIDE = 'R', LWORK >= max(1,M).
+            For optimum performance LWORK >= N*NB if SIDE = 'L', and
+            LWORK >=M*NB if SIDE = 'R', where NB is the optimal
+            blocksize.
+
+            If LWORK = -1, then a workspace query is assumed; the routine
+            only calculates the optimal size of the WORK array, returns
+            this value as the first entry of the WORK array, and no error
+            message related to LWORK is issued by XERBLA.
+
+    INFO    (output) INTEGER
+            = 0:  successful exit
+            < 0:  if INFO = -i, the i-th argument had an illegal value
+
+    =====================================================================
+
+
+       Test the input arguments
+*/
+
+    /* Parameter adjustments */
+    a_dim1 = *lda;
+    a_offset = 1 + a_dim1;
+    a -= a_offset;
+    --tau;
+    c_dim1 = *ldc;
+    c_offset = 1 + c_dim1;
+    c__ -= c_offset;
+    --work;
+
+    /* Function Body */
+    *info = 0;
+    left = lsame_(side, "L");
+    upper = lsame_(uplo, "U");
+    lquery = *lwork == -1;
+
+/*     NQ is the order of Q and NW is the minimum dimension of WORK */
+
+    if (left) {
+	nq = *m;
+	nw = *n;
+    } else {
+	nq = *n;
+	nw = *m;
+    }
+    if (! left && ! lsame_(side, "R")) {
+	*info = -1;
+    } else if (! upper && ! lsame_(uplo, "L")) {
+	*info = -2;
+    } else if (! lsame_(trans, "N") && ! lsame_(trans,
+	    "C")) {
+	*info = -3;
+    } else if (*m < 0) {
+	*info = -4;
+    } else if (*n < 0) {
+	*info = -5;
+    } else if (*lda < max(1,nq)) {
+	*info = -7;
+    } else if (*ldc < max(1,*m)) {
+	*info = -10;
+    } else if (*lwork < max(1,nw) && ! lquery) {
+	*info = -12;
+    }
+
+    if (*info == 0) {
+	if (upper) {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "ZUNMQL", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "ZUNMQL", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	} else {
+	    if (left) {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *m - 1;
+		i__3 = *m - 1;
+		nb = ilaenv_(&c__1, "ZUNMQR", ch__1, &i__2, n, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    } else {
+/* Writing concatenation */
+		i__1[0] = 1, a__1[0] = side;
+		i__1[1] = 1, a__1[1] = trans;
+		s_cat(ch__1, a__1, i__1, &c__2, (ftnlen)2);
+		i__2 = *n - 1;
+		i__3 = *n - 1;
+		nb = ilaenv_(&c__1, "ZUNMQR", ch__1, m, &i__2, &i__3, &c_n1, (
+			ftnlen)6, (ftnlen)2);
+	    }
+	}
+	lwkopt = max(1,nw) * nb;
+	work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    }
+
+    if (*info != 0) {
+	i__2 = -(*info);
+	xerbla_("ZUNMTR", &i__2);
+	return 0;
+    } else if (lquery) {
+	return 0;
+    }
+
+/*     Quick return if possible */
+
+    if (((*m == 0) || (*n == 0)) || (nq == 1)) {
+	work[1].r = 1., work[1].i = 0.;
+	return 0;
+    }
+
+    if (left) {
+	mi = *m - 1;
+	ni = *n;
+    } else {
+	mi = *m;
+	ni = *n - 1;
+    }
+
+    if (upper) {
+
+/*        Q was determined by a call to ZHETRD with UPLO = 'U' */
+
+	i__2 = nq - 1;
+	zunmql_(side, trans, &mi, &ni, &i__2, &a[((a_dim1) << (1)) + 1], lda,
+		&tau[1], &c__[c_offset], ldc, &work[1], lwork, &iinfo);
+    } else {
+
+/*        Q was determined by a call to ZHETRD with UPLO = 'L' */
+
+	if (left) {
+	    i1 = 2;
+	    i2 = 1;
+	} else {
+	    i1 = 1;
+	    i2 = 2;
+	}
+	i__2 = nq - 1;
+	zunmqr_(side, trans, &mi, &ni, &i__2, &a[a_dim1 + 2], lda, &tau[1], &
+		c__[i1 + i2 * c_dim1], ldc, &work[1], lwork, &iinfo);
+    }
+    work[1].r = (doublereal) lwkopt, work[1].i = 0.;
+    return 0;
+
+/*     End of ZUNMTR */
+
+} /* zunmtr_ */
+
diff --git a/numpy/core/src/umath/ufunc_object.c b/numpy/core/src/umath/ufunc_object.c
index 2712f0474f1b..124185bfd9e7 100644
--- a/numpy/core/src/umath/ufunc_object.c
+++ b/numpy/core/src/umath/ufunc_object.c
@@ -71,6 +71,12 @@
 static int
 _does_loop_use_arrays(void *data);
 
+static int
+assign_reduce_identity_zero(PyArrayObject *result);
+
+static int
+assign_reduce_identity_one(PyArrayObject *result);
+
 /*
  * fpstatus is the ufunc_formatted hardware status
  * errmask is the handling mask specified by the user.
@@ -1643,7 +1649,7 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
                         PyArrayObject **op)
 {
     int nin, nout;
-    int i, idim, nop;
+    int i, j, idim, nop;
     char *ufunc_name;
     int retval = -1, subok = 1;
     int needs_api = 0;
@@ -1651,11 +1657,12 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     PyArray_Descr *dtypes[NPY_MAXARGS];
 
     /* Use remapped axes for generalized ufunc */
-    int broadcast_ndim, op_ndim;
+    int broadcast_ndim, iter_ndim;
     int op_axes_arrays[NPY_MAXARGS][NPY_MAXDIMS];
     int *op_axes[NPY_MAXARGS];
 
     npy_uint32 op_flags[NPY_MAXARGS];
+    npy_intp iter_shape[NPY_MAXARGS];
 
     NpyIter *iter = NULL;
 
@@ -1673,7 +1680,9 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
     npy_intp *inner_strides = NULL;
 
     npy_intp *inner_strides_tmp, *ax_strides_tmp[NPY_MAXDIMS];
-    int core_dim_ixs_size, *core_dim_ixs;
+    /* The sizes of the core dimensions (# entries is ufunc->core_num_dim_ix) */
+    npy_intp *core_dim_sizes = inner_dimensions + 1;
+    int core_dim_ixs_size;
 
     /* The __array_prepare__ function to call for each output */
     PyObject *arr_prep[NPY_MAXARGS];
@@ -1719,7 +1728,11 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         goto fail;
     }
 
-    /* Figure out the number of dimensions needed by the iterator */
+    /*
+     * Figure out the number of iteration dimensions, which
+     * is the broadcast result of all the input non-core
+     * dimensions.
+     */
     broadcast_ndim = 0;
     for (i = 0; i < nin; ++i) {
         int n = PyArray_NDIM(op[i]) - ufunc->core_num_dims[i];
@@ -1727,8 +1740,18 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
             broadcast_ndim = n;
         }
     }
-    op_ndim = broadcast_ndim + ufunc->core_num_dim_ix;
-    if (op_ndim > NPY_MAXDIMS) {
+
+    /*
+     * Figure out the number of iterator creation dimensions,
+     * which is the broadcast dimensions + all the core dimensions of
+     * the outputs, so that the iterator can allocate those output
+     * dimensions following the rules of order='F', for example.
+     */
+    iter_ndim = broadcast_ndim;
+    for (i = nin; i < nop; ++i) {
+        iter_ndim += ufunc->core_num_dims[i];
+    }
+    if (iter_ndim > NPY_MAXDIMS) {
         PyErr_Format(PyExc_ValueError,
                     "too many dimensions for generalized ufunc %s",
                     ufunc_name);
@@ -1736,9 +1759,76 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
         goto fail;
     }
 
+    /*
+     * Validate the core dimensions of all the operands,
+     * and collect all of the labeled core dimension sizes
+     * into the array 'core_dim_sizes'. Initialize them to
+     * 1, for example in the case where the operand broadcasts
+     * to a core dimension, it won't be visited.
+     */
+    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
+        core_dim_sizes[i] = 1;
+    }
+    for (i = 0; i < nop; ++i) {
+        if (op[i] != NULL) {
+            int dim_offset = ufunc->core_offsets[i];
+            int num_dims = ufunc->core_num_dims[i];
+            int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
+            /* Make sure any output operand has enough dimensions */
+            if (i >= nin && core_start_dim < 0) {
+                PyErr_Format(PyExc_ValueError,
+                        "%s: Output operand %d does not have enough dimensions "
+                        "(has %d, gufunc core with signature %s "
+                        "requires %d)",
+                        ufunc_name, i - nin, PyArray_NDIM(op[i]),
+                        ufunc->core_signature, num_dims);
+                retval = -1;
+                goto fail;
+            }
+
+            /*
+             * Make sure each core dimension matches all other core
+             * dimensions with the same label
+             *
+             * NOTE: For input operands, core_start_dim may be negative.
+             *       In that case, the operand is being broadcast onto
+             *       core dimensions. For example, a scalar will broadcast
+             *       to fit any core signature.
+             */
+            if (core_start_dim >= 0) {
+                idim = 0;
+            } else {
+                idim = -core_start_dim;
+            }
+            for (; idim < num_dims; ++idim) {
+                int core_dim_index = ufunc->core_dim_ixs[dim_offset + idim];
+                npy_intp op_dim_size =
+                        PyArray_SHAPE(op[i])[core_start_dim + idim];
+                if (core_dim_sizes[core_dim_index] == 1) {
+                    core_dim_sizes[core_dim_index] = op_dim_size;
+                } else if ((i >= nin || op_dim_size != 1) &&
+                            core_dim_sizes[core_dim_index] != op_dim_size) {
+                    PyErr_Format(PyExc_ValueError,
+                            "%s: Operand %d has a mismatch in its core "
+                            "dimension %d, with gufunc signature %s "
+                            "(size %d is different from %d)",
+                            ufunc_name, i, idim, ufunc->core_signature,
+                            op_dim_size, core_dim_sizes[core_dim_index]);
+                    retval = -1;
+                    goto fail;
+                }
+            }
+        }
+    }
+
+    /* Fill in the initial part of 'iter_shape' */
+    for (idim = 0; idim < broadcast_ndim; ++idim) {
+        iter_shape[idim] = -1;
+    }
+
     /* Fill in op_axes for all the operands */
+    j = broadcast_ndim;
     core_dim_ixs_size = 0;
-    core_dim_ixs = ufunc->core_dim_ixs;
     for (i = 0; i < nop; ++i) {
         int n;
         if (op[i]) {
@@ -1760,22 +1850,27 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
                 op_axes_arrays[i][idim] = -1;
             }
         }
-        /* Use the signature information for the rest */
-        for (idim = broadcast_ndim; idim < op_ndim; ++idim) {
+
+        /* Any output core dimensions shape should be ignored */
+        for (idim = broadcast_ndim; idim < iter_ndim; ++idim) {
             op_axes_arrays[i][idim] = -1;
         }
-        for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
-            if (n + idim >= 0) {
-                op_axes_arrays[i][broadcast_ndim + core_dim_ixs[idim]] =
-                                                                    n + idim;
-            }
-            else {
-                op_axes_arrays[i][broadcast_ndim + core_dim_ixs[idim]] = -1;
+
+        /* Except for when it belongs to this output */
+        if (i >= nin) {
+            int dim_offset = ufunc->core_offsets[i];
+            int num_dims = ufunc->core_num_dims[i];
+            /* Fill in 'iter_shape' and 'op_axes' for this output */
+            for (idim = 0; idim < num_dims; ++idim) {
+                iter_shape[j] = core_dim_sizes[
+                                        ufunc->core_dim_ixs[dim_offset + idim]];
+                op_axes_arrays[i][j] = n + idim;
+                ++j;
             }
         }
-        core_dim_ixs_size += ufunc->core_num_dims[i];
-        core_dim_ixs += ufunc->core_num_dims[i];
+
         op_axes[i] = op_axes_arrays[i];
+        core_dim_ixs_size += ufunc->core_num_dims[i];
     }
 
     /* Get the buffersize, errormask, and error object globals */
@@ -1881,12 +1976,26 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
                       NPY_ITER_NO_BROADCAST;
     }
 
+    /*
+     * If there are no iteration dimensions, create a fake one
+     * so that the scalar edge case works right.
+     */
+    if (iter_ndim == 0) {
+        iter_ndim = 1;
+        iter_shape[0] = 1;
+        for (i = 0; i < nop; ++i) {
+            op_axes[i][0] = -1;
+        }
+    }
+
     /* Create the iterator */
     iter = NpyIter_AdvancedNew(nop, op, NPY_ITER_MULTI_INDEX|
                                       NPY_ITER_REFS_OK|
-                                      NPY_ITER_REDUCE_OK,
+                                      NPY_ITER_REDUCE_OK|
+                                      NPY_ITER_ZEROSIZE_OK,
                            order, NPY_UNSAFE_CASTING, op_flags,
-                           dtypes, op_ndim, op_axes, NULL, 0);
+                           dtypes, iter_ndim,
+                           op_axes, iter_shape, 0);
     if (iter == NULL) {
         retval = -1;
         goto fail;
@@ -1906,37 +2015,38 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
      */
     inner_strides = (npy_intp *)PyArray_malloc(
                         NPY_SIZEOF_INTP * (nop+core_dim_ixs_size));
-    /* The strides after the first nop match core_dim_ixs */
-    core_dim_ixs = ufunc->core_dim_ixs;
-    inner_strides_tmp = inner_strides + nop;
-    for (idim = 0; idim < ufunc->core_num_dim_ix; ++idim) {
-        ax_strides_tmp[idim] = NpyIter_GetAxisStrideArray(iter,
-                                                broadcast_ndim+idim);
-        if (ax_strides_tmp[idim] == NULL) {
-            retval = -1;
-            goto fail;
-        }
-    }
+    /* Copy the strides after the first nop */
+    idim = nop;
     for (i = 0; i < nop; ++i) {
-        for (idim = 0; idim < ufunc->core_num_dims[i]; ++idim) {
-            inner_strides_tmp[idim] = ax_strides_tmp[core_dim_ixs[idim]][i];
+        int dim_offset = ufunc->core_offsets[i];
+        int num_dims = ufunc->core_num_dims[i];
+        int core_start_dim = PyArray_NDIM(op[i]) - num_dims;
+        /*
+         * Need to use the arrays in the iterator, not op, because
+         * a copy with a different-sized type may have been made.
+         */
+        PyArrayObject *arr = NpyIter_GetOperandArray(iter)[i];
+        npy_intp *shape = PyArray_SHAPE(arr);
+        npy_intp *strides = PyArray_STRIDES(arr);
+        for (j = 0; j < num_dims; ++j) {
+            if (core_start_dim + j >= 0) {
+                /*
+                 * Force the stride to zero when the shape is 1, sot
+                 * that the broadcasting works right.
+                 */
+                if (shape[core_start_dim + j] != 1) {
+                    inner_strides[idim++] = strides[core_start_dim + j];
+                } else {
+                    inner_strides[idim++] = 0;
+                }
+            } else {
+                inner_strides[idim++] = 0;
+            }
         }
-
-        core_dim_ixs += ufunc->core_num_dims[i];
-        inner_strides_tmp += ufunc->core_num_dims[i];
-    }
-
-    /* Set up the inner dimensions array */
-    if (NpyIter_GetShape(iter, inner_dimensions) != NPY_SUCCEED) {
-        retval = -1;
-        goto fail;
     }
-    /* Move the core dimensions to start at the second element */
-    memmove(&inner_dimensions[1], &inner_dimensions[broadcast_ndim],
-                        NPY_SIZEOF_INTP * ufunc->core_num_dim_ix);
 
-    /* Remove all the core dimensions from the iterator */
-    for (i = 0; i < ufunc->core_num_dim_ix; ++i) {
+    /* Remove all the core output dimensions from the iterator */
+    for (i = broadcast_ndim; i < iter_ndim; ++i) {
         if (NpyIter_RemoveAxis(iter, broadcast_ndim) != NPY_SUCCEED) {
             retval = -1;
             goto fail;
@@ -1971,8 +2081,8 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
 
     NPY_UF_DBG_PRINT("Executing inner loop\n");
 
-    /* Do the ufunc loop */
     if (NpyIter_GetIterSize(iter) != 0) {
+        /* Do the ufunc loop */
         NpyIter_IterNextFunc *iternext;
         char **dataptr;
         npy_intp *count_ptr;
@@ -1991,6 +2101,36 @@ PyUFunc_GeneralizedFunction(PyUFuncObject *ufunc,
             inner_dimensions[0] = *count_ptr;
             innerloop(dataptr, inner_dimensions, inner_strides, innerloopdata);
         } while (iternext(iter));
+    } else {
+        /**
+         * For each output operand, check if it has non-zero size,
+         * and assign the identity if it does. For example, a dot
+         * product of two zero-length arrays will be a scalar,
+         * which has size one.
+         */
+        for (i = nin; i < nop; ++i) {
+            if (PyArray_SIZE(op[i]) != 0) {
+                switch (ufunc->identity) {
+                    case PyUFunc_Zero:
+                        assign_reduce_identity_zero(op[i]);
+                        break;
+                    case PyUFunc_One:
+                        assign_reduce_identity_one(op[i]);
+                        break;
+                    case PyUFunc_None:
+                    case PyUFunc_ReorderableNone:
+                        PyErr_Format(PyExc_ValueError,
+                                "ufunc %s ",
+                                ufunc_name);
+                        goto fail;
+                    default:
+                        PyErr_Format(PyExc_ValueError,
+                                "ufunc %s has an invalid identity for reduction",
+                                ufunc_name);
+                        goto fail;
+                }
+            }
+        }
     }
 
     /* Check whether any errors occurred during the loop */
@@ -2433,13 +2573,13 @@ reduce_type_resolver(PyUFuncObject *ufunc, PyArrayObject *arr,
 }
 
 static int
-assign_reduce_identity_zero(PyArrayObject *result, void *data)
+assign_reduce_identity_zero(PyArrayObject *result)
 {
     return PyArray_FillWithScalar(result, PyArrayScalar_False);
 }
 
 static int
-assign_reduce_identity_one(PyArrayObject *result, void *data)
+assign_reduce_identity_one(PyArrayObject *result)
 {
     return PyArray_FillWithScalar(result, PyArrayScalar_True);
 }
diff --git a/numpy/core/src/umath/umath_linalg.c.src b/numpy/core/src/umath/umath_linalg.c.src
new file mode 100644
index 000000000000..a63bcce9a4ba
--- /dev/null
+++ b/numpy/core/src/umath/umath_linalg.c.src
@@ -0,0 +1,4294 @@
+/* -*- c -*- */
+
+/*
+ *****************************************************************************
+ **                            INCLUDES                                     **
+ *****************************************************************************
+ */
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+
+#include "Python.h"
+#include "numpy/arrayobject.h"
+#include "numpy/ufuncobject.h"
+
+#include "npy_pycompat.h"
+
+#include "npy_config.h"
+
+#include <stddef.h>
+#include <stdio.h>
+#include <assert.h>
+#include <math.h>
+
+
+static const char* umath_linalg_version_string = "0.1.4";
+
+/*
+ ****************************************************************************
+ *                        Debugging support                                 *
+ ****************************************************************************
+ */
+#define TRACE_TXT(...) do { fprintf (stderr, __VA_ARGS__); } while (0)
+#define STACK_TRACE do {} while (0)
+#define TRACE\
+    do {                                        \
+        fprintf (stderr,                        \
+                 "%s:%d:%s\n",                  \
+                 __FILE__,                      \
+                 __LINE__,                      \
+                 __FUNCTION__);                 \
+        STACK_TRACE;                            \
+    } while (0)
+
+#if 0
+#include <execinfo.h>
+void
+dbg_stack_trace()
+{
+    void *trace[32];
+    size_t size;
+
+    size = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
+    backtrace_symbols_fd(trace, size, 1);
+}
+
+#undef STACK_TRACE
+#define STACK_TRACE do { dbg_stack_trace(); } while (0)
+#endif
+
+/*
+ *****************************************************************************
+ *                    BLAS/LAPACK calling macros                             *
+ *****************************************************************************
+ */
+
+#ifdef NO_APPEND_FORTRAN
+# define FNAME(x) x
+#else
+# define FNAME(x) x##_
+#endif
+
+typedef struct { float r, i; } f2c_complex;
+typedef struct { double r, i; } f2c_doublecomplex;
+/* typedef long int (*L_fp)(); */
+
+extern int
+FNAME(sgeev)(char *jobvl, char *jobvr, int *n,
+             float a[], int *lda, float wr[], float wi[],
+             float vl[], int *ldvl, float vr[], int *ldvr,
+             float work[], int lwork[],
+             int *info);
+extern int
+FNAME(dgeev)(char *jobvl, char *jobvr, int *n,
+             double a[], int *lda, double wr[], double wi[],
+             double vl[], int *ldvl, double vr[], int *ldvr,
+             double work[], int lwork[],
+             int *info);
+extern int
+FNAME(cgeev)(char *jobvl, char *jobvr, int *n,
+             f2c_doublecomplex a[], int *lda,
+             f2c_doublecomplex w[],
+             f2c_doublecomplex vl[], int *ldvl,
+             f2c_doublecomplex vr[], int *ldvr,
+             f2c_doublecomplex work[], int *lwork,
+             double rwork[],
+             int *info);
+extern int
+FNAME(zgeev)(char *jobvl, char *jobvr, int *n,
+             f2c_doublecomplex a[], int *lda,
+             f2c_doublecomplex w[],
+             f2c_doublecomplex vl[], int *ldvl,
+             f2c_doublecomplex vr[], int *ldvr,
+             f2c_doublecomplex work[], int *lwork,
+             double rwork[],
+             int *info);
+
+extern int
+FNAME(ssyevd)(char *jobz, char *uplo, int *n,
+              float a[], int *lda, float w[], float work[],
+              int *lwork, int iwork[], int *liwork,
+              int *info);
+extern int
+FNAME(dsyevd)(char *jobz, char *uplo, int *n,
+              double a[], int *lda, double w[], double work[],
+              int *lwork, int iwork[], int *liwork,
+              int *info);
+extern int
+FNAME(cheevd)(char *jobz, char *uplo, int *n,
+              f2c_complex a[], int *lda,
+              float w[], f2c_complex work[],
+              int *lwork, float rwork[], int *lrwork, int iwork[],
+              int *liwork,
+              int *info);
+extern int
+FNAME(zheevd)(char *jobz, char *uplo, int *n,
+              f2c_doublecomplex a[], int *lda,
+              double w[], f2c_doublecomplex work[],
+              int *lwork, double rwork[], int *lrwork, int iwork[],
+              int *liwork,
+              int *info);
+
+extern int
+FNAME(dgelsd)(int *m, int *n, int *nrhs,
+              double a[], int *lda, double b[], int *ldb,
+              double s[], double *rcond, int *rank,
+              double work[], int *lwork, int iwork[],
+              int *info);
+extern int
+FNAME(zgelsd)(int *m, int *n, int *nrhs,
+              f2c_doublecomplex a[], int *lda,
+              f2c_doublecomplex b[], int *ldb,
+              double s[], double *rcond, int *rank,
+              f2c_doublecomplex work[], int *lwork,
+              double rwork[], int iwork[],
+              int *info);
+
+extern int
+FNAME(sgesv)(int *n, int *nrhs,
+             float a[], int *lda,
+             int ipiv[],
+             float b[], int *ldb,
+             int *info);
+extern int
+FNAME(dgesv)(int *n, int *nrhs,
+             double a[], int *lda,
+             int ipiv[],
+             double b[], int *ldb,
+             int *info);
+extern int
+FNAME(cgesv)(int *n, int *nrhs,
+             f2c_complex a[], int *lda,
+             int ipiv[],
+             f2c_complex b[], int *ldb,
+             int *info);
+extern int
+FNAME(zgesv)(int *n, int *nrhs,
+             f2c_doublecomplex a[], int *lda,
+             int ipiv[],
+             f2c_doublecomplex b[], int *ldb,
+             int *info);
+
+extern int
+FNAME(sgetrf)(int *m, int *n,
+              float a[], int *lda,
+              int ipiv[],
+              int *info);
+extern int
+FNAME(dgetrf)(int *m, int *n,
+              double a[], int *lda,
+              int ipiv[],
+              int *info);
+extern int
+FNAME(cgetrf)(int *m, int *n,
+              f2c_complex a[], int *lda,
+              int ipiv[],
+              int *info);
+extern int
+FNAME(zgetrf)(int *m, int *n,
+              f2c_doublecomplex a[], int *lda,
+              int ipiv[],
+              int *info);
+
+extern int
+FNAME(spotrf)(char *uplo, int *n,
+              float a[], int *lda,
+              int *info);
+extern int
+FNAME(dpotrf)(char *uplo, int *n,
+              double a[], int *lda,
+              int *info);
+extern int
+FNAME(cpotrf)(char *uplo, int *n,
+              f2c_complex a[], int *lda,
+              int *info);
+extern int
+FNAME(zpotrf)(char *uplo, int *n,
+              f2c_doublecomplex a[], int *lda,
+              int *info);
+
+extern int
+FNAME(sgesdd)(char *jobz, int *m, int *n,
+              float a[], int *lda, float s[], float u[],
+              int *ldu, float vt[], int *ldvt, float work[],
+              int *lwork, int iwork[], int *info);
+extern int
+FNAME(dgesdd)(char *jobz, int *m, int *n,
+              double a[], int *lda, double s[], double u[],
+              int *ldu, double vt[], int *ldvt, double work[],
+              int *lwork, int iwork[], int *info);
+extern int
+FNAME(cgesdd)(char *jobz, int *m, int *n,
+              f2c_complex a[], int *lda,
+              float s[], f2c_complex u[], int *ldu,
+              f2c_complex vt[], int *ldvt,
+              f2c_complex work[], int *lwork,
+              float rwork[], int iwork[], int *info);
+extern int
+FNAME(zgesdd)(char *jobz, int *m, int *n,
+              f2c_doublecomplex a[], int *lda,
+              double s[], f2c_doublecomplex u[], int *ldu,
+              f2c_doublecomplex vt[], int *ldvt,
+              f2c_doublecomplex work[], int *lwork,
+              double rwork[], int iwork[], int *info);
+
+extern int
+FNAME(spotrs)(char *uplo, int *n, int *nrhs,
+              float a[], int *lda,
+              float b[], int *ldb,
+              int *info);
+extern int
+FNAME(dpotrs)(char *uplo, int *n, int *nrhs,
+              double a[], int *lda,
+              double b[], int *ldb,
+              int *info);
+extern int
+FNAME(cpotrs)(char *uplo, int *n, int *nrhs,
+              f2c_complex a[], int *lda,
+              f2c_complex b[], int *ldb,
+              int *info);
+extern int
+FNAME(zpotrs)(char *uplo, int *n, int *nrhs,
+              f2c_doublecomplex a[], int *lda,
+              f2c_doublecomplex b[], int *ldb,
+              int *info);
+
+extern int
+FNAME(spotri)(char *uplo, int *n,
+              float a[], int *lda,
+              int *info);
+extern int
+FNAME(dpotri)(char *uplo, int *n,
+              double a[], int *lda,
+              int *info);
+extern int
+FNAME(cpotri)(char *uplo, int *n,
+              f2c_complex a[], int *lda,
+              int *info);
+extern int
+FNAME(zpotri)(char *uplo, int *n,
+              f2c_doublecomplex a[], int *lda,
+              int *info);
+
+extern int
+FNAME(scopy)(int *n,
+             float *sx, int *incx,
+             float *sy, int *incy);
+extern int
+FNAME(dcopy)(int *n,
+             double *sx, int *incx,
+             double *sy, int *incy);
+extern int
+FNAME(ccopy)(int *n,
+             f2c_complex *sx, int *incx,
+             f2c_complex *sy, int *incy);
+extern int
+FNAME(zcopy)(int *n,
+             f2c_doublecomplex *sx, int *incx,
+             f2c_doublecomplex *sy, int *incy);
+
+extern float
+FNAME(sdot)(int *n,
+            float *sx, int *incx,
+            float *sy, int *incy);
+extern double
+FNAME(ddot)(int *n,
+            double *sx, int *incx,
+            double *sy, int *incy);
+extern f2c_complex
+FNAME(cdotu)(int *n,
+             f2c_complex *sx, int *incx,
+             f2c_complex *sy, int *incy);
+extern f2c_doublecomplex
+FNAME(zdotu)(int *n,
+             f2c_doublecomplex *sx, int *incx,
+             f2c_doublecomplex *sy, int *incy);
+extern f2c_complex
+FNAME(cdotc)(int *n,
+             f2c_complex *sx, int *incx,
+             f2c_complex *sy, int *incy);
+extern f2c_doublecomplex
+FNAME(zdotc)(int *n,
+             f2c_doublecomplex *sx, int *incx,
+             f2c_doublecomplex *sy, int *incy);
+
+extern int
+FNAME(sgemm)(char *transa, char *transb,
+             int *m, int *n, int *k,
+             float *alpha,
+             float *a, int *lda,
+             float *b, int *ldb,
+             float *beta,
+             float *c, int *ldc);
+extern int
+FNAME(dgemm)(char *transa, char *transb,
+             int *m, int *n, int *k,
+             double *alpha,
+             double *a, int *lda,
+             double *b, int *ldb,
+             double *beta,
+             double *c, int *ldc);
+extern int
+FNAME(cgemm)(char *transa, char *transb,
+             int *m, int *n, int *k,
+             f2c_complex *alpha,
+             f2c_complex *a, int *lda,
+             f2c_complex *b, int *ldb,
+             f2c_complex *beta,
+             f2c_complex *c, int *ldc);
+extern int
+FNAME(zgemm)(char *transa, char *transb,
+             int *m, int *n, int *k,
+             f2c_doublecomplex *alpha,
+             f2c_doublecomplex *a, int *lda,
+             f2c_doublecomplex *b, int *ldb,
+             f2c_doublecomplex *beta,
+             f2c_doublecomplex *c, int *ldc);
+
+
+#define LAPACK_T(FUNC)                                          \
+    TRACE_TXT("Calling LAPACK ( " # FUNC " )\n");               \
+    FNAME(FUNC)
+
+#define BLAS(FUNC)                              \
+    FNAME(FUNC)
+
+#define LAPACK(FUNC)                            \
+    FNAME(FUNC)
+
+typedef int               fortran_int;
+typedef float             fortran_real;
+typedef double            fortran_doublereal;
+typedef f2c_complex       fortran_complex;
+typedef f2c_doublecomplex fortran_doublecomplex;
+
+
+/*
+ *****************************************************************************
+ **                      Some handy functions                               **
+ *****************************************************************************
+ */
+
+static inline void *
+offset_ptr(void* ptr, ptrdiff_t offset)
+{
+    return (void*)((npy_uint8*)ptr + offset);
+}
+
+/*
+ *****************************************************************************
+ **                      Some handy constants                               **
+ *****************************************************************************
+ */
+
+#define UMATH_LINALG_MODULE_NAME "_umath_linalg"
+
+typedef union {
+    fortran_complex f;
+    npy_cfloat npy;
+    float array[2];
+} COMPLEX_t;
+
+typedef union {
+    fortran_doublecomplex f;
+    npy_cdouble npy;
+    double array[2];
+} DOUBLECOMPLEX_t;
+
+static float s_one;
+static float s_zero;
+static float s_minus_one;
+static float s_ninf;
+static float s_nan;
+static double d_one;
+static double d_zero;
+static double d_minus_one;
+static double d_ninf;
+static double d_nan;
+static COMPLEX_t c_one;
+static COMPLEX_t c_zero;
+static COMPLEX_t c_minus_one;
+static COMPLEX_t c_ninf;
+static COMPLEX_t c_nan;
+static DOUBLECOMPLEX_t z_one;
+static DOUBLECOMPLEX_t z_zero;
+static DOUBLECOMPLEX_t z_minus_one;
+static DOUBLECOMPLEX_t z_ninf;
+static DOUBLECOMPLEX_t z_nan;
+
+static void init_constants()
+{
+    /*
+       this is needed as NPY_INFINITY and NPY_NAN macros
+       can't be used as initializers. I prefer to just set
+       all the constants the same way.
+    */
+    s_one  = 1.0f;
+    s_zero = 0.0f;
+    s_minus_one = -1.0f;
+    s_ninf = -NPY_INFINITYF;
+    s_nan = NPY_NANF;
+
+    d_one  = 1.0;
+    d_zero = 0.0;
+    d_minus_one = -1.0;
+    d_ninf = -NPY_INFINITY;
+    d_nan = NPY_NAN;
+
+    c_one.array[0]  = 1.0f;
+    c_one.array[1]  = 0.0f;
+    c_zero.array[0] = 0.0f;
+    c_zero.array[1] = 0.0f;
+    c_minus_one.array[0] = -1.0f;
+    c_minus_one.array[1] = 0.0f;
+    c_ninf.array[0] = -NPY_INFINITYF;
+    c_ninf.array[1] = 0.0f;
+    c_nan.array[0] = NPY_NANF;
+    c_nan.array[1] = NPY_NANF;
+
+    z_one.array[0]  = 1.0;
+    z_one.array[1]  = 0.0;
+    z_zero.array[0] = 0.0;
+    z_zero.array[1] = 0.0;
+    z_minus_one.array[0] = -1.0;
+    z_minus_one.array[1] = 0.0;
+    z_ninf.array[0] = -NPY_INFINITY;
+    z_ninf.array[1] = 0.0;
+    z_nan.array[0] = NPY_NAN;
+    z_nan.array[1] = NPY_NAN;
+}
+
+/*
+ *****************************************************************************
+ **               Structs used for data rearrangement                       **
+ *****************************************************************************
+ */
+
+
+/* this struct contains information about how to linearize in a local buffer
+   a matrix so that it can be used by blas functions.
+   All strides are specified in number of elements (similar to what blas
+   expects)
+
+   dst_row_strides: number of elements between different row. Matrix is
+                    considered row-major
+   dst_column_strides: number of elements between differnt columns in the
+                    destination buffer
+   rows: number of rows of the matrix
+   columns: number of columns of the matrix
+   src_row_strides: strides needed to access the next row in the source matrix
+   src_column_strides: strides needed to access the next column in the source
+                       matrix
+ */
+typedef struct linearize_data_struct
+{
+  size_t     rows;
+  size_t     columns;
+  ptrdiff_t  row_strides;
+  ptrdiff_t  column_strides;
+} LINEARIZE_DATA_t;
+
+static inline void
+init_linearize_data(LINEARIZE_DATA_t *lin_data,
+                    int rows,
+                    int columns,
+                    ptrdiff_t row_strides,
+                    ptrdiff_t column_strides)
+{
+    lin_data->rows = rows;
+    lin_data->columns = columns;
+    lin_data->row_strides = row_strides;
+    lin_data->column_strides = column_strides;
+}
+
+static inline void
+dump_ufunc_object(PyUFuncObject* ufunc)
+{
+    TRACE_TXT("\n\n%s '%s' (%d input(s), %d output(s), %d specialization(s).\n",
+              ufunc->core_enabled? "generalized ufunc" : "scalar ufunc",
+              ufunc->name, ufunc->nin, ufunc->nout, ufunc->ntypes);
+    if (ufunc->core_enabled) {
+        int arg;
+        int dim;
+        TRACE_TXT("\t%s (%d dimension(s) detected).\n",
+                  ufunc->core_signature, ufunc->core_num_dim_ix);
+
+        for (arg = 0; arg < ufunc->nargs; arg++){
+            int * arg_dim_ix = ufunc->core_dim_ixs + ufunc->core_offsets[arg];
+            TRACE_TXT("\t\targ %d (%s) has %d dimension(s): (",
+                      arg, arg < ufunc->nin? "INPUT" : "OUTPUT",
+                      ufunc->core_num_dims[arg]);
+            for (dim = 0; dim < ufunc->core_num_dims[arg]; dim ++) {
+                TRACE_TXT(" %d", arg_dim_ix[dim]);
+            }
+            TRACE_TXT(" )\n");
+        }
+    }
+}
+
+static inline void
+dump_linearize_data(const char* name, const LINEARIZE_DATA_t* params)
+{
+    TRACE_TXT("\n\t%s rows: %zd columns: %zd"\
+              "\n\t\trow_strides: %td column_strides: %td"\
+              "\n", name, params->rows, params->columns,
+              params->row_strides, params->column_strides);
+}
+
+
+static inline float
+FLOAT_add(float op1, float op2)
+{
+    return op1 + op2;
+}
+
+static inline double
+DOUBLE_add(double op1, double op2)
+{
+    return op1 + op2;
+}
+
+static inline COMPLEX_t
+CFLOAT_add(COMPLEX_t op1, COMPLEX_t op2)
+{
+    COMPLEX_t result;
+    result.array[0] = op1.array[0] + op2.array[0];
+    result.array[1] = op1.array[1] + op2.array[1];
+
+    return result;
+}
+
+static inline DOUBLECOMPLEX_t
+CDOUBLE_add(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2)
+{
+    DOUBLECOMPLEX_t result;
+    result.array[0] = op1.array[0] + op2.array[0];
+    result.array[1] = op1.array[1] + op2.array[1];
+
+    return result;
+}
+
+static inline float
+FLOAT_mul(float op1, float op2)
+{
+    return op1*op2;
+}
+
+static inline double
+DOUBLE_mul(double op1, double op2)
+{
+    return op1*op2;
+}
+
+
+static inline COMPLEX_t
+CFLOAT_mul(COMPLEX_t op1, COMPLEX_t op2)
+{
+    COMPLEX_t result;
+    result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1];
+    result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1];
+
+    return result;
+}
+
+static inline DOUBLECOMPLEX_t
+CDOUBLE_mul(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2)
+{
+    DOUBLECOMPLEX_t result;
+    result.array[0] = op1.array[0]*op2.array[0] - op1.array[1]*op2.array[1];
+    result.array[1] = op1.array[1]*op2.array[0] + op1.array[0]*op2.array[1];
+
+    return result;
+}
+
+static inline float
+FLOAT_mulc(float op1, float op2)
+{
+    return op1*op2;
+}
+
+static inline double
+DOUBLE_mulc(float op1, float op2)
+{
+    return op1*op2;
+}
+
+static inline COMPLEX_t
+CFLOAT_mulc(COMPLEX_t op1, COMPLEX_t op2)
+{
+    COMPLEX_t result;
+    result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1];
+    result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0];
+
+    return result;
+}
+
+static inline DOUBLECOMPLEX_t
+CDOUBLE_mulc(DOUBLECOMPLEX_t op1, DOUBLECOMPLEX_t op2)
+{
+    DOUBLECOMPLEX_t result;
+    result.array[0] = op1.array[0]*op2.array[0] + op1.array[1]*op2.array[1];
+    result.array[1] = op1.array[0]*op2.array[1] - op1.array[1]*op2.array[0];
+
+    return result;
+}
+
+static inline void
+print_FLOAT(npy_float s)
+{
+    TRACE_TXT(" %8.4f", s);
+}
+static inline void
+print_DOUBLE(npy_double d)
+{
+    TRACE_TXT(" %10.6f", d);
+}
+static inline void
+print_CFLOAT(npy_cfloat c)
+{
+    float* c_parts = (float*)&c;
+    TRACE_TXT("(%8.4f, %8.4fj)", c_parts[0], c_parts[1]);
+}
+static inline void
+print_CDOUBLE(npy_cdouble z)
+{
+    double* z_parts = (double*)&z;
+    TRACE_TXT("(%8.4f, %8.4fj)", z_parts[0], z_parts[1]);
+}
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #typ=npy_float,npy_double,npy_cfloat,npy_cdouble#
+ */
+static inline void
+dump_@TYPE@_matrix(const char* name,
+                   size_t rows, size_t columns,
+                   const @typ@* ptr)
+{
+    size_t i,j;
+
+    TRACE_TXT("\n%s %p (%zd, %zd)\n", name, ptr, rows, columns);
+    for (i=0; i<rows; i++)
+    {
+        TRACE_TXT("| ");
+        for (j=0; j<columns; j++)
+        {
+            print_@TYPE@(ptr[j*rows + i]);
+            TRACE_TXT(", ");
+        }
+        TRACE_TXT(" |\n");
+    }
+}
+/**end repeat**/
+
+
+/*
+ *****************************************************************************
+ **                            Basics                                       **
+ *****************************************************************************
+ */
+
+#define INIT_OUTER_LOOP_1 \
+    npy_intp dN = *dimensions++;\
+    npy_intp N_;\
+    npy_intp s0 = *steps++;
+
+#define INIT_OUTER_LOOP_2 \
+    INIT_OUTER_LOOP_1\
+    npy_intp s1 = *steps++;
+
+#define INIT_OUTER_LOOP_3 \
+    INIT_OUTER_LOOP_2\
+    npy_intp s2 = *steps++;
+
+#define INIT_OUTER_LOOP_4 \
+    INIT_OUTER_LOOP_3\
+    npy_intp s3 = *steps++;
+
+#define INIT_OUTER_LOOP_5 \
+    INIT_OUTER_LOOP_4\
+    npy_intp s4 = *steps++;
+
+#define INIT_OUTER_LOOP_6  \
+    INIT_OUTER_LOOP_5\
+    npy_intp s5 = *steps++;
+
+#define BEGIN_OUTER_LOOP_2 \
+    for (N_ = 0;\
+         N_ < dN;\
+         N_++, args[0] += s0,\
+             args[1] += s1) {
+
+#define BEGIN_OUTER_LOOP_3 \
+    for (N_ = 0;\
+         N_ < dN;\
+         N_++, args[0] += s0,\
+             args[1] += s1,\
+             args[2] += s2) {
+
+#define BEGIN_OUTER_LOOP_4 \
+    for (N_ = 0;\
+         N_ < dN;\
+         N_++, args[0] += s0,\
+             args[1] += s1,\
+             args[2] += s2,\
+             args[3] += s3) {
+
+#define BEGIN_OUTER_LOOP_5 \
+    for (N_ = 0;\
+         N_ < dN;\
+         N_++, args[0] += s0,\
+             args[1] += s1,\
+             args[2] += s2,\
+             args[3] += s3,\
+             args[4] += s4) {
+
+#define BEGIN_OUTER_LOOP_6 \
+    for (N_ = 0;\
+         N_ < dN;\
+         N_++, args[0] += s0,\
+             args[1] += s1,\
+             args[2] += s2,\
+             args[3] += s3,\
+             args[4] += s4,\
+             args[5] += s5) {
+
+#define END_OUTER_LOOP  }
+
+static inline void
+update_pointers(npy_uint8** bases, ptrdiff_t* offsets, size_t count)
+{
+    size_t i;
+    for (i=0; i < count; ++i) {
+        bases[i] += offsets[i];
+    }
+}
+
+
+/* disable -Wmaybe-uninitialized as there is some code that generate false
+   positives with this warning
+*/
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+
+/*
+ *****************************************************************************
+ **                             HELPER FUNCS                                **
+ *****************************************************************************
+ */
+
+             /* rearranging of 2D matrices using blas */
+
+/**begin repeat
+    #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+    #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t#
+    #copy=scopy,dcopy,ccopy,zcopy#
+    #nan=s_nan, d_nan, c_nan, z_nan#
+ */
+static inline void *
+linearize_@TYPE@_matrix(void *dst_in,
+                        void *src_in,
+                        const LINEARIZE_DATA_t* data)
+{
+    @typ@ *src = (@typ@ *) src_in;
+    @typ@ *dst = (@typ@ *) dst_in;
+
+    if (dst) {
+        int i;
+        @typ@* rv = dst;
+        fortran_int columns = (fortran_int)data->columns;
+        fortran_int column_strides =
+            (fortran_int)(data->column_strides/sizeof(@typ@));
+        fortran_int one = 1;
+        for (i=0; i< data->rows; i++) {
+            FNAME(@copy@)(&columns,
+                          (void*)src, &column_strides,
+                          (void*)dst, &one);
+            src += data->row_strides/sizeof(@typ@);
+            dst += data->columns;
+        }
+        return rv;
+    } else {
+        return src;
+    }
+}
+
+static inline void *
+delinearize_@TYPE@_matrix(void *dst_in,
+                          void *src_in,
+                          const LINEARIZE_DATA_t* data)
+{
+    @typ@ *src = (@typ@ *) src_in;
+    @typ@ *dst = (@typ@ *) dst_in;
+
+    if (src) {
+        int i;
+        @typ@ *rv = src;
+        fortran_int columns = (fortran_int)data->columns;
+        fortran_int column_strides =
+            (fortran_int)(data->column_strides/sizeof(@typ@));
+        fortran_int one = 1;
+        for (i=0; i < data->rows; i++) {
+            FNAME(@copy@)(&columns,
+                          (void*)src, &one,
+                          (void*)dst, &column_strides);
+            src += data->columns;
+            dst += data->row_strides/sizeof(@typ@);
+        }
+
+        return rv;
+    } else {
+        return src;
+    }
+}
+
+static inline void
+nan_@TYPE@_matrix(void *dst_in, const LINEARIZE_DATA_t* data)
+{
+    @typ@ *dst = (@typ@ *) dst_in;
+
+    int i,j;
+    for (i=0; i < data->rows; i++) {
+        @typ@ *cp = dst;
+        ptrdiff_t cs = data->column_strides/sizeof(@typ@);
+        for (j=0; j< data->columns; ++j) {
+            *cp = @nan@;
+            cp += cs;
+        }
+        dst += data->row_strides/sizeof(@typ@);
+    }
+
+    /* if this function gets called, the module generates nans */
+    /* set the fpe status to invalid to notify that nans were generated */
+    npy_set_floatstatus_invalid();
+}
+
+static inline void
+make_symmetric_@TYPE@_matrix(char UPLO,
+                             fortran_int n,
+                             void *matrix)
+{
+    /*
+       note: optimized assuming that sequential write is better than
+       sequential read
+    */
+    fortran_int i;
+    fortran_int one = 1;
+
+    /* note: 'L' and 'U' are interpreted considering *fortran* order */
+    if ('L' == UPLO) {
+        @typ@ *dst = (@typ@ *) matrix;
+        @typ@ *src = (@typ@ *) matrix;
+        src += 1;
+        dst += n;
+        for (i = 1; i < n; ++i) {
+            FNAME(@copy@)(&i,
+                          (void *)src, &n,
+                          (void *)dst, &one);
+            src += 1;
+            dst += n;
+        }
+    } else { /* assume U */
+        @typ@ *dst = (@typ@ *) matrix;
+        @typ@ *src = (@typ@ *) matrix;
+        src += n;
+        dst += 1;
+        for (i = n - 1; i > 0; --i) {
+            FNAME(@copy@)(&i,
+                          (void *)src, &n,
+                          (void *)dst, &one);
+            src += n + 1;
+            dst += n + 1;
+        }
+    }
+}
+/**end repeat**/
+
+               /* identity square matrix generation */
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t#
+   #cblas_type=s,d,c,z#
+ */
+static inline void
+identity_@TYPE@_matrix(void *ptr, size_t n)
+{
+    size_t i;
+    @typ@ *matrix = (@typ@*) ptr;
+    /* in IEEE floating point, zeroes are represented as bitwise 0 */
+    memset(matrix, 0, n*n*sizeof(@typ@));
+
+    for (i = 0; i < n; ++i)
+    {
+        *matrix = @cblas_type@_one;
+        matrix += n+1;
+    }
+}
+/**end repeat**/
+
+         /* lower/upper triangular matrix using blas (in place) */
+/**begin repeat
+
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t#
+   #cblas_type=s,d,c,z#
+ */
+static inline void
+tril_@TYPE@_matrix(void *ptr, size_t n)
+{
+    size_t i,j;
+    @typ@ *matrix = (@typ@*)ptr;
+    matrix++;
+    for (i = n-1; i > 0; --i) {
+        for (j = 0; j < i; ++j) {
+            *matrix = @cblas_type@_zero;
+        }
+        matrix += n + 1;
+    }
+}
+
+static inline void
+triu_@TYPE@_matrix(void *ptr, size_t n)
+{
+    size_t i,j;
+    @typ@ *matrix = (@typ@*)ptr;
+    matrix += n;
+    for (i=1; i < n; ++i) {
+        for (j=0; j<i; ++j) {
+            matrix[j] = @cblas_type@_zero;
+        }
+        matrix += n;
+    }
+}
+/**end repeat**/
+
+/*
+ *****************************************************************************
+ **                             UFUNC LOOPS                                 **
+ *****************************************************************************
+ */
+
+/**begin repeat
+   #typ=npy_float,npy_double,COMPLEX_t,DOUBLECOMPLEX_t,COMPLEX_t,DOUBLECOMPLEX_t#
+   #ftyp=fortran_real,fortran_doublereal,fortran_complex,fortran_doublecomplex,fortran_complex,fortran_doublecomplex#
+   #add=FLOAT_add,DOUBLE_add,CFLOAT_add,CDOUBLE_add,CFLOAT_add,CDOUBLE_add#
+   #mul=FLOAT_mul,DOUBLE_mul,CFLOAT_mul,CDOUBLE_mul,CFLOAT_mulc,CDOUBLE_mulc#
+   #dot=sdot,ddot,cdotu,zdotu,cdotc,zdotc#
+   #zero=s_zero, d_zero,c_zero,z_zero,c_zero,z_zero#
+ */
+
+static inline void
+@dot@_inner1d(char **args, npy_intp *dimensions, npy_intp *steps)
+{
+    const size_t sot = sizeof(@typ@);
+    int dim = (int)dimensions[0];
+    INIT_OUTER_LOOP_3
+    /*
+     * use blas if the stride is a multiple of datatype size in the inputs
+     * it should be the common case
+     */
+    if ((0 == (steps[3] % sot)) &&
+        (0 == (steps[4] % sot))) {
+        /* use blas */
+
+        int is1 = (int)(steps[0]/sot), is2 = (int)(steps[1]/sot);
+        BEGIN_OUTER_LOOP_3
+            @ftyp@ * ip1 = (@ftyp@*)args[0], *ip2 = (@ftyp@*)args[1];
+            *(@ftyp@*)(args[2]) = BLAS(@dot@)(&dim, ip1, &is1, ip2, &is2);
+        END_OUTER_LOOP
+    } else {
+        /* use standard version */
+        npy_intp is1=steps[0], is2=steps[1];
+        BEGIN_OUTER_LOOP_3
+            int i;
+            char *ip1=args[0], *ip2=args[1], *op=args[2];
+            @typ@ sum = @zero@;
+            for (i = 0; i < dim; i++) {
+                @typ@ prod = @mul@(*(@typ@ *)ip1, *(@typ@*)ip2);
+                sum = @add@(sum, prod);
+                ip1 += is1;
+                ip2 += is2;
+            }
+            *(@typ@ *)op = sum;
+        END_OUTER_LOOP
+    }
+}
+
+/**end repeat**/
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #dot=sdot,ddot,cdotu,zdotu#
+   #dotc=sdot,ddot,cdotc,zdotc#
+ */
+static void
+@TYPE@_inner1d(char **args, npy_intp *dimensions, npy_intp *steps,
+               void *NPY_UNUSED(func))
+{
+    @dot@_inner1d(args, dimensions, steps);
+}
+
+static void
+@TYPE@_dotc1d(char **args, npy_intp *dimensions, npy_intp *steps,
+              void *NPY_UNUSED(func))
+{
+    @dotc@_inner1d(args, dimensions, steps);
+}
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #typ=npy_float,npy_double,COMPLEX_t,DOUBLECOMPLEX_t#
+   #add=FLOAT_add,DOUBLE_add,CFLOAT_add,CDOUBLE_add#
+   #mul=FLOAT_mul,DOUBLE_mul,CFLOAT_mul,CDOUBLE_mul# 
+   #zero=s_zero, d_zero,c_zero,z_zero#
+*/
+
+/*
+ *  This implements the function
+ *        out[n] = sum_i { in1[n, i] * in2[n, i] * in3[n, i] }.
+ */
+
+static void
+@TYPE@_innerwt(char **args,
+               npy_intp *dimensions,
+               npy_intp *steps,
+               void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_4
+    npy_intp di = dimensions[0];
+    npy_intp i;
+    npy_intp is1=steps[0], is2=steps[1], is3=steps[2];
+    BEGIN_OUTER_LOOP_4
+        char *ip1=args[0], *ip2=args[1], *ip3=args[2], *op=args[3];
+        @typ@ sum = @zero@;
+        for (i = 0; i < di; i++) {
+            @typ@ prod = @mul@(@mul@(*(@typ@*)ip1, *(@typ@*)ip2),*(@typ@*)ip3);
+            sum = @add@(sum, prod);
+            ip1 += is1;
+            ip2 += is2;
+            ip3 += is3;
+        }
+        *(@typ@ *)op = sum;
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+
+/* -------------------------------------------------------------------------- */
+                        /* Matrix Multiply */
+
+typedef struct gemm_params_struct
+{
+    void *A;
+    void *B;
+    void *C;
+    fortran_int LDA;
+    fortran_int LDB;
+    fortran_int LDC;
+    fortran_int M;
+    fortran_int K;
+    fortran_int N;
+    char TRANSA;
+    char TRANSB;
+
+    void *allocated_data;
+} GEMM_PARAMS_t;
+
+static inline void
+dump_gemm_params(const char* name, const GEMM_PARAMS_t* params)
+{
+    TRACE_TXT("\n%s\n"
+              "%14s: %18p\n" "%14s: %18p\n" "%14s: %18p\n"
+              "%14s: %18d\n" "%14s: %18d\n" "%14s: %18d\n"
+              "%14s: %18d\n" "%14s: %18d\n" "%14s: %18d\n"
+              "%14s: %15c'%c'\n" "%14s: %15c'%c'\n",
+              name,
+              "A", params->A, "B", params->B, "C", params->C,
+              "LDA", params->LDA, "LDB", params->LDB, "LDC", params->LDC,
+              "M", params->M, "K", params->K, "N", params->N,
+              "TRANSA", ' ', params->TRANSA, "TRANSB", ' ', params->TRANSB);
+}
+
+
+typedef struct _matrix_desc {
+    int need_buff;
+    fortran_int lead;
+    size_t size;
+    void *buff;
+} matrix_desc;
+
+static inline void
+matrix_desc_init(matrix_desc *mtxd,
+                 npy_intp* steps,
+                 size_t sot,
+                 fortran_int rows,
+                 fortran_int columns)
+/* initialized a matrix desc based on the information in steps
+   it will fill the matrix desc with the values needed.
+   steps[0] contains column step
+   steps[1] contains row step
+*/
+{
+    /* if column step is not element step, a copy will be needed
+       to arrange the array in a way compatible with blas
+    */
+    mtxd->need_buff = steps[0] != sot;
+
+    if (!mtxd->need_buff) {
+        if (steps[1]) {
+            mtxd->lead = (fortran_int) steps[1] / sot;
+        } else {
+            /* step is 0, this means either it is only 1 column or
+               there is a step trick around*/
+            if (columns > 1) {
+                /* step tricks not supported in gemm... make a copy */
+                mtxd->need_buff = 1;
+            } else {
+                /* lead must be at least 1 */
+                mtxd->lead = rows;
+            }
+        }
+    }
+
+    /* if a copy is performed, the lead is the number of columns */
+    if (mtxd->need_buff) {
+        mtxd->lead = rows;
+    }
+
+    mtxd->size = rows*columns*sot*mtxd->need_buff;
+    mtxd->buff = NULL;
+}
+
+static inline npy_int8 *
+matrix_desc_assign_buff(matrix_desc* mtxd, npy_int8 *p)
+{
+    if (mtxd->need_buff) {
+        mtxd->buff = p;
+        return p + mtxd->size;
+    } else {
+        return p;
+    }
+}
+
+static inline int
+init_gemm_params(GEMM_PARAMS_t *params,
+                 fortran_int m,
+                 fortran_int k,
+                 fortran_int n,
+                 npy_intp* steps,
+                 size_t sot)
+{
+    matrix_desc a, b, c;
+    matrix_desc_init(&a, steps + 0, sot, m, k);
+    matrix_desc_init(&b, steps + 2, sot, k, n);
+    matrix_desc_init(&c, steps + 4, sot, m, n);
+    npy_uint8 *mem_buff = NULL;
+
+    if (a.size + b.size + c.size)
+    {
+        npy_uint8 *current;
+        /* need to allocate some buffer */
+        mem_buff = malloc(a.size + b.size + c.size);
+        if (!mem_buff)
+            goto error;
+
+        current = mem_buff;
+        current = matrix_desc_assign_buff(&a, current);
+        current = matrix_desc_assign_buff(&b, current);
+        current = matrix_desc_assign_buff(&c, current);
+    }
+
+    params->A = a.buff;
+    params->B = b.buff;
+    params->C = c.buff;
+    params->LDA = a.lead;
+    params->LDB = b.lead;
+    params->LDC = c.lead;
+    params->M = m;
+    params->N = n;
+    params->K = k;
+    params->TRANSA='N';
+    params->TRANSB='N';
+
+    params->allocated_data = mem_buff;
+
+    return 1;
+ error:
+    free(mem_buff);
+    memset(params, 0, sizeof(*params));
+    return 0;
+}
+
+static inline void
+release_gemm_params(GEMM_PARAMS_t * params)
+{
+    free(params->allocated_data);
+    params->allocated_data = NULL;
+}
+
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #typ=npy_float,npy_double,npy_cfloat,npy_cdouble#
+   #one=s_one,d_one,c_one.array,z_one.array#
+   #zero=s_zero,d_zero,c_zero.array,z_zero.array#
+   #GEMM=sgemm,dgemm,cgemm,zgemm#
+*/
+
+static void
+@TYPE@_matrix_multiply(char **args,
+                       npy_intp *dimensions,
+                       npy_intp *steps,
+                       void *NPY_UNUSED(func))
+{
+    /*
+     * everything is setup in a way that makes things work. BLAS gemm can be
+     * be called without rearranging nor using weird stuff, as matrices are
+     * in the expected way in memory.
+     * This is just a loop calling blas.
+     */
+    GEMM_PARAMS_t params;
+    fortran_int m, k, n;
+
+    INIT_OUTER_LOOP_3
+
+    m = (fortran_int) dimensions[0];
+    k = (fortran_int) dimensions[1];
+    n = (fortran_int) dimensions[2];
+
+    if (init_gemm_params(&params, m, k, n, steps, sizeof(@typ@)))
+    {
+        LINEARIZE_DATA_t a_in, b_in, c_out;
+
+        if (params.A)
+            init_linearize_data(&a_in, k, m, steps[1], steps[0]);
+        if (params.B)
+            init_linearize_data(&b_in, n, k, steps[3], steps[2]);
+        if (params.C)
+            init_linearize_data(&c_out, n, m, steps[5], steps[4]);
+
+        BEGIN_OUTER_LOOP_3
+            /* just call the appropriate multiply and update pointers */
+            @typ@ *A = linearize_@TYPE@_matrix(params.A, args[0], &a_in);
+            @typ@ *B = linearize_@TYPE@_matrix(params.B, args[1], &b_in);
+            @typ@ *C = params.C ? (@typ@*)params.C : (@typ@ *) args[2];
+
+            /* linearize source operands if needed */
+            FNAME(@GEMM@)(&params.TRANSA, &params.TRANSB,
+                          &params.M, &params.N, &params.K,
+                          (void*)&@one@, // alpha
+                          (void*)A, &params.LDA,
+                          (void*)B, &params.LDB,
+                          (void*)&@zero@, // beta
+                          (void*)C, &params.LDC);
+            delinearize_@TYPE@_matrix(args[2], params.C, &c_out);
+        END_OUTER_LOOP
+
+        release_gemm_params(&params);
+    }
+}
+/**end repeat**/
+
+
+/* -------------------------------------------------------------------------- */
+                          /* Determinants */
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE#
+   #typ=npy_float, npy_double#
+   #log_func=npy_logf,npy_log#
+   #exp_func=npy_expf,npy_exp#
+   #zero=0.0f,0.0#
+*/
+
+static inline void
+@TYPE@_slogdet_from_factored_diagonal(@typ@* src,
+                                      fortran_int m,
+                                      @typ@ *sign,
+                                      @typ@ *logdet)
+{
+    @typ@ acc_sign = *sign;
+    @typ@ acc_logdet = @zero@;
+    int i;
+    for (i = 0; i < m; i++) {
+        @typ@ abs_element = *src;
+        if (abs_element < @zero@) {
+            acc_sign = -acc_sign;
+            abs_element = -abs_element;
+        }
+
+        acc_logdet += @log_func@(abs_element);
+        src += m+1;
+    }
+
+    *sign = acc_sign;
+    *logdet = acc_logdet;
+}
+
+static inline @typ@
+@TYPE@_det_from_slogdet(@typ@ sign, @typ@ logdet)
+{
+    @typ@ result = sign * @exp_func@(logdet);
+    return result;
+}
+
+/**end repeat**/
+
+
+/**begin repeat
+   #TYPE=CFLOAT,CDOUBLE#
+   #typ=npy_cfloat, npy_cdouble#
+   #basetyp=npy_float, npy_double#
+   #abs_func=npy_cabsf, npy_cabs#
+   #log_func=npy_logf, npy_log#
+   #exp_func=npy_expf, npy_exp#
+   #zero=0.0f,0.0#
+*/
+#define RE(COMPLEX) (((@basetyp@*)(&COMPLEX))[0])
+#define IM(COMPLEX) (((@basetyp@*)(&COMPLEX))[1])
+
+static inline @typ@
+@TYPE@_mult(@typ@ op1, @typ@ op2)
+{
+    @typ@ rv;
+
+    RE(rv) = RE(op1)*RE(op2) - IM(op1)*IM(op2);
+    IM(rv) = RE(op1)*IM(op2) + IM(op1)*RE(op2);
+
+    return rv;
+}
+
+
+static inline void
+@TYPE@_slogdet_from_factored_diagonal(@typ@* src,
+                                      fortran_int m,
+                                      @typ@ *sign,
+                                      @basetyp@ *logdet)
+{
+    int i;
+    @typ@ sign_acc = *sign;
+    @basetyp@ logdet_acc = @zero@;
+
+    for (i = 0; i < m; i++)
+    {
+        @basetyp@ abs_element = @abs_func@(*src);
+        @typ@ sign_element;
+        RE(sign_element) = RE(*src) / abs_element;
+        IM(sign_element) = IM(*src) / abs_element;
+
+        sign_acc = @TYPE@_mult(sign_acc, sign_element);
+        logdet_acc += @log_func@(abs_element);
+        src += m + 1;
+    }
+
+    *sign = sign_acc;
+    *logdet = logdet_acc;
+}
+
+static inline @typ@
+@TYPE@_det_from_slogdet(@typ@ sign, @basetyp@ logdet)
+{
+    @typ@ tmp;
+    RE(tmp) = @exp_func@(logdet);
+    IM(tmp) = @zero@;
+    return @TYPE@_mult(sign, tmp);
+}
+#undef RE
+#undef IM
+/**end repeat**/
+
+
+/* As in the linalg package, the determinant is computed via LU factorization
+ * using LAPACK.
+ * slogdet computes sign + log(determinant).
+ * det computes sign * exp(slogdet).
+ */
+/**begin repeat
+
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #typ=npy_float,npy_double,npy_cfloat,npy_cdouble#
+   #basetyp=npy_float,npy_double,npy_float,npy_double#
+   #cblas_type=s,d,c,z#
+*/
+
+static inline void
+@TYPE@_slogdet_single_element(fortran_int m,
+                              void* src,
+                              fortran_int* pivots,
+                              @typ@ *sign,
+                              @basetyp@ *logdet)
+{
+    fortran_int info = 0;
+    int i;
+    /* note: done in place */
+    LAPACK(@cblas_type@getrf)(&m, &m, (void *)src, &m, pivots, &info);
+
+    if (info == 0)
+    {
+        int change_sign = 0;
+        /* note: fortran uses 1 based indexing */
+        for (i=0; i < m; i++)
+        {
+            change_sign += (pivots[i] != (i+1));
+        }
+
+        memcpy(sign,
+               (change_sign % 2)?
+                   &@cblas_type@_minus_one :
+                   &@cblas_type@_one
+               , sizeof(*sign));
+        @TYPE@_slogdet_from_factored_diagonal(src, m, sign, logdet);
+    } else {
+        /*
+          if getrf fails, use 0 as sign and -inf as logdet
+        */
+        memcpy(sign, &@cblas_type@_zero, sizeof(*sign));
+        memcpy(logdet, &@cblas_type@_ninf, sizeof(*logdet));
+    }
+}
+
+static void
+@TYPE@_slogdet(char **args,
+               npy_intp *dimensions,
+               npy_intp *steps,
+               void *NPY_UNUSED(func))
+{
+    fortran_int m;
+    npy_uint8 *tmp_buff = NULL;
+    size_t matrix_size;
+    size_t pivot_size;
+    /* notes:
+     *   matrix will need to be copied always, as factorization in lapack is
+     *          made inplace
+     *   matrix will need to be in column-major order, as expected by lapack
+     *          code (fortran)
+     *   always a square matrix
+     *   need to allocate memory for both, matrix_buffer and pivot buffer
+     */
+    INIT_OUTER_LOOP_3
+    m = (fortran_int) dimensions[0];
+    matrix_size = m*m*sizeof(@typ@);
+    pivot_size = m*sizeof(fortran_int);
+    tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size);
+
+    if (tmp_buff)
+    {
+        LINEARIZE_DATA_t lin_data;
+        /* swapped steps to get matrix in FORTRAN order */
+        init_linearize_data(&lin_data, m, m,
+                            (ptrdiff_t)steps[1],
+                            (ptrdiff_t)steps[0]);
+        BEGIN_OUTER_LOOP_3
+            linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data);
+            @TYPE@_slogdet_single_element(m,
+                                          (void*)tmp_buff,
+                                          (fortran_int*)(tmp_buff+matrix_size),
+                                          (@typ@*)args[1],
+                                          (@basetyp@*)args[2]);
+        END_OUTER_LOOP
+
+        free(tmp_buff);
+    }
+}
+
+static void
+@TYPE@_det(char **args,
+           npy_intp *dimensions,
+           npy_intp *steps,
+           void *NPY_UNUSED(func))
+{
+    fortran_int m;
+    npy_uint8 *tmp_buff;
+    size_t matrix_size;
+    size_t pivot_size;
+    /* notes:
+     *   matrix will need to be copied always, as factorization in lapack is
+     *       made inplace
+     *   matrix will need to be in column-major order, as expected by lapack
+     *       code (fortran)
+     *   always a square matrix
+     *   need to allocate memory for both, matrix_buffer and pivot buffer
+     */
+    INIT_OUTER_LOOP_2
+    m = (fortran_int) dimensions[0];
+    matrix_size = m*m*sizeof(@typ@);
+    pivot_size = m*sizeof(fortran_int);
+    tmp_buff = (npy_uint8 *)malloc(matrix_size + pivot_size);
+
+    if (tmp_buff)
+    {
+        LINEARIZE_DATA_t lin_data;
+        @typ@ sign;
+        @basetyp@ logdet;
+        /* swapped steps to get matrix in FORTRAN order */
+        init_linearize_data(&lin_data, m, m,
+                            (ptrdiff_t)steps[1],
+                            (ptrdiff_t)steps[0]);
+
+        BEGIN_OUTER_LOOP_2
+            linearize_@TYPE@_matrix(tmp_buff, args[0], &lin_data);
+            @TYPE@_slogdet_single_element(m,
+                                          (void*)tmp_buff,
+                                          (fortran_int*)(tmp_buff+matrix_size),
+                                          &sign,
+                                          &logdet);
+            *(@typ@ *)args[1] = @TYPE@_det_from_slogdet(sign, logdet);
+        END_OUTER_LOOP
+
+        free(tmp_buff);
+    }
+}
+/**end repeat**/
+
+
+/* -------------------------------------------------------------------------- */
+                          /* Eigh family */
+
+typedef struct eigh_params_struct {
+    void *A;     /* matrix */
+    void *W;     /* eigenvalue vector */
+    void *WORK;  /* main work buffer */
+    void *RWORK; /* secondary work buffer (for complex versions) */
+    void *IWORK;
+    fortran_int N;
+    fortran_int LWORK;
+    fortran_int LRWORK;
+    fortran_int LIWORK;
+    char JOBZ;
+    char UPLO;
+} EIGH_PARAMS_t;
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE#
+   #typ=npy_float,npy_double#
+   #ftyp=fortran_real,fortran_doublereal#
+   #lapack_func=ssyevd,dsyevd#
+*/
+
+/*
+ * Initialize the parameters to use in for the lapack function _syevd
+ * Handles buffer allocation
+ */
+static inline int
+init_@lapack_func@(EIGH_PARAMS_t* params, char JOBZ, char UPLO,
+                   fortran_int N)
+{
+    npy_uint8 *mem_buff = NULL;
+    npy_uint8 *mem_buff2 = NULL;
+    @typ@ query_work_size;
+    fortran_int query_iwork_size;
+    fortran_int lwork  = -1;
+    fortran_int liwork = -1;
+    fortran_int info;
+    npy_uint8 *a, *w, *work, *iwork;
+    size_t alloc_size = N*(N+1)*sizeof(@typ@);
+
+    mem_buff = malloc(alloc_size);
+
+    if (!mem_buff)
+        goto error;
+    a = mem_buff;
+    w = mem_buff + N*N*sizeof(@typ@);
+    LAPACK(@lapack_func@)(&JOBZ, &UPLO, &N,
+                          (@ftyp@*)a, &N, (@ftyp@*)w,
+                          &query_work_size, &lwork,
+                          &query_iwork_size, &liwork,
+                          &info);
+
+    if (info != 0)
+        goto error;
+
+    work = mem_buff;
+    lwork = (fortran_int)query_work_size;
+    liwork = query_iwork_size;
+    mem_buff2 = malloc(lwork*sizeof(@typ@) + liwork*sizeof(fortran_int));
+    if (!mem_buff2)
+        goto error;
+
+    work = mem_buff2;
+    iwork = mem_buff2 + lwork*sizeof(@typ@);
+
+    params->A = a;
+    params->W = w;
+    params->WORK = work;
+    params->RWORK = NULL; /* unused */
+    params->IWORK = iwork;
+    params->N = N;
+    params->LWORK = lwork;
+    params->LRWORK = 0; /* unused */
+    params->LIWORK = liwork;
+    params->JOBZ = JOBZ;
+    params->UPLO = UPLO;
+
+    return 1;
+
+ error:
+    /* something failed */
+    memset(params, 0, sizeof(*params));
+    free(mem_buff2);
+    free(mem_buff);
+
+    return 0;
+}
+
+static inline fortran_int
+call_@lapack_func@(EIGH_PARAMS_t *params)
+{
+    fortran_int rv;
+    LAPACK(@lapack_func@)(&params->JOBZ, &params->UPLO, &params->N,
+                          params->A, &params->N, params->W,
+                          params->WORK, &params->LWORK,
+                          params->IWORK, &params->LIWORK,
+                          &rv);
+    return rv;
+}
+/**end repeat**/
+
+
+/**begin repeat
+   #TYPE=CFLOAT,CDOUBLE#
+   #typ=npy_cfloat,npy_cdouble#
+   #basetyp=npy_float,npy_double#
+   #ftyp=fortran_complex,fortran_doublecomplex#
+   #fbasetyp=fortran_real,fortran_doublereal#
+   #lapack_func=cheevd,zheevd#
+*/
+/*
+ * Initialize the parameters to use in for the lapack function _heev
+ * Handles buffer allocation
+ */
+static inline int
+init_@lapack_func@(EIGH_PARAMS_t *params,
+                   char JOBZ,
+                   char UPLO,
+                   fortran_int N)
+{
+    npy_uint8 *mem_buff = NULL;
+    npy_uint8 *mem_buff2 = NULL;
+    @ftyp@ query_work_size;
+    @fbasetyp@ query_rwork_size;
+    fortran_int query_iwork_size;
+    fortran_int lwork = -1;
+    fortran_int lrwork = -1;
+    fortran_int liwork = -1;
+    npy_uint8 *a, *w, *work, *rwork, *iwork;
+    fortran_int info;
+
+    mem_buff = malloc(N*N*sizeof(@typ@)+N*sizeof(@basetyp@));
+    if (!mem_buff)
+        goto error;
+    a = mem_buff;
+    w = mem_buff+N*N*sizeof(@typ@);
+
+    LAPACK(@lapack_func@)(&JOBZ, &UPLO, &N,
+                          (@ftyp@*)a, &N, (@fbasetyp@*)w,
+                          &query_work_size, &lwork,
+                          &query_rwork_size, &lrwork,
+                          &query_iwork_size, &liwork,
+                          &info);
+    if (info != 0)
+        goto error;
+
+    lwork = (fortran_int)*(@fbasetyp@*)&query_work_size;
+    lrwork = (fortran_int)query_rwork_size;
+    liwork = query_iwork_size;
+
+    mem_buff2 = malloc(lwork*sizeof(@typ@) +
+                       lrwork*sizeof(@basetyp@) +
+                       liwork*sizeof(fortran_int));
+    if (!mem_buff2)
+        goto error;
+    work = mem_buff2;
+    rwork = work + lwork*sizeof(@typ@);
+    iwork = rwork + lrwork*sizeof(@basetyp@);
+
+    params->A = a;
+    params->W = w;
+    params->WORK = work;
+    params->RWORK = rwork;
+    params->IWORK = iwork;
+    params->N = N;
+    params->LWORK = lwork;
+    params->LRWORK = lrwork;
+    params->LIWORK = liwork;
+    params->JOBZ = JOBZ;
+    params->UPLO = UPLO;
+
+    return 1;
+
+    /* something failed */
+error:
+    memset(params, 0, sizeof(*params));
+    free(mem_buff2);
+    free(mem_buff);
+
+    return 0;
+}
+
+static inline fortran_int
+call_@lapack_func@(EIGH_PARAMS_t *params)
+{
+    fortran_int rv;
+    LAPACK(@lapack_func@)(&params->JOBZ, &params->UPLO, &params->N,
+                          params->A, &params->N, params->W,
+                          params->WORK, &params->LWORK,
+                          params->RWORK, &params->LRWORK,
+                          params->IWORK, &params->LIWORK,
+                          &rv);
+    return rv;
+}
+/**end repeat**/
+
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #BASETYPE=FLOAT,DOUBLE,FLOAT,DOUBLE#
+   #typ=npy_float,npy_double,npy_cfloat,npy_cdouble#
+   #basetyp=npy_float,npy_double,npy_float,npy_double#
+   #lapack_func=ssyevd,dsyevd,cheevd,zheevd#
+**/
+/*
+ * (M,M)->(M,)(M,M)
+ * dimensions[1] -> M
+ * args[0] -> A[in]
+ * args[1] -> W
+ * args[2] -> A[out]
+ */
+
+static inline void
+release_@lapack_func@(EIGH_PARAMS_t *params)
+{
+    /* allocated memory in A and WORK */
+    free(params->A);
+    free(params->WORK);
+    memset(params, 0, sizeof(*params));
+}
+
+
+static inline void
+@TYPE@_eigh_wrapper(char JOBZ,
+                    char UPLO,
+                    char**args,
+                    npy_intp* dimensions,
+                    npy_intp* steps)
+{
+    ptrdiff_t outer_steps[3];
+    size_t iter;
+    size_t outer_dim = *dimensions++;
+    size_t op_count = (JOBZ=='N')?2:3;
+    EIGH_PARAMS_t eigh_params;
+
+    for (iter=0; iter < op_count; ++iter) {
+        outer_steps[iter] = (ptrdiff_t) steps[iter];
+    }
+    steps += op_count;
+
+    if (init_@lapack_func@(&eigh_params,
+                           JOBZ,
+                           UPLO,
+                           (fortran_int)dimensions[0])) {
+        LINEARIZE_DATA_t matrix_in_ld;
+        LINEARIZE_DATA_t eigenvectors_out_ld;
+        LINEARIZE_DATA_t eigenvalues_out_ld;
+
+        init_linearize_data(&matrix_in_ld,
+                            eigh_params.N, eigh_params.N,
+                            steps[1], steps[0]);
+        init_linearize_data(&eigenvalues_out_ld,
+                            1, eigh_params.N,
+                            0, steps[2]);
+        if ('V' == eigh_params.JOBZ) {
+            init_linearize_data(&eigenvectors_out_ld,
+                                eigh_params.N, eigh_params.N,
+                                steps[4], steps[3]);
+        }
+
+        for (iter = 0; iter < outer_dim; ++iter) {
+            int not_ok;
+            /* copy the matrix in */
+            linearize_@TYPE@_matrix(eigh_params.A, args[0], &matrix_in_ld);
+            not_ok = call_@lapack_func@(&eigh_params);
+            if (!not_ok) {
+                /* lapack ok, copy result out */
+                delinearize_@BASETYPE@_matrix(args[1],
+                                              eigh_params.W,
+                                              &eigenvalues_out_ld);
+
+                if ('V' == eigh_params.JOBZ) {
+                    delinearize_@TYPE@_matrix(args[2],
+                                              eigh_params.A,
+                                              &eigenvectors_out_ld);
+                }
+            } else {
+                /* lapack fail, set result to nan */
+                nan_@BASETYPE@_matrix(args[1], &eigenvalues_out_ld);
+                if ('V' == eigh_params.JOBZ) {
+                    nan_@TYPE@_matrix(args[2], &eigenvectors_out_ld);
+                }
+            }
+            update_pointers((npy_uint8**)args, outer_steps, op_count);
+        }
+
+        release_@lapack_func@(&eigh_params);
+    }
+}
+/**end repeat**/
+
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+ */
+static void
+@TYPE@_eighlo(char **args,
+              npy_intp *dimensions,
+              npy_intp *steps,
+              void *NPY_UNUSED(func))
+{
+    @TYPE@_eigh_wrapper('V', 'L', args, dimensions, steps);
+}
+
+static void
+@TYPE@_eighup(char **args,
+              npy_intp *dimensions,
+              npy_intp *steps,
+              void* NPY_UNUSED(func))
+{
+    @TYPE@_eigh_wrapper('V', 'U', args, dimensions, steps);
+}
+
+static void
+@TYPE@_eigvalshlo(char **args,
+                  npy_intp *dimensions,
+                  npy_intp *steps,
+                  void* NPY_UNUSED(func))
+{
+    @TYPE@_eigh_wrapper('N', 'L', args, dimensions, steps);
+}
+
+static void
+@TYPE@_eigvalshup(char **args,
+                  npy_intp *dimensions,
+                  npy_intp *steps,
+                  void* NPY_UNUSED(func))
+{
+    @TYPE@_eigh_wrapper('N', 'U', args, dimensions, steps);
+}
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+                  /* Solve family (includes inv) */
+
+typedef struct gesv_params_struct
+{
+    void *A; /* A is (N,N) of base type */
+    void *B; /* B is (N,NRHS) of base type */
+    fortran_int * IPIV; /* IPIV is (N) */
+
+    fortran_int N;
+    fortran_int NRHS;
+    fortran_int LDA;
+    fortran_int LDB;
+} GESV_PARAMS_t;
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #typ=npy_float,npy_double,npy_cfloat,npy_cdouble#
+   #ftyp=fortran_real,fortran_doublereal,fortran_complex,fortran_doublecomplex#
+   #lapack_func=sgesv,dgesv,cgesv,zgesv#
+*/
+
+/*
+ * Initialize the parameters to use in for the lapack function _heev
+ * Handles buffer allocation
+ */
+static inline int
+init_@lapack_func@(GESV_PARAMS_t *params, fortran_int N, fortran_int NRHS)
+{
+    npy_uint8 *mem_buff = NULL;
+    npy_uint8 *a, *b, *ipiv;
+    mem_buff = malloc(N*N*sizeof(@ftyp@) +
+                      N*NRHS*sizeof(@ftyp@) +
+                      N*sizeof(fortran_int));
+    if (!mem_buff)
+        goto error;
+    a = mem_buff;
+    b = a + N*N*sizeof(@ftyp@);
+    ipiv = b + N*NRHS*sizeof(@ftyp@);
+
+    params->A = a;
+    params->B = b;
+    params->IPIV = (fortran_int*)ipiv;
+    params->N = N;
+    params->NRHS = NRHS;
+    params->LDA = N;
+    params->LDB = N;
+
+    return 1;
+ error:
+    free(mem_buff);
+    memset(params, 0, sizeof(*params));
+
+    return 0;
+}
+
+static inline void
+release_@lapack_func@(GESV_PARAMS_t *params)
+{
+    /* memory block base is in A */
+    free(params->A);
+    memset(params, 0, sizeof(*params));
+}
+
+static inline fortran_int
+call_@lapack_func@(GESV_PARAMS_t *params)
+{
+    fortran_int rv;
+    LAPACK(@lapack_func@)(&params->N, &params->NRHS,
+                          params->A, &params->LDA,
+                          params->IPIV,
+                          params->B, &params->LDB,
+                          &rv);
+    return rv;
+}
+
+static void
+@TYPE@_solve(char **args, npy_intp *dimensions, npy_intp *steps,
+             void *NPY_UNUSED(func))
+{
+    GESV_PARAMS_t params;
+    fortran_int n, nrhs;
+    INIT_OUTER_LOOP_3
+
+    n = (fortran_int)dimensions[0];
+    nrhs = (fortran_int)dimensions[1];
+    if (init_@lapack_func@(&params, n, nrhs)) {
+        LINEARIZE_DATA_t a_in, b_in, r_out;
+
+        init_linearize_data(&a_in, n, n, steps[1], steps[0]);
+        init_linearize_data(&b_in, nrhs, n, steps[3], steps[2]);
+        init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]);
+
+        BEGIN_OUTER_LOOP_3
+            int not_ok;
+            linearize_@TYPE@_matrix(params.A, args[0], &a_in);
+            linearize_@TYPE@_matrix(params.B, args[1], &b_in);
+            not_ok =call_@lapack_func@(&params);
+            if (!not_ok) {
+                delinearize_@TYPE@_matrix(args[2], params.B, &r_out);
+            } else {
+                nan_@TYPE@_matrix(args[2], &r_out);
+            }
+        END_OUTER_LOOP
+
+        release_@lapack_func@(&params);
+    }
+}
+
+static void
+@TYPE@_solve1(char **args, npy_intp *dimensions, npy_intp *steps,
+              void *NPY_UNUSED(func))
+{
+    GESV_PARAMS_t params;
+    fortran_int n;
+    INIT_OUTER_LOOP_3
+
+    n = (fortran_int)dimensions[0];
+    if (init_@lapack_func@(&params, n, 1)) {
+        LINEARIZE_DATA_t a_in, b_in, r_out;
+        init_linearize_data(&a_in, n, n, steps[1], steps[0]);
+        init_linearize_data(&b_in, 1, n, 1, steps[2]);
+        init_linearize_data(&r_out, 1, n, 1, steps[3]);
+
+        BEGIN_OUTER_LOOP_3
+            int not_ok;
+            linearize_@TYPE@_matrix(params.A, args[0], &a_in);
+            linearize_@TYPE@_matrix(params.B, args[1], &b_in);
+            not_ok = call_@lapack_func@(&params);
+            if (!not_ok) {
+                delinearize_@TYPE@_matrix(args[2], params.B, &r_out);
+            } else {
+                nan_@TYPE@_matrix(args[2], &r_out);
+            }
+        END_OUTER_LOOP
+
+        release_@lapack_func@(&params);
+    }
+}
+
+static void
+@TYPE@_inv(char **args, npy_intp *dimensions, npy_intp *steps,
+           void *NPY_UNUSED(func))
+{
+    GESV_PARAMS_t params;
+    fortran_int n;
+    INIT_OUTER_LOOP_2
+
+    n = (fortran_int)dimensions[0];
+    if (init_@lapack_func@(&params, n, n)) {
+        LINEARIZE_DATA_t a_in, r_out;
+        init_linearize_data(&a_in, n, n, steps[1], steps[0]);
+        init_linearize_data(&r_out, n, n, steps[3], steps[2]);
+
+        BEGIN_OUTER_LOOP_2
+            int not_ok;
+            linearize_@TYPE@_matrix(params.A, args[0], &a_in);
+            identity_@TYPE@_matrix(params.B, n);
+            not_ok = call_@lapack_func@(&params);
+            if (!not_ok) {
+                delinearize_@TYPE@_matrix(args[1], params.B, &r_out);
+            } else {
+                nan_@TYPE@_matrix(args[1], &r_out);
+            }
+        END_OUTER_LOOP
+
+        release_@lapack_func@(&params);
+    }
+}
+/**end repeat**/
+
+
+/* -------------------------------------------------------------------------- */
+                     /* Cholesky decomposition */
+
+typedef struct potr_params_struct
+{
+    void *A;
+    fortran_int N;
+    fortran_int LDA;
+    char UPLO;
+} POTR_PARAMS_t;
+
+/**begin repeat
+
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #ftyp=fortran_real, fortran_doublereal,
+         fortran_complex, fortran_doublecomplex#
+   #lapack_func=spotrf,dpotrf,cpotrf,zpotrf#
+ */
+
+static inline int
+init_@lapack_func@(POTR_PARAMS_t *params, char UPLO, fortran_int N)
+{
+    npy_uint8 *mem_buff = NULL;
+    npy_uint8 *a;
+
+    mem_buff = malloc(N*N*sizeof(@ftyp@));
+    if (!mem_buff)
+        goto error;
+
+    a = mem_buff;
+
+    params->A = a;
+    params->N = N;
+    params->LDA = N;
+    params->UPLO = UPLO;
+
+    return 1;
+ error:
+    free(mem_buff);
+    memset(params, 0, sizeof(*params));
+
+    return 0;
+}
+
+static inline void
+release_@lapack_func@(POTR_PARAMS_t *params)
+{
+    /* memory block base in A */
+    free(params->A);
+    memset(params, 0, sizeof(*params));
+}
+
+static inline fortran_int
+call_@lapack_func@(POTR_PARAMS_t *params)
+{
+    fortran_int rv;
+    LAPACK(@lapack_func@)(&params->UPLO,
+                          &params->N, params->A, &params->LDA,
+                          &rv);
+    return rv;
+}
+
+static void
+@TYPE@_cholesky(char uplo, char **args, npy_intp *dimensions, npy_intp *steps)
+{
+    POTR_PARAMS_t params;
+    fortran_int n;
+    INIT_OUTER_LOOP_2
+
+    n = (fortran_int)dimensions[0];
+    if (init_@lapack_func@(&params, uplo, n))
+    {
+        LINEARIZE_DATA_t a_in, r_out;
+        init_linearize_data(&a_in, n, n, steps[1], steps[0]);
+        init_linearize_data(&r_out, n, n, steps[3], steps[2]);
+        BEGIN_OUTER_LOOP_2
+            int not_ok;
+            linearize_@TYPE@_matrix(params.A, args[0], &a_in);
+            not_ok = call_@lapack_func@(&params);
+            if (!not_ok) {
+                triu_@TYPE@_matrix(params.A, params.N);
+                delinearize_@TYPE@_matrix(args[1], params.A, &r_out);
+            } else {
+                nan_@TYPE@_matrix(args[1], &r_out);
+            }
+        END_OUTER_LOOP
+        release_@lapack_func@(&params);
+    }
+}
+
+static void
+@TYPE@_cholesky_up(char **args, npy_intp *dimensions, npy_intp *steps,
+                void *NPY_UNUSED(func))
+{
+    @TYPE@_cholesky('U', args, dimensions, steps);
+}
+
+static void
+@TYPE@_cholesky_lo(char **args, npy_intp *dimensions, npy_intp *steps,
+                void *NPY_UNUSED(func))
+{
+    @TYPE@_cholesky('L', args, dimensions, steps);
+}
+
+
+
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+                          /* eig family  */
+
+typedef struct geev_params_struct {
+    void *A;
+    void *WR; /* RWORK in complex versions, REAL W buffer for (sd)geev*/
+    void *WI;
+    void *VLR; /* REAL VL buffers for _geev where _ is s, d */
+    void *VRR; /* REAL VR buffers for _geev hwere _ is s, d */
+    void *WORK;
+    void *W;  /* final w */
+    void *VL; /* final vl */
+    void *VR; /* final vr */
+
+    fortran_int N;
+    fortran_int LDA;
+    fortran_int LDVL;
+    fortran_int LDVR;
+    fortran_int LWORK;
+
+    char JOBVL;
+    char JOBVR;
+} GEEV_PARAMS_t;
+
+static inline void
+dump_geev_params(const char *name, GEEV_PARAMS_t* params)
+{
+    TRACE_TXT("\n%s\n"
+
+              "\t%10s: %p\n"\
+              "\t%10s: %p\n"\
+              "\t%10s: %p\n"\
+              "\t%10s: %p\n"\
+              "\t%10s: %p\n"\
+              "\t%10s: %p\n"\
+              "\t%10s: %p\n"\
+              "\t%10s: %p\n"\
+              "\t%10s: %p\n"\
+
+              "\t%10s: %d\n"\
+              "\t%10s: %d\n"\
+              "\t%10s: %d\n"\
+              "\t%10s: %d\n"\
+              "\t%10s: %d\n"\
+
+              "\t%10s: %c\n"\
+              "\t%10s: %c\n",
+
+              name,
+
+              "A", params->A,
+              "WR", params->WR,
+              "WI", params->WI,
+              "VLR", params->VLR,
+              "VRR", params->VRR,
+              "WORK", params->WORK,
+              "W", params->W,
+              "VL", params->VL,
+              "VR", params->VR,
+
+              "N", (int)params->N,
+              "LDA", (int)params->LDA,
+              "LDVL", (int)params->LDVL,
+              "LDVR", (int)params->LDVR,
+              "LWORK", (int)params->LWORK,
+
+              "JOBVL", params->JOBVL,
+              "JOBVR", params->JOBVR);
+}
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE#
+   #CTYPE=CFLOAT,CDOUBLE#
+   #typ=float,double#
+   #complextyp=COMPLEX_t,DOUBLECOMPLEX_t#
+   #lapack_func=sgeev,dgeev#
+   #zero=0.0f,0.0#
+*/
+static inline int
+init_@lapack_func@(GEEV_PARAMS_t *params, char jobvl, char jobvr, fortran_int n)
+{
+    npy_uint8 *mem_buff=NULL;
+    npy_uint8 *mem_buff2=NULL;
+    npy_uint8 *a, *wr, *wi, *vlr, *vrr, *work, *w, *vl, *vr;
+    size_t a_size = n*n*sizeof(@typ@);
+    size_t wr_size = n*sizeof(@typ@);
+    size_t wi_size = n*sizeof(@typ@);
+    size_t vlr_size = jobvl=='V' ? n*n*sizeof(@typ@) : 0;
+    size_t vrr_size = jobvr=='V' ? n*n*sizeof(@typ@) : 0;
+    size_t w_size = wr_size*2;
+    size_t vl_size = vlr_size*2;
+    size_t vr_size = vrr_size*2;
+    size_t work_count = 0;
+    @typ@ work_size_query;
+    fortran_int do_size_query = -1;
+    fortran_int rv;
+
+    /* allocate data for known sizes (all but work) */
+    mem_buff = malloc(a_size + wr_size + wi_size +
+                      vlr_size + vrr_size +
+                      w_size + vl_size + vr_size);
+    if (!mem_buff)
+        goto error;
+
+    a = mem_buff;
+    wr = a + a_size;
+    wi = wr + wr_size;
+    vlr = wi + wi_size;
+    vrr = vlr + vlr_size;
+    w = vrr + vrr_size;
+    vl = w + w_size;
+    vr = vl + vl_size;
+    LAPACK(@lapack_func@)(&jobvl, &jobvr, &n,
+                          (void *)a, &n, (void *)wr, (void *)wi,
+                          (void *)vl, &n, (void *)vr, &n,
+                          &work_size_query, &do_size_query,
+                          &rv);
+
+    if (0 != rv)
+        goto error;
+
+    work_count = (size_t)work_size_query;
+    mem_buff2 = malloc(work_count*sizeof(@typ@));
+    if (!mem_buff2)
+        goto error;
+    work = mem_buff2;
+
+    params->A = a;
+    params->WR = wr;
+    params->WI = wi;
+    params->VLR = vlr;
+    params->VRR = vrr;
+    params->WORK = work;
+    params->W = w;
+    params->VL = vl;
+    params->VR = vr;
+    params->N = n;
+    params->LDA = n;
+    params->LDVL = n;
+    params->LDVR = n;
+    params->LWORK = (fortran_int)work_count;
+    params->JOBVL = jobvl;
+    params->JOBVR = jobvr;
+
+    return 1;
+ error:
+    free(mem_buff2);
+    free(mem_buff);
+    memset(params, 0, sizeof(*params));
+
+    return 0;
+}
+
+static inline fortran_int
+call_@lapack_func@(GEEV_PARAMS_t* params)
+{
+    fortran_int rv;
+    LAPACK(@lapack_func@)(&params->JOBVL, &params->JOBVR,
+                          &params->N, params->A, &params->LDA,
+                          params->WR, params->WI,
+                          params->VLR, &params->LDVL,
+                          params->VRR, &params->LDVR,
+                          params->WORK, &params->LWORK,
+                          &rv);
+    return rv;
+}
+
+
+static inline void
+mk_@TYPE@_complex_array_from_real(@complextyp@ *c, const @typ@ *re, size_t n)
+{
+    size_t iter;
+    for (iter = 0; iter < n; ++iter) {
+        c[iter].array[0] = re[iter];
+        c[iter].array[1] = @zero@;
+    }
+}
+
+static inline void
+mk_@TYPE@_complex_array(@complextyp@ *c,
+                        const @typ@ *re,
+                        const @typ@ *im,
+                        size_t n)
+{
+    size_t iter;
+    for (iter = 0; iter < n; ++iter) {
+        c[iter].array[0] = re[iter];
+        c[iter].array[1] = im[iter];
+    }
+}
+
+static inline void
+mk_@TYPE@_complex_array_conjugate_pair(@complextyp@ *c,
+                                       const @typ@ *r,
+                                       size_t n)
+{
+    size_t iter;
+    for (iter = 0; iter < n; ++iter) {
+        @typ@ re = r[iter];
+        @typ@ im = r[iter+n];
+        c[iter].array[0] = re;
+        c[iter].array[1] = im;
+        c[iter+n].array[0] = re;
+        c[iter+n].array[1] = -im;
+    }
+}
+
+/*
+ * make the complex eigenvectors from the real array produced by sgeev/zgeev.
+ * c is the array where the results will be left.
+ * r is the source array of reals produced by sgeev/zgeev
+ * i is the eigenvalue imaginary part produced by sgeev/zgeev
+ * n is so that the order of the matrix is n by n
+ */
+static inline void
+mk_@lapack_func@_complex_eigenvectors(@complextyp@ *c,
+                                      const @typ@ *r,
+                                      const @typ@ *i,
+                                      size_t n)
+{
+    size_t iter = 0;
+    while (iter < n)
+    {
+        if (i[iter] ==  @zero@) {
+            /* eigenvalue was real, eigenvectors as well...  */
+            mk_@TYPE@_complex_array_from_real(c, r, n);
+            c += n;
+            r += n;
+            iter ++;
+        } else {
+            /* eigenvalue was complex, generate a pair of eigenvectors */
+            mk_@TYPE@_complex_array_conjugate_pair(c, r, n);
+            c += 2*n;
+            r += 2*n;
+            iter += 2;
+        }
+    }
+}
+
+
+static inline void
+process_@lapack_func@_results(GEEV_PARAMS_t *params)
+{
+    /* REAL versions of geev need the results to be translated
+     * into complex versions. This is the way to deal with imaginary
+     * results. In our gufuncs we will always return complex arrays!
+     */
+    mk_@TYPE@_complex_array(params->W, params->WR, params->WI, params->N);
+
+    /* handle the eigenvectors */
+    if ('V' == params->JOBVL) {
+        mk_@lapack_func@_complex_eigenvectors(params->VL, params->VLR,
+                                              params->WI, params->N);
+    }
+    if ('V' == params->JOBVR) {
+        mk_@lapack_func@_complex_eigenvectors(params->VR, params->VRR,
+                                              params->WI, params->N);
+    }
+}
+
+/**end repeat**/
+
+
+/**begin repeat
+   #TYPE=CFLOAT,CDOUBLE#
+   #typ=COMPLEX_t,DOUBLECOMPLEX_t#
+   #ftyp=fortran_complex,fortran_doublecomplex#
+   #realtyp=float,double#
+   #lapack_func=cgeev,zgeev#
+ */
+
+static inline int
+init_@lapack_func@(GEEV_PARAMS_t* params,
+                   char jobvl,
+                   char jobvr,
+                   fortran_int n)
+{
+    npy_uint8 *mem_buff = NULL;
+    npy_uint8 *mem_buff2 = NULL;
+    npy_uint8 *a, *w, *vl, *vr, *work, *rwork;
+    size_t a_size = n*n*sizeof(@ftyp@);
+    size_t w_size = n*sizeof(@ftyp@);
+    size_t vl_size = jobvl=='V'? n*n*sizeof(@ftyp@) : 0;
+    size_t vr_size = jobvr=='V'? n*n*sizeof(@ftyp@) : 0;
+    size_t rwork_size = 2*n*sizeof(@realtyp@);
+    size_t work_count = 0;
+    @typ@ work_size_query;
+    fortran_int do_size_query = -1;
+    fortran_int rv;
+    size_t total_size = a_size + w_size + vl_size + vr_size + rwork_size; 
+
+    mem_buff = malloc(total_size);
+    if (!mem_buff)
+        goto error;
+
+    a = mem_buff;
+    w = a + a_size;
+    vl = w + w_size;
+    vr = vl + vl_size;
+    rwork = vr + vr_size;
+    LAPACK(@lapack_func@)(&jobvl, &jobvr, &n,
+                          (void *)a, &n, (void *)w,
+                          (void *)vl, &n, (void *)vr, &n,
+                          (void *)&work_size_query, &do_size_query,
+                          (void *)rwork,
+                          &rv);
+    if (0 != rv)
+        goto error;
+
+    work_count = (size_t) work_size_query.array[0];
+    mem_buff2 = malloc(work_count*sizeof(@ftyp@));
+    if (!mem_buff2)
+        goto error;
+
+    work = mem_buff2;
+
+    params->A = a;
+    params->WR = rwork;
+    params->WI = NULL;
+    params->VLR = NULL;
+    params->VRR = NULL;
+    params->VL = vl;
+    params->VR = vr;
+    params->WORK = work;
+    params->W = w;
+    params->N = n;
+    params->LDA = n;
+    params->LDVL = n;
+    params->LDVR = n;
+    params->LWORK = (fortran_int)work_count;
+    params->JOBVL = jobvl;
+    params->JOBVR = jobvr;
+
+    return 1;
+ error:
+    free(mem_buff2);
+    free(mem_buff);
+    memset(params, 0, sizeof(*params));
+
+    return 0;
+}
+
+static inline fortran_int
+call_@lapack_func@(GEEV_PARAMS_t* params)
+{
+    fortran_int rv;
+
+    LAPACK(@lapack_func@)(&params->JOBVL, &params->JOBVR,
+                          &params->N, params->A, &params->LDA,
+                          params->W,
+                          params->VL, &params->LDVL,
+                          params->VR, &params->LDVR,
+                          params->WORK, &params->LWORK,
+                          params->WR, /* actually RWORK */
+                          &rv);
+    return rv;
+}
+
+
+static inline void
+process_@lapack_func@_results(GEEV_PARAMS_t *NPY_UNUSED(params))
+{
+    /* nothing to do here, complex versions are ready to copy out */
+}
+/**end repeat**/
+
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #COMPLEXTYPE=CFLOAT,CDOUBLE,CFLOAT,CDOUBLE#
+   #ftype=fortran_real,fortran_doublereal,fortran_complex,fortran_doublecomplex#
+   #lapack_func=sgeev,dgeev,cgeev,zgeev#
+ */
+
+static inline void
+release_@lapack_func@(GEEV_PARAMS_t *params)
+{
+    free(params->WORK);
+    free(params->A);
+    memset(params, 0, sizeof(*params));
+}
+
+static inline void
+@TYPE@_eig_wrapper(char JOBVL,
+                   char JOBVR,
+                   char**args,
+                   npy_intp* dimensions,
+                   npy_intp* steps)
+{
+    ptrdiff_t outer_steps[4];
+    size_t iter;
+    size_t outer_dim = *dimensions++;
+    size_t op_count = 2;
+    GEEV_PARAMS_t geev_params;
+
+    STACK_TRACE;
+    op_count += 'V'==JOBVL?1:0;
+    op_count += 'V'==JOBVR?1:0;
+
+    for (iter=0; iter < op_count; ++iter) {
+        outer_steps[iter] = (ptrdiff_t) steps[iter];
+    }
+    steps += op_count;
+
+    if (init_@lapack_func@(&geev_params,
+                           JOBVL, JOBVR,
+                           (fortran_int)dimensions[0])) {
+        LINEARIZE_DATA_t a_in;
+        LINEARIZE_DATA_t w_out;
+        LINEARIZE_DATA_t vl_out;
+        LINEARIZE_DATA_t vr_out;
+
+        init_linearize_data(&a_in,
+                            geev_params.N, geev_params.N,
+                            steps[1], steps[0]);
+        steps += 2;
+        init_linearize_data(&w_out,
+                            1, geev_params.N,
+                            0, steps[0]);
+        steps += 1;
+        if ('V' == geev_params.JOBVL) {
+            init_linearize_data(&vl_out,
+                                geev_params.N, geev_params.N,
+                                steps[1], steps[0]);
+            steps += 2;
+        }
+        if ('V' == geev_params.JOBVR) {
+            init_linearize_data(&vr_out,
+                                geev_params.N, geev_params.N,
+                                steps[1], steps[0]);
+        }
+
+        for (iter = 0; iter < outer_dim; ++iter) {
+            int not_ok;
+            char **arg_iter = args;
+            /* copy the matrix in */
+            linearize_@TYPE@_matrix(geev_params.A, *arg_iter++, &a_in);
+            not_ok = call_@lapack_func@(&geev_params);
+
+            if (!not_ok) {
+                process_@lapack_func@_results(&geev_params);
+                delinearize_@COMPLEXTYPE@_matrix(*arg_iter++,
+                                                 geev_params.W,
+                                                 &w_out);
+
+                if ('V' == geev_params.JOBVL)
+                    delinearize_@COMPLEXTYPE@_matrix(*arg_iter++,
+                                                     geev_params.VL,
+                                                     &vl_out);
+                if ('V' == geev_params.JOBVR)
+                    delinearize_@COMPLEXTYPE@_matrix(*arg_iter++,
+                                                     geev_params.VR,
+                                                     &vr_out);
+            } else {
+                /* geev failed */
+                nan_@COMPLEXTYPE@_matrix(*arg_iter++, &w_out);
+                if ('V' == geev_params.JOBVL)
+                    nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vl_out);
+                if ('V' == geev_params.JOBVR)
+                    nan_@COMPLEXTYPE@_matrix(*arg_iter++, &vr_out);
+            }
+            update_pointers((npy_uint8**)args, outer_steps, op_count);
+        }
+
+        release_@lapack_func@(&geev_params);
+    }
+}
+
+static void
+@TYPE@_eig(char **args,
+           npy_intp *dimensions,
+           npy_intp *steps,
+           void *NPY_UNUSED(func))
+{
+    @TYPE@_eig_wrapper('N', 'V', args, dimensions, steps);
+}
+
+static void
+@TYPE@_eigvals(char **args,
+               npy_intp *dimensions,
+               npy_intp *steps,
+               void *NPY_UNUSED(func))
+{
+    @TYPE@_eig_wrapper('N', 'N', args, dimensions, steps);
+}
+
+/**end repeat**/
+
+
+/* -------------------------------------------------------------------------- */
+                 /* singular value decomposition  */
+
+typedef struct gessd_params_struct
+{
+    void *A;
+    void *S;
+    void *U;
+    void *VT;
+    void *WORK;
+    void *RWORK;
+    void *IWORK;
+
+    fortran_int M;
+    fortran_int N;
+    fortran_int LDA;
+    fortran_int LDU;
+    fortran_int LDVT;
+    fortran_int LWORK;
+    char JOBZ;
+} GESDD_PARAMS_t;
+
+
+static inline void
+dump_gesdd_params(const char *name,
+                  GESDD_PARAMS_t *params)
+{
+    TRACE_TXT("\n%s:\n"\
+
+              "%14s: %18p\n"\
+              "%14s: %18p\n"\
+              "%14s: %18p\n"\
+              "%14s: %18p\n"\
+              "%14s: %18p\n"\
+              "%14s: %18p\n"\
+              "%14s: %18p\n"\
+
+              "%14s: %18d\n"\
+              "%14s: %18d\n"\
+              "%14s: %18d\n"\
+              "%14s: %18d\n"\
+              "%14s: %18d\n"\
+              "%14s: %18d\n"\
+
+              "%14s: %15c'%c'\n",
+
+              name,
+
+              "A", params->A,
+              "S", params->S,
+              "U", params->U,
+              "VT", params->VT,
+              "WORK", params->WORK,
+              "RWORK", params->RWORK,
+              "IWORK", params->IWORK,
+
+              "M", (int)params->M,
+              "N", (int)params->N,
+              "LDA", (int)params->LDA,
+              "LDU", (int)params->LDU,
+              "LDVT", (int)params->LDVT,
+              "LWORK", (int)params->LWORK,
+
+              "JOBZ", ' ',params->JOBZ);
+}
+
+static inline int
+compute_urows_vtcolumns(char jobz,
+                        fortran_int m, fortran_int n,
+                        fortran_int *urows, fortran_int *vtcolumns)
+{
+    fortran_int min_m_n = m<n?m:n;
+    switch(jobz)
+    {
+    case 'N':
+        *urows = 0;
+        *vtcolumns = 0;
+        break;
+    case 'A':
+        *urows = m;
+        *vtcolumns = n;
+        break;
+    case 'S':
+        {
+            *urows = min_m_n;
+            *vtcolumns = min_m_n;
+        }
+        break;
+    default:
+        return 0;
+    }
+
+    return 1;
+}
+
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE#
+   #lapack_func=sgesdd,dgesdd#
+   #ftyp=fortran_real,fortran_doublereal#
+ */
+
+static inline int
+init_@lapack_func@(GESDD_PARAMS_t *params,
+                   char jobz,
+                   fortran_int m,
+                   fortran_int n)
+{
+    npy_uint8 *mem_buff = NULL;
+    npy_uint8 *mem_buff2 = NULL;
+    npy_uint8 *a, *s, *u, *vt, *work, *iwork;
+    size_t a_size = (size_t)m*(size_t)n*sizeof(@ftyp@);
+    fortran_int min_m_n = m<n?m:n;
+    size_t s_size = ((size_t)min_m_n)*sizeof(@ftyp@);
+    fortran_int  u_row_count, vt_column_count;
+    size_t u_size, vt_size;
+    fortran_int work_count;
+    size_t work_size;
+    size_t iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int);
+
+    if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count))
+        goto error;
+
+    u_size = ((size_t)u_row_count)*m*sizeof(@ftyp@);
+    vt_size = n*((size_t)vt_column_count)*sizeof(@ftyp@);
+
+    mem_buff = malloc(a_size + s_size + u_size + vt_size + iwork_size);
+
+    if (!mem_buff)
+        goto error;
+
+    a = mem_buff;
+    s = a + a_size;
+    u = s + s_size;
+    vt = u + u_size;
+    iwork = vt + vt_size;
+
+    /* fix vt_column_count so that it is a valid lapack parameter (0 is not) */
+    vt_column_count = vt_column_count < 1? 1 : vt_column_count;
+    {
+        /* compute optimal work size */
+        @ftyp@ work_size_query;
+        fortran_int do_query = -1;
+        fortran_int rv;
+        LAPACK(@lapack_func@)(&jobz, &m, &n,
+                              (void*)a, &m, (void*)s, (void*)u, &m,
+                              (void*)vt, &vt_column_count,
+                              &work_size_query, &do_query,
+                              (void*)iwork, &rv);
+        if (0!=rv)
+            goto error;
+        work_count = (fortran_int)work_size_query;
+        work_size = (size_t)work_count * sizeof(@ftyp@);
+    }
+
+    mem_buff2 = malloc(work_size);
+    if (!mem_buff2)
+        goto error;
+
+    work = mem_buff2;
+
+    params->M = m;
+    params->N = n;
+    params->A = a;
+    params->S = s;
+    params->U = u;
+    params->VT = vt;
+    params->WORK = work;
+    params->RWORK = NULL;
+    params->IWORK = iwork;
+    params->M = m;
+    params->N = n;
+    params->LDA = m;
+    params->LDU = m;
+    params->LDVT = vt_column_count;
+    params->LWORK = work_count;
+    params->JOBZ = jobz;
+
+    return 1;
+ error:
+    TRACE_TXT("%s failed init\n", __FUNCTION__);
+    free(mem_buff2);
+    free(mem_buff2);
+    memset(params, 0, sizeof(*params));
+
+    return 0;
+}
+
+static inline fortran_int
+call_@lapack_func@(GESDD_PARAMS_t *params)
+{
+    fortran_int rv;
+    LAPACK(@lapack_func@)(&params->JOBZ, &params->M, &params->N,
+                          params->A, &params->LDA,
+                          params->S,
+                          params->U, &params->LDU,
+                          params->VT, &params->LDVT,
+                          params->WORK, &params->LWORK,
+                          params->IWORK,
+                          &rv);
+    return rv;
+}
+
+/**end repeat**/
+
+/**begin repeat
+   #TYPE=CFLOAT,CDOUBLE#
+   #ftyp=fortran_complex,fortran_doublecomplex#
+   #frealtyp=fortran_real,fortran_doublereal#
+   #typ=COMPLEX_t,DOUBLECOMPLEX_t#
+   #lapack_func=cgesdd,zgesdd#
+ */
+
+static inline int
+init_@lapack_func@(GESDD_PARAMS_t *params,
+                   char jobz,
+                   fortran_int m,
+                   fortran_int n)
+{
+    npy_uint8 *mem_buff = NULL, *mem_buff2 = NULL;
+    npy_uint8 *a,*s, *u, *vt, *work, *rwork, *iwork;
+    size_t a_size, s_size, u_size, vt_size, work_size, rwork_size, iwork_size;
+    fortran_int u_row_count, vt_column_count, work_count;
+    fortran_int min_m_n = m<n?m:n;
+
+    if (!compute_urows_vtcolumns(jobz, m, n, &u_row_count, &vt_column_count))
+        goto error;
+
+    a_size = ((size_t)m)*((size_t)n)*sizeof(@ftyp@);
+    s_size = ((size_t)min_m_n)*sizeof(@frealtyp@);
+    u_size = ((size_t)u_row_count)*m*sizeof(@ftyp@);
+    vt_size = n*((size_t)vt_column_count)*sizeof(@ftyp@);
+    rwork_size = 'N'==jobz?
+        7*((size_t)min_m_n) :
+        (5*(size_t)min_m_n*(size_t)min_m_n + 5*(size_t)min_m_n);
+    rwork_size *= sizeof(@ftyp@);
+    iwork_size = 8*((size_t)min_m_n)*sizeof(fortran_int);
+
+    mem_buff = malloc(a_size +
+                      s_size +
+                      u_size +
+                      vt_size +
+                      rwork_size +
+                      iwork_size);
+    if (!mem_buff)
+        goto error;
+
+    a = mem_buff;
+    s = a + a_size;
+    u = s + s_size;
+    vt = u + u_size;
+    rwork = vt + vt_size;
+    iwork = rwork + rwork_size;
+
+    /* fix vt_column_count so that it is a valid lapack parameter (0 is not) */
+    vt_column_count = vt_column_count < 1? 1 : vt_column_count;
+    {
+        /* compute optimal work size */
+        @ftyp@ work_size_query;
+        fortran_int do_query = -1;
+        fortran_int rv;
+        LAPACK(@lapack_func@)(&jobz, &m, &n,
+                              (void*)a, &m, (void*)s, (void*)u, &m,
+                              (void*)vt, &vt_column_count,
+                              &work_size_query, &do_query,
+                              (void*)rwork,
+                              (void*)iwork, &rv);
+        if (0!=rv)
+            goto error;
+        work_count = (fortran_int)((@typ@*)&work_size_query)->array[0];
+        work_size = (size_t)work_count * sizeof(@ftyp@);
+    }
+
+    mem_buff2 = malloc(work_size);
+    if (!mem_buff2)
+        goto error;
+
+    work = mem_buff2;
+
+    params->A = a;
+    params->S = s;
+    params->U = u;
+    params->VT = vt;
+    params->WORK = work;
+    params->RWORK = rwork;
+    params->IWORK = iwork;
+    params->M = m;
+    params->N = n;
+    params->LDA = m;
+    params->LDU = m;
+    params->LDVT = vt_column_count;
+    params->LWORK = work_count;
+    params->JOBZ = jobz;
+
+    return 1;
+ error:
+    TRACE_TXT("%s failed init\n", __FUNCTION__);
+    free(mem_buff2);
+    free(mem_buff);
+    memset(params, 0, sizeof(*params));
+
+    return 0;
+}
+
+static inline fortran_int
+call_@lapack_func@(GESDD_PARAMS_t *params)
+{
+    fortran_int rv;
+    LAPACK(@lapack_func@)(&params->JOBZ, &params->M, &params->N,
+                          params->A, &params->LDA,
+                          params->S,
+                          params->U, &params->LDU,
+                          params->VT, &params->LDVT,
+                          params->WORK, &params->LWORK,
+                          params->RWORK,
+                          params->IWORK,
+                          &rv);
+    return rv;
+}
+/**end repeat**/
+
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #REALTYPE=FLOAT,DOUBLE,FLOAT,DOUBLE#
+   #lapack_func=sgesdd,dgesdd,cgesdd,zgesdd#
+ */
+static inline void
+release_@lapack_func@(GESDD_PARAMS_t* params)
+{
+    /* A and WORK contain allocated blocks */
+    free(params->A);
+    free(params->WORK);
+    memset(params, 0, sizeof(*params));
+}
+
+static inline void
+@TYPE@_svd_wrapper(char JOBZ,
+                   char **args,
+                   npy_intp* dimensions,
+                   npy_intp* steps)
+{
+    ptrdiff_t outer_steps[3];
+    size_t iter;
+    size_t outer_dim = *dimensions++;
+    size_t op_count = (JOBZ=='N')?2:4;
+    GESDD_PARAMS_t params;
+
+    for (iter=0; iter < op_count; ++iter) {
+        outer_steps[iter] = (ptrdiff_t) steps[iter];
+    }
+    steps += op_count;
+
+    if (init_@lapack_func@(&params,
+                           JOBZ,
+                           (fortran_int)dimensions[0],
+                           (fortran_int)dimensions[1])) {
+        LINEARIZE_DATA_t a_in, u_out, s_out, v_out;
+
+        init_linearize_data(&a_in, params.N, params.M, steps[1], steps[0]);
+        if ('N' == params.JOBZ) {
+            /* only the singular values are wanted */
+            fortran_int min_m_n = params.M < params.N? params.M : params.N;
+            init_linearize_data(&s_out, 1, min_m_n, 0, steps[2]);
+        } else {
+            fortran_int u_columns, v_rows;
+            fortran_int min_m_n = params.M < params.N? params.M : params.N;
+            if ('S' == params.JOBZ) {
+                u_columns = min_m_n;
+                v_rows = min_m_n;
+            } else {
+                u_columns = params.M;
+                v_rows = params.N;
+            }
+            init_linearize_data(&u_out,
+                                u_columns, params.M,
+                                steps[3], steps[2]);
+            init_linearize_data(&s_out,
+                                1, min_m_n,
+                                0, steps[4]);
+            init_linearize_data(&v_out,
+                                params.N, v_rows,
+                                steps[6], steps[5]);
+        }
+
+        for (iter = 0; iter < outer_dim; ++iter) {
+            int not_ok;
+            /* copy the matrix in */
+            linearize_@TYPE@_matrix(params.A, args[0], &a_in);
+            not_ok = call_@lapack_func@(&params);
+            if (!not_ok) {
+                if ('N' == params.JOBZ) {
+                    delinearize_@REALTYPE@_matrix(args[1], params.S, &s_out);
+                } else {
+                    delinearize_@TYPE@_matrix(args[1], params.U, &u_out);
+                    delinearize_@REALTYPE@_matrix(args[2], params.S, &s_out);
+                    delinearize_@TYPE@_matrix(args[3], params.VT, &v_out);
+                }
+            } else {
+                if ('N' == params.JOBZ) {
+                    nan_@REALTYPE@_matrix(args[1], &s_out);
+                } else {
+                    nan_@TYPE@_matrix(args[1], &u_out);
+                    nan_@REALTYPE@_matrix(args[2], &s_out);
+                    nan_@TYPE@_matrix(args[3], &v_out);
+                }
+            }
+            update_pointers((npy_uint8**)args, outer_steps, op_count);
+        }
+
+        release_@lapack_func@(&params);
+    }
+}
+/**end repeat*/
+
+
+/* svd gufunc entry points */
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+ */
+static void
+@TYPE@_svd_N(char **args,
+             npy_intp *dimensions,
+             npy_intp *steps,
+             void *NPY_UNUSED(func))
+{
+    @TYPE@_svd_wrapper('N', args, dimensions, steps);
+}
+
+static void
+@TYPE@_svd_S(char **args,
+             npy_intp *dimensions,
+             npy_intp *steps,
+             void *NPY_UNUSED(func))
+{
+    @TYPE@_svd_wrapper('S', args, dimensions, steps);
+}
+
+static void
+@TYPE@_svd_A(char **args,
+             npy_intp *dimensions,
+             npy_intp *steps,
+             void *NPY_UNUSED(func))
+{
+    @TYPE@_svd_wrapper('A', args, dimensions, steps);
+}
+
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+               /* some basic ufuncs */
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t#
+ */
+
+static void
+@TYPE@_add3(char **args,
+            npy_intp *dimensions,
+            npy_intp* steps,
+            void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_4
+    BEGIN_OUTER_LOOP_4
+        @typ@ *op1p = (@typ@ *) args[0];
+        @typ@ *op2p = (@typ@ *) args[1];
+        @typ@ *op3p = (@typ@ *) args[2];
+        @typ@ *op4p = (@typ@ *) args[3];
+        @typ@ tmp;
+        tmp   = @TYPE@_add(*op1p, *op2p);
+        *op4p = @TYPE@_add(tmp, *op3p);
+    END_OUTER_LOOP
+}
+
+static void
+@TYPE@_multiply3(char **args,
+                 npy_intp *dimensions,
+                 npy_intp* steps,
+                 void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_4
+    BEGIN_OUTER_LOOP_4
+        @typ@ *op1p = (@typ@ *) args[0];
+        @typ@ *op2p = (@typ@ *) args[1];
+        @typ@ *op3p = (@typ@ *) args[2];
+        @typ@ *op4p = (@typ@ *) args[3];
+        @typ@ tmp;
+        tmp   = @TYPE@_mul(*op1p, *op2p);
+        *op4p = @TYPE@_mul(tmp, *op3p);
+    END_OUTER_LOOP
+}
+
+static void
+@TYPE@_multiply3_add(char **args,
+                     npy_intp *dimensions,
+                     npy_intp* steps,
+                     void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_5
+    BEGIN_OUTER_LOOP_5
+        @typ@ *op1p = (@typ@ *) args[0];
+        @typ@ *op2p = (@typ@ *) args[1];
+        @typ@ *op3p = (@typ@ *) args[2];
+        @typ@ *op4p = (@typ@ *) args[3];
+        @typ@ *op5p = (@typ@ *) args[4];
+        @typ@ tmp, tmp2;
+
+        tmp   = @TYPE@_mul(*op1p, *op2p);
+        tmp2  = @TYPE@_mul(tmp, *op3p);
+        *op5p = @TYPE@_add(tmp2, *op4p);
+    END_OUTER_LOOP
+}
+
+static void
+@TYPE@_multiply_add(char **args,
+                    npy_intp *dimensions,
+                    npy_intp* steps,
+                    void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_4
+    BEGIN_OUTER_LOOP_4
+        @typ@ *op1p = (@typ@ *) args[0];
+        @typ@ *op2p = (@typ@ *) args[1];
+        @typ@ *op3p = (@typ@ *) args[2];
+        @typ@ *op4p = (@typ@ *) args[3];
+        @typ@ tmp;
+        tmp   = @TYPE@_mul(*op1p, *op2p);
+        *op4p = @TYPE@_add(tmp, *op3p);
+    END_OUTER_LOOP
+}
+
+static void
+@TYPE@_multiply_add2(char **args,
+                     npy_intp *dimensions,
+                     npy_intp* steps,
+                     void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_5
+    BEGIN_OUTER_LOOP_5
+        @typ@ *op1p = (@typ@ *) args[0];
+        @typ@ *op2p = (@typ@ *) args[1];
+        @typ@ *op3p = (@typ@ *) args[2];
+        @typ@ *op4p = (@typ@ *) args[3];
+        @typ@ *op5p = (@typ@ *) args[4];
+        @typ@ tmp, tmp2;
+        tmp   = @TYPE@_mul(*op1p, *op2p);
+        tmp2  = @TYPE@_add(*op3p, *op4p);
+        *op5p = @TYPE@_add(tmp, tmp2);
+    END_OUTER_LOOP
+}
+
+static void
+@TYPE@_multiply4(char **args,
+                 npy_intp *dimensions,
+                 npy_intp* steps,
+                 void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_5
+    BEGIN_OUTER_LOOP_5
+        @typ@ *op1p = (@typ@ *) args[0];
+        @typ@ *op2p = (@typ@ *) args[1];
+        @typ@ *op3p = (@typ@ *) args[2];
+        @typ@ *op4p = (@typ@ *) args[3];
+        @typ@ *op5p = (@typ@ *) args[4];
+        @typ@ tmp, tmp2;
+        tmp   = @TYPE@_mul(*op1p, *op2p);
+        tmp2  = @TYPE@_mul(*op3p, *op4p);
+        *op5p = @TYPE@_mul(tmp, tmp2);
+    END_OUTER_LOOP
+}
+
+static void
+@TYPE@_multiply4_add(char **args,
+                     npy_intp *dimensions,
+                     npy_intp* steps,
+                     void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_6
+    BEGIN_OUTER_LOOP_6
+        @typ@ *op1p = (@typ@ *) args[0];
+        @typ@ *op2p = (@typ@ *) args[1];
+        @typ@ *op3p = (@typ@ *) args[2];
+        @typ@ *op4p = (@typ@ *) args[3];
+        @typ@ *op5p = (@typ@ *) args[4];
+        @typ@ *op6p = (@typ@ *) args[5];
+        @typ@ tmp, tmp2, tmp3;
+        tmp   = @TYPE@_mul(*op1p, *op2p);
+        tmp2  = @TYPE@_mul(*op3p, *op4p);
+        tmp3  = @TYPE@_mul(tmp, tmp2);
+        *op6p = @TYPE@_add(tmp3, *op5p);
+    END_OUTER_LOOP
+}
+
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+             /* quadratic form */
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #typ=float,double,COMPLEX_t,DOUBLECOMPLEX_t#
+   #ftyp=fortran_real,fortran_doublereal,fortran_complex,fortran_doublecomplex#
+   #zero=s_zero,d_zero,c_zero,z_zero#
+   #blas_dot=FNAME(sdot),FNAME(ddot),FNAME(cdotu),FNAME(zdotu)#
+ */
+
+static inline @typ@
+@TYPE@_dot_blas(size_t count,
+                void *X, intptr_t X_byte_step,
+                void *Y, intptr_t Y_byte_step)
+{
+    @ftyp@ result;
+    fortran_int N = (fortran_int) count;
+    fortran_int INCX = X_byte_step/sizeof(@ftyp@);
+    fortran_int INCY = Y_byte_step/sizeof(@ftyp@);
+
+    result = @blas_dot@(&N, X, &INCX, Y, &INCY);
+
+    return *(@typ@ *)&result;
+}
+
+static inline @typ@
+@TYPE@_dot_std(size_t count,
+               void *X, intptr_t X_byte_step,
+               void *Y, intptr_t Y_byte_step)
+{
+    size_t i;
+    @typ@ acc, *x_ptr, *y_ptr;
+    x_ptr = (@typ@ *)X;
+    y_ptr = (@typ@ *)Y;
+
+    acc = @TYPE@_mul(*x_ptr, *y_ptr);
+    x_ptr = offset_ptr(x_ptr, X_byte_step);
+    y_ptr = offset_ptr(y_ptr, Y_byte_step);
+
+    for (i = 1; i < count; i++) {
+        @typ@ tmp;
+
+        tmp = @TYPE@_mul(*x_ptr, *y_ptr);
+        acc = @TYPE@_add(acc, tmp);
+
+        x_ptr = offset_ptr(x_ptr, X_byte_step);
+        y_ptr = offset_ptr(y_ptr, Y_byte_step);
+    }
+
+    return acc;
+}
+
+/* uQv, where u and v are vectors and Q is a matrix */
+static void
+@TYPE@_quadratic_form(char **args,
+                      npy_intp *dimensions,
+                      npy_intp* steps,
+                      void *NPY_UNUSED(func))
+{
+    INIT_OUTER_LOOP_4
+    size_t m = (size_t)dimensions[0];
+    size_t n = (size_t)dimensions[1];
+    ptrdiff_t u_step = (ptrdiff_t)steps[0];
+    ptrdiff_t Q_row_step = (ptrdiff_t)steps[1];
+    ptrdiff_t Q_column_step = (ptrdiff_t)steps[2];
+    ptrdiff_t v_step = (ptrdiff_t)steps[3];
+
+    if ((0 == (Q_row_step % sizeof(@ftyp@))) &&
+        (0 == (u_step % sizeof(@ftyp@)))) {
+        /* use blas loops for dot */
+        BEGIN_OUTER_LOOP_4
+            size_t column;
+            npy_uint8 *u, *Q, *v;
+            @typ@ *r;
+            @typ@ accum = @zero@;
+
+            u = (npy_uint8 *)args[0]; /* u (m)   [in] */
+            Q = (npy_uint8 *)args[1]; /* Q (m,n) [in] */
+            v = (npy_uint8 *)args[2]; /* v (n)   [in] */
+            r = (@typ@ *)args[3];   /* result  [out] */
+            /* sum (compute u * Q[i] * v[i]) for all i.
+               Q[i] are the different columns on Q */
+
+            for (column = 0; column < n; ++column) {
+                @typ@ tmp, tmp2;
+                tmp = @TYPE@_dot_blas(m,
+                                      Q, Q_row_step,
+                                      u, u_step);
+                tmp2  = @TYPE@_mul(tmp, *(@typ@*)v);
+                accum = @TYPE@_add(accum, tmp2);
+                Q += Q_column_step;
+                v += v_step;
+            }
+
+            *r = accum;
+        END_OUTER_LOOP
+    } else {
+        BEGIN_OUTER_LOOP_4
+            size_t column;
+            npy_uint8 *u, *Q, *v;
+            @typ@ *r;
+            @typ@ accum = @zero@;
+            u = (npy_uint8 *)args[0]; /* u (m)   [in] */
+            Q = (npy_uint8 *)args[1]; /* Q (m,n) [in] */
+            v = (npy_uint8 *)args[2]; /* v (n)   [in] */
+            r = (@typ@ *)args[3];   /* result  [out] */
+            /* sum (compute u * Q[i] * v[i]) for all i.
+               Q[i] are the different columns on Q */
+
+            for (column = 0; column < n; ++column) {
+                @typ@ tmp, tmp2;
+                tmp = @TYPE@_dot_std(m, Q, Q_row_step, u, u_step);
+                tmp2 = @TYPE@_mul(tmp, *(@typ@*)v);
+                accum = @TYPE@_add(accum, tmp2);
+                Q += Q_column_step;
+                v += v_step;
+            }
+
+            *r = accum;
+        END_OUTER_LOOP
+    }
+}
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+             /* chosolve (using potrs) */
+
+typedef struct potrs_params_struct {
+    void *A;
+    void *B;
+    fortran_int N;
+    fortran_int NRHS;
+    fortran_int LDA;
+    fortran_int LDB;
+    char UPLO;
+} POTRS_PARAMS_t;
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #ftyp=float,double,COMPLEX_t,DOUBLECOMPLEX_t#
+   #lapack_func_2=spotrf,dpotrf,cpotrf,zpotrf#
+   #lapack_func=spotrs,dpotrs,cpotrs,zpotrs#
+ */
+
+
+static inline int
+init_@lapack_func@(POTRS_PARAMS_t *params,
+                   char UPLO,
+                   fortran_int N,
+                   fortran_int NRHS)
+{
+    npy_uint8 *mem_buff = NULL;
+    npy_uint8 *a, *b;
+    size_t a_size, b_size;
+
+    a_size = N*N*sizeof(@ftyp@);
+    b_size = N*NRHS*sizeof(@ftyp@);
+    mem_buff = malloc(a_size + b_size);
+    if (!mem_buff)
+      goto error;
+
+    a = mem_buff;
+    b = a + a_size;
+
+    params->A = (void*)a;
+    params->B = (void*)b;
+    params->N = N;
+    params->NRHS = NRHS;
+    params->LDA = N;
+    params->LDB = N;
+    params->UPLO = UPLO;
+
+    return 1;
+
+ error:
+    free(mem_buff);
+    memset(params, 0, sizeof(*params));
+
+    return 0;
+}
+
+static inline void
+release_@lapack_func@(POTRS_PARAMS_t *params)
+{
+    free(params->A);
+    memset(params,0,sizeof(*params));
+    return;
+}
+
+static inline int
+call_@lapack_func@(POTRS_PARAMS_t *params)
+{
+    fortran_int rv;
+    LAPACK(@lapack_func_2@)(&params->UPLO,
+                            &params->N,
+                            params->A, &params->LDA,
+                            &rv);
+    if (0 != rv)
+        return rv;
+
+    LAPACK(@lapack_func@)(&params->UPLO,
+                          &params->N, &params->NRHS,
+                          params->A, &params->LDA,
+                          params->B, &params->LDB,
+                          &rv);
+    return rv;
+}
+
+
+/* uplo: either 'U' or 'L'
+   ndim: use 1 to get nrhs from dimensions (matrix), 0 to use 1 (vector)
+ */
+static void
+@TYPE@_chosolve(char uplo, char ndim,
+                char **args,
+                npy_intp *dimensions,
+                npy_intp* steps
+                )
+{
+    POTRS_PARAMS_t params;
+    fortran_int n, nrhs;
+    INIT_OUTER_LOOP_3
+
+    n = (fortran_int)dimensions[0];
+    nrhs = ndim?(fortran_int)dimensions[1]:1;
+    if (init_@lapack_func@(&params, uplo, n, nrhs)) {
+        LINEARIZE_DATA_t a_in, b_in, r_out;
+
+        init_linearize_data(&a_in, n, n, steps[1], steps[0]);
+        if (ndim) {
+            init_linearize_data(&b_in,  nrhs, n, steps[3], steps[2]);
+            init_linearize_data(&r_out, nrhs, n, steps[5], steps[4]);
+        } else {
+            init_linearize_data(&b_in,  1, n, 0, steps[2]);
+            init_linearize_data(&r_out, 1, n, 0, steps[3]);
+        }
+
+        BEGIN_OUTER_LOOP_3
+            int not_ok;
+            linearize_@TYPE@_matrix(params.A, args[0], &a_in);
+            linearize_@TYPE@_matrix(params.B, args[1], &b_in);
+            not_ok = call_@lapack_func@(&params);
+            if (!not_ok) {
+                delinearize_@TYPE@_matrix(args[2], params.B, &r_out);
+            } else {
+                nan_@TYPE@_matrix(args[2], &r_out);
+            }
+        END_OUTER_LOOP
+
+        release_@lapack_func@(&params);
+    }
+}
+
+static void
+@TYPE@_chosolve_up(char **args,
+                   npy_intp *dimensions,
+                   npy_intp* steps,
+                   void *NPY_UNUSED(func))
+{
+    @TYPE@_chosolve('U', 1, args, dimensions, steps);
+}
+
+static void
+@TYPE@_chosolve_lo(char **args,
+                   npy_intp *dimensions,
+                   npy_intp* steps,
+                   void *NPY_UNUSED(func))
+{
+    @TYPE@_chosolve('L', 1, args, dimensions, steps);
+}
+
+static void
+@TYPE@_chosolve1_up(char **args,
+                    npy_intp *dimensions,
+                    npy_intp* steps,
+                    void *NPY_UNUSED(func))
+{
+    @TYPE@_chosolve('U', 0, args, dimensions, steps);
+}
+
+static void
+@TYPE@_chosolve1_lo(char **args,
+                    npy_intp *dimensions,
+                    npy_intp* steps,
+                    void *NPY_UNUSED(func))
+{
+    @TYPE@_chosolve('L', 0, args, dimensions, steps);
+}
+
+/**end repeat**/
+
+/* -------------------------------------------------------------------------- */
+                 /* poinv  */
+
+typedef struct potri_params_struct {
+    void *A;
+    fortran_int N;
+    fortran_int LDA;
+    char UPLO;
+} POTRI_PARAMS_t;
+
+
+/**begin repeat
+   #TYPE=FLOAT,DOUBLE,CFLOAT,CDOUBLE#
+   #ftyp=float,double,COMPLEX_t,DOUBLECOMPLEX_t#
+   #potrf=spotrf,dpotrf,cpotrf,zpotrf#
+   #potri=spotri,dpotri,cpotri,zpotri#
+ */
+static inline int
+init_@potri@(POTRI_PARAMS_t *params,
+             char UPLO,
+             fortran_int N)
+{
+    npy_uint8 *mem_buff = NULL;
+    npy_uint8 *a;
+    size_t a_size;
+
+    a_size = N*N*sizeof(@ftyp@);
+    mem_buff = malloc(a_size);
+    if (!mem_buff)
+      goto error;
+
+    a = mem_buff;
+
+    params->A = (void*)a;
+    params->N = N;
+    params->LDA = N;
+    params->UPLO = UPLO;
+
+    return 1;
+
+ error:
+    free(mem_buff);
+    memset(params, 0, sizeof(*params));
+
+    return 0;
+}
+
+static inline void
+release_@potri@(POTRI_PARAMS_t *params)
+{
+    free(params->A);
+    memset(params,0,sizeof(*params));
+    return;
+}
+
+static inline int
+call_@potri@(POTRI_PARAMS_t *params)
+{
+    fortran_int rv;
+    LAPACK(@potrf@)(&params->UPLO,
+                    &params->N,
+                    params->A, &params->LDA,
+                    &rv);
+    if (0!= rv) {
+      return rv;
+    }
+
+    LAPACK(@potri@)(&params->UPLO,
+                    &params->N,
+                    params->A, &params->LDA,
+                    &rv);
+    return rv;
+}
+
+
+static inline void
+@TYPE@_poinv(char uplo,
+             char **args,
+             npy_intp *dimensions,
+             npy_intp *steps)
+{
+    POTRI_PARAMS_t params;
+    fortran_int n;
+    INIT_OUTER_LOOP_2
+
+    n = (fortran_int)dimensions[0];
+    if (init_@potri@(&params, uplo, n)) {
+        LINEARIZE_DATA_t a_in, r_out;
+
+        init_linearize_data(&a_in, n, n, steps[1], steps[0]);
+        init_linearize_data(&r_out, n, n, steps[3], steps[2]);
+
+        BEGIN_OUTER_LOOP_2
+            int not_ok;
+            linearize_@TYPE@_matrix(params.A, args[0], &a_in);
+            not_ok = call_@potri@(&params);
+            if (!not_ok) {
+                make_symmetric_@TYPE@_matrix(params.UPLO, params.N, params.A);
+                delinearize_@TYPE@_matrix(args[1], params.A, &r_out);
+            } else {
+                nan_@TYPE@_matrix(args[1], &r_out);
+            }
+        END_OUTER_LOOP
+
+        release_@potri@(&params);
+    }
+}
+
+static void
+@TYPE@_poinv_up(char **args,
+                npy_intp *dimensions,
+                npy_intp *steps,
+                void *NPY_UNUSED(func))
+{
+    @TYPE@_poinv('U', args, dimensions, steps);
+}
+
+static void
+@TYPE@_poinv_lo(char **args,
+                npy_intp *dimensions,
+                npy_intp *steps,
+                void *NPY_UNUSED(func))
+{
+    @TYPE@_poinv('L', args, dimensions, steps);
+}
+
+/**end repeat**/
+
+#pragma GCC diagnostic pop
+
+/* -------------------------------------------------------------------------- */
+              /* gufunc registration  */
+
+static void *array_of_nulls[] = {
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL,
+    (void *)NULL
+};
+
+#define FUNC_ARRAY_NAME(NAME) NAME ## _funcs
+
+#define GUFUNC_FUNC_ARRAY_REAL(NAME)                    \
+    static PyUFuncGenericFunction                       \
+    FUNC_ARRAY_NAME(NAME)[] = {                         \
+        FLOAT_ ## NAME,                                 \
+        DOUBLE_ ## NAME                                 \
+    }
+
+#define GUFUNC_FUNC_ARRAY_REAL_COMPLEX(NAME)            \
+    static PyUFuncGenericFunction                       \
+    FUNC_ARRAY_NAME(NAME)[] = {                         \
+        FLOAT_ ## NAME,                                 \
+        DOUBLE_ ## NAME,                                \
+        CFLOAT_ ## NAME,                                \
+        CDOUBLE_ ## NAME                                \
+    }
+
+/* There are problems with eig in complex single precision.
+ * That kernel is disabled
+ */
+#define GUFUNC_FUNC_ARRAY_EIG(NAME)                     \
+    static PyUFuncGenericFunction                       \
+    FUNC_ARRAY_NAME(NAME)[] = {                         \
+        FLOAT_ ## NAME,                                 \
+        DOUBLE_ ## NAME,                                \
+        CDOUBLE_ ## NAME                                \
+    }
+
+
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inner1d);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(dotc1d);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(innerwt);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(matrix_multiply);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(slogdet);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(det);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighlo);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eighup);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshlo);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(eigvalshup);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(solve1);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(inv);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_up);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(cholesky_lo);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_N);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_S);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(svd_A);
+GUFUNC_FUNC_ARRAY_EIG(eig);
+GUFUNC_FUNC_ARRAY_EIG(eigvals);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(quadratic_form);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(add3);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply3);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply3_add);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply_add);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply_add2);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply4);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(multiply4_add);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(chosolve_up);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(chosolve_lo);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(chosolve1_up);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(chosolve1_lo);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(poinv_up);
+GUFUNC_FUNC_ARRAY_REAL_COMPLEX(poinv_lo);
+
+static char equal_2_types[] = {
+    NPY_FLOAT, NPY_FLOAT,
+    NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT, NPY_CFLOAT,
+    NPY_CDOUBLE, NPY_CDOUBLE
+};
+
+static char equal_3_types[] = {
+    NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
+    NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT,
+    NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE
+};
+
+static char equal_4_types[] = {
+    NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
+    NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT,
+    NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE
+};
+
+static char equal_5_types[] = {
+    NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
+    NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT,
+    NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE
+};
+
+static char equal_6_types[] = {
+    NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
+    NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT, NPY_CFLOAT,
+    NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE
+};
+
+/* second result is logdet, that will always be a REAL */
+static char slogdet_types[] = {
+    NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
+    NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT, NPY_CFLOAT, NPY_FLOAT,
+    NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE
+};
+
+static char eigh_types[] = {
+    NPY_FLOAT, NPY_FLOAT, NPY_FLOAT,
+    NPY_DOUBLE, NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT, NPY_FLOAT, NPY_CFLOAT,
+    NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE
+};
+
+static char eighvals_types[] = {
+    NPY_FLOAT, NPY_FLOAT,
+    NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT, NPY_FLOAT, 
+    NPY_CDOUBLE, NPY_DOUBLE
+};
+
+static char eig_types[] = {
+    NPY_FLOAT, NPY_CFLOAT, NPY_CFLOAT,
+    NPY_DOUBLE, NPY_CDOUBLE, NPY_CDOUBLE,
+    NPY_CDOUBLE, NPY_CDOUBLE, NPY_CDOUBLE
+};
+
+static char eigvals_types[] = {
+    NPY_FLOAT, NPY_CFLOAT,
+    NPY_DOUBLE, NPY_CDOUBLE,
+    NPY_CDOUBLE, NPY_CDOUBLE
+};
+
+static char svd_1_1_types[] = {
+    NPY_FLOAT, NPY_FLOAT,
+    NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT, NPY_FLOAT,
+    NPY_CDOUBLE, NPY_DOUBLE
+};
+
+static char svd_1_3_types[] = {
+    NPY_FLOAT,   NPY_FLOAT,   NPY_FLOAT,  NPY_FLOAT,
+    NPY_DOUBLE,  NPY_DOUBLE,  NPY_DOUBLE, NPY_DOUBLE,
+    NPY_CFLOAT,  NPY_CFLOAT,  NPY_FLOAT,  NPY_CFLOAT,
+    NPY_CDOUBLE, NPY_CDOUBLE, NPY_DOUBLE, NPY_CDOUBLE
+};
+
+typedef struct gufunc_descriptor_struct {
+    char *name;
+    char *signature;
+    char *doc;
+    int ntypes;
+    int nin;
+    int nout;
+    PyUFuncGenericFunction *funcs;
+    char *types;
+} GUFUNC_DESCRIPTOR_t;
+
+GUFUNC_DESCRIPTOR_t gufunc_descriptors [] = {
+    {
+        "inner1d",
+        "(i),(i)->()",
+        "inner on the last dimension and broadcast on the rest \n"\
+        "    \"(i),(i)->()\" \n",
+        4, 2, 1,
+        FUNC_ARRAY_NAME(inner1d),
+        equal_3_types
+    },
+    {
+        "dotc1d",
+        "(i),(i)->()",
+        "inner on the last dimension and broadcast on the rest \n"\
+        "    \"(i),(i)->()\" \n",
+        4, 2, 1,
+        FUNC_ARRAY_NAME(dotc1d),
+        equal_3_types
+    },
+    {
+        "innerwt",
+        "(i),(i),(i)->()",
+        "inner on the last dimension using 3 operands and broadcast on the"\
+        " rest \n"\
+        "    \"(i),(i),(i)->()\" \n",
+        2, 3, 1,
+        FUNC_ARRAY_NAME(innerwt),
+        equal_4_types
+    },
+    {
+        "matrix_multiply",
+        "(m,k),(k,n)->(m,n)",
+        "dot on the last two dimensions and broadcast on the rest \n"\
+        "    \"(m,k),(k,n)->(m,n)\" \n",
+        4, 2, 1,
+        FUNC_ARRAY_NAME(matrix_multiply),
+        equal_3_types
+    },
+    {
+        "slogdet",
+        "(m,m)->(),()", 
+        "slogdet on the last two dimensions and broadcast on the rest. \n"\
+        "Results in two arrays, one with sign and the other with log of the"\
+        " determinants. \n"\
+        "    \"(m,m)->(),()\" \n",
+        4, 1, 2,
+        FUNC_ARRAY_NAME(slogdet),
+        slogdet_types
+    },
+    {
+        "det",
+        "(m,m)->()",
+        "det of the last two dimensions and broadcast on the rest. \n"\
+        "    \"(m,m)->()\" \n",
+        4, 1, 1,
+        FUNC_ARRAY_NAME(det),
+        equal_2_types
+    },
+    {
+        "eigh_lo",
+        "(m,m)->(m),(m,m)",
+        "eigh on the last two dimension and broadcast to the rest, using"\
+        " lower triangle \n"\
+        "Results in a vector of eigenvalues and a matrix with the"\
+        "eigenvectors. \n"\
+        "    \"(m,m)->(m),(m,m)\" \n",
+        4, 1, 2,
+        FUNC_ARRAY_NAME(eighlo),
+        eigh_types
+    },
+    {
+        "eigh_up",
+        "(m,m)->(m),(m,m)",
+        "eigh on the last two dimension and broadcast to the rest, using"\
+        " upper triangle. \n"\
+        "Results in a vector of eigenvalues and a matrix with the"\
+        " eigenvectors. \n"\
+        "    \"(m,m)->(m),(m,m)\" \n",
+        4, 1, 2,
+        FUNC_ARRAY_NAME(eighup),
+        eigh_types
+    },
+    {
+        "eigvalsh_lo",
+        "(m,m)->(m)",
+        "eigh on the last two dimension and broadcast to the rest, using"\
+        " lower triangle. \n"\
+        "Results in a vector of eigenvalues and a matrix with the"\
+        "eigenvectors. \n"\
+        "    \"(m,m)->(m)\" \n",
+        4, 1, 1,
+        FUNC_ARRAY_NAME(eigvalshlo),
+        eighvals_types
+    },
+    {
+        "eigvalsh_up",
+        "(m,m)->(m)",
+        "eigvalsh on the last two dimension and broadcast to the rest,"\
+        " using upper triangle. \n"\
+        "Results in a vector of eigenvalues and a matrix with the"\
+        "eigenvectors.\n"\
+        "    \"(m,m)->(m)\" \n",
+        4, 1, 1,
+        FUNC_ARRAY_NAME(eigvalshup),
+        eighvals_types
+    },
+    {
+        "solve",
+        "(m,m),(m,n)->(m,n)",
+        "solve the system a x = b, on the last two dimensions, broadcast"\
+        " to the rest. \n"\
+        "Results in a matrices with the solutions. \n"\
+        "    \"(m,m),(m,n)->(m,n)\" \n",
+        4, 2, 1,
+        FUNC_ARRAY_NAME(solve),
+        equal_3_types
+    },
+    {
+        "solve1",
+        "(m,m),(m)->(m)",
+        "solve the system a x = b, for b being a vector, broadcast in"\
+        " the outer dimensions. \n"\
+        "Results in vectors with the solutions. \n"\
+        "    \"(m,m),(m)->(m)\" \n",
+        4,2,1,
+        FUNC_ARRAY_NAME(solve1),
+        equal_3_types
+    },
+    {
+        "inv",
+        "(m,m)->(m,m)",
+        "compute the inverse of the last two dimensions and broadcast"\
+        " to the rest. \n"\
+        "Results in the inverse matrices. \n"\
+        "    \"(m,m)->(m,m)\" \n",
+        4,1,1,
+        FUNC_ARRAY_NAME(inv),
+        equal_2_types
+    },
+    {
+        "cholesky_up",
+        "(m,m)->(m,m)",
+        "cholesky decomposition of hermitian positive-definite matrices. \n"\
+        "Broadcast to all outer dimensions. \n"\
+        "    \"(m,m)->(m,m)\" \n",
+        4, 1, 1,
+        FUNC_ARRAY_NAME(cholesky_up),
+        equal_2_types
+    },
+    {
+        "cholesky_lo",
+        "(m,m)->(m,m)",
+        "cholesky decomposition of hermitian positive-definite matrices. \n"\
+        "Broadcast to all outer dimensions. \n"\
+        "    \"(m,m)->(m,m)\" \n",
+        4, 1, 1,
+        FUNC_ARRAY_NAME(cholesky_lo),
+        equal_2_types
+    },
+    {
+        "svd_m",
+        "(m,n)->(m)",
+        "svd when n>=m. ",
+        4, 1, 1,
+        FUNC_ARRAY_NAME(svd_N),
+        equal_2_types
+    },
+    {
+        "svd_n",
+        "(m,n)->(n)",
+        "svd when n<=m",
+        4, 1, 1,
+        FUNC_ARRAY_NAME(svd_N),
+        svd_1_1_types
+    },
+    {
+        "svd_m_s",
+        "(m,n)->(m,m),(m),(m,n)",
+        "svd when m>=n",
+        4, 1, 3,
+        FUNC_ARRAY_NAME(svd_S),
+        svd_1_3_types
+    },
+    {
+        "svd_n_s",
+        "(m,n)->(m,n),(n),(n,n)",
+        "svd when m>=n",
+        4, 1, 3,
+        FUNC_ARRAY_NAME(svd_S),
+        svd_1_3_types
+    },
+    {
+        "svd_m_f",
+        "(m,n)->(m,m),(m),(n,n)",
+        "svd when m>=n",
+        4, 1, 3,
+        FUNC_ARRAY_NAME(svd_A),
+        svd_1_3_types
+    },
+    {
+        "svd_n_f",
+        "(m,n)->(m,m),(n),(n,n)",
+        "svd when m>=n",
+        4, 1, 3,
+        FUNC_ARRAY_NAME(svd_A),
+        svd_1_3_types
+    },
+    {
+        "eig",
+        "(m,m)->(m),(m,m)",
+        "eig on the last two dimension and broadcast to the rest. \n"\
+        "Results in a vector with the  eigenvalues and a matrix with the"\
+        " eigenvectors. \n"\
+        "    \"(m,m)->(m),(m,m)\" \n",
+        3, 1, 2,
+        FUNC_ARRAY_NAME(eig),
+        eig_types
+    },
+    {
+        "eigvals",
+        "(m,m)->(m)",
+        "eigvals on the last two dimension and broadcast to the rest. \n"\
+        "Results in a vector of eigenvalues. \n"\
+        "    \"(m,m)->(m),(m,m)\" \n",
+        3, 1, 1,
+        FUNC_ARRAY_NAME(eigvals),
+        eigvals_types
+    },
+    {
+        "quadratic_form",
+        "(m),(m,n),(n)->()",
+        "computes the quadratic form uQv in the inner dimensions, broadcast"\
+        "to the rest \n"\
+        "Results in the result of uQv for each element of the broadcasted"\
+        "dimensions. \n"\
+        "    \"(m),(m,n),(n)->()",
+        4,3,1,
+        FUNC_ARRAY_NAME(quadratic_form),
+        equal_4_types
+    },
+    {
+        "add3",
+        "(),(),()->()",
+        "3-way element-wise addition. a,b,c -> a+b+c for all elements. \n",
+        4,3,1,
+        FUNC_ARRAY_NAME(add3),
+        equal_4_types
+    },
+    {
+        "multiply3",
+        "(),(),()->()",
+        "3-way element-wise product. a,b,c -> a*b*c for all elements. \n",
+        4,3,1,
+        FUNC_ARRAY_NAME(multiply3),
+        equal_4_types
+    },
+    {
+        "multiply3_add",
+        "(),(),(),()->()",
+        "3-way element-wise product plus addition. a,b,c,d -> a*b*c+d"\
+        " for all elements. \n",
+        4,4,1,
+        FUNC_ARRAY_NAME(multiply3_add),
+        equal_5_types
+    },
+    {
+        "multiply_add",
+        "(),(),()->()",
+        "element-wise multiply-add. a,b,c -> a*b+c for all elements. \n",
+        4,3,1,
+        FUNC_ARRAY_NAME(multiply_add),
+        equal_4_types
+    },
+    {
+        "multiply_add2",
+        "(),(),(),()->()",
+        "element-wise product with 2 additions. a,b,c,d -> a*b+c+d for"\
+        " all elements. \n",
+        4,4,1,
+        FUNC_ARRAY_NAME(multiply_add2),
+        equal_5_types
+    },
+    {
+        "multiply4",
+        "(),(),(),()->()",
+        "4-way element-wise product. a,b,c,d -> a*b*c*d for all elements. \n",
+        4,4,1,
+        FUNC_ARRAY_NAME(multiply4),
+        equal_5_types
+    },
+    {
+        "multiply4_add",
+        "(),(),(),(),()->()",
+        "4-way element-wise product and addition. a,b,c,d,e -> a*b*c*d+e. \n",
+        4,5,1,
+        FUNC_ARRAY_NAME(multiply4_add),
+        equal_6_types
+    },
+    {  /* solve using cholesky decomposition (lapack potrs) */
+        "chosolve_up",
+        "(m,m),(m,n)->(m,n)",
+        "solve for symmetric/hermitian matrices using cholesky"\
+        " factorization. \n",
+        4,2,1,
+        FUNC_ARRAY_NAME(chosolve_up),
+        equal_3_types
+    },
+    {  /* solve using choleske decomposition (lapack potrs) */
+        "chosolve_lo",
+        "(m,m),(m,n)->(m,n)",
+        "solve for symmetric/hermitian matrices using cholesky"\
+        " factorization. \n",
+        4,2,1,
+        FUNC_ARRAY_NAME(chosolve_lo),
+        equal_3_types
+    },
+    {   /* solve using cholesky decomposition (lapack potrs) */
+        "chosolve1_up",
+        "(m,m),(m)->(m)",
+        "solve a system using cholesky decomposition. \n",
+        4,2,1,
+        FUNC_ARRAY_NAME(chosolve1_up),
+        equal_3_types
+    },
+    {   /* solve using cholesky decomposition (lapack potrs) */
+        "chosolve1_lo",
+        "(m,m),(m)->(m)",
+        "solve a system using cholesky decomposition. \n",
+        4,2,1,
+        FUNC_ARRAY_NAME(chosolve1_lo),
+        equal_3_types
+    },
+    {   /* inverse using cholesky decomposition (lapack potri) */
+        "poinv_up",
+        "(m,m)->(m,m)",
+        "inverse using cholesky decomposition for symmetric/hermitian"\
+        " matrices. \n",
+        4,1,1,
+        FUNC_ARRAY_NAME(poinv_up),
+        equal_2_types
+    },
+    {   /* inverse using cholesky decomposition (lapack potri) */
+        "poinv_lo",
+        "(m,m)->(m,m)",
+        "inverse using cholesky decomposition for symmetric/hermitian"\
+        " matrices. \n",
+        4,1,1,
+        FUNC_ARRAY_NAME(poinv_lo),
+        equal_2_types
+    },
+};
+
+static void
+addUfuncs(PyObject *dictionary) {
+    PyObject *f;
+    int i;
+    const int gufunc_count = sizeof(gufunc_descriptors)/
+        sizeof(gufunc_descriptors[0]);
+    for (i=0; i < gufunc_count; i++) {
+        GUFUNC_DESCRIPTOR_t* d = &gufunc_descriptors[i];
+        f = PyUFunc_FromFuncAndDataAndSignature(d->funcs,
+                                                array_of_nulls,
+                                                d->types,
+                                                d->ntypes,
+                                                d->nin,
+                                                d->nout,
+                                                PyUFunc_None,
+                                                d->name,
+                                                d->doc,
+                                                0,
+                                                d->signature);
+        PyDict_SetItemString(dictionary, d->name, f);
+#if 0
+        dump_ufunc_object((PyUFuncObject*) f);
+#endif
+        Py_DECREF(f);
+    }
+}
+
+
+
+/* -------------------------------------------------------------------------- */
+                  /* Module initialization stuff  */
+
+static PyMethodDef UMath_LinAlgMethods[] = {
+    {NULL, NULL, 0, NULL}        /* Sentinel */
+};
+
+#if defined(NPY_PY3K)
+static struct PyModuleDef moduledef = {
+        PyModuleDef_HEAD_INIT,
+        UMATH_LINALG_MODULE_NAME,
+        NULL,
+        -1,
+        UMath_LinAlgMethods,
+        NULL,
+        NULL,
+        NULL,
+        NULL
+};
+#endif
+
+#if defined(NPY_PY3K)
+#define RETVAL m
+PyObject *PyInit__umath_linalg(void)
+#else
+#define RETVAL
+PyMODINIT_FUNC
+init_umath_linalg(void)
+#endif
+{
+    PyObject *m;
+    PyObject *d;
+    PyObject *version;
+
+    init_constants();
+#if defined(NPY_PY3K)
+    m = PyModule_Create(&moduledef);
+#else
+    m = Py_InitModule(UMATH_LINALG_MODULE_NAME, UMath_LinAlgMethods);
+#endif
+    if (m == NULL)
+        return RETVAL;
+
+    import_array();
+    import_ufunc();
+
+    d = PyModule_GetDict(m);
+
+    version = PyString_FromString(umath_linalg_version_string);
+    PyDict_SetItemString(d, "__version__", version);
+    Py_DECREF(version);
+
+    /* Load the ufunc operators into the module's namespace */
+    addUfuncs(d);
+
+    if (PyErr_Occurred()) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "cannot load _umath_linalg module.");
+    }
+
+    return RETVAL;
+}
diff --git a/numpy/core/tests/test_gufuncs_linalg.py b/numpy/core/tests/test_gufuncs_linalg.py
new file mode 100644
index 000000000000..34af58a7c83b
--- /dev/null
+++ b/numpy/core/tests/test_gufuncs_linalg.py
@@ -0,0 +1,500 @@
+"""
+Test functions for gufuncs_linalg module
+Heavily inspired (ripped in part) test_linalg
+"""
+
+
+################################################################################
+# The following functions are implemented in the module "gufuncs_linalg"
+#
+# category "linalg"
+# - inv (TestInv)
+# - poinv (TestPoinv)
+# - det (TestDet)
+# - slogdet (TestDet)
+# - eig (TestEig)
+# - eigh (TestEigh)
+# - eigvals (TestEigvals)
+# - eigvalsh (TestEigvalsh)
+# - cholesky
+# - solve (TestSolve)
+# - chosolve (TestChosolve)
+# - svd (TestSVD)
+
+# ** unimplemented **
+# - qr
+# - matrix_power
+# - matrix_rank
+# - pinv
+# - lstsq
+# - tensorinv
+# - tensorsolve
+# - norm
+# - cond
+#
+# category "inspired by pdl"
+# - quadratic_form
+# - matrix_multiply3
+# - add3 (TestAdd3)
+# - multiply3 (TestMultiply3)
+# - multiply3_add (TestMultiply3Add)
+# - multiply_add (TestMultiplyAdd)
+# - multiply_add2 (TestMultiplyAdd2)
+# - multiply4 (TestMultiply4)
+# - multiply4_add (TestMultiply4Add)
+#
+# category "others"
+# - convolve
+# - inner1d
+# - innerwt
+# - matrix_multiply
+
+from nose.plugins.skip import Skip, SkipTest
+import numpy as np
+
+from numpy.testing import (TestCase, assert_, assert_equal, assert_raises,
+                           assert_array_equal, assert_almost_equal,
+                           run_module_suite)
+
+from numpy import array, single, double, csingle, cdouble, dot, identity
+from numpy import multiply, inf
+import numpy.core.gufuncs_linalg as gula
+
+old_assert_almost_equal = assert_almost_equal
+
+def assert_almost_equal(a, b, **kw):
+    if a.dtype.type in (single, csingle):
+        decimal = 5
+    else:
+        decimal = 10
+    old_assert_almost_equal(a, b, decimal = decimal, **kw)
+
+
+def assert_valid_eigen_no_broadcast(M, w, v, **kw):
+    lhs = gula.matrix_multiply(M,v)
+    rhs = w*v
+    assert_almost_equal(lhs, rhs, **kw)
+
+
+def assert_valid_eigen_recurse(M, w, v, **kw):
+    """check that w and v are valid eigenvalues/eigenvectors for matrix M
+    broadcast"""
+    if len(M.shape) > 2:
+        for i in range(M.shape[0]):
+            assert_valid_eigen_recurse(M[i], w[i], v[i], **kw)
+    else:
+        if len(M.shape) == 2:
+            assert_valid_eigen_no_broadcast(M, w, v, **kw)
+        else:
+            raise AssertionError('Not enough dimensions')
+
+
+def assert_valid_eigen(M, w, v, **kw):
+    if np.any(np.isnan(M)):
+        raise AssertionError('nan found in matrix')
+    if np.any(np.isnan(w)):
+        raise AssertionError('nan found in eigenvalues')
+    if np.any(np.isnan(v)):
+        raise AssertionError('nan found in eigenvectors')
+
+    assert_valid_eigen_recurse(M, w, v, **kw)
+
+
+def assert_valid_eigenvals_no_broadcast(M, w, **kw):
+    ident = np.eye(M.shape[0], dtype=M.dtype)
+    for i in range(w.shape[0]):
+        assert_almost_equal(gula.det(M - w[i]*ident), 0.0, **kw)
+
+
+def assert_valid_eigenvals_recurse(M, w, **kw):
+    if len(M.shape) > 2:
+        for i in range(M.shape[0]):
+            assert_valid_eigenvals_recurse(M[i], w[i], **kw)
+    else:
+        if len(M.shape) == 2:
+            assert_valid_eigenvals_no_broadcast(M, w, **kw)
+        else:
+            raise AssertionError('Not enough dimensions')
+
+
+def assert_valid_eigenvals(M, w, **kw):
+    if np.any(np.isnan(M)):
+        raise AssertionError('nan found in matrix')
+    if np.any(np.isnan(w)):
+        raise AssertionError('nan found in eigenvalues')
+    assert_valid_eigenvals_recurse(M, w, **kw)
+
+
+class MatrixGenerator(object):
+    def real_matrices(self):
+        a = [[1,2],
+             [3,4]]
+
+        b = [[4,3],
+             [2,1]]
+
+        return a, b
+    
+    def real_symmetric_matrices(self):
+        a = [[ 2 ,-1],
+             [-1 , 2]]
+
+        b = [[4,3],
+             [2,1]]
+
+        return a, b
+
+    def complex_matrices(self):
+        a = [[1+2j,2+3j],
+             [3+4j,4+5j]]
+
+        b = [[4+3j,3+2j],
+             [2+1j,1+0j]]
+
+        return a, b
+
+    def complex_hermitian_matrices(self):
+        a = [[2,-1],
+             [-1, 2]]
+
+        b = [[4+3j,3+2j],
+             [2-1j,1+0j]]
+        
+        return a, b
+
+    def real_matrices_vector(self):
+        a, b = self.real_matrices()
+        return [a], [b]
+
+    def real_symmetric_matrices_vector(self):
+        a, b = self.real_symmetric_matrices()
+        return [a], [b]
+
+    def complex_matrices_vector(self):
+        a, b = self.complex_matrices()
+        return [a], [b]
+
+    def complex_hermitian_matrices_vector(self):
+        a, b = self.complex_hermitian_matrices()
+        return [a], [b]
+
+
+class GeneralTestCase(MatrixGenerator):
+    def test_single(self):
+        a,b = self.real_matrices()
+        self.do(array(a, dtype=single),
+                array(b, dtype=single))
+
+    def test_double(self):
+        a, b = self.real_matrices()
+        self.do(array(a, dtype=double),
+                array(b, dtype=double))
+
+    def test_csingle(self):
+        a, b = self.complex_matrices()
+        self.do(array(a, dtype=csingle),
+                array(b, dtype=csingle))
+
+    def test_cdouble(self):
+        a, b = self.complex_matrices()
+        self.do(array(a, dtype=cdouble),
+                array(b, dtype=cdouble))
+
+    def test_vector_single(self):
+        a,b = self.real_matrices_vector()
+        self.do(array(a, dtype=single),
+                array(b, dtype=single))
+
+    def test_vector_double(self):
+        a, b = self.real_matrices_vector()
+        self.do(array(a, dtype=double),
+                array(b, dtype=double))
+
+    def test_vector_csingle(self):
+        a, b = self.complex_matrices_vector()
+        self.do(array(a, dtype=csingle),
+                array(b, dtype=csingle))
+
+    def test_vector_cdouble(self):
+        a, b = self.complex_matrices_vector()
+        self.do(array(a, dtype=cdouble),
+                array(b, dtype=cdouble))
+
+
+class HermitianTestCase(MatrixGenerator):
+    def test_single(self):
+        a,b = self.real_symmetric_matrices()
+        self.do(array(a, dtype=single),
+                array(b, dtype=single))
+
+    def test_double(self):
+        a, b = self.real_symmetric_matrices()
+        self.do(array(a, dtype=double),
+                array(b, dtype=double))
+
+    def test_csingle(self):
+        a, b = self.complex_hermitian_matrices()
+        self.do(array(a, dtype=csingle),
+                array(b, dtype=csingle))
+
+    def test_cdouble(self):
+        a, b = self.complex_hermitian_matrices()
+        self.do(array(a, dtype=cdouble),
+                array(b, dtype=cdouble))
+
+    def test_vector_single(self):
+        a,b = self.real_symmetric_matrices_vector()
+        self.do(array(a, dtype=single),
+                array(b, dtype=single))
+
+    def test_vector_double(self):
+        a, b = self.real_symmetric_matrices_vector()
+        self.do(array(a, dtype=double),
+                array(b, dtype=double))
+
+    def test_vector_csingle(self):
+        a, b = self.complex_hermitian_matrices_vector()
+        self.do(array(a, dtype=csingle),
+                array(b, dtype=csingle))
+
+    def test_vector_cdouble(self):
+        a, b = self.complex_hermitian_matrices_vector()
+        self.do(array(a, dtype=cdouble),
+                array(b, dtype=cdouble))
+
+
+class TestMatrixMultiply(GeneralTestCase):
+    def do(self, a, b):
+        res = gula.matrix_multiply(a,b)
+        if a.ndim == 2:
+            assert_almost_equal(res, np.dot(a,b))
+        else:
+            assert_almost_equal(res[0], np.dot(a[0],b[0]))
+
+    def test_column_matrix(self):
+        A = np.arange(2*2).reshape((2,2))
+        B = np.arange(2*1).reshape((2,1))
+        res = gula.matrix_multiply(A,B)
+        assert_almost_equal(res, np.dot(A,B))
+
+class TestInv(GeneralTestCase, TestCase):
+    def do(self, a, b):
+        a_inv = gula.inv(a)
+        ident = identity(a.shape[-1])
+        if 3 == len(a.shape):
+            ident = ident.reshape((1, ident.shape[0], ident.shape[1]))
+        assert_almost_equal(gula.matrix_multiply(a, a_inv), ident)
+
+
+class TestPoinv(HermitianTestCase, TestCase):
+    def do(self, a, b):
+        a_inv = gula.poinv(a)
+        ident = identity(a.shape[-1])
+        if 3 == len(a.shape):
+            ident = ident.reshape((1,ident.shape[0], ident.shape[1]))
+
+        assert_almost_equal(a_inv, gula.inv(a))
+        assert_almost_equal(gula.matrix_multiply(a, a_inv), ident)
+
+
+class TestDet(GeneralTestCase, TestCase):
+    def do(self, a, b):
+        d = gula.det(a)
+        s, ld = gula.slogdet(a)
+        assert_almost_equal(s * np.exp(ld), d)
+
+        if np.csingle == a.dtype.type or np.single == a.dtype.type:
+            cmp_type=np.csingle
+        else:
+            cmp_type=np.cdouble
+
+        ev = gula.eigvals(a.astype(cmp_type))
+        assert_almost_equal(d.astype(cmp_type),
+                            multiply.reduce(ev.astype(cmp_type),
+                                            axis=(ev.ndim-1)))
+        if s != 0:
+            assert_almost_equal(np.abs(s), 1)
+        else:
+            assert_equal(ld, -inf)
+
+    def test_zero(self):
+        assert_equal(gula.det(array([[0.0]], dtype=single)), 0.0)
+        assert_equal(gula.det(array([[0.0]], dtype=double)), 0.0)
+        assert_equal(gula.det(array([[0.0]], dtype=csingle)), 0.0)
+        assert_equal(gula.det(array([[0.0]], dtype=cdouble)), 0.0)
+
+        assert_equal(gula.slogdet(array([[0.0]], dtype=single)), (0.0, -inf))
+        assert_equal(gula.slogdet(array([[0.0]], dtype=double)), (0.0, -inf))
+        assert_equal(gula.slogdet(array([[0.0]], dtype=csingle)), (0.0, -inf))
+        assert_equal(gula.slogdet(array([[0.0]], dtype=cdouble)), (0.0, -inf))
+
+    def test_types(self):
+        for typ in [(single, single), 
+                    (double, double), 
+                    (csingle, single),
+                    (cdouble, double)]:
+            for x in [ [0], [[0]], [[[0]]] ]:
+                assert_equal(gula.det(array(x, dtype=typ[0])).dtype, typ[0])
+                assert_equal(gula.slogdet(array(x, dtype=typ[0]))[0].dtype, typ[0])
+                assert_equal(gula.slogdet(array(x, dtype=typ[0]))[1].dtype, typ[1])
+        
+
+class TestEig(GeneralTestCase, TestCase):
+    def do(self, a, b):
+        evalues, evectors = gula.eig(a)
+        assert_valid_eigenvals(a, evalues)
+        assert_valid_eigen(a, evalues, evectors)
+        ev = gula.eigvals(a)
+        assert_valid_eigenvals(a, evalues)
+        assert_almost_equal(ev, evalues)
+
+
+class TestEigh(HermitianTestCase, TestCase):
+    def do(self, a, b):
+        evalues_lo, evectors_lo = gula.eigh(a, UPLO='L')
+        evalues_up, evectors_up = gula.eigh(a, UPLO='U')
+
+        assert_valid_eigenvals(a, evalues_lo)
+        assert_valid_eigenvals(a, evalues_up)
+        assert_valid_eigen(a, evalues_lo, evectors_lo)
+        assert_valid_eigen(a, evalues_up, evectors_up)
+        assert_almost_equal(evalues_lo, evalues_up)
+        assert_almost_equal(evectors_lo, evectors_up)
+
+        ev_lo = gula.eigvalsh(a, UPLO='L')
+        ev_up = gula.eigvalsh(a, UPLO='U')
+        assert_valid_eigenvals(a, ev_lo)
+        assert_valid_eigenvals(a, ev_up)
+        assert_almost_equal(ev_lo, evalues_lo)
+        assert_almost_equal(ev_up, evalues_up)
+
+
+class TestSolve(GeneralTestCase,TestCase):
+    def do(self, a, b):
+        x = gula.solve(a,b)
+        assert_almost_equal(b, gula.matrix_multiply(a,x))
+
+
+class TestChosolve(HermitianTestCase, TestCase):
+    def do(self, a, b):
+        """ 
+        inner1d not defined for complex types.
+        todo: implement alternative test
+        """
+        if csingle == a.dtype or cdouble == a.dtype:
+            raise SkipTest
+
+        x_lo = gula.chosolve(a, b, UPLO='L')
+        x_up = gula.chosolve(a, b, UPLO='U')
+        assert_almost_equal(x_lo, x_up)
+        # inner1d not defined for complex types
+        # todo: implement alternative test
+        assert_almost_equal(b, gula.matrix_multiply(a,x_lo))
+        assert_almost_equal(b, gula.matrix_multiply(a,x_up))
+
+
+class TestSVD(GeneralTestCase, TestCase):
+    def do(self, a, b):
+        """ still work in progress """
+        raise SkipTest
+        u, s, vt = gula.svd(a, 0)
+        assert_almost_equal(a, dot(multiply(u, s), vt))
+
+"""
+class TestCholesky(HermitianTestCase, TestCase):
+    def do(self, a, b):
+        pass
+"""
+
+################################################################################
+# ufuncs inspired by pdl
+# - add3
+# - multiply3
+# - multiply3_add
+# - multiply_add
+# - multiply_add2
+# - multiply4
+# - multiply4_add
+
+class UfuncTestCase(object):
+    parameter = range(0,10)
+
+    def _check_for_type(self, typ):
+        a = np.array(self.__class__.parameter, dtype=typ)
+        self.do(a)
+
+    def _check_for_type_vector(self, typ):
+        parameter = self.__class__.parameter
+        a = np.array([parameter, parameter], dtype=typ)
+        self.do(a)
+ 
+    def test_single(self):
+        self._check_for_type(single)
+
+    def test_double(self):
+        self._check_for_type(double)
+
+    def test_csingle(self):
+        self._check_for_type(csingle)
+
+    def test_cdouble(self):
+        self._check_for_type(cdouble)
+
+    def test_single_vector(self):
+        self._check_for_type_vector(single)
+
+    def test_double_vector(self):
+        self._check_for_type_vector(double)
+
+    def test_csingle_vector(self):
+        self._check_for_type_vector(csingle)
+
+    def test_cdouble_vector(self):
+        self._check_for_type_vector(cdouble)
+
+
+class TestAdd3(UfuncTestCase, TestCase):
+    def do(self, a):
+        r = gula.add3(a,a,a)
+        assert_almost_equal(r, a+a+a)
+
+
+class TestMultiply3(UfuncTestCase, TestCase):
+    def do(self, a):
+        r = gula.multiply3(a,a,a)
+        assert_almost_equal(r, a*a*a)
+
+
+class TestMultiply3Add(UfuncTestCase, TestCase):
+    def do(self, a):
+        r = gula.multiply3_add(a,a,a,a)
+        assert_almost_equal(r, a*a*a+a)
+
+
+class TestMultiplyAdd(UfuncTestCase, TestCase):
+    def do(self, a):
+        r = gula.multiply_add(a,a,a)
+        assert_almost_equal(r, a*a+a)
+
+
+class TestMultiplyAdd2(UfuncTestCase, TestCase):
+    def do(self, a):
+        r = gula.multiply_add2(a,a,a,a)
+        assert_almost_equal(r, a*a+a+a)
+
+
+class TestMultiply4(UfuncTestCase, TestCase):
+    def do(self, a):
+        r = gula.multiply4(a,a,a,a)
+        assert_almost_equal(r, a*a*a*a)
+
+
+class TestMultiply4_add(UfuncTestCase, TestCase):
+    def do(self, a):
+        r = gula.multiply4_add(a,a,a,a,a)
+        assert_almost_equal(r, a*a*a*a+a)
+
+
+if __name__ == "__main__":
+    print 'testing gufuncs_linalg; gufuncs version: %s' % gula._impl.__version__
+    run_module_suite()
diff --git a/numpy/core/tests/test_ufunc.py b/numpy/core/tests/test_ufunc.py
index 57fd66892938..d59688b21ba6 100644
--- a/numpy/core/tests/test_ufunc.py
+++ b/numpy/core/tests/test_ufunc.py
@@ -314,6 +314,8 @@ def test_forced_sig(self):
     def test_inner1d(self):
         a = np.arange(6).reshape((2,3))
         assert_array_equal(umt.inner1d(a,a), np.sum(a*a,axis=-1))
+        a = np.arange(6)
+        assert_array_equal(umt.inner1d(a,a), np.sum(a*a))
 
     def test_broadcast(self):
         msg = "broadcast"
@@ -423,6 +425,13 @@ def test_innerwt(self):
         w = np.arange(300,324).reshape((2,3,4))
         assert_array_equal(umt.innerwt(a,b,w), np.sum(a*b*w,axis=-1))
 
+    def test_innerwt_empty(self):
+        """Test generalized ufunc with zero-sized operands"""
+        a = np.array([], dtype='f8')
+        b = np.array([], dtype='f8')
+        w = np.array([], dtype='f8')
+        assert_array_equal(umt.innerwt(a,b,w), np.sum(a*b*w,axis=-1))
+
     def test_matrix_multiply(self):
         self.compare_matrix_multiply_results(np.long)
         self.compare_matrix_multiply_results(np.double)