subroutine Wigner3j(w3j, jmin, jmax, j2, j3, m1, m2, m3, exitstatus)
!------------------------------------------------------------------------------
!
!   This subroutine will calculate the Wigner 3j symbols
!
!       j  j2 j3
!       m1 m2 m3
!
!   for all allowable values of j. The returned values in the array j are
!   calculated only for the limits
!
!       jmin = max(|j2-j3|, |m1|)
!       jmax = j2 + j3
!
!   To be non-zero, m1 + m2 + m3 = 0. In addition, it is assumed that all j and
!   m are integers. Returned values have a relative error less than ~1.d-8 when
!   j2 and j3 are less than 103 (see below). In practice, this routine is
!   probably usable up to 165.
!
!   This routine is based upon the stable non-linear recurence relations of
!   Luscombe and Luban (1998) for the "non classical" regions near jmin and
!   jmax. For the classical region, the standard three term recursion
!   relationship is used (Schulten and Gordon 1975). Note that this three term
!   recursion can be unstable and can also lead to overflows. Thus the values
!   are rescaled by a factor "scalef" whenever the absolute value of the 3j
!   coefficient becomes greater than unity. Also, the direction of the iteration
!   starts from low values of j to high values, but when abs(w3j(j+2)/w3j(j))
!   is less than one, the iteration will restart from high to low values. More
!   efficient algorithms might be found for specific cases (for instance, when
!   all m's are zero).
!
!   Verification:
!
!   The results have been verified against this routine run in quadruple
!   precision. For 1.e7 acceptable random values of j2, j3, m2, and m3 between
!   -200 and 200, the relative error was calculated only for those 3j
!   coefficients that had an absolute value greater than 1.d-17 (values smaller
!   than this are for all practical purposed zero, and can be heavily
!   affected by machine roundoff errors or underflow). 853 combinations of
!   parameters were found to have relative errors greater than 1.d-8. Here I
!   list the minimum value of max(j2,j3) for different ranges of error, as well
!   as the number of times this occured
!
!   1.d-7 < error  <=1.d-8 = 103    # = 483
!   1.d-6 < error <= 1.d-7 = 116   # = 240
!   1.d-5 < error <= 1.d-6 = 165   # = 93
!   1.d-4 < error <= 1.d-5 = 167    # = 36
!
!   Many times (maybe always), the large relative errors occur when the 3j
!   coefficient changes sign and is close to zero. (I.e., adjacent values are
!   about 10.e7 times greater in magnitude.) Thus, if one does not need to know
!   highly accurate values of the 3j coefficients when they are almost zero
!   (i.e., ~1.d-10) then this routine is probably usable up to about 160.
!
!   These results have also been verified for parameter values less than 100
!   using a code based on the algorith of de Blanc (1987), which was originally
!   coded by Olav van Genabeek, and modified by M. Fang (note that this code was
!   run in quadruple precision, and only calculates one coefficient for each
!   call. I also have no idea if this code was verified.) Maximum relative
!   errors in this case were less than 1.d-8 for a large number of values
!   (again, only 3j coefficients greater than 1.d-17 were considered here).
!   
!   The biggest improvement that could be made in this routine is to determine
!   when one should stop iterating in the forward direction, and start
!   iterating from high to low values.
!
!   Calling parameters
!
!       IN
!           j2, j3, m1, m2, m3      Integer values.
!
!       OUT 
!           w3j                     Array of length jmax - jmin + 1.
!           jmin, jmax              Minimum and maximum values
!                                   out output array.
!
!       OPTIONAL (OUT)
!           exitstatus  If present, instead of executing a STOP when an error
!                       is encountered, the variable exitstatus will be
!                       returned describing the error.
!                       0 = No errors;
!                       1 = Improper dimensions of input array;
!                       2 = Improper bounds for input variable;
!                       3 = Error allocating memory;
!                       4 = File IO error.
!
!   Copyright (c) 2005-2019, SHTOOLS
!   All rights reserved.
!
!------------------------------------------------------------------------------
    use ftypes

    implicit none

    integer(int32), intent(in) :: j2, j3, m1, m2, m3
    integer(int32), intent(out) :: jmin, jmax
    real(dp), intent(out) :: w3j(:)
    integer(int32), intent(out), optional :: exitstatus
    real(dp) :: wnmid, wpmid, scalef, denom, rs(j2+j3+1), &
                wl(j2+j3+1), wu(j2+j3+1), xjmin, yjmin, yjmax, zjmax, xj, zj
    integer(int32) :: j, jnum, jp, jn, k, flag1, flag2, jmid

    if (present(exitstatus)) exitstatus = 0

    if (size(w3j) < j2+j3+1) then
        print*, "Error --- Wigner3j"
        print*, "W3J must be dimensioned (J2+J3+1) where J2 and J3 are ", j2, j3
        print*, "Input array is dimensioned ", size(w3j)
        if (present(exitstatus)) then
            exitstatus = 1
            return
        else
            stop
        end if
    end if

    w3j = 0.0_dp

    flag1 = 0
    flag2 = 0

    scalef = 1.0e3_dp

    jmin = max(abs(j2-j3), abs(m1))
    jmax = j2 + j3
    jnum = jmax - jmin + 1

    if (abs(m2) > j2 .or. abs(m3) > j3) then
        return
    else if (m1 + m2 + m3 /= 0) then
        return
    else if (jmax < jmin) then
        return
    end if

    !--------------------------------------------------------------------------
    !
    !   Only one term is present
    !
    !--------------------------------------------------------------------------

    if (jnum == 1) then
        w3j(1) = 1.0_dp / sqrt(2.0_dp * jmin + 1.0_dp)
        
        if ( (w3j(1) < 0.0_dp .and. (-1)**(j2-j3+m2+m3) > 0) .or. &
            (w3j(1) > 0.0_dp .and. (-1)**(j2-j3+m2+m3) < 0) ) &
            w3j(1) = -w3j(1)

        return

    end if

    !--------------------------------------------------------------------------
    !
    !   Calculate lower non-classical values for [jmin, jn]. If the second term
    !   can not be calculated because the recursion relationsips give rise to a
    !   1/0, then set flag1 to 1.  If all m's are zero, then this is not a
    !   problem as all odd terms must be zero.
    !
    !--------------------------------------------------------------------------
    rs = 0.0_dp
    wl = 0.0_dp

    xjmin = x(jmin)
    yjmin = y(jmin)

    if (m1 == 0 .and. m2 == 0 .and. m3 == 0) then       ! All m's are zero
        wl(jindex(jmin)) = 1.0_dp
        wl(jindex(jmin+1)) = 0.0_dp
        jn = jmin + 1

    else if (yjmin == 0.0_dp) then           ! The second terms is either zero
        if (xjmin == 0.0_dp) then            ! or undefined
            flag1 = 1
            jn = jmin

        else
            wl(jindex(jmin)) = 1.0_dp
            wl(jindex(jmin+1)) = 0.0_dp
            jn = jmin + 1

        end if

    else if (xjmin * yjmin >= 0.0_dp) then
        ! The second term is outside of the non-classical region
        wl(jindex(jmin)) = 1.0_dp
        wl(jindex(jmin+1)) = -yjmin / xjmin
        jn = jmin + 1

    else
        ! Calculate terms in the non-classical region
        rs(jindex(jmin)) = -xjmin / yjmin

        jn = jmax

        do j = jmin + 1, jmax-1, 1
            denom = y(j) + z(j)*rs(jindex(j-1))
            xj = x(j)

            if (abs(xj) > abs(denom) .or. xj * denom >= 0.0_dp .or. &
                    denom == 0.0_dp) then
                jn = j - 1
                exit

            else
                rs(jindex(j)) = -xj / denom

            end if

        end do

        wl(jindex(jn)) = 1.0_dp

        do k = 1, jn - jmin, 1
            wl(jindex(jn-k)) = wl(jindex(jn-k+1)) * rs(jindex(jn-k))
        end do

        if (jn == jmin) then
            ! Calculate at least two terms so that these can be used
            ! in three term recursion
            wl(jindex(jmin+1)) = -yjmin / xjmin
            jn = jmin + 1

        end if

    end if

    if (jn == jmax) then
        ! All terms are calculated
        w3j(1:jnum) = wl(1:jnum)
        call normw3j
        call fixsign

        return

    end if

    !--------------------------------------------------------------------------
    !
    !   Calculate upper non-classical values for [jp, jmax].
    !   If the second last term can not be calculated because the
    !   recursion relations give a 1/0, then set flag2 to 1.
    !   (Note, I don't think that this ever happens).
    !
    !--------------------------------------------------------------------------
    wu = 0.0_dp

    yjmax = y(jmax)
    zjmax = z(jmax)

    if (m1 == 0 .and. m2 == 0 .and. m3 == 0) then
        wu(jindex(jmax)) = 1.0_dp
        wu(jindex(jmax-1)) = 0.0_dp
        jp = jmax - 1

    else if (yjmax == 0.0_dp) then
        if (zjmax == 0.0_dp) then
            flag2 = 1
            jp = jmax

        else
            wu(jindex(jmax)) = 1.0_dp
            wu(jindex(jmax-1)) = - yjmax / zjmax
            jp = jmax-1

        end if

    else if (yjmax * zjmax >= 0.0_dp) then
        wu(jindex(jmax)) = 1.0_dp
        wu(jindex(jmax-1)) = - yjmax / zjmax
        jp = jmax - 1

    else
        rs(jindex(jmax)) = -zjmax / yjmax

        jp = jmin

        do j=jmax-1, jn, -1
            denom = y(j) + x(j)*rs(jindex(j+1))
            zj = z(j)

            if (abs(zj) > abs(denom) .or. zj * denom >= 0.0_dp .or. &
                    denom == 0.0_dp) then
                jp = j + 1
                exit

            else
                rs(jindex(j)) = -zj / denom

            end if

        end do

        wu(jindex(jp)) = 1.0_dp

        do k = 1, jmax - jp, 1
            wu(jindex(jp+k)) = wu(jindex(jp+k-1))*rs(jindex(jp+k))
        end do

        if (jp == jmax) then
            wu(jindex(jmax-1)) = - yjmax / zjmax
            jp = jmax - 1
        end if

    end if

    !--------------------------------------------------------------------------
    !
    !   Calculate classical terms for [jn+1, jp-1] using standard three
    !   term rercusion relationship. Start from both jn and jp and stop at the
    !   midpoint. If flag1 is set, then perform the recursion solely from high
    !   to low values. If flag2 is set, then perform the recursion solely from
    !   low to high.
    !
    !--------------------------------------------------------------------------
    if (flag1 == 0) then
        jmid = (jn + jp) / 2

        do j = jn, jmid - 1, 1
            wl(jindex(j+1)) = - (z(j)*wl(jindex(j-1)) +y(j)*wl(jindex(j))) &
                                / x(j)

            if (abs(wl(jindex(j+1))) > 1.0_dp) then
                ! watch out for overflows.
                wl(jindex(jmin):jindex(j+1)) = wl(jindex(jmin):jindex(j+1)) &
                                               / scalef
            end if

            if (abs(wl(jindex(j+1)) / wl(jindex(j-1))) < 1.0_dp .and. &
                wl(jindex(j+1)) /= 0.0_dp) then
                ! If values are decreasing then stop upward iteration
                ! and start with the downward iteration.
                jmid = j + 1
                exit
            end if

        end do

        wnmid = wl(jindex(jmid))

        if (wl(jindex(jmid-1)) /= 0.0_dp .and. &
                abs(wnmid / wl(jindex(jmid-1))) < 1.d-6) then
            ! Make sure that the stopping midpoint value is not a zero,
            ! or close to it!
            wnmid = wl(jindex(jmid-1))
            jmid = jmid - 1
        end if

        do j = jp, jmid + 1, -1
            wu(jindex(j-1)) = - (x(j)*wu(jindex(j+1)) + y(j)*wu(jindex(j)) ) &
                                / z(j)
            if (abs(wu(jindex(j-1))) > 1.0_dp) then
                wu(jindex(j-1):jindex(jmax)) = wu(jindex(j-1):jindex(jmax)) &
                                               / scalef
            end if

        end do

        wpmid = wu(jindex(jmid))

        ! rescale two sequences to common midpoint
        if (jmid == jmax) then
            w3j(1:jnum) = wl(1:jnum)

        else if (jmid == jmin) then
            w3j(1:jnum) = wu(1:jnum)

        else
            w3j(1:jindex(jmid)) = wl(1:jindex(jmid)) * wpmid / wnmid
            w3j(jindex(jmid+1):jindex(jmax)) = wu(jindex(jmid+1):jindex(jmax))

        end if

    else if (flag1 == 1 .and. flag2 == 0) then
        ! iterature in downward direction only

        do j = jp, jmin + 1, -1
            wu(jindex(j-1)) = - (x(j)*wu(jindex(j+1)) + y(j)*wu(jindex(j)) ) &
                                / z(j)

            if (abs(wu(jindex(j-1))) > 1) then
                wu(jindex(j-1):jindex(jmax)) = wu(jindex(j-1):jindex(jmax)) &
                                               / scalef
            end if
        end do

        w3j(1:jnum) = wu(1:jnum)

    else if (flag2 == 1 .and. flag1 == 0) then
        ! iterature in upward direction only

        do j = jn, jp - 1, 1
            wl(jindex(j+1)) = - (z(j)*wl(jindex(j-1)) +y(j)*wl(jindex(j))) &
                                / x(j)

            if (abs(wl(jindex(j+1))) > 1) then
                wl(jindex(jmin):jindex(j+1)) = wl(jindex(jmin):jindex(j+1)) &
                                               / scalef
            end if

        end do

        w3j(1:jnum) = wl(1:jnum)

    else if (flag1 == 1 .and. flag2 == 1) then
        print*, "Error --- Wigner3j"
        print*, "Can not calculate function for input values, " // &
                "both flag1 and flag 2 are set."
        if (present(exitstatus)) then
            exitstatus = 5
            return
        else
            stop
        end if

    end if

    call normw3j
    call fixsign


    CONTAINS

        function jindex(j)
            integer(int32) :: jindex
            integer(int32) :: j
            jindex = j - jmin + 1
        end function jindex

        function a(j)
            real(dp) :: a
            integer(int32) :: j
            a = (dble(j)**2 - dble(j2 - j3)**2) * (dble(j2 + j3 + 1)**2 &
                            - dble(j)**2) * (dble(j)**2 - dble(m1)**2)
            a = sqrt(a)
        end function a

        function y(j)
            real(dp) :: y
            integer(int32) :: j
            y = -dble(2 * j + 1) * &
                (dble(m1) * (dble(j2) * dble(j2 + 1) &
                - dble(j3) * dble(j3 + 1)) &
                - dble(m3 - m2) * dble(j) * dble(j + 1))
        end function y

        function x(j)
            real(dp) :: x
            integer(int32) :: j
            x = dble(j) * a(j+1)
        end function x

        function z(j)
            real(dp) :: z
            integer(int32) :: j
            z = dble(j+1) * a(j)
        end function z

        subroutine normw3j
            real(dp) :: norm
            integer(int32) :: j

            norm = 0.0_dp

            do j = jmin, jmax
                norm = norm + dble(2 * j + 1) * w3j(jindex(j))**2
            end do

            w3j(1:jnum) = w3j(1:jnum) / sqrt(norm)

        end subroutine normw3j

        subroutine fixsign

            if ( (w3j(jindex(jmax)) < 0.0_dp .and. (-1)**(j2-j3+m2+m3) > 0) &
                    .or. (w3j(jindex(jmax)) > 0.0_dp .and. &
                    (-1)**(j2-j3+m2+m3) < 0) ) then
                w3j(1:jnum) = -w3j(1:jnum)
            end if

        end subroutine fixsign

end subroutine Wigner3j
