fpm_source_parsing.f90 Source File


Source Code

!># Parsing of package source files
!>
!> This module exposes two functions, `[[parse_f_source]]` and `[[parse_c_source]]`,
!> which perform a rudimentary parsing of fortran and c source files
!> in order to extract information required for module dependency tracking.
!>
!> Both functions additionally calculate and store a file digest (hash) which
!> is used by the backend ([[fpm_backend]]) to skip compilation of unmodified sources.
!>
!> Both functions return an instance of the [[srcfile_t]] type.
!>
!> For more information, please read the documentation for each function:
!>
!> - `[[parse_f_source]]`
!> - `[[parse_c_source]]`
!>
module fpm_source_parsing
use fpm_error, only: error_t, file_parse_error, fatal_error, file_not_found_error
use fpm_strings, only: string_t, string_cat, len_trim, split, lower, str_ends_with, fnv_1a, is_fortran_name
use fpm_model, only: srcfile_t, &
                    FPM_UNIT_UNKNOWN, FPM_UNIT_PROGRAM, FPM_UNIT_MODULE, &
                    FPM_UNIT_SUBMODULE, FPM_UNIT_SUBPROGRAM, &
                    FPM_UNIT_CSOURCE, FPM_UNIT_CHEADER, FPM_SCOPE_UNKNOWN, &
                    FPM_SCOPE_LIB, FPM_SCOPE_DEP, FPM_SCOPE_APP, FPM_SCOPE_TEST, &
                    FPM_UNIT_CPPSOURCE
use fpm_filesystem, only: read_lines, read_lines_expanded, exists
implicit none

private
public :: parse_f_source, parse_c_source, parse_use_statement

contains

!> Parsing of free-form fortran source files
!>
!> The following statements are recognised and parsed:
!>
!> - `Module`/`submodule`/`program` declaration
!> - Module `use` statement
!> - `include` statement
!>
!> @note Intrinsic modules used by sources are not listed in
!> the `modules_used` field of source objects.
!>
!> @note Submodules are treated as normal modules which `use` their
!> corresponding parent modules.
!>
!>### Parsing limitations
!>
!> __Statements must not continued onto another line
!>  except for an `only:` list in the `use` statement.__
!>
!> This is supported:
!>
!>```fortran
!> use my_module, only: &
!>      my_var, my_function, my_subroutine
!>```
!>
!> This is __NOT supported:__
!>
!>```fortran
!> use &
!>    my_module
!>```
!>
function parse_f_source(f_filename,error) result(f_source)
    character(*), intent(in) :: f_filename
    type(srcfile_t) :: f_source
    type(error_t), allocatable, intent(out) :: error

    logical :: inside_module, inside_interface, using, intrinsic_module
    integer :: stat
    integer :: fh, n_use, n_include, n_mod, n_parent, i, j, ic, pass
    type(string_t), allocatable :: file_lines(:), file_lines_lower(:)
    character(:), allocatable :: temp_string, mod_name, string_parts(:)

    if (.not. exists(f_filename)) then
        call file_not_found_error(error, f_filename)
        return
    end if

    f_source%file_name = f_filename

    file_lines = read_lines_expanded(f_filename)

    ! for efficiency in parsing make a lowercase left-adjusted copy of the file
    ! Need a copy because INCLUDE (and #include) file arguments are case-sensitive
    file_lines_lower=file_lines
    do i=1,size(file_lines_lower)
       file_lines_lower(i)%s=adjustl(lower(file_lines_lower(i)%s))
    enddo

    ! fnv_1a can only be applied to non-zero-length arrays
    if (len_trim(file_lines_lower) > 0) f_source%digest = fnv_1a(file_lines)

    do pass = 1,2
        n_use = 0
        n_include = 0
        n_mod = 0
        n_parent = 0
        inside_module = .false.
        inside_interface = .false.
        file_loop: do i=1,size(file_lines_lower)

            ! Skip comment lines and preprocessor directives
            if (index(file_lines_lower(i)%s,'!') == 1 .or. &
                index(file_lines_lower(i)%s,'#') == 1 .or. &
                len_trim(file_lines_lower(i)%s) < 1) then
                cycle
            end if

            ! Detect exported C-API via bind(C)
            if (.not.inside_interface .and. &
                parse_subsequence(file_lines_lower(i)%s,'bind','(','c')) then

                do j=i,1,-1

                    if (index(file_lines_lower(j)%s,'function') > 0 .or. &
                        index(file_lines_lower(j)%s,'subroutine') > 0) then
                        f_source%unit_type = FPM_UNIT_SUBPROGRAM
                        exit
                    end if

                    if (j>1) then

                        ic = index(file_lines_lower(j-1)%s,'!')
                        if (ic < 1) then
                            ic = len(file_lines_lower(j-1)%s)
                        end if

                        temp_string = trim(file_lines_lower(j-1)%s(1:ic))
                        if (index(temp_string,'&') /= len(temp_string)) then
                            exit
                        end if

                    end if

                end do

            end if

            ! Skip lines that are continued: not statements
            if (i > 1) then
                ic = index(file_lines_lower(i-1)%s,'!')
                if (ic < 1) then
                    ic = len(file_lines_lower(i-1)%s)
                end if
                temp_string = trim(file_lines_lower(i-1)%s(1:ic))
                if (len(temp_string) > 0 .and. index(temp_string,'&') == len(temp_string)) then
                    cycle
                end if
            end if

            ! Detect beginning of interface block
            if (index(file_lines_lower(i)%s,'interface') == 1 &
                .or. parse_sequence(file_lines_lower(i)%s,'abstract','interface')) then

                inside_interface = .true.
                cycle

            end if

            ! Detect end of interface block
            if (parse_sequence(file_lines_lower(i)%s,'end','interface')) then

                inside_interface = .false.
                cycle

            end if

            ! Process 'USE' statements
            call parse_use_statement(f_filename,i,file_lines_lower(i)%s,using,intrinsic_module,mod_name,error)
            if (allocated(error)) return

            if (using) then

                ! Not a valid module name?
                if (.not.is_fortran_name(mod_name)) cycle

                ! Valid intrinsic module: not a dependency
                if (intrinsic_module) cycle

                n_use = n_use + 1

                if (pass == 2) f_source%modules_used(n_use)%s = mod_name

                cycle

            endif

            ! Process 'INCLUDE' statements
            ic = index(file_lines_lower(i)%s,'include')
            if ( ic == 1 ) then
                ic = index(lower(file_lines(i)%s),'include')
                if (index(adjustl(file_lines(i)%s(ic+7:)),'"') == 1 .or. &
                    index(adjustl(file_lines(i)%s(ic+7:)),"'") == 1 ) then

                    n_include = n_include + 1

                    if (pass == 2) then
                        f_source%include_dependencies(n_include)%s = &
                         & split_n(file_lines(i)%s,n=2,delims="'"//'"',stat=stat)
                        if (stat /= 0) then
                            call file_parse_error(error,f_filename, &
                                  'unable to find include file name',i, &
                                  file_lines(i)%s)
                            return
                        end if
                    end if

                    cycle

                end if
            end if

            ! Extract name of module if is module
            if (index(file_lines_lower(i)%s,'module ') == 1) then

                ! Remove any trailing comments
                ic = index(file_lines_lower(i)%s,'!')-1
                if (ic < 1) then
                    ic = len(file_lines_lower(i)%s)
                end if
                temp_string = trim(file_lines_lower(i)%s(1:ic))

                ! R1405 module-stmt := "MODULE" module-name
                ! module-stmt has two space-delimited parts only
                ! (no line continuations)
                call split(temp_string,string_parts,' ')
                if (size(string_parts) /= 2) then
                    cycle
                end if

                mod_name = trim(adjustl(string_parts(2)))
                if (scan(mod_name,'=(&')>0 ) then
                    ! Ignore these cases:
                    ! module <something>&
                    ! module =*
                    ! module (i)
                    cycle
                end if

                if (.not.is_fortran_name(mod_name)) then
                    call file_parse_error(error,f_filename, &
                          'empty or invalid name for module',i, &
                          file_lines_lower(i)%s, index(file_lines_lower(i)%s,mod_name))
                    return
                end if

                n_mod = n_mod + 1

                if (pass == 2) then
                    f_source%modules_provided(n_mod) = string_t(mod_name)
                end if

                if (f_source%unit_type == FPM_UNIT_UNKNOWN) then
                    f_source%unit_type = FPM_UNIT_MODULE
                end if

                if (.not.inside_module) then
                    inside_module = .true.
                else
                    ! Must have missed an end module statement (can't assume a pure module)
                    if (f_source%unit_type /= FPM_UNIT_PROGRAM) then
                        f_source%unit_type = FPM_UNIT_SUBPROGRAM
                    end if
                end if

                cycle

            end if

            ! Extract name of submodule if is submodule
            if (index(file_lines_lower(i)%s,'submodule') == 1) then

                mod_name = split_n(file_lines_lower(i)%s,n=3,delims='()',stat=stat)
                if (stat /= 0) then
                    call file_parse_error(error,f_filename, &
                          'unable to get submodule name',i, &
                          file_lines_lower(i)%s)
                    return
                end if
                if (.not.is_fortran_name(mod_name)) then
                    call file_parse_error(error,f_filename, &
                          'empty or invalid name for submodule',i, &
                          file_lines_lower(i)%s, index(file_lines_lower(i)%s,mod_name))
                    return
                end if

                n_mod = n_mod + 1

                temp_string = split_n(file_lines_lower(i)%s,n=2,delims='()',stat=stat)
                if (stat /= 0) then
                    call file_parse_error(error,f_filename, &
                          'unable to get submodule ancestry',i, &
                          file_lines_lower(i)%s)
                    return
                end if

                if (f_source%unit_type /= FPM_UNIT_PROGRAM) then
                    f_source%unit_type = FPM_UNIT_SUBMODULE
                end if

                n_use = n_use + 1

                inside_module = .true.

                n_parent = n_parent + 1

                if (pass == 2) then

                    if (index(temp_string,':') > 0) then

                        temp_string = temp_string(index(temp_string,':')+1:)

                    end if

                    if (.not.is_fortran_name(temp_string)) then
                        call file_parse_error(error,f_filename, &
                          'empty or invalid name for submodule parent',i, &
                          file_lines_lower(i)%s, index(file_lines_lower(i)%s,temp_string))
                        return
                    end if

                    f_source%modules_used(n_use)%s = temp_string
                    f_source%parent_modules(n_parent)%s = temp_string
                    f_source%modules_provided(n_mod)%s = mod_name

                end if

                cycle

            end if

            ! Detect if contains a program
            ! - no modules allowed after program def
            ! - program header may be missing (only "end program" statement present)
            if (index(file_lines_lower(i)%s,'program ')==1 .or. &
                parse_sequence(file_lines_lower(i)%s,'end','program')) then

                temp_string = split_n(file_lines_lower(i)%s,n=2,delims=' ',stat=stat)
                if (stat == 0) then

                    if (scan(temp_string,'=(')>0 ) then
                        ! Ignore:
                        ! program =*
                        ! program (i) =*
                        cycle
                    end if

                end if

                f_source%unit_type = FPM_UNIT_PROGRAM

                cycle
                

            end if

            ! Parse end module statement
            !  (to check for code outside of modules)
            if (parse_sequence(file_lines_lower(i)%s,'end','module') .or. &
                parse_sequence(file_lines_lower(i)%s,'end','submodule')) then

                inside_module = .false.
                cycle

            end if

            ! Any statements not yet parsed are assumed to be other code statements
            if (.not.inside_module .and. f_source%unit_type /= FPM_UNIT_PROGRAM) then

                f_source%unit_type = FPM_UNIT_SUBPROGRAM

            end if

        end do file_loop

        ! If unable to parse end of module statement, then can't assume pure module
        !  (there could be non-module subprograms present)
        if (inside_module .and. f_source%unit_type == FPM_UNIT_MODULE) then
            f_source%unit_type = FPM_UNIT_SUBPROGRAM
        end if

        if (pass == 1) then
            allocate(f_source%modules_used(n_use))
            allocate(f_source%include_dependencies(n_include))
            allocate(f_source%modules_provided(n_mod))
            allocate(f_source%parent_modules(n_parent))
        end if

    end do

end function parse_f_source


!> Parsing of c, cpp source files
!>
!> The following statements are recognised and parsed:
!>
!> - `#include` preprocessor statement
!>
function parse_c_source(c_filename,error) result(c_source)
    character(*), intent(in) :: c_filename
    type(srcfile_t) :: c_source
    type(error_t), allocatable, intent(out) :: error

    integer :: fh, n_include, i, pass, stat
    type(string_t), allocatable :: file_lines(:)

    c_source%file_name = c_filename

    if (str_ends_with(lower(c_filename), ".c")) then

        c_source%unit_type = FPM_UNIT_CSOURCE

    else if (str_ends_with(lower(c_filename), ".h")) then

        c_source%unit_type = FPM_UNIT_CHEADER

    else if (str_ends_with(lower(c_filename), ".cpp")) then

        c_source%unit_type = FPM_UNIT_CPPSOURCE

    end if

    allocate(c_source%modules_used(0))
    allocate(c_source%modules_provided(0))
    allocate(c_source%parent_modules(0))

    file_lines = read_lines(c_filename)

    ! Ignore empty files, returned as FPM_UNIT_UNKNOWN
    if (len_trim(file_lines) < 1) then
        c_source%unit_type = FPM_UNIT_UNKNOWN
        return
    end if

    c_source%digest = fnv_1a(file_lines)

    do pass = 1,2
        n_include = 0
        file_loop: do i=1,size(file_lines)

            ! Process 'INCLUDE' statements
            if (index(adjustl(lower(file_lines(i)%s)),'#include') == 1 .and. &
                index(file_lines(i)%s,'"') > 0) then

                n_include = n_include + 1

                if (pass == 2) then

                    c_source%include_dependencies(n_include)%s = &
                     &   split_n(file_lines(i)%s,n=2,delims='"',stat=stat)
                    if (stat /= 0) then
                        call file_parse_error(error,c_filename, &
                            'unable to get c include file',i, &
                            file_lines(i)%s,index(file_lines(i)%s,'"'))
                        return
                    end if

                end if

            end if

        end do file_loop

        if (pass == 1) then
            allocate(c_source%include_dependencies(n_include))
        end if

    end do

end function parse_c_source

!> Split a string on one or more delimeters
!>  and return the nth substring if it exists
!>
!> n=0  will return the last item
!> n=-1 will return the penultimate item etc.
!>
!> stat = 1 on return if the index
!>  is not found
!>
function split_n(string,delims,n,stat) result(substring)

    character(*), intent(in) :: string
    character(*), intent(in) :: delims
    integer, intent(in) :: n
    integer, intent(out) :: stat
    character(:), allocatable :: substring

    integer :: i
    character(:), allocatable :: string_parts(:)

    call split(string,string_parts,delims)

    if (n<1) then
        i = size(string_parts) + n
        if (i < 1) then
            allocate(character(len=0) :: substring) ! ifort bus error otherwise
            stat = 1
            return
        end if
    else
        i = n
    end if

    if (i>size(string_parts)) then
        allocate(character(len=0) :: substring) ! ifort bus error otherwise
        stat = 1
        return
    end if

    substring = trim(adjustl(string_parts(i)))
    stat = 0

end function split_n


!> Parse a subsequence of blank-separated tokens within a string
!>  (see parse_sequence)
function parse_subsequence(string,t1,t2,t3,t4) result(found)
    character(*), intent(in) :: string
    character(*), intent(in) :: t1
    character(*), intent(in), optional :: t2, t3, t4
    logical :: found

    integer :: offset, i

    found = .false.
    offset = 1

    do

        i = index(string(offset:),t1)

        if (i == 0) return

        offset = offset + i - 1

        found = parse_sequence(string(offset:),t1,t2,t3,t4)

        if (found) return

        offset = offset + len(t1)

        if (offset > len(string)) return

    end do

end function parse_subsequence

!> Helper utility to parse sequences of tokens
!> that may be optionally separated by zero or more spaces
function parse_sequence(string,t1,t2,t3,t4) result(found)
    character(*), intent(in) :: string
    character(*), intent(in) :: t1
    character(*), intent(in), optional :: t2, t3, t4
    logical :: found

    integer :: post, n, incr, pos, token_n
    logical :: match

    n = len(string)
    found = .false.
    pos = 1

    do token_n=1,4

        do while (pos <= n)
            if (string(pos:pos) /= ' ') then
                exit
            end if
            pos = pos + 1
        end do

        select case(token_n)
        case(1)
            incr = len(t1)
            if (pos+incr-1>n) return
            match = string(pos:pos+incr-1) == t1
        case(2)
            if (.not.present(t2)) exit
            incr = len(t2)
            if (pos+incr-1>n) return
            match = string(pos:pos+incr-1) == t2
        case(3)
            if (.not.present(t3)) exit
            incr = len(t3)
            if (pos+incr-1>n) return
            match = string(pos:pos+incr-1) == t3
        case(4)
            if (.not.present(t4)) exit
            incr = len(t4)
            if (pos+incr-1>n) return
            match = string(pos:pos+incr-1) == t4
        case default
            exit
        end select

        if (.not.match) then
            return
        end if

        pos = pos + incr

    end do

    found = .true.

end function parse_sequence

! USE [, intrinsic] :: module_name [, only: only_list]
! USE [, non_intrinsic] :: module_name [, only: only_list]
subroutine parse_use_statement(f_filename,i,line,use_stmt,is_intrinsic,module_name,error)

    !> Current file name and line number (for error messaging)
    character(*), intent(in) :: f_filename
    integer, intent(in) :: i

    !> The line being parsed. MUST BE preprocessed with trim(adjustl()
    character(*), intent(in) :: line

    !> Does this line contain a `use` statement?
    logical, intent(out) :: use_stmt

    !> Is the module in this statement intrinsic?
    logical, intent(out) :: is_intrinsic

    !> used module name
    character(:), allocatable, intent(out) :: module_name

    !> Error handling
    type(error_t), allocatable, intent(out) :: error

    character(15), parameter :: INTRINSIC_NAMES(*) =  &
                                 ['iso_c_binding  ', &
                                  'iso_fortran_env', &
                                  'ieee_arithmetic', &
                                  'ieee_exceptions', &
                                  'ieee_features  ', &
                                  'omp_lib        ']

    character(len=:), allocatable :: temp_string
    integer :: colons,intr,nonintr,j,stat
    logical :: has_intrinsic_name

    use_stmt      = .false.
    is_intrinsic  = .false.
    if (len_trim(line)<=0) return

    ! Quick check that the line is preprocessed
    if (line(1:1)==' ') then
        call fatal_error(error,'internal_error: source file line is not trim(adjustl()) on input to parse_use_statement')
        return
    end if

    ! 'use' should be the first string in the adjustl line
    use_stmt = index(line,'use ')==1 .or. index(line,'use::')==1 .or. index(line,'use,')==1
    if (.not.use_stmt) return
    colons   = index(line,'::')
    nonintr  = 0
    intr     = 0

    have_colons: if (colons>3) then

        ! there may be an intrinsic/non-intrinsic spec
        nonintr = index(line(1:colons-1),'non_intrinsic')
        if (nonintr==0) intr = index(line(1:colons-1),'intrinsic')


        temp_string = split_n(line,delims=':',n=2,stat=stat)
        if (stat /= 0) then
            call file_parse_error(error,f_filename, &
                    'unable to find used module name',i, &
                    line,colons)
            return
        end if

        module_name = split_n(temp_string,delims=' ,',n=1,stat=stat)
        if (stat /= 0) then
            call file_parse_error(error,f_filename, &
                     'unable to find used module name',i, &
                     line)
            return
        end if

    else

        module_name = split_n(line,n=2,delims=' ,',stat=stat)
        if (stat /= 0) then
            call file_parse_error(error,f_filename, &
                    'unable to find used module name',i, &
                    line)
            return
        end if

    end if have_colons

    ! If declared intrinsic, check that it is true
    has_intrinsic_name = any([(index(module_name,trim(INTRINSIC_NAMES(j)))>0, &
                             j=1,size(INTRINSIC_NAMES))])
    if (intr>0 .and. .not.has_intrinsic_name) then

        ! An intrinsic module was not found. Its name could be in the next line,
        ! in which case, we just skip this check. The compiler will do the job if the name is invalid.

        ! Module name was not read: it's in the next line
        if (index(module_name,'&')<=0) then
            call file_parse_error(error,f_filename, &
                                  'module '//module_name//' is declared intrinsic but it is not ',i, &
                                  line)
            return
        endif
    endif

    ! Should we treat this as an intrinsic module
    is_intrinsic = nonintr==0 .and. & ! not declared non-intrinsic
                   (intr>0 .or. has_intrinsic_name)

end subroutine parse_use_statement



end module fpm_source_parsing