Source code for sphinx_fortran_domain.utils

from __future__ import annotations

import glob
import os
import re
from pathlib import Path
from typing import Iterable, Optional, Sequence


_WILDCARDS = "*?["


def _has_wildcards(s: str) -> bool:
	return any(ch in s for ch in _WILDCARDS)




[docs]
def as_list(value) -> list[str]:
	"""Normalize a config value into a list of strings."""
	if value is None:
		return []
	if isinstance(value, str):
		return [value]
	return [str(v) for v in value]




[docs]
def as_chars(value) -> list[str]:
	"""Normalize a config value into a list of single-character strings."""
	if value is None:
		return []
	if isinstance(value, str):
		# Allow ">!@" style strings.
		return [c for c in value if c.strip()]
	return [str(v) for v in value]



[docs]
def doc_markers_from_doc_chars(doc_chars) -> list[str]:
	"""Convert `fortran_doc_chars` to concrete doc markers.

	Configured `fortran_doc_chars` is a collection of single characters like `['>']`.
	A doc marker is the two-character string that must appear at the start of a
	comment line, like `!>`.

	Raises ValueError if any entry is not a single character.
	"""
	chars = as_chars(doc_chars)
	if chars:
		for c in chars:
			if len(c) != 1:
				raise ValueError(f"fortran_doc_chars entries must be single characters, got: {c!r}")
		return ["!" + c for c in chars]

	# Default convention: !> doc lines
	return ["!>"]



def _norm_path(path: str) -> str:
	"""Normalize a path for comparison across platforms."""
	try:
		return os.path.normcase(str(Path(path).resolve()))
	except Exception:
		return os.path.normcase(str(Path(path)))



[docs]
def read_text_utf8(path: str | Path) -> str:
	"""Read a text file as UTF-8, replacing invalid sequences."""
	return Path(path).read_text(encoding="utf-8", errors="replace")




[docs]
def read_lines_utf8(path: str | Path) -> list[str]:
	"""Read a text file as UTF-8 lines, replacing invalid sequences."""
	return read_text_utf8(path).splitlines()




[docs]
def strip_inline_comment(line: str) -> str:
	"""Remove a trailing Fortran comment introduced by '!' (best-effort)."""
	if "!" not in line:
		return line
	# Keep it simple: stop at the first '!' (not trying to handle strings).
	return line.split("!", 1)[0]




[docs]
def is_doc_line(line: str, doc_markers: Sequence[str]) -> Optional[str]:
	"""Return doc text if the line is a doc line, else None."""
	stripped = line.lstrip()
	for marker in doc_markers:
		if stripped.startswith(marker):
			return stripped[len(marker) :].lstrip(" \t")
	return None




[docs]
def find_inline_doc(line: str, doc_markers: Sequence[str]) -> Optional[tuple[int, str]]:
	"""Return (pos, marker) for the earliest inline doc marker, else None."""
	best: Optional[tuple[int, str]] = None
	for m in doc_markers:
		if not m:
			continue
		# Inline docs live in Fortran comments (introduced by '!').
		# Ignore markers that don't include '!' so we don't mis-detect operators like `=>`.
		if "!" not in m:
			continue
		pos = line.find(m)
		if pos == -1:
			continue
		if line[:pos].strip() == "":
			# Leading marker is handled by is_doc_line.
			continue
		if best is None or pos < best[0]:
			best = (pos, m)
	return best




[docs]
def extract_predoc_before_line(lines: Sequence[str], idx: int, *, doc_markers: Sequence[str]) -> str | None:
	"""Extract contiguous doc lines immediately preceding `idx` (0-based)."""
	if idx <= 0:
		return None
	markers = [m for m in (doc_markers or []) if m and str(m).strip()]
	buf: list[str] = []
	i = idx - 1
	while i >= 0:
		line = lines[i]
		stripped = line.lstrip()
		marker = next((m for m in markers if stripped.startswith(m)), None)
		if marker is None:
			break
		buf.append(stripped[len(marker) :].lstrip(" \t").rstrip())
		i -= 1
	if not buf:
		return None
	buf.reverse()
	text = "\n".join(buf).strip()
	return text or None



_RE_END_PROGRAM = re.compile(r"^\s*end\s*program\b", re.IGNORECASE)
_RE_CONTAINS = re.compile(r"^\s*contains\b", re.IGNORECASE)
_RE_USE = re.compile(
	r"^\s*use\b\s*(?:,\s*(?:non_intrinsic|intrinsic)\s*)?(?:\s*::\s*)?([A-Za-z_]\w*)\b",
	re.IGNORECASE,
)



[docs]
def extract_use_dependencies(source: str) -> list[str]:
	"""Extract unique module names from USE statements (best-effort)."""
	deps: list[str] = []
	seen: set[str] = set()
	for raw in (source or "").splitlines():
		if _RE_CONTAINS.match(raw) or _RE_END_PROGRAM.match(raw):
			break
		code = raw.split("!", 1)[0]
		m = _RE_USE.match(code)
		if not m:
			continue
		name = (m.group(1) or "").strip()
		key = name.lower()
		if name and key not in seen:
			seen.add(key)
			deps.append(name)
	return deps




[docs]
def collect_fortran_source_files_from_config(*, confdir: Path, config) -> list[str]:
	"""Collect sources using standard Sphinx config names.

	This is intentionally Sphinx-independent: `config` can be any object with
	`fortran_sources`, `fortran_sources_exclude`, and `fortran_file_extensions` attributes.
	"""
	extensions = {e.lower() for e in as_list(getattr(config, "fortran_file_extensions", []))}
	roots = as_list(getattr(config, "fortran_sources", []))
	excludes = as_list(getattr(config, "fortran_sources_exclude", []))
	return collect_fortran_source_files(
		confdir=Path(confdir),
		roots=roots,
		extensions=extensions,
		excludes=excludes,
	)




[docs]
def collect_fortran_source_files(
	*,
	confdir: Path,
	roots: Sequence[str],
	extensions: set[str],
	excludes: Sequence[str] = (),
) -> list[str]:
	"""Collect Fortran source files from roots, honoring excludes.

	- roots may be files, directories, or glob patterns (relative to confdir).
	- excludes may be files, directories, or glob patterns (relative to confdir).
	- extensions is a set of allowed suffixes (lower-cased). Empty means "allow any".
	
	Returns a deterministic sorted list of file paths as strings.
	"""
	if not roots:
		return []

	confdir = Path(confdir)
	files: list[str] = []

	def _accept(p: Path) -> bool:
		return p.is_file() and (not extensions or p.suffix.lower() in extensions)

	def _add_from_dir(d: Path) -> None:
		for child in d.rglob("*"):
			if _accept(child):
				files.append(str(child))

	for raw_root in roots:
		root = str(raw_root)
		if _has_wildcards(root):
			pattern = str(confdir / root)
			for match in glob.glob(pattern, recursive=True):
				p = Path(match)
				if _accept(p):
					files.append(str(p))
			continue

		p = Path(root)
		if not p.is_absolute():
			p = confdir / p
		if p.is_dir():
			_add_from_dir(p)
		elif _accept(p):
			files.append(str(p))

	if excludes:
		exclude_files: set[str] = set()

		def _exclude_path(p: Path) -> None:
			if p.is_dir():
				for child in p.rglob("*"):
					if _accept(child):
						exclude_files.add(_norm_path(str(child)))
			elif _accept(p):
				exclude_files.add(_norm_path(str(p)))

		for raw_ex in excludes:
			pat = str(raw_ex)
			if _has_wildcards(pat):
				pattern = str(confdir / pat)
				for match in glob.glob(pattern, recursive=True):
					_exclude_path(Path(match))
				continue

			p = Path(pat)
			if not p.is_absolute():
				p = confdir / p
			_exclude_path(p)

		if exclude_files:
			files = [f for f in files if _norm_path(f) not in exclude_files]

	# Deterministic order
	return sorted(set(files))