Source code for simple503

#!/usr/bin/env python3
#
#  __init__.py
"""
:pep:`503` Python package repository generator.
"""
#
#  Copyright © 2021 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  Permission is hereby granted, free of charge, to any person obtaining a copy
#  of this software and associated documentation files (the "Software"), to deal
#  in the Software without restriction, including without limitation the rights
#  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#  copies of the Software, and to permit persons to whom the Software is
#  furnished to do so, subject to the following conditions:
#
#  The above copyright notice and this permission notice shall be included in all
#  copies or substantial portions of the Software.
#
#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
#  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
#  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
#  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
#  DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
#  OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
#  OR OTHER DEALINGS IN THE SOFTWARE.
#

# stdlib
import posixpath
import re
import shutil
from collections import defaultdict
from html import escape
from operator import attrgetter
from typing import TYPE_CHECKING, Callable, Dict, Iterable, List, NamedTuple, Optional, Union

# 3rd party
from airium import Airium  # type: ignore
from apeye.url import URL
from dist_meta import distributions, metadata
from domdf_python_tools.paths import PathPlus
from domdf_python_tools.typing import PathLike
from natsort import natsorted
from shippinglabel import normalize
from shippinglabel.checksum import get_sha256_hash
from typing_extensions import Literal

if TYPE_CHECKING:
	# stdlib
	from hashlib import _Hash
else:
	try:
		# 3rd party
		from _hashlib import HASH as _Hash
	except ImportError:  # pragma: no cover
		try:
			# 3rd party
			from _hashlib import Hash as _Hash
		except ImportError:
			pass

__author__: str = "Dominic Davis-Foster"
__copyright__: str = "2021 Dominic Davis-Foster"
__license__: str = "MIT License"
__version__: str = "0.4.0"
__email__: str = "dominic@davis-foster.co.uk"

__all__ = [
		"WheelFile",
		"generate_index",
		"generate_project_page",
		"make_simple",
		]


[docs]def make_simple( origin: PathLike, target: Optional[PathLike] = None, base_url: Union[str, URL] = '/', *, sort: bool = False, copy: bool = False, extract_metadata: bool = True, ) -> Dict[str, List["WheelFile"]]: """ Generate a simple repository of Python wheels. :param origin: A directory containing wheels. The wheels may be arranged in subdirectories. :param target: The directory to create the repository in. The directory structure of ``origin`` will be recreated there. Defaults to ``origin``. :no-default target: :param base_url: The base URL of the simple repository. :param sort: Sort the wheel files into per-project base directories. :param copy: Copy files from the source to the destination, rather than moving them. :param extract_metadata: Extract and serve METADATA files per :pep:`658`. :returns: A mapping of (unnormalized) project names to a list of wheels for that project. .. versionchanged:: 0.2.0 Now ignores wheels in the following directories: ``.git``, ``.hg``, ``.tox``, ``.tox4``, ``.nox``, ``venv``, ``.venv``. .. versionchanged:: 0.3.0 * Renamed the ``move`` option to ``sort`` to better reflect its behaviour. * Files are moved to the destination by default, unless the ``copy`` option is :py:obj:`True`. .. versionchanged:: 0.4.0 Added the ``extract_metadata`` option. """ if target is None: target = origin origin = PathPlus(origin).abspath() target = PathPlus(target).abspath() target.maybe_make(parents=True) projects: Dict[str, List[WheelFile]] = defaultdict(list) move_operation: Callable = shutil.copyfile if copy else shutil.move # type: ignore[assignment] unwanted_dirs = (".git", ".hg", "venv", ".venv", ".tox", ".tox4", ".nox") for wheel_file in origin.iterchildren(exclude_dirs=unwanted_dirs, match="**/*.whl"): target_file = target / wheel_file.relative_to(origin) with distributions.WheelDistribution.from_path(wheel_file) as wd: if not wd.has_file("METADATA"): # pragma: no cover raise FileNotFoundError(f"METADATA file not found in {wheel_file}") metadata_string = wd.read_file("METADATA") wheel_metadata = metadata.loads(metadata_string) if sort: # Move to the appropriate directory project_dir = target / normalize(wheel_metadata["Name"]) project_dir.maybe_make() if wheel_file.relative_to(origin).parts[0] != project_dir.parts[-1]: destination = project_dir / wheel_file.name destination.parent.maybe_make(parents=True) move_operation(wheel_file, destination) target_file = destination else: if not target_file.is_file() or not wheel_file.samefile(target_file): # note: will not overwrite files with the same name, even if the source file changed target_file.parent.maybe_make(parents=True) move_operation(wheel_file, target_file) if extract_metadata: metadata_filename = target_file.with_suffix(f"{target_file.suffix}.metadata") metadata_filename.write_text(metadata_string) metadata_hash = get_sha256_hash(metadata_filename) else: metadata_hash = None projects[wheel_metadata["Name"]].append( WheelFile( filename=target_file.relative_to(target).as_posix(), wheel_hash=get_sha256_hash(target_file), requires_python=wheel_metadata.get("Requires-Python"), metadata_hash=metadata_hash, ) ) index_content = str(generate_index(projects.keys(), base_url=base_url)) _update_file(target / "index.html", index_content) for project_name, project_files in projects.items(): project_dir = target / normalize(project_name) project_dir.maybe_make() project_index = generate_project_page( project_name, natsorted(project_files, key=attrgetter("filename"), reverse=True), base_url, ) _update_file(project_dir / "index.html", str(project_index)) return projects
[docs]def generate_index(projects: Iterable[str], base_url: Union[str, URL] = '/') -> Airium: """ Generate the simple repository index page, containing a list of all projects. :param projects: The list of projects to generate links for. :param base_url: The base URL of the Python package repository. For example, with PyPI's URL, a URL of /foo/ would be https://pypi.org/simple/foo/. """ base_url = URL(base_url) index = Airium() index("<!DOCTYPE html>") with index.html(lang="en"): with index.head(): get_meta_tags(index) with index.title(): index(f"Simple Package Repository") with index.body(): for project_name in natsorted(projects, key=str.lower): normalized_name = normalize(project_name) with index.a(href=f"{base_url / normalized_name}/"): index(project_name) index.br() return index
[docs]class WheelFile(NamedTuple): """ Represents a wheel file in the repository. """ #: The name of the wheel file. filename: str wheel_hash: "_Hash" """ The hash of the wheel file. Repositories SHOULD choose a hash function from one of the ones guaranteed to be available via the hashlib module in the Python standard library (currently ``md5``, ``sha1``, ``sha224``, ``sha256``, ``sha384``, ``sha512``). The current recommendation is to use ``sha256``. """ requires_python: Optional[str] = None """ The ``Requires-Python`` attribute from the wheel's ``METADATA`` file. :py:obj:`None` if undefined. """ metadata_hash: Union["_Hash", Literal[True], None] = None """ The hash of the wheel's METADATA file. :py:obj:`None` if the metadata file is not exposed. May be :py:obj:`True` if no hash is available. """
[docs] def as_anchor(self, page: Airium, base_url: Union[str, URL] = '/') -> None: """ Generate an anchor tag in a :class:`airium.Airium` document for this file. :param page: :param base_url: The base URL of the Python package repository. """ base_url = URL(base_url) href = f"{base_url / self.filename}#{self.wheel_hash.name.lower()}={self.wheel_hash.hexdigest()}" kwargs = {"href": href} if self.requires_python is not None: kwargs["data-requires-python"] = escape(self.requires_python) if self.metadata_hash is True: kwargs["data-dist-info-metadata"] = "true" elif self.metadata_hash is not None: hash_string = f"{self.metadata_hash.name.lower()}={self.metadata_hash.hexdigest()}" kwargs["data-dist-info-metadata"] = hash_string with page.a(**kwargs): page(posixpath.basename(self.filename))
[docs]def generate_project_page(name: str, files: Iterable[WheelFile], base_url: Union[str, URL] = '/') -> Airium: """ Generate the repository page for a project. :param name: The project name, e.g. ``domdf-python-tools``. :param files: An iterable of files for the project, which will be linked to from the index page. :param base_url: The base URL of the Python package repository. For example, with PyPI's URL, a URL of /foo/ would be https://pypi.org/simple/foo/. """ name = normalize(name) base_url = URL(base_url) page = Airium() page("<!DOCTYPE html>") with page.html(lang="en"): with page.head(): get_meta_tags(page) with page.title(): page(f"Links for {name}") with page.body(): with page.h1(): # Not part of the spec, but allowed page(f"Links for {name}") for wheel_file in files: wheel_file.as_anchor(page, base_url) page.br() return page
def get_meta_tags(page: Airium) -> None: # Not part of the spec, but allowed page.meta(name="generator", content=f"simple503 version {__version__}") page.meta(name="pypi:repository-version", content="1.0") page.meta(charset="UTF-8") def cleanup(directory: PathLike) -> None: """ Cleanup files generated by ``simple503`` in the directory. This entails removing: * all ``index.html`` files * all ``.whl.metadata`` files * all empty directories. :param directory: """ directory = PathPlus(directory).abspath() for filename in directory.rglob("**/*"): if not filename.is_file(): continue if filename.match("**/index.html"): filename.unlink() elif filename.match("**/*.whl.metadata"): filename.unlink() for filename in directory.rglob("**/*"): if not filename.is_dir(): continue if next(filename.iterdir(), None) is None: filename.rmdir() _minify_re = re.compile(r"\n\s*") def _update_file(filename: PathPlus, new_content: str) -> bool: """ Write ``new_content`` to filename, but only if the content has changed. Requires the ``incremental`` extra (``BeautifulSoup`` and ``html5lib``), otherwise always writes. .. versionadded:: 0.2.0 (private) :param filename: :param new_content: :returns: Whether the file was updated on disk. """ try: # 3rd party from bs4 import BeautifulSoup as soup # type: ignore except ImportError: # pragma: no cover soup = None if not filename.exists() or soup is None: filename.write_clean(new_content) return True current_soup = soup(_minify_re.sub('', filename.read_text().strip()), "html.parser").body new_soup = soup(_minify_re.sub('', new_content.strip()), "html.parser").body if current_soup != new_soup: filename.write_clean(new_content) return True return False