#! python3 # noqa: E265 # ############################################################################ # ########## Libraries ############# # ################################## # standard library import json from copy import deepcopy from dataclasses import asdict from datetime import datetime from email.utils import format_datetime, formatdate from pathlib import Path from re import compile as re_compile from shutil import copyfile from typing import Literal # 3rd party from jinja2 import Environment, FileSystemLoader, select_autoescape from mkdocs.config import config_options from mkdocs.config.defaults import MkDocsConfig from mkdocs.plugins import BasePlugin, event_priority, get_plugin_logger from mkdocs.structure.files import Files from mkdocs.structure.pages import Page from mkdocs.utils import get_build_timestamp # package modules from mkdocs_rss_plugin.__about__ import __title__, __version__ from mkdocs_rss_plugin.config import RssPluginConfig from mkdocs_rss_plugin.constants import ( DEFAULT_TEMPLATE_FILENAME, DEFAULT_TEMPLATE_FOLDER, MKDOCS_LOGGER_NAME, ) from mkdocs_rss_plugin.integrations.theme_material_blog_plugin import ( IntegrationMaterialBlog, ) from mkdocs_rss_plugin.integrations.theme_material_social_plugin import ( IntegrationMaterialSocialCards, ) from mkdocs_rss_plugin.models import MkdocsPageSubset, PageInformation, RssFeedBase from mkdocs_rss_plugin.util import Util # ############################################################################ # ########## Globals ############# # ################################ logger = get_plugin_logger(MKDOCS_LOGGER_NAME) # ############################################################################ # ########## Classes ############### # ################################## class GitRssPlugin(BasePlugin[RssPluginConfig]): """Main class for MkDocs plugin.""" # allow to set the plugin multiple times in the same mkdocs config supports_multiple_instances = True def __init__(self, *args, **kwargs) -> None: """Instantiation.""" # pages storage super().__init__(*args, **kwargs) self.cmd_is_serve: bool = False def on_startup( self, *, command: Literal["build", "gh-deploy", "serve"], dirty: bool ) -> None: """The `startup` event runs once at the very beginning of an `mkdocs` invocation. Note that for initializing variables, the __init__ method is still preferred. For initializing per-build variables (and whenever in doubt), use the on_config event. See: https://www.mkdocs.org/user-guide/plugins/#on_startup Args: command: the command that MkDocs was invoked with, e.g. "serve" for `mkdocs serve`. dirty: whether `--dirty` flag was passed. """ # flag used command to disable some actions if serve is used self.cmd_is_serve = command == "serve" self.pages_to_filter: list[PageInformation] = [] # prepare output feeds self.feed_created: RssFeedBase = RssFeedBase() self.feed_updated: RssFeedBase = RssFeedBase() def on_config(self, config: MkDocsConfig) -> MkDocsConfig: """The config event is the first event called on build and is run immediately after the user configuration is loaded and validated. Any alterations to the config should be made here. See: https://www.mkdocs.org/user-guide/plugins/#on_config Args: config (config_options.Config): global configuration object Raises: FileExistsError: if the template for the RSS feed is not found PluginError: if the 'date_from_meta.default_time' format does not comply Returns: MkDocsConfig: global configuration object """ # Skip if disabled if not self.config.enabled: return config # Fail if any export option is enabled if not any([self.config.json_feed_enabled, self.config.rss_feed_enabled]): logger.error( "At least one export option has to be enabled. Plugin is disabled." ) self.config.enabled = False return config # cache dir self.cache_dir = Path(self.config.cache_dir) self.cache_dir.mkdir(parents=True, exist_ok=True) logger.debug(f"Caching HTTP requests to: {self.cache_dir.resolve()}") # integrations - check if theme is Material and if blog are enabled self.integration_material_blog = IntegrationMaterialBlog( mkdocs_config=config, switch_force=self.config.use_material_blog, ) # integrations - check if theme is Material and if social cards are enabled self.integration_material_social_cards = IntegrationMaterialSocialCards( mkdocs_config=config, switch_force=self.config.use_material_social_cards, ) # instantiate plugin tooling self.util = Util( cache_dir=self.cache_dir, use_git=self.config.use_git, integration_material_blog=self.integration_material_blog, integration_material_social_cards=self.integration_material_social_cards, mkdocs_command_is_on_serve=self.cmd_is_serve, ) # check template dirs if not Path(DEFAULT_TEMPLATE_FILENAME).is_file(): raise FileExistsError(DEFAULT_TEMPLATE_FILENAME) self.tpl_file = Path(DEFAULT_TEMPLATE_FILENAME) self.tpl_folder = DEFAULT_TEMPLATE_FOLDER # start a feed dictionary using global config vars base_feed = RssFeedBase( author=config.site_author or None, buildDate=formatdate(get_build_timestamp()), copyright=config.copyright, description=( self.config.feed_description if self.config.feed_description else config.site_description ), entries=[], generator=f"{__title__} - v{__version__}", html_url=self.util.get_site_url(mkdocs_config=config), language=self.util.guess_locale(mkdocs_config=config), pubDate=formatdate(get_build_timestamp()), repo_url=config.repo_url, title=( self.config.feed_title if self.config.feed_title else config.site_name ), ttl=self.config.feed_ttl, ) # feed image if self.config.image: base_feed.logo_url = self.config.image # feed stylesheet (XSL) if self.config.stylesheet: if self.config.stylesheet == "auto": base_feed.stylesheet = "rss.xsl" logger.debug( f"Shipped stylesheet will be referenced in RSS feeds: {self.config.stylesheet}" ) else: base_feed.stylesheet = self.config.stylesheet logger.debug( f"Stylesheet will be referenced in RSS feeds: {self.config.stylesheet}" ) else: logger.debug("No stylesheet will be referenced in RSS feeds.") # pattern to match pages included in output self.match_path_pattern = re_compile(self.config.match_path) # date handling if ( self.config.date_from_meta.as_creation == "git" and self.config.date_from_meta.as_update == "git" ): logger.debug("Dates will be retrieved from git log.") elif any( [ isinstance(self.config.date_from_meta.as_creation, bool), isinstance(self.config.date_from_meta.as_update, bool), ] ): deprecation_msg = ( "Since version 1.13, using a boolean for " "'date_from_meta.as_creation' and 'date_from_meta.as_update' is " "deprecated. Please update your " "`rss` plugin settings in your Mkdocs configuration " f"({config.config_file_path}) by using a str or removing the value if " "you were using `False`., " ) logger.warning(DeprecationWarning(deprecation_msg)) self.config.date_from_meta.as_creation = ( self.config.date_from_meta.as_update ) = "git" # check if default time complies with expected format try: self.config.date_from_meta.default_time = datetime.strptime( self.config.date_from_meta.default_time, "%H:%M" ) except (TypeError, ValueError) as err: logger.warning( "Config error: `date_from_meta.default_time` value " f"'{self.config.date_from_meta.default_time}' format doesn't match the " f"expected format %H:%M. Fallback to the default value. Trace: {err}" ) self.config.date_from_meta.default_time = datetime.strptime( "00:00", "%H:%M" ) if self.config.use_git: logger.debug( "Dates will be retrieved FIRSTLY from page meta (yaml " "frontmatter). The git log will be used as fallback." ) else: logger.debug( "Dates will be retrieved ONLY from page meta (yaml " "frontmatter). The build date will be used as fallback, without any " "call to Git." ) # create 2 final dicts self.feed_created = deepcopy(base_feed) self.feed_updated = deepcopy(base_feed) # final feed url if base_feed.html_url: # concatenate both URLs self.feed_created.rss_url = ( base_feed.html_url + self.config.feeds_filenames.rss_created ) self.feed_updated.rss_url = ( base_feed.html_url + self.config.feeds_filenames.rss_updated ) self.feed_created.json_url = ( base_feed.html_url + self.config.feeds_filenames.json_created ) self.feed_updated.json_url = ( base_feed.html_url + self.config.feeds_filenames.json_updated ) else: logger.error( "The variable `site_url` is not set in the MkDocs " "configuration file whereas a URL is mandatory to publish. " "See: https://validator.w3.org/feed/docs/rss2.html#requiredChannelElements" ) self.feed_created.rss_url = self.feed_updated.json_url = ( self.feed_updated.rss_url ) = self.feed_updated.json_url = None # ending event return config @event_priority(priority=-75) def on_page_content( self, html: str, page: Page, config: MkDocsConfig, files: Files ) -> str | None: """The page_content event is called after the Markdown text is rendered to HTML (but before being passed to a template) and can be used to alter the HTML body of the page. See: https://www.mkdocs.org/user-guide/plugins/#on_page_content Args: html (str): HTML rendered from Markdown source as string page (Page): `mkdocs.structure.pages.Page` instance config (MkDocsConfig): global configuration object files (Files): global files collection Returns: Optional[str]: HTML rendered from Markdown source as string """ # Skip if disabled if not self.config.enabled: return # skip pages that don't match the config var match_path if not self.match_path_pattern.match(page.file.src_uri): return # skip pages with draft=true if page.meta.get("draft", False) is True: logger.debug(f"Page {page.title} ignored because it's a draft") return # retrieve dates from git log page_dates = self.util.get_file_dates( in_page=page, source_date_creation=self.config.date_from_meta.as_creation, source_date_update=self.config.date_from_meta.as_update, meta_datetime_format=self.config.date_from_meta.datetime_format, meta_default_timezone=self.config.date_from_meta.default_timezone, meta_default_time=self.config.date_from_meta.default_time, ) # handle custom URL parameters if self.config.url_parameters: page_url_full = self.util.build_url( base_url=page.canonical_url, path="", args_dict=self.config.url_parameters, ) else: page_url_full = page.canonical_url # handle URL comment path if self.config.comments_path: page_url_comments = self.util.build_url( base_url=page.canonical_url, path=self.config.comments_path, ) else: page_url_comments = None # append to list to be filtered later self.pages_to_filter.append( PageInformation( abs_path=Path(page.file.abs_src_path), authors=self.util.get_authors_from_meta(in_page=page), categories=self.util.get_categories_from_meta( in_page=page, categories_labels=self.config.categories ), comments_url=page_url_comments, created=page_dates[0], description=self.util.get_description_or_abstract( in_page=page, chars_count=self.config.abstract_chars_count, abstract_delimiter=self.config.abstract_delimiter, ), guid=page.canonical_url, link=page_url_full, title=page.title, updated=page_dates[1], # for later fetch _mkdocs_page_ref=MkdocsPageSubset.from_page(page), ) ) def on_post_build(self, config: config_options.Config) -> None: """The post_build event does not alter any variables. Use this event to call post-build scripts. See: Args: config (config_options.Config): global configuration object """ # Skip if disabled if not self.config.enabled: return # pretty print or not pretty_print = self.config.pretty_print # output filepaths out_feed_created = Path(config.site_dir).joinpath( self.config.feeds_filenames.rss_created ) out_feed_updated = Path(config.site_dir).joinpath( self.config.feeds_filenames.rss_updated ) out_json_created = Path(config.site_dir).joinpath( self.config.feeds_filenames.json_created ) out_json_updated = Path(config.site_dir).joinpath( self.config.feeds_filenames.json_updated ) # stylesheet for RSS feed if self.config.stylesheet == "auto": xsl_source = self.tpl_folder.joinpath("default.xsl") xsl_dest = Path(config.site_dir).joinpath("rss.xsl") copyfile(xsl_source, xsl_dest) # created items self.feed_created.entries.extend( self.util.filter_pages( pages=self.pages_to_filter, filter_attribute="created", length=self.config.length, ) ) # updated items self.feed_updated.entries.extend( self.util.filter_pages( pages=self.pages_to_filter, filter_attribute="updated", length=self.config.length, ) ) # load RSS items images (enclosures) logger.debug( f"Loading images for {len(self.feed_created.entries)} pages by creation " f"and {len(self.feed_updated.entries)} pages by update" ) processed_refs = set() self.util.load_images_for_pages( self.feed_created.entries, config.site_url, processed_refs ) self.util.load_images_for_pages( self.feed_updated.entries, config.site_url, processed_refs ) # RSS if self.config.rss_feed_enabled: # Jinja environment depending on the pretty print option if pretty_print: # load Jinja environment and template env = Environment( autoescape=select_autoescape(["html", "xml"]), loader=FileSystemLoader(self.tpl_folder), ) else: # load Jinja environment and template env = Environment( autoescape=select_autoescape(["html", "xml"]), loader=FileSystemLoader(self.tpl_folder), lstrip_blocks=True, trim_blocks=True, ) template = env.get_template(self.tpl_file.name) # -- Feed sorted by creation date logger.debug("Fill creation dates and dump created feed into RSS template.") # set pub date as created for page in self.feed_created.entries: page.pub_date = format_datetime(dt=page.created) # write file with out_feed_created.open(mode="w", encoding="UTF8") as fifeed_created: if pretty_print: fifeed_created.write(template.render(feed=self.feed_created)) else: prev_char = "" for char in template.render(feed=asdict(self.feed_created)): if char == "\n": # convert new lines to spaces to preserve sentence structure char = " " if char == " " and prev_char == " ": prev_char = char continue prev_char = char fifeed_created.write(char) # -- Feed sorted by last update date logger.debug("Fill update dates and dump udpated feed into RSS template.") # set pub date as updated for page in self.feed_updated.entries: page.pub_date = format_datetime(dt=page.updated) # write file with out_feed_updated.open(mode="w", encoding="UTF8") as fifeed_updated: if pretty_print: fifeed_updated.write(template.render(feed=self.feed_updated)) else: prev_char = "" for char in template.render(feed=asdict(self.feed_updated)): if char == "\n": # convert new lines to spaces to preserve sentence structure char = " " if char == " " and prev_char == " ": prev_char = char continue prev_char = char fifeed_updated.write(char) # JSON FEED if self.config.json_feed_enabled: with out_json_created.open(mode="w", encoding="UTF8") as fp: json.dump( self.util.feed_to_json(self.feed_created), fp, indent=4 if self.config.pretty_print else None, ) with out_json_updated.open(mode="w", encoding="UTF8") as fp: json.dump( self.util.feed_to_json(self.feed_updated), fp, indent=4 if self.config.pretty_print else None, )