new generic_html parser for extracting hrefs

This commit is contained in:
Nick Sweeting
2020-08-18 08:29:05 -04:00
parent a682a9c478
commit 15efb2d5ed
5 changed files with 106 additions and 39 deletions

View File

@@ -301,14 +301,14 @@ def load_main_index_meta(out_dir: str=OUTPUT_DIR) -> Optional[dict]:
@enforce_types
def parse_links_from_source(source_path: str) -> Tuple[List[Link], List[Link]]:
def parse_links_from_source(source_path: str, root_url: Optional[str]=None) -> Tuple[List[Link], List[Link]]:
from ..parsers import parse_links
new_links: List[Link] = []
# parse and validate the import file
raw_links, parser_name = parse_links(source_path)
raw_links, parser_name = parse_links(source_path, root_url=root_url)
new_links = validate_links(raw_links)
if parser_name: