Merge pull request #356 from cdvv7788/depth-flag

This commit is contained in:
Nick Sweeting
2020-07-13 05:05:36 -04:00
committed by GitHub
7 changed files with 90 additions and 41 deletions

View File

@@ -38,18 +38,38 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
type=str,
default=None,
help=(
'URL or path to local file containing a list of links to import. e.g.:\n'
'URL or path to local file to start the archiving process from. e.g.:\n'
' https://getpocket.com/users/USERNAME/feed/all\n'
' https://example.com/some/rss/feed.xml\n'
' https://example.com\n'
' ~/Downloads/firefox_bookmarks_export.html\n'
' ~/Desktop/sites_list.csv\n'
)
)
parser.add_argument(
"--depth",
action="store",
default=0,
choices=[0,1],
type=int,
help="Recursively archive all linked pages up to this many hops away"
)
command = parser.parse_args(args or ())
import_str = accept_stdin(stdin)
import_string = accept_stdin(stdin)
if import_string and command.import_path:
stderr(
'[X] You should pass an import path or a page url as an argument or in stdin but not both\n',
color='red',
)
raise SystemExit(2)
elif import_string:
import_path = import_string
else:
import_path = command.import_path
add(
import_str=import_str,
import_path=command.import_path,
url=import_path,
depth=command.depth,
update_all=command.update_all,
index_only=command.index_only,
out_dir=pwd or OUTPUT_DIR,
@@ -63,12 +83,6 @@ if __name__ == '__main__':
# TODO: Implement these
#
# parser.add_argument(
# '--depth', #'-d',
# type=int,
# help='Recursively archive all linked pages up to this many hops away',
# default=0,
# )
# parser.add_argument(
# '--mirror', #'-m',
# action='store_true',
# help='Archive an entire site (finding all linked pages below it on the same domain)',

View File

@@ -66,12 +66,10 @@ class AddLinks(View):
if form.is_valid():
url = form.cleaned_data["url"]
print(f'[+] Adding URL: {url}')
if form.cleaned_data["source"] == "url":
key = "import_str"
else:
key = "import_path"
depth = 0 if form.cleaned_data["source"] == "url" else 1
input_kwargs = {
key: url,
"url": url,
"depth": depth,
"update_all": False,
"out_dir": OUTPUT_DIR,
}

View File

@@ -496,8 +496,8 @@ def status(out_dir: str=OUTPUT_DIR) -> None:
@enforce_types
def add(import_str: Optional[str]=None,
import_path: Optional[str]=None,
def add(url: str,
depth: int=0,
update_all: bool=not ONLY_NEW,
index_only: bool=False,
out_dir: str=OUTPUT_DIR) -> List[Link]:
@@ -505,18 +505,9 @@ def add(import_str: Optional[str]=None,
check_data_folder(out_dir=out_dir)
if (import_str and import_path) or (not import_str and not import_path):
stderr(
'[X] You should pass either an import path as an argument, '
'or pass a list of links via stdin, but not both.\n',
color='red',
)
raise SystemExit(2)
elif import_str:
import_path = save_stdin_to_sources(import_str, out_dir=out_dir)
elif import_path:
import_path = save_file_to_sources(import_path, out_dir=out_dir)
base_path = save_stdin_to_sources(url, out_dir=out_dir)
if depth == 1:
depth_path = save_file_to_sources(url, out_dir=out_dir)
check_dependencies()
# Step 1: Load list of links from the existing index
@@ -524,8 +515,11 @@ def add(import_str: Optional[str]=None,
all_links: List[Link] = []
new_links: List[Link] = []
all_links = load_main_index(out_dir=out_dir)
if import_path:
all_links, new_links = import_new_links(all_links, import_path, out_dir=out_dir)
all_links, new_links = import_new_links(all_links, base_path, out_dir=out_dir)
if depth == 1:
all_links, new_links_depth = import_new_links(all_links, depth_path, out_dir=out_dir)
new_links = new_links + new_links_depth
# Step 2: Write updated index with deduped old and new links back to disk
write_main_index(links=all_links, out_dir=out_dir)