mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-04-06 07:47:53 +10:00
way better plugin hooks system wip
This commit is contained in:
@@ -23,7 +23,7 @@ const puppeteer = require('puppeteer-core');
|
||||
const EXTRACTOR_NAME = 'accessibility';
|
||||
const OUTPUT_DIR = '.';
|
||||
const OUTPUT_FILE = 'accessibility.json';
|
||||
const CHROME_SESSION_DIR = '../chrome_session';
|
||||
const CHROME_SESSION_DIR = '../chrome';
|
||||
|
||||
// Parse command line arguments
|
||||
function parseArgs() {
|
||||
@@ -49,7 +49,23 @@ function getEnvBool(name, defaultValue = false) {
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
// Get CDP URL from chrome_session
|
||||
// Wait for chrome tab to be fully loaded
|
||||
async function waitForChromeTabLoaded(timeoutMs = 60000) {
|
||||
const navigationFile = path.join(CHROME_SESSION_DIR, 'navigation.json');
|
||||
const startTime = Date.now();
|
||||
|
||||
while (Date.now() - startTime < timeoutMs) {
|
||||
if (fs.existsSync(navigationFile)) {
|
||||
return true;
|
||||
}
|
||||
// Wait 100ms before checking again
|
||||
await new Promise(resolve => setTimeout(resolve, 100));
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Get CDP URL from chrome plugin
|
||||
function getCdpUrl() {
|
||||
const cdpFile = path.join(CHROME_SESSION_DIR, 'cdp_url.txt');
|
||||
if (fs.existsSync(cdpFile)) {
|
||||
@@ -69,7 +85,7 @@ async function extractAccessibility(url) {
|
||||
// Connect to existing Chrome session
|
||||
const cdpUrl = getCdpUrl();
|
||||
if (!cdpUrl) {
|
||||
return { success: false, error: 'No Chrome session found (chrome_session extractor must run first)' };
|
||||
return { success: false, error: 'No Chrome session found (chrome plugin must run first)' };
|
||||
}
|
||||
|
||||
browser = await puppeteer.connect({
|
||||
@@ -207,6 +223,12 @@ async function main() {
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Wait for page to be fully loaded
|
||||
const pageLoaded = await waitForChromeTabLoaded(60000);
|
||||
if (!pageLoaded) {
|
||||
throw new Error('Page not loaded after 60s (chrome_navigate must complete first)');
|
||||
}
|
||||
|
||||
const result = await extractAccessibility(url);
|
||||
|
||||
if (result.success) {
|
||||
|
||||
Reference in New Issue
Block a user