$dbHost, 'port' => $dbPort, 'dbname' => $dbName, ]; $dsnParts = array_filter($dsnParts, fn ($value) => ! empty($value)); $dsnParts = array_map(function ($key, $value) { return "$key=$value"; }, array_keys($dsnParts), $dsnParts); $dsn = "$dbType:" . ($dbType == 'sqlite' ? $dbPath : implode(';', $dsnParts)); global $db; $db = new PDO( $dsn, $dbUsername, $dbPassword, [ PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC, ], ); if (isset($_GET['init--db'])) { $db->query(<<execute(); } /* **** * Crawler **** */ if (! empty($_GET['crawl'])) { $targetToCrawl = $_GET['crawl']; $targetUrl = parse_url($targetToCrawl); $baseUrl = "{$targetUrl['scheme']}://{$targetUrl['host']}"; $streamContext = stream_context_create([ 'http' => [ 'follow_location' => true, ], ]); $responseCode = intval(explode(' ', get_headers("$baseUrl/sitemap.xml")[0])[1]); if ($responseCode == 200) { } else { $dom = new DOMDocument(); @$dom->loadHTML(file_get_contents($targetToCrawl, false, $streamContext)); $xpath = new DomXPath($dom); $links = $xpath->query('//a'); foreach ($links as $link) { /*@var DOMNode $link*/ $href = $link->attributes->getNamedItem('href')->nodeValue; $fullPath = str_starts_with($href, 'http') ? $href : "$baseUrl$href"; $linkUrl = parse_url($fullPath); if ($linkUrl['host'] != $targetUrl['host']) { continue; } $content = file_get_contents($fullPath, false, $streamContext); $content = strip_tags($content); $db->prepare(<<execute([ 'url' => $fullPath, 'content' => $content, 'excerpt' => 'EXCERPT', 'title' => 'TITLE', 'timestamp' => time(), ]); } } // TODO: check sitemap first, only check those links then // TODO: otherwise get every link on site and crawl that // TODO: check if link (without query params?) already visited for recursive protection // strip html => store text in db } if (isset($_GET['search'])) { if (! empty($_GET['search'])) { $statement = $db->prepare(<<execute(['search' => "%{$_GET['search']}%"]); $result = $statement->fetchAll(); foreach ($result as $row) { ?>