From 684eab6378acf78aedfa0d778aac96a2190feda6 Mon Sep 17 00:00:00 2001 From: Daniel Weipert Date: Thu, 17 Oct 2024 19:39:29 +0200 Subject: group by domain --- index.php | 130 ++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 101 insertions(+), 29 deletions(-) (limited to 'index.php') diff --git a/index.php b/index.php index e23fa19..30c1c76 100644 --- a/index.php +++ b/index.php @@ -41,6 +41,7 @@ if (isset($_GET['init--db'])) { create table if not exists search_index ( id integer primary key, url text unqiue, + domain text, content text, excerpt text, title text, @@ -58,6 +59,10 @@ if (isset($_GET['init--db'])) { **** */ +/* + * crawl + * max-pages + */ if (! empty($_GET['crawl'])) { $targetToCrawl = $_GET['crawl']; $targetUrl = parse_url($targetToCrawl); @@ -74,6 +79,8 @@ if (! empty($_GET['crawl'])) { $responseCode = intval(explode(' ', get_headers("$baseUrl/sitemap.xml")[0])[1]); if ($responseCode == 200) { + Logger::log('Found sitemap.xml'); + $dom = new DOMDocument(); @$dom->loadHTML(file_get_contents("$baseUrl/sitemap.xml", false, $streamContext)); $xpath = new DomXPath($dom); @@ -177,6 +184,11 @@ function index_page(string $url): bool { } + // Domain + $parsedUrl = parse_url($url); + $domain = $parsedUrl['host']; + + // Insert $db->prepare(<<prepare(<<execute([ 'url' => $url, + 'domain' => $domain, 'content' => $content, 'excerpt' => $excerpt, 'title' => $title, @@ -206,10 +219,17 @@ function index_page(string $url): bool { **** */ +/** + * search + * domain + */ if (isset($_GET['search'])) { ?> + + +
@@ -219,40 +239,45 @@ if (isset($_GET['search'])) { prepare(<<execute([ - 'search' => "%{$_GET['search']}%", - 'limit' => 10, - 'offset' => (($_GET['page'] ?? 1) - 1) * 10 - ]); - $result = $statement->fetchAll(); - - foreach ($result as $row) { - $excerpt = $row['excerpt']; - if (empty($excerpt)) { - $excerpt = "EXCERPT"; + $results = search($_GET['search'], $_GET['domain'] ?? '', limit: 10, page: $_GET['page'] ?? 1)['results']; + + if (count($results) >= 2) { + ?>
    $a) { + ?> +
  • + + + +
  • + +
-
-
- -

- -
- +
+
+ +

+ +
+ +
- $value): ?> - - +
@@ -266,6 +291,53 @@ if (isset($_GET['search'])) { "%$query%", + 'limit' => $limit, + 'offset' => ($page - 1) * $limit, + ...$mappingsDomain, + ]; + + $statement = $db->prepare($sql); + $statement->execute($mappings); + $rows = $statement->fetchAll(); + + $results = []; + foreach ($rows as $row) { + $results[$row['domain']][] = $row; + } + + return [ + 'results' => $results, + ]; +} + +function form_query_fields() { + $query = array_diff_key($_GET, array_flip(['page'])); + + foreach ($query as $key => $value) { + ?> + +