diff options
author | Daniel Weipert <git@mail.dweipert.de> | 2024-10-17 19:39:29 +0200 |
---|---|---|
committer | Daniel Weipert <git@mail.dweipert.de> | 2024-10-17 19:39:29 +0200 |
commit | 684eab6378acf78aedfa0d778aac96a2190feda6 (patch) | |
tree | 5896fcc8e9bfbd49067fdc1aa0986c7d612f0f4e | |
parent | 624d7c9ba5b28a3eeef99af7277bf3bacc817709 (diff) |
-rw-r--r-- | index.php | 130 |
1 files changed, 101 insertions, 29 deletions
@@ -41,6 +41,7 @@ if (isset($_GET['init--db'])) { create table if not exists search_index ( id integer primary key, url text unqiue, + domain text, content text, excerpt text, title text, @@ -58,6 +59,10 @@ if (isset($_GET['init--db'])) { **** */ +/* + * crawl + * max-pages + */ if (! empty($_GET['crawl'])) { $targetToCrawl = $_GET['crawl']; $targetUrl = parse_url($targetToCrawl); @@ -74,6 +79,8 @@ if (! empty($_GET['crawl'])) { $responseCode = intval(explode(' ', get_headers("$baseUrl/sitemap.xml")[0])[1]); if ($responseCode == 200) { + Logger::log('Found sitemap.xml'); + $dom = new DOMDocument(); @$dom->loadHTML(file_get_contents("$baseUrl/sitemap.xml", false, $streamContext)); $xpath = new DomXPath($dom); @@ -177,6 +184,11 @@ function index_page(string $url): bool { } + // Domain + $parsedUrl = parse_url($url); + $domain = $parsedUrl['host']; + + // Insert $db->prepare(<<<SQL @@ -187,10 +199,11 @@ function index_page(string $url): bool { ]); return $db->prepare(<<<SQL - insert into search_index (url, content, excerpt, title, timestamp) values (:url, :content, :excerpt, :title, :timestamp) + insert into search_index (url, domain, content, excerpt, title, timestamp) values (:url, :domain, :content, :excerpt, :title, :timestamp) SQL) ->execute([ 'url' => $url, + 'domain' => $domain, 'content' => $content, 'excerpt' => $excerpt, 'title' => $title, @@ -206,10 +219,17 @@ function index_page(string $url): bool { **** */ +/** + * search + * domain + */ if (isset($_GET['search'])) { ?> <!DOCTYPE html> <html> + <head> + <style>@media (prefers-color-scheme: dark) { html { color: #eee; background: #222; } a { color: #fff; } }</style> + </head> <body> <div style="max-width: 1200px; margin: 0 auto;"> <form> @@ -219,40 +239,45 @@ if (isset($_GET['search'])) { <?php if (! empty($_GET['search'])) { - $statement = $db->prepare(<<<SQL - select * from search_index - where content like :search - limit :limit offset :offset - SQL); - $statement->execute([ - 'search' => "%{$_GET['search']}%", - 'limit' => 10, - 'offset' => (($_GET['page'] ?? 1) - 1) * 10 - ]); - $result = $statement->fetchAll(); - - foreach ($result as $row) { - $excerpt = $row['excerpt']; - if (empty($excerpt)) { - $excerpt = "EXCERPT"; + $results = search($_GET['search'], $_GET['domain'] ?? '', limit: 10, page: $_GET['page'] ?? 1)['results']; + + if (count($results) >= 2) { + ?><ul><?php + foreach ($results as $group => $a) { + ?> + <li> + <form> + <?php form_query_fields(); ?> + <button name="domain" value="<?php echo $group; ?>"><?php echo $group; ?></button> + </form> + </li> + <?php } + ?></ul><?php + } - ?> - <hr> - <div> - <a href="<?php echo $row['url']; ?>"><?php echo $row['title']; ?></a> - <p><?php echo $row['excerpt']; ?></p> - <a href="<?php echo $row['url']; ?>"><?php echo $row['url']; ?></a> - </div> - <?php + foreach ($results as $group) { + foreach ($group as $row) { + $excerpt = $row['excerpt']; + if (empty($excerpt)) { + $excerpt = "EXCERPT"; + } + + ?> + <hr> + <div> + <a href="<?php echo $row['url']; ?>"><?php echo $row['title']; ?></a> + <p><?php echo $row['excerpt']; ?></p> + <a href="<?php echo $row['url']; ?>"><?php echo $row['url']; ?></a> + </div> + <?php + } } - $query = array_diff_key($_GET, array_flip(['page'])); ?> + <hr> <form> - <?php foreach ($query as $key => $value): ?> - <input type="hidden" name="<?php echo $key; ?>" value="<?php echo $value; ?>"> - <?php endforeach; ?> + <?php form_query_fields(); ?> <button name="page" value="<?php echo intval($_GET['page'] ?? 1) - 1; ?>">Previous</button> <button name="page" value="<?php echo intval($_GET['page'] ?? 1) + 1; ?>">Next</button> </form> @@ -266,6 +291,53 @@ if (isset($_GET['search'])) { <?php } +function search(string $query, string $domain = '', int $limit = 10, int $page = 1): array { + global $db; + + $whereDomain = ''; + $mappingsDomain = []; + if ($domain) { + $whereDomain = 'and domain=:domain'; + $mappingsDomain['domain'] = $domain; + } + + $sql = <<<SQL + select * from search_index + where content like :search $whereDomain + limit :limit offset :offset + SQL; + + $mappings = [ + 'search' => "%$query%", + 'limit' => $limit, + 'offset' => ($page - 1) * $limit, + ...$mappingsDomain, + ]; + + $statement = $db->prepare($sql); + $statement->execute($mappings); + $rows = $statement->fetchAll(); + + $results = []; + foreach ($rows as $row) { + $results[$row['domain']][] = $row; + } + + return [ + 'results' => $results, + ]; +} + +function form_query_fields() { + $query = array_diff_key($_GET, array_flip(['page'])); + + foreach ($query as $key => $value) { + ?> + <input type="hidden" name="<?php echo $key; ?>" value="<?php echo $value; ?>"> + <?php + } +} + /* |