summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Weipert <git@mail.dweipert.de>2024-10-17 19:39:29 +0200
committerDaniel Weipert <git@mail.dweipert.de>2024-10-17 19:39:29 +0200
commit684eab6378acf78aedfa0d778aac96a2190feda6 (patch)
tree5896fcc8e9bfbd49067fdc1aa0986c7d612f0f4e
parent624d7c9ba5b28a3eeef99af7277bf3bacc817709 (diff)
group by domainHEADmain
-rw-r--r--index.php130
1 files changed, 101 insertions, 29 deletions
diff --git a/index.php b/index.php
index e23fa19..30c1c76 100644
--- a/index.php
+++ b/index.php
@@ -41,6 +41,7 @@ if (isset($_GET['init--db'])) {
create table if not exists search_index (
id integer primary key,
url text unqiue,
+ domain text,
content text,
excerpt text,
title text,
@@ -58,6 +59,10 @@ if (isset($_GET['init--db'])) {
****
*/
+/*
+ * crawl
+ * max-pages
+ */
if (! empty($_GET['crawl'])) {
$targetToCrawl = $_GET['crawl'];
$targetUrl = parse_url($targetToCrawl);
@@ -74,6 +79,8 @@ if (! empty($_GET['crawl'])) {
$responseCode = intval(explode(' ', get_headers("$baseUrl/sitemap.xml")[0])[1]);
if ($responseCode == 200) {
+ Logger::log('Found sitemap.xml');
+
$dom = new DOMDocument();
@$dom->loadHTML(file_get_contents("$baseUrl/sitemap.xml", false, $streamContext));
$xpath = new DomXPath($dom);
@@ -177,6 +184,11 @@ function index_page(string $url): bool {
}
+ // Domain
+ $parsedUrl = parse_url($url);
+ $domain = $parsedUrl['host'];
+
+
// Insert
$db->prepare(<<<SQL
@@ -187,10 +199,11 @@ function index_page(string $url): bool {
]);
return $db->prepare(<<<SQL
- insert into search_index (url, content, excerpt, title, timestamp) values (:url, :content, :excerpt, :title, :timestamp)
+ insert into search_index (url, domain, content, excerpt, title, timestamp) values (:url, :domain, :content, :excerpt, :title, :timestamp)
SQL)
->execute([
'url' => $url,
+ 'domain' => $domain,
'content' => $content,
'excerpt' => $excerpt,
'title' => $title,
@@ -206,10 +219,17 @@ function index_page(string $url): bool {
****
*/
+/**
+ * search
+ * domain
+ */
if (isset($_GET['search'])) {
?>
<!DOCTYPE html>
<html>
+ <head>
+ <style>@media (prefers-color-scheme: dark) { html { color: #eee; background: #222; } a { color: #fff; } }</style>
+ </head>
<body>
<div style="max-width: 1200px; margin: 0 auto;">
<form>
@@ -219,40 +239,45 @@ if (isset($_GET['search'])) {
<?php
if (! empty($_GET['search'])) {
- $statement = $db->prepare(<<<SQL
- select * from search_index
- where content like :search
- limit :limit offset :offset
- SQL);
- $statement->execute([
- 'search' => "%{$_GET['search']}%",
- 'limit' => 10,
- 'offset' => (($_GET['page'] ?? 1) - 1) * 10
- ]);
- $result = $statement->fetchAll();
-
- foreach ($result as $row) {
- $excerpt = $row['excerpt'];
- if (empty($excerpt)) {
- $excerpt = "EXCERPT";
+ $results = search($_GET['search'], $_GET['domain'] ?? '', limit: 10, page: $_GET['page'] ?? 1)['results'];
+
+ if (count($results) >= 2) {
+ ?><ul><?php
+ foreach ($results as $group => $a) {
+ ?>
+ <li>
+ <form>
+ <?php form_query_fields(); ?>
+ <button name="domain" value="<?php echo $group; ?>"><?php echo $group; ?></button>
+ </form>
+ </li>
+ <?php
}
+ ?></ul><?php
+ }
- ?>
- <hr>
- <div>
- <a href="<?php echo $row['url']; ?>"><?php echo $row['title']; ?></a>
- <p><?php echo $row['excerpt']; ?></p>
- <a href="<?php echo $row['url']; ?>"><?php echo $row['url']; ?></a>
- </div>
- <?php
+ foreach ($results as $group) {
+ foreach ($group as $row) {
+ $excerpt = $row['excerpt'];
+ if (empty($excerpt)) {
+ $excerpt = "EXCERPT";
+ }
+
+ ?>
+ <hr>
+ <div>
+ <a href="<?php echo $row['url']; ?>"><?php echo $row['title']; ?></a>
+ <p><?php echo $row['excerpt']; ?></p>
+ <a href="<?php echo $row['url']; ?>"><?php echo $row['url']; ?></a>
+ </div>
+ <?php
+ }
}
- $query = array_diff_key($_GET, array_flip(['page']));
?>
+ <hr>
<form>
- <?php foreach ($query as $key => $value): ?>
- <input type="hidden" name="<?php echo $key; ?>" value="<?php echo $value; ?>">
- <?php endforeach; ?>
+ <?php form_query_fields(); ?>
<button name="page" value="<?php echo intval($_GET['page'] ?? 1) - 1; ?>">Previous</button>
<button name="page" value="<?php echo intval($_GET['page'] ?? 1) + 1; ?>">Next</button>
</form>
@@ -266,6 +291,53 @@ if (isset($_GET['search'])) {
<?php
}
+function search(string $query, string $domain = '', int $limit = 10, int $page = 1): array {
+ global $db;
+
+ $whereDomain = '';
+ $mappingsDomain = [];
+ if ($domain) {
+ $whereDomain = 'and domain=:domain';
+ $mappingsDomain['domain'] = $domain;
+ }
+
+ $sql = <<<SQL
+ select * from search_index
+ where content like :search $whereDomain
+ limit :limit offset :offset
+ SQL;
+
+ $mappings = [
+ 'search' => "%$query%",
+ 'limit' => $limit,
+ 'offset' => ($page - 1) * $limit,
+ ...$mappingsDomain,
+ ];
+
+ $statement = $db->prepare($sql);
+ $statement->execute($mappings);
+ $rows = $statement->fetchAll();
+
+ $results = [];
+ foreach ($rows as $row) {
+ $results[$row['domain']][] = $row;
+ }
+
+ return [
+ 'results' => $results,
+ ];
+}
+
+function form_query_fields() {
+ $query = array_diff_key($_GET, array_flip(['page']));
+
+ foreach ($query as $key => $value) {
+ ?>
+ <input type="hidden" name="<?php echo $key; ?>" value="<?php echo $value; ?>">
+ <?php
+ }
+}
+
/*