<?php
declare(strict_types=1);

namespace App\Service;

use Cake\ORM\TableRegistry;
use App\Service\EmbeddingsClient;

class FaqRetrieverService
{
    private function cosineSim(array $a, array $b): float {
    $dot = 0.0; $na = 0.0; $nb = 0.0; $n = min(count($a), count($b));
    for ($i=0; $i<$n; $i++) { $dot += $a[$i]*$b[$i]; $na += $a[$i]*$a[$i]; $nb += $b[$i]*$b[$i]; }
    if ($na == 0.0 || $nb == 0.0) return 0.0;
    return $dot / (sqrt($na)*sqrt($nb));
}

private function findFaqSnippetsSemantic(string $query, int $k = 3): array
{
    $Faqs = $this->fetchTable('Faqs');
    $rows = $Faqs->find()
        ->select(['id','question','answer','link','embedding_json'])
        ->where(['embedding_json IS NOT' => null])
        ->enableHydration(false)->all()->toList();

    if (empty($rows)) return [];

    $client = new \App\Service\EmbeddingsClient();
    $qvec = $client->embed($query);

    $scored = [];
    foreach ($rows as $r) {
        $emb = json_decode((string)$r['embedding_json'], true) ?: [];
        $score = $this->cosineSim($qvec, $emb);
        $scored[] = ['score'=>$score, 'row'=>$r];
    }
    usort($scored, fn($a,$b)=>$b['score']<=>$a['score']);

    // keep items above threshold; keep best one even if below
    $thr = (float)env('FAQ_EMBED_THRESHOLD', '0.70');
    $scored = array_slice($scored, 0, max($k,1));
    $keep = array_filter($scored, fn($s)=>$s['score'] >= $thr);
    if (empty($keep) && !empty($scored)) { $keep = [$scored[0]]; }

    $chunks = [];
    foreach ($keep as $s) {
        $r = $s['row'];
        $src = $r['link'] ?: 'FAQ';
        $chunks[] = [
            'url'  => $src,
            'text' => "Q: {$r['question']}\nA: {$r['answer']}",
            'kind' => 'faq',
        ];
    }
    return array_slice($chunks, 0, $k);
}

}