<?php
declare(strict_types=1);

namespace App\Command;

use Cake\Command\Command;
use Cake\Console\Arguments;
use Cake\Console\ConsoleIo;
use Cake\I18n\DateTime;
use Cake\Console\ConsoleOptionParser;

class ImportFaqsCommand extends Command
{
    public function buildOptionParser(ConsoleOptionParser $parser): ConsoleOptionParser
    {
        $parser = parent::buildOptionParser($parser);

        return $parser
            ->setDescription('Import FAQ files (txt/csv/tsv). Accepts file paths, directories, or globs.')
            ->addOption('dir', [
                'short' => 'd',
                'help'  => 'Directory containing FAQ files',
            ]);
    }

    public function execute(Arguments $args, ConsoleIo $io)
    {
        // 1) Collect inputs: positional files + optional --dir
        $inputs = $args->getArguments();
        if ($dir = $args->getOption('dir')) {
            $inputs[] = $dir;
        }

        // 2) Default: import ALL .txt/.csv/.tsv under webroot/faq
        if (empty($inputs)) {
            $faqDir = WWW_ROOT . 'faq' . DIRECTORY_SEPARATOR;
            $inputs = [
                $faqDir . '*.txt',
                $faqDir . '*.csv',
                $faqDir . '*.tsv',
            ];
        }

        // 3) Expand to concrete file paths
        $paths = $this->expandInputsToFiles($inputs, $io);
        if (empty($paths)) {
            $io->err('No files found to import.');
            return Command::CODE_ERROR;
        }

        $Faqs = $this->fetchTable('Faqs');
        $count = 0; $rowErrors = 0;

        foreach ($paths as $path) {
            if (!is_readable($path)) {
                $io->err("File not found or unreadable: $path");
                continue;
            }

            $fh = fopen($path, 'r');
            if (!$fh) { $io->err("Failed to open: $path"); continue; }

            // Read first line as raw to detect BOM + delimiter
            $first = fgets($fh);
            if ($first === false) { fclose($fh); $io->warning("Empty file: $path"); continue; }

            $first = preg_replace('/^\xEF\xBB\xBF/', '', $first);
            $delimiter = (str_contains($first, "\t")) ? "\t" : ",";

            // Helper to check URL-ish
            $isUrl = function (?string $s): bool {
                return is_string($s) && preg_match('#^https?://#i', $s) === 1;
            };

            $header = str_getcsv($first, $delimiter);

            $preambleTitle = null;
            $preambleUrl   = null;
            if (count($header) === 2 && $isUrl($header[1])) {
                $preambleTitle = trim((string)$header[0]);
                $preambleUrl   = trim((string)$header[1]);
                $second = fgets($fh);
                if ($second !== false) {
                    $header = str_getcsv($second, $delimiter);
                }
            }

            $header = array_map(function ($h) {
                $h = is_string($h) ? trim($h) : '';
                return strtolower($h);
            }, $header);

            // Detect schema type
            $hasQA       = in_array('question', $header, true) && in_array('answer', $header, true);
            $hasPeople   = in_array('role/title', $header, true) && in_array('name', $header, true);
            $hasCatalog  = in_array('subcategory', $header, true) && in_array('details', $header, true);

            // Build a map for common columns we’ll read regardless
            $colIndex = function(string $name) use ($header): ?int {
                $i = array_search($name, $header, true);
                return ($i === false) ? null : $i;
            };

            // Detect schema type
            $hasQA       = in_array('question', $header, true) && in_array('answer', $header, true);
            $hasPeople   = in_array('role/title', $header, true) && in_array('name', $header, true);
            $hasCatalog  = in_array('subcategory', $header, true) && in_array('details', $header, true);

            // Build column index helper
            $colIndex = function(string $name) use ($header): ?int {
                $i = array_search($name, $header, true);
                return ($i === false) ? null : $i;
            };

            // Common indices we may read
            $idx = [
                'question'    => $colIndex('question'),
                'answer'      => $colIndex('answer'),
                'category'    => $colIndex('category'),
                'link'        => $colIndex('link'),
                'tags'        => $colIndex('tags'),
                // People
                'role'        => $colIndex('role/title'),
                'name'        => $colIndex('name'),
                // Catalog
                'subcategory' => $colIndex('subcategory'),
                'details'     => $colIndex('details'),
            ];

            // If none of the supported schemas, abort this file
            if (!$hasQA && !$hasPeople && !$hasCatalog) {
                fclose($fh);
                $io->err("Missing supported columns in: $path (need either question/answer, or People schema: role/title+name, or Catalog schema: subcategory+details)");
                continue;
            }

            while (($row = fgetcsv($fh, 0, $delimiter)) !== false) {
            // skip empty
            if (count($row) === 1 && trim((string)$row[0]) === '') { continue; }

            // normalise cells
            $row = array_map(
                fn($v) => is_string($v) ? trim($v) : (is_null($v) ? '' : (string)$v),
                $row
            );

            // helper: get cell by index
            $cell = function (?int $i) use ($row): string {
                return (is_int($i) && array_key_exists($i, $row)) ? (string)$row[$i] : '';
            };

            // Carry preamble (if any) as defaults
            $category = $cell($idx['category']) ?: ($preambleTitle ?? null);
            $link     = $cell($idx['link'])     ?: ($preambleUrl   ?? null);
            $tags     = $cell($idx['tags'])     ?: '';

            $question = '';
            $answer   = '';

            if ($hasQA) {
                // Native FAQ schema
                $question = $cell($idx['question']);
                $answer   = $cell($idx['answer']);
            } elseif ($hasPeople) {
                // People schema → synthesize Q/A
                $role = $cell($idx['role']);
                $name = $cell($idx['name']);
                if ($role === '' && $name === '') { continue; }

                // Q: "Who is the <Role> (<Category>)?"
                $catSuffix = $category ? " ({$category})" : '';
                $question = "Who is the {$role}{$catSuffix}?";

                // A: "<Name> — <link>" (omit link if 'No link')
                $aLink = $link && strcasecmp($link, 'no link') !== 0 ? $link : '';
                $answer = $name . ($aLink ? " — {$aLink}" : '');
            } elseif ($hasCatalog) {
                // Catalog schema → synthesize Q/A
                $sub     = $cell($idx['subcategory']);
                $details = $cell($idx['details']);
                if ($sub === '' && $details === '' && $link === '') { continue; }

                // Q: "<Category>: <Subcategory>" (fallbacks if missing)
                if ($category && $sub) {
                    $question = "{$category}: {$sub}";
                } elseif ($category) {
                    $question = "{$category}";
                } else {
                    $question = $sub ?: 'Info';
                }

                // A: details + optional link on new line
                $answer = $details ?: '';
                if ($link && strcasecmp($link, 'no link') !== 0) {
                    $answer = $answer ? "{$answer}\n{$link}" : $link;
                }
            }

            if ($question === '' || $answer === '') { $rowErrors++; continue; }

            // Save/upsert
            $entity = $Faqs->newEntity([
                'category'   => $category ?: null,
                'question'   => $question,
                'answer'     => $answer,
                'link'       => ($link && strcasecmp($link, 'no link') !== 0) ? $link : null,
                'tags'       => $tags ?: null,
                'updated_at' => null,
            ]);

            $existing = $Faqs->find()->where(['question' => $question])->first();
            if ($existing) {
                $Faqs->patchEntity($existing, $entity->toArray());
                $Faqs->save($existing);
            } else {
                $Faqs->save($entity);
            }
            $count++;
        }

            fclose($fh);
            $io->out("Processed $path");
        }

        $io->out("Imported/updated rows: $count");
        if ($rowErrors > 0) {
            $io->warning("Skipped rows (missing question/answer): $rowErrors");
        }

        return Command::CODE_SUCCESS;
    }

        /**
     * Expand CLI inputs into a list of readable files.
     * Accepts: exact files, directories, and glob patterns.
     *
     * @param string[] $inputs
     * @return string[] absolute file paths
     */
    private function expandInputsToFiles(array $inputs, ConsoleIo $io): array
    {
        $files = [];

        foreach ($inputs as $in) {
            $in = trim((string)$in);
            if ($in === '') continue;

            // Directory → include *.txt, *.csv, *.tsv
            if (is_dir($in)) {
                $dir = rtrim($in, DIRECTORY_SEPARATOR) . DIRECTORY_SEPARATOR;
                foreach (['*.txt','*.csv','*.tsv'] as $pat) {
                    foreach (glob($dir . $pat) ?: [] as $p) {
                        if (is_file($p) && is_readable($p)) $files[] = realpath($p) ?: $p;
                    }
                }
                continue;
            }

            // Wildcards → glob
            if (strpbrk($in, '*?[]') !== false) {
                foreach (glob($in) ?: [] as $p) {
                    if (is_file($p) && is_readable($p)) $files[] = realpath($p) ?: $p;
                }
                continue;
            }

            // Direct file path
            if (is_file($in) && is_readable($in)) {
                $files[] = realpath($in) ?: $in;
            } else {
                $io->warning("Not found or unreadable: {$in}");
            }
        }

        // De-dup, stable order
        return array_values(array_unique($files));
    }

}
