addOption('dry-run', null, InputOption::VALUE_NONE, 'No DB changes.'); // No specific --debug flag needed, we will output verbose logs by default for now } protected function execute(InputInterface $input, OutputInterface $output): int { $io = new SymfonyStyle($input, $output); $isDryRun = $input->getOption('dry-run'); $noteRepository = $this->entityManager->getRepository(Note::class); $io->title("Starting Audio Matcher"); // 1. Fetch Notes $qb = $noteRepository->createQueryBuilder('n') ->leftJoin('n.user', 'u') ->addSelect('u') ->where('n.recording IS NULL OR n.recording = :empty') ->andWhere('u.homeChurchRSS IS NOT NULL') ->orderBy('n.date', 'DESC') // <--- Added Sort Here ->setParameter('empty', ''); //$query = $qb->getQuery(); //print ($query->getSql()); $notesMissingAudio = $qb->getQuery()->getResult(); $count = count($notesMissingAudio); $io->text("Found $count notes in database missing audio."); if ($count === 0) { return Command::SUCCESS; } // 2. Group by User $notesByUser = []; foreach ($notesMissingAudio as $note) { $userId = (string) $note->getUser()->getId(); $notesByUser[$userId]['user'] = $note->getUser(); $notesByUser[$userId]['notes'][] = $note; } // 3. Process Per User foreach ($notesByUser as $userId => $data) { $user = $data['user']; $userNotes = $data['notes']; $rssUrl = $user->getHomeChurchRSS(); $io->section("User: {$user->getEmail()} (Notes: " . count($userNotes) . ")"); $io->text("Fetching RSS: $rssUrl"); try { // Pass $io to helper for debug output $rssItems = $this->fetchRssItems($rssUrl, $io); if (empty($rssItems)) { $io->warning("RSS feed was empty or failed to parse."); continue; } $matchCount = 0; foreach ($userNotes as $note) { if (!$note->getDate()) { $io->text(" > Note ID {$note->getId()} skipped (No Date)"); continue; } $noteDateString = $note->getDate()->format('Y-m-d'); $noteTitle = $note->getTitle(); $io->text("---------------------------------------------------"); $io->text("Checking Note: [$noteDateString] '$noteTitle'"); $bestMatch = null; $highestConfidence = 0; foreach ($rssItems as $item) { // DEBUG: Show Date Comparison if ($item['date_string'] !== $noteDateString) { // Uncomment the line below if you want to see EVERY failed date comparison (can be noisy) // $io->text(" - REJECTED: Date mismatch (RSS: {$item['date_string']})"); continue; } // DEBUG: Show Score Calculation $confidence = $this->calculateConfidence($note, $item); $io->text(sprintf( " - DATE MATCHED. Score: %d%%. RSS Title: '%s'", $confidence, $item['title'] )); if ($confidence >= 80 && $confidence > $highestConfidence) { $highestConfidence = $confidence; $bestMatch = $item; } } if ($bestMatch) { $matchCount++; $io->success("Match Found! ($highestConfidence%)"); if (!$isDryRun) { $note->setRecording($bestMatch['url']); } } else { $io->text(" > No match found for this note."); } } if (!$isDryRun) { $this->entityManager->flush(); } if ($matchCount > 0) { $io->success("Found $matchCount matches"); } } catch (\Exception $e) { $io->error("Error: " . $e->getMessage()); } } return Command::SUCCESS; } /** * Recursively fetches RSS items if pagination links are present. */ private function fetchRssItems(string $startUrl, SymfonyStyle $io): array { $items = []; $nextUrl = $startUrl; $pageCount = 0; $maxPages = 20; // Safety brake to prevent infinite loops do { $pageCount++; $io->text(" > Fetching Feed Page $pageCount: $nextUrl"); try { $response = $this->httpClient->request('GET', $nextUrl); $content = $response->getContent(); // Suppress warnings for malformed XML $xml = @simplexml_load_string($content); if ($xml === false) { $io->warning("XML Parsing Failed on page $pageCount"); break; } } catch (\Exception $e) { $io->warning("HTTP Request Failed on page $pageCount: " . $e->getMessage()); break; } // 1. Parse Items on this page $pageItemsCount = 0; foreach ($xml->channel->item as $item) { $namespaces = $item->getNamespaces(true); $speaker = ''; // Speaker Logic if (isset($namespaces['itunes'])) { $itunes = $item->children($namespaces['itunes']); $speaker = (string) ($itunes->author ?? ''); } if (empty($speaker) && isset($namespaces['dc'])) { $dc = $item->children($namespaces['dc']); $speaker = (string) ($dc->creator ?? ''); } if (empty($speaker)) { $speaker = (string) ($item->author ?? ''); } // Date Parsing $dateString = null; if (isset($item->pubDate)) { try { $dt = new \DateTimeImmutable((string)$item->pubDate); $dateString = $dt->format('Y-m-d'); } catch (\Exception $e) { // ignore bad date } } $items[] = [ 'title' => (string) $item->title, 'speaker' => $speaker, 'url' => (string) ($item->enclosure['url'] ?? ''), 'date_string' => $dateString, ]; $pageItemsCount++; } $io->text(" Found $pageItemsCount items on this page."); // 2. Look for "Next Page" link (RFC 5005 / Atom) $nextUrl = null; // Get namespaces on the element $namespaces = $xml->channel->getNamespaces(true); if (isset($namespaces['atom'])) { $atom = $xml->channel->children($namespaces['atom']); foreach ($atom->link as $link) { // We are looking for