diff --git a/src/Command/GetAudioCommand.php b/src/Command/GetAudioCommand.php new file mode 100644 index 0000000..cbf3903 --- /dev/null +++ b/src/Command/GetAudioCommand.php @@ -0,0 +1,282 @@ +addOption('dry-run', null, InputOption::VALUE_NONE, 'No DB changes.'); + // No specific --debug flag needed, we will output verbose logs by default for now + } + + protected function execute(InputInterface $input, OutputInterface $output): int + { + $io = new SymfonyStyle($input, $output); + $isDryRun = $input->getOption('dry-run'); + $noteRepository = $this->entityManager->getRepository(Note::class); + + $io->title("Starting Audio Matcher"); + + // 1. Fetch Notes + $qb = $noteRepository->createQueryBuilder('n') + ->leftJoin('n.user', 'u') + ->addSelect('u') + ->where('n.recording IS NULL OR n.recording = :empty') + ->andWhere('u.homeChurchRSS IS NOT NULL') + ->orderBy('n.date', 'DESC') // <--- Added Sort Here + ->setParameter('empty', ''); + //$query = $qb->getQuery(); + + //print ($query->getSql()); + + $notesMissingAudio = $qb->getQuery()->getResult(); + $count = count($notesMissingAudio); + $io->text("Found $count notes in database missing audio."); + + if ($count === 0) { + return Command::SUCCESS; + } + + // 2. Group by User + $notesByUser = []; + foreach ($notesMissingAudio as $note) { + $userId = (string) $note->getUser()->getId(); + $notesByUser[$userId]['user'] = $note->getUser(); + $notesByUser[$userId]['notes'][] = $note; + } + + // 3. Process Per User + foreach ($notesByUser as $userId => $data) { + $user = $data['user']; + $userNotes = $data['notes']; + $rssUrl = $user->getHomeChurchRSS(); + + $io->section("User: {$user->getEmail()} (Notes: " . count($userNotes) . ")"); + $io->text("Fetching RSS: $rssUrl"); + + try { + // Pass $io to helper for debug output + $rssItems = $this->fetchRssItems($rssUrl, $io); + + if (empty($rssItems)) { + $io->warning("RSS feed was empty or failed to parse."); + continue; + } + + $matchCount = 0; + + foreach ($userNotes as $note) { + if (!$note->getDate()) { + $io->text(" > Note ID {$note->getId()} skipped (No Date)"); + continue; + } + + $noteDateString = $note->getDate()->format('Y-m-d'); + $noteTitle = $note->getTitle(); + $io->text("---------------------------------------------------"); + $io->text("Checking Note: [$noteDateString] '$noteTitle'"); + + $bestMatch = null; + $highestConfidence = 0; + + foreach ($rssItems as $item) { + // DEBUG: Show Date Comparison + if ($item['date_string'] !== $noteDateString) { + // Uncomment the line below if you want to see EVERY failed date comparison (can be noisy) + // $io->text(" - REJECTED: Date mismatch (RSS: {$item['date_string']})"); + continue; + } + + // DEBUG: Show Score Calculation + $confidence = $this->calculateConfidence($note, $item); + $io->text(sprintf( + " - DATE MATCHED. Score: %d%%. RSS Title: '%s'", + $confidence, + $item['title'] + )); + + if ($confidence >= 80 && $confidence > $highestConfidence) { + $highestConfidence = $confidence; + $bestMatch = $item; + } + } + + if ($bestMatch) { + $matchCount++; + $io->success("Match Found! ($highestConfidence%)"); + if (!$isDryRun) { + $note->setRecording($bestMatch['url']); + } + } else { + $io->text(" > No match found for this note."); + } + } + + if (!$isDryRun) { + $this->entityManager->flush(); + } + + if ($matchCount > 0) { + $io->success("Found $matchCount matches"); + } + + } catch (\Exception $e) { + $io->error("Error: " . $e->getMessage()); + } + } + + return Command::SUCCESS; + } + + /** + * Recursively fetches RSS items if pagination links are present. + */ + private function fetchRssItems(string $startUrl, SymfonyStyle $io): array + { + $items = []; + $nextUrl = $startUrl; + $pageCount = 0; + $maxPages = 20; // Safety brake to prevent infinite loops + + do { + $pageCount++; + $io->text(" > Fetching Feed Page $pageCount: $nextUrl"); + + try { + $response = $this->httpClient->request('GET', $nextUrl); + $content = $response->getContent(); + + // Suppress warnings for malformed XML + $xml = @simplexml_load_string($content); + + if ($xml === false) { + $io->warning("XML Parsing Failed on page $pageCount"); + break; + } + } catch (\Exception $e) { + $io->warning("HTTP Request Failed on page $pageCount: " . $e->getMessage()); + break; + } + + // 1. Parse Items on this page + $pageItemsCount = 0; + foreach ($xml->channel->item as $item) { + $namespaces = $item->getNamespaces(true); + $speaker = ''; + + // Speaker Logic + if (isset($namespaces['itunes'])) { + $itunes = $item->children($namespaces['itunes']); + $speaker = (string) ($itunes->author ?? ''); + } + if (empty($speaker) && isset($namespaces['dc'])) { + $dc = $item->children($namespaces['dc']); + $speaker = (string) ($dc->creator ?? ''); + } + if (empty($speaker)) { + $speaker = (string) ($item->author ?? ''); + } + + // Date Parsing + $dateString = null; + if (isset($item->pubDate)) { + try { + $dt = new \DateTimeImmutable((string)$item->pubDate); + $dateString = $dt->format('Y-m-d'); + } catch (\Exception $e) { + // ignore bad date + } + } + + $items[] = [ + 'title' => (string) $item->title, + 'speaker' => $speaker, + 'url' => (string) ($item->enclosure['url'] ?? ''), + 'date_string' => $dateString, + ]; + $pageItemsCount++; + } + + $io->text(" Found $pageItemsCount items on this page."); + + // 2. Look for "Next Page" link (RFC 5005 / Atom) + $nextUrl = null; + + // Get namespaces on the element + $namespaces = $xml->channel->getNamespaces(true); + + if (isset($namespaces['atom'])) { + $atom = $xml->channel->children($namespaces['atom']); + foreach ($atom->link as $link) { + // We are looking for