Compare commits
2 Commits
b14a0c23f6
...
1.2
| Author | SHA1 | Date | |
|---|---|---|---|
| 28ab3d7886 | |||
| bb8e7f359f |
@@ -1,282 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
namespace App\Command;
|
|
||||||
|
|
||||||
use App\Entity\Note;
|
|
||||||
use Doctrine\ORM\EntityManagerInterface;
|
|
||||||
use Symfony\Component\Console\Attribute\AsCommand;
|
|
||||||
use Symfony\Component\Console\Command\Command;
|
|
||||||
use Symfony\Component\Console\Input\InputInterface;
|
|
||||||
use Symfony\Component\Console\Input\InputOption;
|
|
||||||
use Symfony\Component\Console\Output\OutputInterface;
|
|
||||||
use Symfony\Component\Console\Style\SymfonyStyle;
|
|
||||||
use Symfony\Contracts\HttpClient\HttpClientInterface;
|
|
||||||
|
|
||||||
#[AsCommand(
|
|
||||||
name: 'app:get-audio',
|
|
||||||
description: 'Finds Notes with missing recordings and matches them to RSS feed by Date and Title.',
|
|
||||||
)]
|
|
||||||
class GetAudioCommand extends Command
|
|
||||||
{
|
|
||||||
public function __construct(
|
|
||||||
private EntityManagerInterface $entityManager,
|
|
||||||
private HttpClientInterface $httpClient
|
|
||||||
) {
|
|
||||||
parent::__construct();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function configure(): void
|
|
||||||
{
|
|
||||||
$this->addOption('dry-run', null, InputOption::VALUE_NONE, 'No DB changes.');
|
|
||||||
// No specific --debug flag needed, we will output verbose logs by default for now
|
|
||||||
}
|
|
||||||
|
|
||||||
protected function execute(InputInterface $input, OutputInterface $output): int
|
|
||||||
{
|
|
||||||
$io = new SymfonyStyle($input, $output);
|
|
||||||
$isDryRun = $input->getOption('dry-run');
|
|
||||||
$noteRepository = $this->entityManager->getRepository(Note::class);
|
|
||||||
|
|
||||||
$io->title("Starting Audio Matcher");
|
|
||||||
|
|
||||||
// 1. Fetch Notes
|
|
||||||
$qb = $noteRepository->createQueryBuilder('n')
|
|
||||||
->leftJoin('n.user', 'u')
|
|
||||||
->addSelect('u')
|
|
||||||
->where('n.recording IS NULL OR n.recording = :empty')
|
|
||||||
->andWhere('u.homeChurchRSS IS NOT NULL')
|
|
||||||
->orderBy('n.date', 'DESC') // <--- Added Sort Here
|
|
||||||
->setParameter('empty', '');
|
|
||||||
//$query = $qb->getQuery();
|
|
||||||
|
|
||||||
//print ($query->getSql());
|
|
||||||
|
|
||||||
$notesMissingAudio = $qb->getQuery()->getResult();
|
|
||||||
$count = count($notesMissingAudio);
|
|
||||||
$io->text("Found $count notes in database missing audio.");
|
|
||||||
|
|
||||||
if ($count === 0) {
|
|
||||||
return Command::SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 2. Group by User
|
|
||||||
$notesByUser = [];
|
|
||||||
foreach ($notesMissingAudio as $note) {
|
|
||||||
$userId = (string) $note->getUser()->getId();
|
|
||||||
$notesByUser[$userId]['user'] = $note->getUser();
|
|
||||||
$notesByUser[$userId]['notes'][] = $note;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 3. Process Per User
|
|
||||||
foreach ($notesByUser as $userId => $data) {
|
|
||||||
$user = $data['user'];
|
|
||||||
$userNotes = $data['notes'];
|
|
||||||
$rssUrl = $user->getHomeChurchRSS();
|
|
||||||
|
|
||||||
$io->section("User: {$user->getEmail()} (Notes: " . count($userNotes) . ")");
|
|
||||||
$io->text("Fetching RSS: $rssUrl");
|
|
||||||
|
|
||||||
try {
|
|
||||||
// Pass $io to helper for debug output
|
|
||||||
$rssItems = $this->fetchRssItems($rssUrl, $io);
|
|
||||||
|
|
||||||
if (empty($rssItems)) {
|
|
||||||
$io->warning("RSS feed was empty or failed to parse.");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$matchCount = 0;
|
|
||||||
|
|
||||||
foreach ($userNotes as $note) {
|
|
||||||
if (!$note->getDate()) {
|
|
||||||
$io->text(" > Note ID {$note->getId()} skipped (No Date)");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$noteDateString = $note->getDate()->format('Y-m-d');
|
|
||||||
$noteTitle = $note->getTitle();
|
|
||||||
$io->text("---------------------------------------------------");
|
|
||||||
$io->text("Checking Note: [$noteDateString] '$noteTitle'");
|
|
||||||
|
|
||||||
$bestMatch = null;
|
|
||||||
$highestConfidence = 0;
|
|
||||||
|
|
||||||
foreach ($rssItems as $item) {
|
|
||||||
// DEBUG: Show Date Comparison
|
|
||||||
if ($item['date_string'] !== $noteDateString) {
|
|
||||||
// Uncomment the line below if you want to see EVERY failed date comparison (can be noisy)
|
|
||||||
// $io->text(" - REJECTED: Date mismatch (RSS: {$item['date_string']})");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// DEBUG: Show Score Calculation
|
|
||||||
$confidence = $this->calculateConfidence($note, $item);
|
|
||||||
$io->text(sprintf(
|
|
||||||
" - DATE MATCHED. Score: %d%%. RSS Title: '%s'",
|
|
||||||
$confidence,
|
|
||||||
$item['title']
|
|
||||||
));
|
|
||||||
|
|
||||||
if ($confidence >= 80 && $confidence > $highestConfidence) {
|
|
||||||
$highestConfidence = $confidence;
|
|
||||||
$bestMatch = $item;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($bestMatch) {
|
|
||||||
$matchCount++;
|
|
||||||
$io->success("Match Found! ($highestConfidence%)");
|
|
||||||
if (!$isDryRun) {
|
|
||||||
$note->setRecording($bestMatch['url']);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
$io->text(" > No match found for this note.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!$isDryRun) {
|
|
||||||
$this->entityManager->flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($matchCount > 0) {
|
|
||||||
$io->success("Found $matchCount matches");
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch (\Exception $e) {
|
|
||||||
$io->error("Error: " . $e->getMessage());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Command::SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Recursively fetches RSS items if pagination links are present.
|
|
||||||
*/
|
|
||||||
private function fetchRssItems(string $startUrl, SymfonyStyle $io): array
|
|
||||||
{
|
|
||||||
$items = [];
|
|
||||||
$nextUrl = $startUrl;
|
|
||||||
$pageCount = 0;
|
|
||||||
$maxPages = 20; // Safety brake to prevent infinite loops
|
|
||||||
|
|
||||||
do {
|
|
||||||
$pageCount++;
|
|
||||||
$io->text(" > Fetching Feed Page $pageCount: $nextUrl");
|
|
||||||
|
|
||||||
try {
|
|
||||||
$response = $this->httpClient->request('GET', $nextUrl);
|
|
||||||
$content = $response->getContent();
|
|
||||||
|
|
||||||
// Suppress warnings for malformed XML
|
|
||||||
$xml = @simplexml_load_string($content);
|
|
||||||
|
|
||||||
if ($xml === false) {
|
|
||||||
$io->warning("XML Parsing Failed on page $pageCount");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} catch (\Exception $e) {
|
|
||||||
$io->warning("HTTP Request Failed on page $pageCount: " . $e->getMessage());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 1. Parse Items on this page
|
|
||||||
$pageItemsCount = 0;
|
|
||||||
foreach ($xml->channel->item as $item) {
|
|
||||||
$namespaces = $item->getNamespaces(true);
|
|
||||||
$speaker = '';
|
|
||||||
|
|
||||||
// Speaker Logic
|
|
||||||
if (isset($namespaces['itunes'])) {
|
|
||||||
$itunes = $item->children($namespaces['itunes']);
|
|
||||||
$speaker = (string) ($itunes->author ?? '');
|
|
||||||
}
|
|
||||||
if (empty($speaker) && isset($namespaces['dc'])) {
|
|
||||||
$dc = $item->children($namespaces['dc']);
|
|
||||||
$speaker = (string) ($dc->creator ?? '');
|
|
||||||
}
|
|
||||||
if (empty($speaker)) {
|
|
||||||
$speaker = (string) ($item->author ?? '');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Date Parsing
|
|
||||||
$dateString = null;
|
|
||||||
if (isset($item->pubDate)) {
|
|
||||||
try {
|
|
||||||
$dt = new \DateTimeImmutable((string)$item->pubDate);
|
|
||||||
$dateString = $dt->format('Y-m-d');
|
|
||||||
} catch (\Exception $e) {
|
|
||||||
// ignore bad date
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$items[] = [
|
|
||||||
'title' => (string) $item->title,
|
|
||||||
'speaker' => $speaker,
|
|
||||||
'url' => (string) ($item->enclosure['url'] ?? ''),
|
|
||||||
'date_string' => $dateString,
|
|
||||||
];
|
|
||||||
$pageItemsCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
$io->text(" Found $pageItemsCount items on this page.");
|
|
||||||
|
|
||||||
// 2. Look for "Next Page" link (RFC 5005 / Atom)
|
|
||||||
$nextUrl = null;
|
|
||||||
|
|
||||||
// Get namespaces on the <channel> element
|
|
||||||
$namespaces = $xml->channel->getNamespaces(true);
|
|
||||||
|
|
||||||
if (isset($namespaces['atom'])) {
|
|
||||||
$atom = $xml->channel->children($namespaces['atom']);
|
|
||||||
foreach ($atom->link as $link) {
|
|
||||||
// We are looking for <atom:link rel="next" href="..." />
|
|
||||||
$attributes = $link->attributes();
|
|
||||||
if (isset($attributes['rel']) && (string)$attributes['rel'] === 'next') {
|
|
||||||
$nextUrl = (string)$attributes['href'];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback: Check for raw <link rel="next"> if atom ns missing (rare but happens)
|
|
||||||
if (!$nextUrl && property_exists($xml->channel, 'link')) {
|
|
||||||
foreach ($xml->channel->link as $link) {
|
|
||||||
$attributes = $link->attributes();
|
|
||||||
if (isset($attributes['rel']) && (string)$attributes['rel'] === 'next') {
|
|
||||||
$nextUrl = (string)$attributes['href'];
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} while ($nextUrl && $pageCount < $maxPages);
|
|
||||||
|
|
||||||
$io->success(sprintf("Finished fetching. Total items: %d (across %d pages)", count($items), $pageCount));
|
|
||||||
|
|
||||||
return $items;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function calculateConfidence(Note $note, array $rssItem): float
|
|
||||||
{
|
|
||||||
$noteTitle = $this->normalize($note->getTitle());
|
|
||||||
$rssTitle = $this->normalize($rssItem['title']);
|
|
||||||
|
|
||||||
$noteSpeaker = $this->normalize($note->getSpeaker()->getName() ?? '');
|
|
||||||
$rssSpeaker = $this->normalize($rssItem['speaker']);
|
|
||||||
|
|
||||||
similar_text($noteTitle, $rssTitle, $titlePercent);
|
|
||||||
|
|
||||||
if (!empty($noteSpeaker) && !empty($rssSpeaker)) {
|
|
||||||
similar_text($noteSpeaker, $rssSpeaker, $speakerPercent);
|
|
||||||
return ($titlePercent + $speakerPercent) / 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
return $titlePercent;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function normalize(string $input): string
|
|
||||||
{
|
|
||||||
return strtolower(trim($input));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user