Add song info to existing posts

fix-ismusicvideo-detection
Max Nuding 2023-04-24 20:43:13 +02:00
parent 68aade4f1f
commit bca4382988
Signed by: phlaym
GPG Key ID: A06651BAB6777237
4 changed files with 100 additions and 46 deletions

View File

@ -1,6 +1,6 @@
{
"name": "moshing-mammut",
"version": "1.3.0",
"version": "1.3.1",
"private": true,
"license": "LGPL-3.0-or-later",
"scripts": {

View File

@ -2,6 +2,7 @@ import { IGNORE_USERS, MASTODON_INSTANCE } from '$env/static/private';
import { enableVerboseLog, log } from '$lib/log';
import type { Account, Post, Tag } from '$lib/mastodon/response';
import type { SongInfo } from '$lib/odesliResponse';
import { TimelineReader } from '$lib/server/timeline';
import sqlite3 from 'sqlite3';
const { DEV } = import.meta.env;
@ -79,6 +80,45 @@ if (enableVerboseLog) {
});
}
async function applyDbMigration(migration: Migration): Promise<void> {
return new Promise(async (resolve, reject) => {
db.exec(migration.statement, (err) => {
if (err !== null) {
log.error(`Failed to apply migration ${migration.name}`, err);
reject(err);
return;
}
resolve();
});
});
}
async function applyMigration(migration: Migration) {
if (migration.id === 4) {
// When this is run, no posts will have added song data,
// so filtering won't help
const posts = await getPostsInternal(null, null, 10000);
let current = 0;
let total = posts.length.toString().padStart(4, '0');
for (const post of posts) {
current++;
if (post.songs && post.songs.length) {
continue;
}
log.debug(
`Fetching songs for existing post ${current.toString().padStart(4, '0')} of ${total}`,
post.url
);
const songs = await TimelineReader.getSongInfoInPost(post);
await saveSongInfoData(post.url, songs);
log.debug(`Fetched ${songs.length} songs for existing post`, post.url);
}
log.debug(`Finished fetching songs`);
} else {
await applyDbMigration(migration);
}
}
db.on('open', () => {
log.info('Opened database');
db.serialize();
@ -98,7 +138,7 @@ db.on('open', () => {
return;
}
for (const migration of toApply) {
db.exec(migration.statement, (err) => {
applyMigration(migration).then(() => {
remaining--;
// This will set databaseReady to true before the migration has been inserted as applies,
// but that doesn't matter. It's only important that is has been applied
@ -225,11 +265,16 @@ function getMigrations(): Migration[] {
post_url TEXT,
FOREIGN KEY (post_url) REFERENCES posts(url)
);`
},
{
id: 4,
name: 'song info for existing posts',
statement: ``
}
];
}
async function waitReady(): Promise<undefined> {
async function waitReady(): Promise<void> {
// Simpler than a semaphore and is really only needed on startup
return new Promise((resolve) => {
const interval = setInterval(() => {
@ -241,14 +286,14 @@ async function waitReady(): Promise<undefined> {
log.debug('DB is ready');
}
clearInterval(interval);
resolve(undefined);
resolve();
}
}, 100);
});
}
function saveAccountData(account: Account): Promise<undefined> {
return new Promise<undefined>((resolve, reject) => {
function saveAccountData(account: Account): Promise<void> {
return new Promise<void>((resolve, reject) => {
db.run(
`
INSERT INTO accounts (id, acct, username, display_name, url, avatar)
@ -274,14 +319,14 @@ function saveAccountData(account: Account): Promise<undefined> {
reject(err);
return;
}
resolve(undefined);
resolve();
}
);
});
}
function savePostData(post: Post): Promise<undefined> {
return new Promise<undefined>((resolve, reject) => {
function savePostData(post: Post): Promise<void> {
return new Promise<void>((resolve, reject) => {
db.run(
`
INSERT INTO posts (id, content, created_at, url, account_id)
@ -297,16 +342,16 @@ function savePostData(post: Post): Promise<undefined> {
reject(postErr);
return;
}
resolve(undefined);
resolve();
}
);
});
}
function savePostTagData(post: Post): Promise<undefined> {
return new Promise<undefined>((resolve, reject) => {
function savePostTagData(post: Post): Promise<void> {
return new Promise<void>((resolve, reject) => {
if (!post.tags.length) {
resolve(undefined);
resolve();
return;
}
@ -338,7 +383,7 @@ function savePostTagData(post: Post): Promise<undefined> {
remaining--;
// Only resolve after all have been inserted
if (remaining === 0) {
resolve(undefined);
resolve();
}
}
);
@ -349,10 +394,10 @@ function savePostTagData(post: Post): Promise<undefined> {
});
}
function saveSongInfoData(postUrl: string, songs: SongInfo[]): Promise<undefined> {
return new Promise<undefined>((resolve, reject) => {
function saveSongInfoData(postUrl: string, songs: SongInfo[]): Promise<void> {
return new Promise<void>((resolve, reject) => {
if (songs.length === 0) {
resolve(undefined);
resolve();
return;
}
db.parallelize(() => {
@ -383,7 +428,7 @@ function saveSongInfoData(postUrl: string, songs: SongInfo[]): Promise<undefined
remaining--;
// Only resolve after all have been inserted
if (remaining === 0) {
resolve(undefined);
resolve();
}
}
);
@ -503,7 +548,14 @@ export async function getPosts(
if (!databaseReady) {
await waitReady();
}
return await getPostsInternal(since, before, limit);
}
async function getPostsInternal(
since: string | null,
before: string | null,
limit: number
): Promise<Post[]> {
let filterQuery = '';
const params: FilterParameter = { $limit: limit };
if (since === null && before === null) {

View File

@ -12,6 +12,34 @@ const URL_REGEX = new RegExp(/href="(?<postUrl>[^>]+?)" target="_blank"/gm);
export class TimelineReader {
private static _instance: TimelineReader;
public static async getSongInfoInPost(post: Post): Promise<SongInfo[]> {
const urlMatches = post.content.matchAll(URL_REGEX);
const songs: SongInfo[] = [];
for (const match of urlMatches) {
if (match === undefined || match.groups === undefined) {
log.warn('Match listed in allMatches, but either it or its groups are undefined', match);
continue;
}
const urlMatch = match.groups.postUrl.toString();
let url: URL;
try {
url = new URL(urlMatch);
} catch (e) {
log.error('URL found via Regex does not seem to be a valud url', urlMatch, e);
continue;
}
// Check *all* found url and let odesli determine if it is music or not
log.debug(`Checking ${url} if it contains song data`);
const info = await TimelineReader.getSongInfo(url);
log.debug(`Found song info for ${url}?`, info);
if (info) {
songs.push(info);
}
}
return songs;
}
private static async getSongInfo(url: URL, remainingTries = 6): Promise<SongInfo | null> {
if (remainingTries === 0) {
log.error('No tries remaining. Lookup failed!');
@ -77,33 +105,7 @@ export class TimelineReader {
const hashttags: string[] = HASHTAG_FILTER.split(',');
const found_tags: Tag[] = post.tags.filter((t: Tag) => hashttags.includes(t.name));
const urlMatches = post.content.matchAll(URL_REGEX);
const songs: SongInfo[] = [];
for (const match of urlMatches) {
if (match === undefined || match.groups === undefined) {
log.warn(
'Match listed in allMatches, but either it or its groups are undefined',
match
);
continue;
}
const urlMatch = match.groups.postUrl.toString();
let url: URL;
try {
url = new URL(urlMatch);
} catch (e) {
log.error('URL found via Regex does not seem to be a valud url', urlMatch, e);
continue;
}
// Check *all* found url and let odesli determine if it is music or not
log.debug(`Checking ${url} if it contains song data`);
const info = await TimelineReader.getSongInfo(url);
log.debug(`Found song info for ${url}?`, info);
if (info) {
songs.push(info);
}
}
const songs = await TimelineReader.getSongInfoInPost(post);
// If we don't have any tags or non-youtube urls, check youtube
// YT is handled separately, because it requires an API call and therefore is slower

View File

@ -1,4 +1,4 @@
export function sleep(timeInMs: number): Promise<undefined> {
export function sleep(timeInMs: number): Promise<void> {
return new Promise((resolve) => {
setTimeout(resolve, timeInMs);
});