From 0785f4d0f5e440301573f50cd7fc248fe571c1d2 Mon Sep 17 00:00:00 2001 From: Mitchell Syer Date: Sat, 4 Nov 2023 18:09:40 -0400 Subject: [PATCH] Chapter Fetch Improvements (#754) * Chapter fetch improvements * Update previous date uploads * Lint * Fix backup inserts * Remove extra maxSeenUploadDate * Port downloaded over * Make sure to set isDownloaded on all inserts --- .../util/chapter/ChapterSanitizer.kt | 45 +++ .../suwayomi/tachidesk/manga/impl/Chapter.kt | 284 +++++++++++------- .../impl/backup/proto/ProtoBackupImport.kt | 41 +-- .../fileProvider/ChaptersFilesProvider.kt | 2 +- .../manga/model/dataclass/ChapterDataClass.kt | 2 +- .../database/migration/M0030_FixDateUpload.kt | 21 ++ 6 files changed, 261 insertions(+), 134 deletions(-) create mode 100644 server/src/main/kotlin/eu/kanade/tachiyomi/util/chapter/ChapterSanitizer.kt create mode 100644 server/src/main/kotlin/suwayomi/tachidesk/server/database/migration/M0030_FixDateUpload.kt diff --git a/server/src/main/kotlin/eu/kanade/tachiyomi/util/chapter/ChapterSanitizer.kt b/server/src/main/kotlin/eu/kanade/tachiyomi/util/chapter/ChapterSanitizer.kt new file mode 100644 index 00000000..03f4be03 --- /dev/null +++ b/server/src/main/kotlin/eu/kanade/tachiyomi/util/chapter/ChapterSanitizer.kt @@ -0,0 +1,45 @@ +package eu.kanade.tachiyomi.util.chapter + +object ChapterSanitizer { + fun String.sanitize(title: String): String { + return trim() + .removePrefix(title) + .trim(*CHAPTER_TRIM_CHARS) + } + + private val CHAPTER_TRIM_CHARS = + arrayOf( + // Whitespace + ' ', + '\u0009', + '\u000A', + '\u000B', + '\u000C', + '\u000D', + '\u0020', + '\u0085', + '\u00A0', + '\u1680', + '\u2000', + '\u2001', + '\u2002', + '\u2003', + '\u2004', + '\u2005', + '\u2006', + '\u2007', + '\u2008', + '\u2009', + '\u200A', + '\u2028', + '\u2029', + '\u202F', + '\u205F', + '\u3000', + // Separators + '-', + '_', + ',', + ':', + ).toCharArray() +} diff --git a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/Chapter.kt b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/Chapter.kt index 70e60929..13241a78 100644 --- a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/Chapter.kt +++ b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/Chapter.kt @@ -7,17 +7,21 @@ package suwayomi.tachidesk.manga.impl * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ +import com.google.common.cache.Cache +import com.google.common.cache.CacheBuilder import eu.kanade.tachiyomi.source.model.SChapter import eu.kanade.tachiyomi.source.model.SManga import eu.kanade.tachiyomi.source.online.HttpSource import eu.kanade.tachiyomi.util.chapter.ChapterRecognition +import eu.kanade.tachiyomi.util.chapter.ChapterSanitizer.sanitize +import kotlinx.coroutines.sync.Mutex +import kotlinx.coroutines.sync.withLock import kotlinx.serialization.Serializable import mu.KotlinLogging import org.jetbrains.exposed.dao.id.EntityID import org.jetbrains.exposed.sql.Op import org.jetbrains.exposed.sql.ResultRow import org.jetbrains.exposed.sql.SortOrder -import org.jetbrains.exposed.sql.SortOrder.ASC import org.jetbrains.exposed.sql.SqlExpressionBuilder.inList import org.jetbrains.exposed.sql.and import org.jetbrains.exposed.sql.batchInsert @@ -41,7 +45,11 @@ import suwayomi.tachidesk.manga.model.table.MangaTable import suwayomi.tachidesk.manga.model.table.PageTable import suwayomi.tachidesk.manga.model.table.toDataClass import suwayomi.tachidesk.server.serverConfig +import java.lang.Long import java.time.Instant +import java.util.TreeSet +import java.util.concurrent.TimeUnit +import kotlin.math.max object Chapter { private val logger = KotlinLogging.logger { } @@ -109,133 +117,179 @@ object Chapter { } } + val map: Cache = + CacheBuilder.newBuilder() + .expireAfterAccess(10, TimeUnit.MINUTES) + .build() + suspend fun fetchChapterList(mangaId: Int): List { - val manga = getManga(mangaId) - val source = getCatalogueSourceOrStub(manga.sourceId.toLong()) + val mutex = map.get(mangaId) { Mutex() } + val chapterList = + mutex.withLock { + val manga = getManga(mangaId) + val source = getCatalogueSourceOrStub(manga.sourceId.toLong()) - val sManga = - SManga.create().apply { - title = manga.title - url = manga.url - } - - val numberOfCurrentChapters = getCountOfMangaChapters(mangaId) - val chapterList = source.getChapterList(sManga) - - // Recognize number for new chapters. - chapterList.forEach { chapter -> - (source as? HttpSource)?.prepareNewChapter(chapter, sManga) - val chapterNumber = ChapterRecognition.parseChapterNumber(manga.title, chapter.name, chapter.chapter_number.toDouble()) - chapter.chapter_number = chapterNumber.toFloat() - } - - val now = Instant.now().epochSecond - val chaptersInDb = - transaction { - ChapterTable.select { ChapterTable.manga eq mangaId } - .map { ChapterTable.toDataClass(it) } - .toSet() - } - - val chaptersToInsert = mutableListOf() - val chaptersToUpdate = mutableListOf() - - chapterList.reversed().forEachIndexed { index, fetchedChapter -> - val chapterEntry = chaptersInDb.find { it.url == fetchedChapter.url } - - val chapterData = - ChapterDataClass.fromSChapter( - fetchedChapter, - chapterEntry?.id ?: 0, - index + 1, - now, - mangaId, - runCatching { - (source as? HttpSource)?.getChapterUrl(fetchedChapter) - }.getOrNull(), - ) - - if (chapterEntry == null) { - chaptersToInsert.add(chapterData) - } else { - chaptersToUpdate.add(chapterData) - } - } - - transaction { - if (chaptersToInsert.isNotEmpty()) { - ChapterTable.batchInsert(chaptersToInsert) { - this[ChapterTable.url] = it.url - this[ChapterTable.name] = it.name - this[ChapterTable.date_upload] = it.uploadDate - this[ChapterTable.chapter_number] = it.chapterNumber - this[ChapterTable.scanlator] = it.scanlator - this[ChapterTable.sourceOrder] = it.index - this[ChapterTable.fetchedAt] = it.fetchedAt - this[ChapterTable.manga] = it.mangaId - this[ChapterTable.realUrl] = it.realUrl - } - } - - if (chaptersToUpdate.isNotEmpty()) { - BatchUpdateStatement(ChapterTable).apply { - chaptersToUpdate.forEach { - addBatch(EntityID(it.id, ChapterTable)) - this[ChapterTable.name] = it.name - this[ChapterTable.date_upload] = it.uploadDate - this[ChapterTable.chapter_number] = it.chapterNumber - this[ChapterTable.scanlator] = it.scanlator - this[ChapterTable.sourceOrder] = it.index - this[ChapterTable.realUrl] = it.realUrl + val sManga = + SManga.create().apply { + title = manga.title + url = manga.url } - execute(this@transaction) - } - } - MangaTable.update({ MangaTable.id eq mangaId }) { - it[MangaTable.chaptersLastFetchedAt] = Instant.now().epochSecond - } - } + val numberOfCurrentChapters = getCountOfMangaChapters(mangaId) + val chapterList = source.getChapterList(sManga) - val newChapters = - transaction { - ChapterTable.select { ChapterTable.manga eq mangaId } - .orderBy(ChapterTable.sourceOrder to SortOrder.DESC).toList() - } - - // clear any orphaned/duplicate chapters that are in the db but not in `chapterList` - val dbChapterCount = newChapters.count() - if (dbChapterCount > chapterList.size) { // we got some clean up due - val dbChapterList = - transaction { - ChapterTable.select { ChapterTable.manga eq mangaId } - .orderBy(ChapterTable.url to ASC).toList() + // Recognize number for new chapters. + chapterList.forEach { chapter -> + (source as? HttpSource)?.prepareNewChapter(chapter, sManga) + val chapterNumber = ChapterRecognition.parseChapterNumber(manga.title, chapter.name, chapter.chapter_number.toDouble()) + chapter.chapter_number = chapterNumber.toFloat() + chapter.name = chapter.name.sanitize(manga.title) + chapter.scanlator = chapter.scanlator?.ifBlank { null } } - val chapterUrls = chapterList.map { it.url }.toSet() + val now = Instant.now().epochSecond + // Used to not set upload date of older chapters + // to a higher value than newer chapters + var maxSeenUploadDate = 0L - val chaptersIdsToDelete = - dbChapterList.mapIndexedNotNull { index, dbChapter -> - val isOrphaned = !chapterUrls.contains(dbChapter[ChapterTable.url]) - val isDuplicate = - index < dbChapterList.lastIndex && dbChapter[ChapterTable.url] == dbChapterList[index + 1][ChapterTable.url] - val deleteChapter = isOrphaned || isDuplicate - if (deleteChapter) { - dbChapter[ChapterTable.id].value + val chaptersInDb = + transaction { + ChapterTable.select { ChapterTable.manga eq mangaId } + .map { ChapterTable.toDataClass(it) } + .toList() + } + + val chaptersToInsert = mutableListOf() + val chaptersToUpdate = mutableListOf() + + chapterList.reversed().forEachIndexed { index, fetchedChapter -> + val chapterEntry = chaptersInDb.find { it.url == fetchedChapter.url } + + val chapterData = + ChapterDataClass.fromSChapter( + fetchedChapter, + chapterEntry?.id ?: 0, + index + 1, + now, + mangaId, + runCatching { + (source as? HttpSource)?.getChapterUrl(fetchedChapter) + }.getOrNull(), + ) + + if (chapterEntry == null) { + val newChapterData = + if (chapterData.uploadDate == 0L) { + val altDateUpload = if (maxSeenUploadDate == 0L) now else maxSeenUploadDate + chapterData.copy(uploadDate = altDateUpload) + } else { + maxSeenUploadDate = max(maxSeenUploadDate, chapterData.uploadDate) + chapterData + } + chaptersToInsert.add(newChapterData) } else { - null + val newChapterData = + if (chapterData.uploadDate == 0L) { + chapterData.copy(uploadDate = chapterEntry.uploadDate) + } else { + chapterData + } + chaptersToUpdate.add(newChapterData) } } - transaction { - PageTable.deleteWhere { PageTable.chapter inList chaptersIdsToDelete } - ChapterTable.deleteWhere { ChapterTable.id inList chaptersIdsToDelete } - } - } + val deletedChapterNumbers = TreeSet() + val deletedReadChapterNumbers = TreeSet() + val deletedBookmarkedChapterNumbers = TreeSet() + val deletedDownloadedChapterNumbers = TreeSet() + val deletedChapterNumberDateFetchMap = mutableMapOf() - if (manga.inLibrary) { - downloadNewChapters(mangaId, numberOfCurrentChapters, newChapters) - } + // clear any orphaned/duplicate chapters that are in the db but not in `chapterList` + val dbChapterCount = chaptersInDb.count() + if (dbChapterCount > chapterList.size) { // we got some clean up due + val chapterUrls = chapterList.map { it.url }.toSet() + + val chaptersIdsToDelete = + chaptersInDb.mapNotNull { dbChapter -> + if (!chapterUrls.contains(dbChapter.url)) { + if (dbChapter.read) deletedReadChapterNumbers.add(dbChapter.chapterNumber) + if (dbChapter.bookmarked) deletedBookmarkedChapterNumbers.add(dbChapter.chapterNumber) + if (dbChapter.downloaded) deletedDownloadedChapterNumbers.add(dbChapter.chapterNumber) + deletedChapterNumbers.add(dbChapter.chapterNumber) + deletedChapterNumberDateFetchMap[dbChapter.chapterNumber] = dbChapter.fetchedAt + dbChapter.id + } else { + null + } + } + + transaction { + PageTable.deleteWhere { PageTable.chapter inList chaptersIdsToDelete } + ChapterTable.deleteWhere { ChapterTable.id inList chaptersIdsToDelete } + } + } + + transaction { + if (chaptersToInsert.isNotEmpty()) { + ChapterTable.batchInsert(chaptersToInsert) { chapter -> + this[ChapterTable.url] = chapter.url + this[ChapterTable.name] = chapter.name + this[ChapterTable.date_upload] = chapter.uploadDate + this[ChapterTable.chapter_number] = chapter.chapterNumber + this[ChapterTable.scanlator] = chapter.scanlator + this[ChapterTable.sourceOrder] = chapter.index + this[ChapterTable.fetchedAt] = chapter.fetchedAt + this[ChapterTable.manga] = chapter.mangaId + this[ChapterTable.realUrl] = chapter.realUrl + this[ChapterTable.isRead] = false + this[ChapterTable.isBookmarked] = false + this[ChapterTable.isDownloaded] = false + + // is recognized chapter number + if (chapter.chapterNumber >= 0f && chapter.chapterNumber in deletedChapterNumbers) { + this[ChapterTable.isRead] = chapter.chapterNumber in deletedReadChapterNumbers + this[ChapterTable.isBookmarked] = chapter.chapterNumber in deletedBookmarkedChapterNumbers + this[ChapterTable.isDownloaded] = chapter.chapterNumber in deletedDownloadedChapterNumbers + // Try to use the fetch date of the original entry to not pollute 'Updates' tab + deletedChapterNumberDateFetchMap[chapter.chapterNumber]?.let { + this[ChapterTable.fetchedAt] = it + } + } + } + } + + if (chaptersToUpdate.isNotEmpty()) { + BatchUpdateStatement(ChapterTable).apply { + chaptersToUpdate.forEach { + addBatch(EntityID(it.id, ChapterTable)) + this[ChapterTable.name] = it.name + this[ChapterTable.date_upload] = it.uploadDate + this[ChapterTable.chapter_number] = it.chapterNumber + this[ChapterTable.scanlator] = it.scanlator + this[ChapterTable.sourceOrder] = it.index + this[ChapterTable.realUrl] = it.realUrl + } + execute(this@transaction) + } + } + + MangaTable.update({ MangaTable.id eq mangaId }) { + it[MangaTable.chaptersLastFetchedAt] = Instant.now().epochSecond + } + } + + val newChapters = + transaction { + ChapterTable.select { ChapterTable.manga eq mangaId } + .orderBy(ChapterTable.sourceOrder to SortOrder.DESC).toList() + } + + if (manga.inLibrary) { + downloadNewChapters(mangaId, numberOfCurrentChapters, newChapters) + } + + chapterList + } return chapterList } diff --git a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/backup/proto/ProtoBackupImport.kt b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/backup/proto/ProtoBackupImport.kt index ec7d7529..99df5ca3 100644 --- a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/backup/proto/ProtoBackupImport.kt +++ b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/backup/proto/ProtoBackupImport.kt @@ -22,6 +22,7 @@ import okio.buffer import okio.gzip import okio.source import org.jetbrains.exposed.sql.and +import org.jetbrains.exposed.sql.batchInsert import org.jetbrains.exposed.sql.insert import org.jetbrains.exposed.sql.insertAndGetId import org.jetbrains.exposed.sql.select @@ -294,23 +295,25 @@ object ProtoBackupImport : ProtoBackupBase() { // insert chapter data val chaptersLength = chapters.size - chapters.forEach { chapter -> - ChapterTable.insert { - it[url] = chapter.url - it[name] = chapter.name - it[date_upload] = chapter.date_upload - it[chapter_number] = chapter.chapter_number - it[scanlator] = chapter.scanlator - - it[sourceOrder] = chaptersLength - chapter.source_order - it[ChapterTable.manga] = mangaId - - it[isRead] = chapter.read - it[lastPageRead] = chapter.last_page_read - it[isBookmarked] = chapter.bookmark - - it[fetchedAt] = TimeUnit.MILLISECONDS.toSeconds(chapter.date_fetch) + ChapterTable.batchInsert(chapters) { chapter -> + this[ChapterTable.url] = chapter.url + this[ChapterTable.name] = chapter.name + if (chapter.date_upload == 0L) { + this[ChapterTable.date_upload] = chapter.date_fetch + } else { + this[ChapterTable.date_upload] = chapter.date_upload } + this[ChapterTable.chapter_number] = chapter.chapter_number + this[ChapterTable.scanlator] = chapter.scanlator + + this[ChapterTable.sourceOrder] = chaptersLength - chapter.source_order + this[ChapterTable.manga] = mangaId + + this[ChapterTable.isRead] = chapter.read + this[ChapterTable.lastPageRead] = chapter.last_page_read + this[ChapterTable.isBookmarked] = chapter.bookmark + + this[ChapterTable.fetchedAt] = TimeUnit.MILLISECONDS.toSeconds(chapter.date_fetch) } // insert categories @@ -350,7 +353,11 @@ object ProtoBackupImport : ProtoBackupBase() { ChapterTable.insert { it[url] = chapter.url it[name] = chapter.name - it[date_upload] = chapter.date_upload + if (chapter.date_upload == 0L) { + it[date_upload] = chapter.date_fetch + } else { + it[date_upload] = chapter.date_upload + } it[chapter_number] = chapter.chapter_number it[scanlator] = chapter.scanlator diff --git a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/ChaptersFilesProvider.kt b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/ChaptersFilesProvider.kt index e5428bba..cfe567d3 100644 --- a/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/ChaptersFilesProvider.kt +++ b/server/src/main/kotlin/suwayomi/tachidesk/manga/impl/download/fileProvider/ChaptersFilesProvider.kt @@ -15,7 +15,7 @@ import java.io.InputStream /* * Base class for downloaded chapter files provider, example: Folder, Archive -* */ +*/ abstract class ChaptersFilesProvider(val mangaId: Int, val chapterId: Int) : DownloadedFilesProvider { abstract fun getImageImpl(index: Int): Pair diff --git a/server/src/main/kotlin/suwayomi/tachidesk/manga/model/dataclass/ChapterDataClass.kt b/server/src/main/kotlin/suwayomi/tachidesk/manga/model/dataclass/ChapterDataClass.kt index 1023a679..9c59b313 100644 --- a/server/src/main/kotlin/suwayomi/tachidesk/manga/model/dataclass/ChapterDataClass.kt +++ b/server/src/main/kotlin/suwayomi/tachidesk/manga/model/dataclass/ChapterDataClass.kt @@ -56,7 +56,7 @@ data class ChapterDataClass( name = sChapter.name, uploadDate = sChapter.date_upload, chapterNumber = sChapter.chapter_number, - scanlator = sChapter.scanlator ?: "", + scanlator = sChapter.scanlator, index = index, fetchedAt = fetchedAt, realUrl = realUrl, diff --git a/server/src/main/kotlin/suwayomi/tachidesk/server/database/migration/M0030_FixDateUpload.kt b/server/src/main/kotlin/suwayomi/tachidesk/server/database/migration/M0030_FixDateUpload.kt new file mode 100644 index 00000000..75f9add8 --- /dev/null +++ b/server/src/main/kotlin/suwayomi/tachidesk/server/database/migration/M0030_FixDateUpload.kt @@ -0,0 +1,21 @@ +package suwayomi.tachidesk.server.database.migration + +/* + * Copyright (C) Contributors to the Suwayomi project + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ + +import de.neonew.exposed.migrations.helpers.SQLMigration + +@Suppress("ClassName", "unused") +class M0030_FixDateUpload : SQLMigration() { + // language=h2 + override val sql = + """ + UPDATE CHAPTER + SET DATE_UPLOAD = (FETCHED_AT * 1000) + WHERE DATE_UPLOAD = 0; + """.trimIndent() +}