Skip to content

Commit 8e6e0ed

Browse files
committed
Enhance duration parsing in YouTube extractors to handle invalid inputs and improve Shorts detection
1 parent 003dca6 commit 8e6e0ed

3 files changed

Lines changed: 43 additions & 29 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,10 @@ public static boolean isY2ubeURL(@Nonnull final URL url) {
235235
*/
236236
public static int parseDurationString(@Nonnull final String input)
237237
throws ParsingException, NumberFormatException {
238+
if (!input.matches(".*\\d.*") && !input.equalsIgnoreCase("SHORTS")) {
239+
throw new ParsingException("Error duration string contains no digits: " + input);
240+
}
241+
238242
// If time separator : is not detected, try . instead
239243
final String[] splitInput = input.contains(":")
240244
? input.split(":")

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemExtractor.java

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import java.time.format.DateTimeFormatter;
5050
import java.util.List;
5151
import java.util.regex.Pattern;
52+
import java.util.stream.Collectors;
5253

5354
public class YoutubeStreamInfoItemExtractor implements StreamInfoItemExtractor {
5455

@@ -155,19 +156,24 @@ public long getDuration() throws ParsingException {
155156
duration = videoInfo.getString("lengthSeconds");
156157

157158
if (isNullOrEmpty(duration)) {
158-
final JsonObject timeOverlay = videoInfo.getArray("thumbnailOverlays")
159+
final List<String> timeOverlays = videoInfo.getArray("thumbnailOverlays")
159160
.stream()
160161
.filter(JsonObject.class::isInstance)
161162
.map(JsonObject.class::cast)
162163
.filter(thumbnailOverlay ->
163164
thumbnailOverlay.has("thumbnailOverlayTimeStatusRenderer"))
164-
.findFirst()
165-
.orElse(null);
166-
167-
if (timeOverlay != null) {
168-
duration = getTextFromObject(
169-
timeOverlay.getObject("thumbnailOverlayTimeStatusRenderer")
170-
.getObject("text"));
165+
.map(thumbnailOverlay -> getTextFromObject(
166+
thumbnailOverlay.getObject("thumbnailOverlayTimeStatusRenderer")
167+
.getObject("text")))
168+
.filter(text -> !isNullOrEmpty(text))
169+
.collect(Collectors.toList());
170+
171+
for (final String timeOverlayText : timeOverlays) {
172+
try {
173+
return YoutubeParsingHelper.parseDurationString(timeOverlayText);
174+
} catch (final ParsingException ex) {
175+
// try next
176+
}
171177
}
172178
}
173179

@@ -452,24 +458,21 @@ public boolean isShortFormContent() throws ParsingException {
452458
}
453459

454460
if (!isShort) {
455-
final JsonObject thumbnailTimeOverlay = videoInfo.getArray("thumbnailOverlays")
456-
.stream()
457-
.filter(JsonObject.class::isInstance)
458-
.map(JsonObject.class::cast)
459-
.filter(thumbnailOverlay -> thumbnailOverlay.has(
460-
"thumbnailOverlayTimeStatusRenderer"))
461-
.map(thumbnailOverlay -> thumbnailOverlay.getObject(
462-
"thumbnailOverlayTimeStatusRenderer"))
463-
.findFirst()
464-
.orElse(null);
465-
466-
if (!isNullOrEmpty(thumbnailTimeOverlay)) {
467-
isShort = thumbnailTimeOverlay.getString("style", "")
468-
.equalsIgnoreCase("SHORTS")
469-
|| thumbnailTimeOverlay.getObject("icon")
470-
.getString("iconType", "")
471-
.toLowerCase()
472-
.contains("shorts");
461+
if (videoInfo.has("thumbnailOverlays")) {
462+
isShort = videoInfo.getArray("thumbnailOverlays")
463+
.stream()
464+
.filter(JsonObject.class::isInstance)
465+
.map(JsonObject.class::cast)
466+
.filter(thumbnailOverlay -> thumbnailOverlay.has(
467+
"thumbnailOverlayTimeStatusRenderer"))
468+
.map(thumbnailOverlay -> thumbnailOverlay.getObject(
469+
"thumbnailOverlayTimeStatusRenderer"))
470+
.anyMatch(timeOverlay -> timeOverlay.getString("style", "")
471+
.equalsIgnoreCase("SHORTS")
472+
|| timeOverlay.getObject("icon")
473+
.getString("iconType", "")
474+
.toLowerCase()
475+
.contains("shorts"));
473476
}
474477
}
475478

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamInfoItemLockupExtractor.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@
3131
/**
3232
* Note:
3333
* This extractor is currently (2025-07) only used to extract related video streams.<br>
34-
* The following features are currently not implemented because they have never been observed:
34+
* The following features are currently not implemented:
3535
* <ul>
36-
* <li>Shorts</li>
36+
* <li>Shorts: appear in related videos without a duration badge; getDuration() returns -1</li>
3737
* <li>Paid content (Premium, members first or only)</li>
3838
* </ul>
3939
*/
@@ -164,18 +164,25 @@ public long getDuration() throws ParsingException {
164164
.collect(Collectors.toList());
165165

166166
if (potentialDurations.isEmpty()) {
167-
throw new ParsingException("Could not get duration: No parsable durations detected");
167+
return -1;
168168
}
169169

170170
ParsingException parsingException = null;
171171
for (final String potentialDuration : potentialDurations) {
172+
if (potentialDuration == null || !potentialDuration.matches(".*\\d.*")) {
173+
continue;
174+
}
172175
try {
173176
return YoutubeParsingHelper.parseDurationString(potentialDuration);
174177
} catch (final ParsingException ex) {
175178
parsingException = ex;
176179
}
177180
}
178181

182+
if (parsingException == null) {
183+
return -1; // e.g. only "SHORTS" or "CC" badge was present, no duration available
184+
}
185+
179186
throw new ParsingException("Could not get duration", parsingException);
180187
}
181188

0 commit comments

Comments
 (0)