Skip to content

Commit 354426f

Browse files
authored
Merge pull request #441 from B0pol/fix-three-attempts
Support new YouTube JSON scheme
2 parents 350eed6 + 6dc5ab4 commit 354426f

1 file changed

Lines changed: 56 additions & 29 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeStreamExtractor.java

Lines changed: 56 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
import com.grack.nanojson.JsonArray;
44
import com.grack.nanojson.JsonObject;
55
import com.grack.nanojson.JsonParser;
6-
6+
import org.jsoup.Jsoup;
7+
import org.jsoup.nodes.Document;
8+
import org.jsoup.nodes.Element;
9+
import org.jsoup.select.Elements;
710
import org.mozilla.javascript.Context;
811
import org.mozilla.javascript.Function;
912
import org.mozilla.javascript.ScriptableObject;
@@ -36,6 +39,8 @@
3639
import org.schabi.newpipe.extractor.utils.Parser;
3740
import org.schabi.newpipe.extractor.utils.Utils;
3841

42+
import javax.annotation.Nonnull;
43+
import javax.annotation.Nullable;
3944
import java.io.IOException;
4045
import java.io.UnsupportedEncodingException;
4146
import java.text.SimpleDateFormat;
@@ -49,9 +54,6 @@
4954
import java.util.Locale;
5055
import java.util.Map;
5156

52-
import javax.annotation.Nonnull;
53-
import javax.annotation.Nullable;
54-
5557
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.fixThumbnailUrl;
5658
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse;
5759
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
@@ -102,6 +104,7 @@ public class DecryptException extends ParsingException {
102104
private JsonObject videoPrimaryInfoRenderer;
103105
private JsonObject videoSecondaryInfoRenderer;
104106
private int ageLimit;
107+
private boolean newJsonScheme;
105108

106109
@Nonnull
107110
private List<SubtitlesInfo> subtitlesInfos = new ArrayList<>();
@@ -156,20 +159,23 @@ public String getTextualUploadDate() throws ParsingException {
156159
TimeAgoParser timeAgoParser = TimeAgoPatternsManager.getTimeAgoParserFor(Localization.fromLocalizationCode("en"));
157160
Calendar parsedTime = timeAgoParser.parse(time).date();
158161
return new SimpleDateFormat("yyyy-MM-dd").format(parsedTime.getTime());
159-
} catch (Exception ignored) {}
162+
} catch (Exception ignored) {
163+
}
160164

161165
try { // Premiered Feb 21, 2020
162166
Date d = new SimpleDateFormat("MMM dd, YYYY", Locale.ENGLISH).parse(time);
163167
return new SimpleDateFormat("yyyy-MM-dd").format(d.getTime());
164-
} catch (Exception ignored) {}
168+
} catch (Exception ignored) {
169+
}
165170
}
166171

167172
try {
168173
// TODO: this parses English formatted dates only, we need a better approach to parse the textual date
169174
Date d = new SimpleDateFormat("dd MMM yyyy", Locale.ENGLISH).parse(
170175
getTextFromObject(getVideoPrimaryInfoRenderer().getObject("dateText")));
171176
return new SimpleDateFormat("yyyy-MM-dd").format(d);
172-
} catch (Exception ignored) {}
177+
} catch (Exception ignored) {
178+
}
173179
throw new ParsingException("Could not get upload date");
174180
}
175181

@@ -360,7 +366,8 @@ public String getUploaderName() throws ParsingException {
360366
try {
361367
uploaderName = getTextFromObject(getVideoSecondaryInfoRenderer().getObject("owner")
362368
.getObject("videoOwnerRenderer").getObject("title"));
363-
} catch (ParsingException ignored) { }
369+
} catch (ParsingException ignored) {
370+
}
364371

365372
if (isNullOrEmpty(uploaderName)) {
366373
uploaderName = playerResponse.getObject("videoDetails").getString("author");
@@ -650,27 +657,23 @@ public void onFetchPage(@Nonnull Downloader downloader) throws IOException, Extr
650657
} else {
651658
ageLimit = NO_AGE_LIMIT;
652659
JsonObject playerConfig;
660+
initialData = initialAjaxJson.getObject(3).getObject("response");
653661

654-
// sometimes at random YouTube does not provide the player config,
655-
// so just retry the same request three times
656-
int attempts = 2;
657-
while (true) {
658-
playerConfig = initialAjaxJson.getObject(2).getObject("player", null);
659-
if (playerConfig != null) {
660-
break;
661-
}
662+
// sometimes at random YouTube does not provide the player config
663+
playerConfig = initialAjaxJson.getObject(2).getObject("player", null);
662664

663-
if (attempts <= 0) {
664-
throw new ParsingException(
665-
"YouTube did not provide player config even after three attempts");
666-
}
667-
initialAjaxJson = getJsonResponse(url, getExtractorLocalization());
668-
--attempts;
665+
if (playerConfig == null) {
666+
newJsonScheme = true;
667+
final EmbeddedInfo info = getEmbeddedInfo();
668+
final String videoInfoUrl = getVideoInfoUrl(getId(), info.sts);
669+
final String infoPageResponse = downloader.get(videoInfoUrl, getExtractorLocalization()).responseBody();
670+
videoInfoPage.putAll(Parser.compatParseMap(infoPageResponse));
671+
playerUrl = info.url;
672+
} else {
673+
playerArgs = getPlayerArgs(playerConfig);
674+
playerUrl = getPlayerUrl(playerConfig);
669675
}
670-
initialData = initialAjaxJson.getObject(3).getObject("response");
671676

672-
playerArgs = getPlayerArgs(playerConfig);
673-
playerUrl = getPlayerUrl(playerConfig);
674677
}
675678

676679
playerResponse = getPlayerResponse();
@@ -718,6 +721,10 @@ private String getPlayerUrl(final JsonObject playerConfig) throws ParsingExcepti
718721
private JsonObject getPlayerResponse() throws ParsingException {
719722
try {
720723
String playerResponseStr;
724+
if (newJsonScheme) {
725+
return initialAjaxJson.getObject(2).getObject("playerResponse");
726+
}
727+
721728
if (playerArgs != null) {
722729
playerResponseStr = playerArgs.getString("player_response");
723730
} else {
@@ -737,11 +744,30 @@ private EmbeddedInfo getEmbeddedInfo() throws ParsingException, ReCaptchaExcepti
737744
final String embedPageContent = downloader.get(embedUrl, getExtractorLocalization()).responseBody();
738745

739746
// Get player url
740-
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
741-
String playerUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
742-
.replace("\\", "").replace("\"", "");
747+
String playerUrl = null;
748+
try {
749+
final String assetsPattern = "\"assets\":.+?\"js\":\\s*(\"[^\"]+\")";
750+
playerUrl = Parser.matchGroup1(assetsPattern, embedPageContent)
751+
.replace("\\", "").replace("\"", "");
752+
} catch (Parser.RegexException ex) {
753+
// playerUrl is still available in the file, just somewhere else
754+
final Document doc = Jsoup.parse(embedPageContent);
755+
final Elements elems = doc.select("script").attr("name", "player_ias/base");
756+
for (Element elem : elems) {
757+
if (elem.attr("src").contains("base.js")) {
758+
playerUrl = elem.attr("src");
759+
}
760+
}
761+
762+
if (playerUrl == null) {
763+
throw new ParsingException("Could not get playerUrl");
764+
}
765+
}
766+
743767
if (playerUrl.startsWith("//")) {
744768
playerUrl = HTTPS + playerUrl;
769+
} else if (playerUrl.startsWith("/")) {
770+
playerUrl = HTTPS + "//youtube.com" + playerUrl;
745771
}
746772

747773
try {
@@ -988,7 +1014,8 @@ private Map<String, ItagItem> getItags(String streamingDataKey, ItagItem.ItagTyp
9881014

9891015
urlAndItags.put(streamUrl, itagItem);
9901016
}
991-
} catch (UnsupportedEncodingException ignored) {}
1017+
} catch (UnsupportedEncodingException ignored) {
1018+
}
9921019
}
9931020
}
9941021

0 commit comments

Comments
 (0)