Skip to content

Commit 8627d01

Browse files
authored
Merge pull request #373 from wb9688/fix-yt-continuations
Support YouTube's new continuations
2 parents df28a08 + 667dce0 commit 8627d01

3 files changed

Lines changed: 152 additions & 47 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/Page.java

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,35 +8,45 @@
88

99
public class Page implements Serializable {
1010
private final String url;
11+
private final String id;
1112
private final List<String> ids;
1213
private final Map<String, String> cookies;
1314

14-
public Page(final String url, final List<String> ids, final Map<String, String> cookies) {
15+
public Page(final String url, final String id, final List<String> ids, final Map<String, String> cookies) {
1516
this.url = url;
17+
this.id = id;
1618
this.ids = ids;
1719
this.cookies = cookies;
1820
}
1921

2022
public Page(final String url) {
21-
this(url, null, null);
23+
this(url, null, null, null);
24+
}
25+
26+
public Page(final String url, final String id) {
27+
this(url, id, null, null);
2228
}
2329

2430
public Page(final String url, final Map<String, String> cookies) {
25-
this(url, null, cookies);
31+
this(url, null, null, cookies);
2632
}
2733

2834
public Page(final List<String> ids) {
29-
this(null, ids, null);
35+
this(null, null, ids, null);
3036
}
3137

3238
public Page(final List<String> ids, final Map<String, String> cookies) {
33-
this(null, ids, cookies);
39+
this(null, null, ids, cookies);
3440
}
3541

3642
public String getUrl() {
3743
return url;
3844
}
3945

46+
public String getId() {
47+
return id;
48+
}
49+
4050
public List<String> getIds() {
4151
return ids;
4252
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 53 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ private YoutubeParsingHelper() {
6464
private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
6565
private static String clientVersion;
6666

67+
private static String key;
68+
6769
private static final String[] HARDCODED_YOUTUBE_MUSIC_KEYS = {"AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30", "67", "0.1"};
6870
private static String[] youtubeMusicKeys;
6971

@@ -214,39 +216,31 @@ public static boolean isHardcodedClientVersionValid() throws IOException, Extrac
214216
return response.length() > 50; // ensure to have a valid response
215217
}
216218

217-
/**
218-
* Get the client version from a page
219-
* @return
220-
* @throws ParsingException
221-
*/
222-
public static String getClientVersion() throws IOException, ExtractionException {
223-
if (!isNullOrEmpty(clientVersion)) return clientVersion;
224-
if (isHardcodedClientVersionValid()) return clientVersion = HARDCODED_CLIENT_VERSION;
225-
219+
private static void extractClientVersionAndKey() throws IOException, ExtractionException {
226220
final String url = "https://www.youtube.com/results?search_query=test";
227221
final String html = getDownloader().get(url).responseBody();
228-
JsonObject initialData = getInitialData(html);
229-
JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams");
222+
final JsonObject initialData = getInitialData(html);
223+
final JsonArray serviceTrackingParams = initialData.getObject("responseContext").getArray("serviceTrackingParams");
230224
String shortClientVersion = null;
231225

232226
// try to get version from initial data first
233-
for (Object service : serviceTrackingParams) {
234-
JsonObject s = (JsonObject) service;
227+
for (final Object service : serviceTrackingParams) {
228+
final JsonObject s = (JsonObject) service;
235229
if (s.getString("service").equals("CSI")) {
236-
JsonArray params = s.getArray("params");
237-
for (Object param : params) {
238-
JsonObject p = (JsonObject) param;
239-
String key = p.getString("key");
230+
final JsonArray params = s.getArray("params");
231+
for (final Object param : params) {
232+
final JsonObject p = (JsonObject) param;
233+
final String key = p.getString("key");
240234
if (key != null && key.equals("cver")) {
241-
return clientVersion = p.getString("value");
235+
clientVersion = p.getString("value");
242236
}
243237
}
244238
} else if (s.getString("service").equals("ECATCHER")) {
245239
// fallback to get a shortened client version which does not contain the last two digits
246-
JsonArray params = s.getArray("params");
247-
for (Object param : params) {
248-
JsonObject p = (JsonObject) param;
249-
String key = p.getString("key");
240+
final JsonArray params = s.getArray("params");
241+
for (final Object param : params) {
242+
final JsonObject p = (JsonObject) param;
243+
final String key = p.getString("key");
250244
if (key != null && key.equals("client.version")) {
251245
shortClientVersion = p.getString("value");
252246
}
@@ -255,26 +249,55 @@ public static String getClientVersion() throws IOException, ExtractionException
255249
}
256250

257251
String contextClientVersion;
258-
String[] patterns = {
252+
final String[] patterns = {
259253
"INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"",
260254
"innertube_context_client_version\":\"([0-9\\.]+?)\"",
261255
"client.version=([0-9\\.]+)"
262256
};
263-
for (String pattern : patterns) {
257+
for (final String pattern : patterns) {
264258
try {
265259
contextClientVersion = Parser.matchGroup1(pattern, html);
266260
if (!isNullOrEmpty(contextClientVersion)) {
267-
return clientVersion = contextClientVersion;
261+
clientVersion = contextClientVersion;
262+
break;
268263
}
269-
} catch (Exception ignored) {
270-
}
264+
} catch (Parser.RegexException ignored) { }
265+
}
266+
267+
if (!isNullOrEmpty(clientVersion) && !isNullOrEmpty(shortClientVersion)) {
268+
clientVersion = shortClientVersion;
271269
}
272270

273-
if (shortClientVersion != null) {
274-
return clientVersion = shortClientVersion;
271+
try {
272+
key = Parser.matchGroup1("INNERTUBE_API_KEY\":\"([0-9a-zA-Z_-]+?)\"", html);
273+
} catch (Parser.RegexException e) {
274+
try {
275+
key = Parser.matchGroup1("innertubeApiKey\":\"([0-9a-zA-Z_-]+?)\"", html);
276+
} catch (Parser.RegexException ignored) { }
275277
}
278+
}
279+
280+
/**
281+
* Get the client version
282+
*/
283+
public static String getClientVersion() throws IOException, ExtractionException {
284+
if (!isNullOrEmpty(clientVersion)) return clientVersion;
285+
if (isHardcodedClientVersionValid()) return clientVersion = HARDCODED_CLIENT_VERSION;
286+
287+
extractClientVersionAndKey();
288+
if (isNullOrEmpty(key)) throw new ParsingException("Could not extract client version");
289+
return clientVersion;
290+
}
291+
292+
/**
293+
* Get the key
294+
*/
295+
public static String getKey() throws IOException, ExtractionException {
296+
if (!isNullOrEmpty(key)) return key;
276297

277-
throw new ParsingException("Could not get client version");
298+
extractClientVersionAndKey();
299+
if (isNullOrEmpty(key)) throw new ParsingException("Could not extract key");
300+
return key;
278301
}
279302

280303
public static boolean areHardcodedYoutubeMusicKeysValid() throws IOException, ReCaptchaException {

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/extractors/YoutubeSearchExtractor.java

Lines changed: 84 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
import com.grack.nanojson.JsonArray;
44
import com.grack.nanojson.JsonObject;
5+
import com.grack.nanojson.JsonParser;
6+
import com.grack.nanojson.JsonParserException;
7+
import com.grack.nanojson.JsonWriter;
8+
59
import org.schabi.newpipe.extractor.InfoItem;
610
import org.schabi.newpipe.extractor.Page;
711
import org.schabi.newpipe.extractor.StreamingService;
@@ -14,11 +18,19 @@
1418
import org.schabi.newpipe.extractor.search.SearchExtractor;
1519
import org.schabi.newpipe.extractor.utils.JsonUtils;
1620

17-
import javax.annotation.Nonnull;
1821
import java.io.IOException;
22+
import java.util.Collections;
23+
import java.util.HashMap;
24+
import java.util.List;
25+
import java.util.Map;
1926

27+
import javax.annotation.Nonnull;
28+
29+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getClientVersion;
2030
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getJsonResponse;
31+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getKey;
2132
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getTextFromObject;
33+
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.getValidJsonResponseBody;
2234
import static org.schabi.newpipe.extractor.utils.Utils.isNullOrEmpty;
2335

2436
/*
@@ -96,20 +108,24 @@ public boolean isCorrectedSearch() {
96108

97109
@Nonnull
98110
@Override
99-
public InfoItemsPage<InfoItem> getInitialPage() throws ExtractionException {
111+
public InfoItemsPage<InfoItem> getInitialPage() throws IOException, ExtractionException {
100112
final InfoItemsSearchCollector collector = new InfoItemsSearchCollector(getServiceId());
101113

102114
final JsonArray sections = initialData.getObject("contents").getObject("twoColumnSearchResultsRenderer")
103115
.getObject("primaryContents").getObject("sectionListRenderer").getArray("contents");
104116

105117
Page nextPage = null;
106118

107-
for (Object section : sections) {
108-
final JsonObject itemSectionRenderer = ((JsonObject) section).getObject("itemSectionRenderer");
119+
for (final Object section : sections) {
120+
if (((JsonObject) section).has("itemSectionRenderer")) {
121+
final JsonObject itemSectionRenderer = ((JsonObject) section).getObject("itemSectionRenderer");
109122

110-
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
123+
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
111124

112-
nextPage = getNextPageFrom(itemSectionRenderer.getArray("continuations"));
125+
nextPage = getNextPageFrom(itemSectionRenderer.getArray("continuations"));
126+
} else if (((JsonObject) section).has("continuationItemRenderer")) {
127+
nextPage = getNewNextPageFrom(((JsonObject) section).getObject("continuationItemRenderer"));
128+
}
113129
}
114130

115131
return new InfoItemsPage<>(collector, nextPage);
@@ -122,15 +138,58 @@ public InfoItemsPage<InfoItem> getPage(final Page page) throws IOException, Extr
122138
}
123139

124140
final InfoItemsSearchCollector collector = new InfoItemsSearchCollector(getServiceId());
125-
final JsonArray ajaxJson = getJsonResponse(page.getUrl(), getExtractorLocalization());
126141

127-
final JsonObject itemSectionRenderer = ajaxJson.getObject(1).getObject("response")
128-
.getObject("continuationContents").getObject("itemSectionContinuation");
142+
if (page.getId() == null) {
143+
final JsonArray ajaxJson = getJsonResponse(page.getUrl(), getExtractorLocalization());
144+
145+
final JsonObject itemSectionContinuation = ajaxJson.getObject(1).getObject("response")
146+
.getObject("continuationContents").getObject("itemSectionContinuation");
147+
148+
collectStreamsFrom(collector, itemSectionContinuation.getArray("contents"));
149+
final JsonArray continuations = itemSectionContinuation.getArray("continuations");
150+
151+
return new InfoItemsPage<>(collector, getNextPageFrom(continuations));
152+
} else {
153+
// @formatter:off
154+
final byte[] json = JsonWriter.string()
155+
.object()
156+
.object("context")
157+
.object("client")
158+
.value("hl", "en")
159+
.value("gl", getExtractorContentCountry().getCountryCode())
160+
.value("clientName", "WEB")
161+
.value("clientVersion", getClientVersion())
162+
.value("utcOffsetMinutes", 0)
163+
.end()
164+
.object("request").end()
165+
.object("user").end()
166+
.end()
167+
.value("continuation", page.getId())
168+
.end().done().getBytes("UTF-8");
169+
// @formatter:on
170+
171+
final Map<String, List<String>> headers = new HashMap<>();
172+
headers.put("Origin", Collections.singletonList("https://www.youtube.com"));
173+
headers.put("Referer", Collections.singletonList(this.getUrl()));
174+
headers.put("Content-Type", Collections.singletonList("application/json"));
175+
176+
final String responseBody = getValidJsonResponseBody(getDownloader().post(page.getUrl(), headers, json));
177+
178+
final JsonObject ajaxJson;
179+
try {
180+
ajaxJson = JsonParser.object().from(responseBody);
181+
} catch (JsonParserException e) {
182+
throw new ParsingException("Could not parse JSON", e);
183+
}
129184

130-
collectStreamsFrom(collector, itemSectionRenderer.getArray("contents"));
131-
final JsonArray continuations = itemSectionRenderer.getArray("continuations");
185+
final JsonArray continuationItems = ajaxJson.getArray("onResponseReceivedCommands")
186+
.getObject(0).getObject("appendContinuationItemsAction").getArray("continuationItems");
132187

133-
return new InfoItemsPage<>(collector, getNextPageFrom(continuations));
188+
final JsonArray contents = continuationItems.getObject(0).getObject("itemSectionRenderer").getArray("contents");
189+
collectStreamsFrom(collector, contents);
190+
191+
return new InfoItemsPage<>(collector, getNewNextPageFrom(continuationItems.getObject(1).getObject("continuationItemRenderer")));
192+
}
134193
}
135194

136195
private void collectStreamsFrom(final InfoItemsSearchCollector collector, final JsonArray videos) throws NothingFoundException, ParsingException {
@@ -162,4 +221,17 @@ private Page getNextPageFrom(final JsonArray continuations) throws ParsingExcept
162221
return new Page(getUrl() + "&pbj=1&ctoken=" + continuation + "&continuation=" + continuation
163222
+ "&itct=" + clickTrackingParams);
164223
}
224+
225+
private Page getNewNextPageFrom(final JsonObject continuationItemRenderer) throws IOException, ExtractionException {
226+
if (isNullOrEmpty(continuationItemRenderer)) {
227+
return null;
228+
}
229+
230+
final String token = continuationItemRenderer.getObject("continuationEndpoint")
231+
.getObject("continuationCommand").getString("token");
232+
233+
final String url = "https://www.youtube.com/youtubei/v1/search?key=" + getKey();
234+
235+
return new Page(url, token);
236+
}
165237
}

0 commit comments

Comments
 (0)