Skip to content

Commit b2d0c09

Browse files
2 parents 35c7b56 + 6cc50b5 commit b2d0c09

12 files changed

Lines changed: 109 additions & 56 deletions

File tree

extractor/src/main/java/org/schabi/newpipe/extractor/StreamingService.java

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.schabi.newpipe.extractor.stream.StreamExtractor;
1717
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
1818
import org.schabi.newpipe.extractor.suggestion.SuggestionExtractor;
19+
import org.schabi.newpipe.extractor.utils.Utils;
1920

2021
import javax.annotation.Nullable;
2122
import java.util.Collections;
@@ -277,18 +278,19 @@ public CommentsExtractor getCommentsExtractor(String url) throws ExtractionExcep
277278
* Figures out where the link is pointing to (a channel, a video, a playlist, etc.)
278279
* @param url the url on which it should be decided of which link type it is
279280
* @return the link type of url
280-
* @throws ParsingException
281281
*/
282-
public final LinkType getLinkTypeByUrl(String url) throws ParsingException {
283-
LinkHandlerFactory sH = getStreamLHFactory();
284-
LinkHandlerFactory cH = getChannelLHFactory();
285-
LinkHandlerFactory pH = getPlaylistLHFactory();
282+
public final LinkType getLinkTypeByUrl(final String url) throws ParsingException {
283+
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
286284

287-
if (sH != null && sH.acceptUrl(url)) {
285+
final LinkHandlerFactory sH = getStreamLHFactory();
286+
final LinkHandlerFactory cH = getChannelLHFactory();
287+
final LinkHandlerFactory pH = getPlaylistLHFactory();
288+
289+
if (sH != null && sH.acceptUrl(polishedUrl)) {
288290
return LinkType.STREAM;
289-
} else if (cH != null && cH.acceptUrl(url)) {
291+
} else if (cH != null && cH.acceptUrl(polishedUrl)) {
290292
return LinkType.CHANNEL;
291-
} else if (pH != null && pH.acceptUrl(url)) {
293+
} else if (pH != null && pH.acceptUrl(polishedUrl)) {
292294
return LinkType.PLAYLIST;
293295
} else {
294296
return LinkType.NONE;

extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/LinkHandlerFactory.java

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,29 @@ public String getUrl(String id, String baseUrl) throws ParsingException {
4242
// Logic
4343
///////////////////////////////////
4444

45-
public LinkHandler fromUrl(String url) throws ParsingException {
46-
if (url == null) throw new IllegalArgumentException("url can not be null");
47-
final String baseUrl = Utils.getBaseUrl(url);
48-
return fromUrl(url, baseUrl);
45+
/**
46+
* Builds a {@link LinkHandler} from a url.<br>
47+
* Be sure to call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding
48+
* this function.
49+
* @param url the url to extract path and id from
50+
* @return a {@link LinkHandler} complete with information
51+
*/
52+
public LinkHandler fromUrl(final String url) throws ParsingException {
53+
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
54+
final String baseUrl = Utils.getBaseUrl(polishedUrl);
55+
return fromUrl(polishedUrl, baseUrl);
4956
}
5057

58+
/**
59+
* Builds a {@link LinkHandler} from a url and a base url. The url is expected to be already
60+
* polished from google search redirects (otherwise how could {@code baseUrl} have been
61+
* extracted?).<br>
62+
* So do not call {@link Utils#followGoogleRedirectIfNeeded(String)} on the url if overriding
63+
* this function, since that should be done in {@link #fromUrl(String)}.
64+
* @param url the url without google search redirects to extract id from
65+
* @param baseUrl the base url
66+
* @return a {@link LinkHandler} complete with information
67+
*/
5168
public LinkHandler fromUrl(String url, String baseUrl) throws ParsingException {
5269
if (url == null) throw new IllegalArgumentException("url can not be null");
5370
if (!acceptUrl(url)) {

extractor/src/main/java/org/schabi/newpipe/extractor/linkhandler/ListLinkHandlerFactory.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ public String getUrl(String id, List<String> contentFilter, String sortFilter, S
3131
///////////////////////////////////
3232

3333
@Override
34-
public ListLinkHandler fromUrl(String url) throws ParsingException {
35-
String baseUrl = Utils.getBaseUrl(url);
36-
return fromUrl(url, baseUrl);
34+
public ListLinkHandler fromUrl(final String url) throws ParsingException {
35+
final String polishedUrl = Utils.followGoogleRedirectIfNeeded(url);
36+
final String baseUrl = Utils.getBaseUrl(polishedUrl);
37+
return fromUrl(polishedUrl, baseUrl);
3738
}
3839

3940
@Override

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/SoundcloudParsingHelper.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525

2626
import javax.annotation.Nonnull;
2727
import java.io.IOException;
28+
import java.net.MalformedURLException;
29+
import java.net.URL;
2830
import java.net.URLEncoder;
2931
import java.text.ParseException;
3032
import java.text.SimpleDateFormat;
@@ -148,12 +150,21 @@ public static String resolveUrlWithEmbedPlayer(String apiUrl) throws IOException
148150
*
149151
* @return the resolved id
150152
*/
151-
public static String resolveIdWithEmbedPlayer(String url) throws IOException, ReCaptchaException, ParsingException {
153+
public static String resolveIdWithEmbedPlayer(String urlString) throws IOException, ReCaptchaException, ParsingException {
154+
// Remove the tailing slash from URLs due to issues with the SoundCloud API
155+
if (urlString.charAt(urlString.length() -1) == '/') urlString = urlString.substring(0, urlString.length()-1);
156+
157+
URL url;
158+
try {
159+
url = Utils.stringToURL(urlString);
160+
} catch (MalformedURLException e){
161+
throw new IllegalArgumentException("The given URL is not valid");
162+
}
152163

153164
String response = NewPipe.getDownloader().get("https://w.soundcloud.com/player/?url="
154-
+ URLEncoder.encode(url, "UTF-8"), SoundCloud.getLocalization()).responseBody();
165+
+ URLEncoder.encode(url.toString(), "UTF-8"), SoundCloud.getLocalization()).responseBody();
155166
// handle playlists / sets different and get playlist id via uir field in JSON
156-
if (url.contains("sets") && !url.endsWith("sets") && !url.endsWith("sets/"))
167+
if (url.getPath().contains("/sets/") && !url.getPath().endsWith("/sets"))
157168
return Parser.matchGroup1("\"uri\":\\s*\"https:\\/\\/api\\.soundcloud\\.com\\/playlists\\/((\\d)*?)\"", response);
158169
return Parser.matchGroup1(",\"id\":(([^}\\n])*?),", response);
159170
}

extractor/src/main/java/org/schabi/newpipe/extractor/services/soundcloud/linkHandler/SoundcloudStreamLinkHandlerFactory.java

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,6 @@ public String getUrl(String id) throws ParsingException {
3030
@Override
3131
public String getId(String url) throws ParsingException {
3232
Utils.checkUrl(URL_PATTERN, url);
33-
// Remove the tailing slash from URLs due to issues with the SoundCloud API
34-
if (url.charAt(url.length() -1) == '/') url = url.substring(0, url.length()-1);
3533

3634
try {
3735
return SoundcloudParsingHelper.resolveIdWithEmbedPlayer(url);

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/YoutubeParsingHelper.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,6 @@ public class YoutubeParsingHelper {
5555
private YoutubeParsingHelper() {
5656
}
5757

58-
/**
59-
* The official youtube app supports intents in this format, where after the ':' is the videoId.
60-
* Accordingly there are other apps sharing streams in this format.
61-
*/
62-
public final static String BASE_YOUTUBE_INTENT_URL = "vnd.youtube";
63-
6458
private static final String HARDCODED_CLIENT_VERSION = "2.20200214.04.00";
6559
private static String clientVersion;
6660

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeCommentsLinkHandlerFactory.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
package org.schabi.newpipe.extractor.services.youtube.linkHandler;
22

3-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL;
4-
53
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
64
import org.schabi.newpipe.extractor.exceptions.ParsingException;
7-
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandler;
85
import org.schabi.newpipe.extractor.linkhandler.ListLinkHandlerFactory;
96

107
import java.util.List;
@@ -17,15 +14,6 @@ public static YoutubeCommentsLinkHandlerFactory getInstance() {
1714
return instance;
1815
}
1916

20-
@Override
21-
public ListLinkHandler fromUrl(String url) throws ParsingException {
22-
if (url.startsWith(BASE_YOUTUBE_INTENT_URL)){
23-
return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL);
24-
} else {
25-
return super.fromUrl(url);
26-
}
27-
}
28-
2917
@Override
3018
public String getUrl(String id) {
3119
return "https://m.youtube.com/watch?v=" + id;

extractor/src/main/java/org/schabi/newpipe/extractor/services/youtube/linkHandler/YoutubeStreamLinkHandlerFactory.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
import org.schabi.newpipe.extractor.exceptions.FoundAdException;
44
import org.schabi.newpipe.extractor.exceptions.ParsingException;
5-
import org.schabi.newpipe.extractor.linkhandler.LinkHandler;
65
import org.schabi.newpipe.extractor.linkhandler.LinkHandlerFactory;
76
import org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper;
87
import org.schabi.newpipe.extractor.utils.Utils;
@@ -15,8 +14,6 @@
1514
import java.util.regex.Matcher;
1615
import java.util.regex.Pattern;
1716

18-
import static org.schabi.newpipe.extractor.services.youtube.YoutubeParsingHelper.BASE_YOUTUBE_INTENT_URL;
19-
2017
/*
2118
* Created by Christian Schabesberger on 02.02.16.
2219
*
@@ -67,15 +64,6 @@ private static String assertIsId(@Nullable final String id) throws ParsingExcept
6764
}
6865
}
6966

70-
@Override
71-
public LinkHandler fromUrl(String url) throws ParsingException {
72-
if (url.startsWith(BASE_YOUTUBE_INTENT_URL)) {
73-
return super.fromUrl(url, BASE_YOUTUBE_INTENT_URL);
74-
} else {
75-
return super.fromUrl(url);
76-
}
77-
}
78-
7967
@Override
8068
public String getUrl(String id) {
8169
return "https://www.youtube.com/watch?v=" + id;

extractor/src/main/java/org/schabi/newpipe/extractor/utils/Utils.java

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,39 @@ public static String removeUTF8BOM(String s) {
181181
return s;
182182
}
183183

184-
public static String getBaseUrl(String url) throws ParsingException {
185-
URL uri;
184+
public static String getBaseUrl(final String url) throws ParsingException {
186185
try {
187-
uri = stringToURL(url);
188-
} catch (MalformedURLException e) {
186+
final URL uri = stringToURL(url);
187+
return uri.getProtocol() + "://" + uri.getAuthority();
188+
} catch (final MalformedURLException e) {
189+
final String message = e.getMessage();
190+
if (message.startsWith("unknown protocol: ")) {
191+
// return just the protocol (e.g. vnd.youtube)
192+
return message.substring("unknown protocol: ".length());
193+
}
194+
189195
throw new ParsingException("Malformed url: " + url, e);
190196
}
191-
return uri.getProtocol() + "://" + uri.getAuthority();
197+
}
198+
199+
/**
200+
* If the provided url is a Google search redirect, then the actual url is extracted from the
201+
* {@code url=} query value and returned, otherwise the original url is returned.
202+
* @param url the url which can possibly be a Google search redirect
203+
* @return an url with no Google search redirects
204+
*/
205+
public static String followGoogleRedirectIfNeeded(final String url) {
206+
// if the url is a redirect from a Google search, extract the actual url
207+
try {
208+
final URL decoded = Utils.stringToURL(url);
209+
if (decoded.getHost().contains("google") && decoded.getPath().equals("/url")) {
210+
return URLDecoder.decode(Parser.matchGroup1("&url=([^&]+)(?:&|$)", url), "UTF-8");
211+
}
212+
} catch (final Exception ignored) {
213+
}
214+
215+
// url is not a google search redirect
216+
return url;
192217
}
193218

194219
public static boolean isNullOrEmpty(final String str) {

extractor/src/test/java/org/schabi/newpipe/extractor/NewPipeTest.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import static org.junit.Assert.*;
88
import static org.schabi.newpipe.extractor.NewPipe.getServiceByUrl;
9+
import static org.schabi.newpipe.extractor.ServiceList.SoundCloud;
910
import static org.schabi.newpipe.extractor.ServiceList.YouTube;
1011

1112
public class NewPipeTest {
@@ -39,8 +40,10 @@ public void getServiceWithUrl() throws Exception {
3940
assertEquals(getServiceByUrl("https://www.youtube.com/watch?v=_r6CgaFNAGg"), YouTube);
4041
assertEquals(getServiceByUrl("https://www.youtube.com/channel/UCi2bIyFtz-JdI-ou8kaqsqg"), YouTube);
4142
assertEquals(getServiceByUrl("https://www.youtube.com/playlist?list=PLRqwX-V7Uu6ZiZxtDDRCi6uhfTH4FilpH"), YouTube);
43+
assertEquals(getServiceByUrl("https://www.google.it/url?sa=t&rct=j&q=&esrc=s&cd=&cad=rja&uact=8&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DHu80uDzh8RY&source=video"), YouTube);
4244

43-
assertNotEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), YouTube);
45+
assertEquals(getServiceByUrl("https://soundcloud.com/pegboardnerds"), SoundCloud);
46+
assertEquals(getServiceByUrl("https://www.google.com/url?sa=t&url=https%3A%2F%2Fsoundcloud.com%2Fciaoproduction&rct=j&q=&esrc=s&source=web&cd="), SoundCloud);
4447
}
4548

4649
@Test

0 commit comments

Comments
 (0)