Skip to content

Commit b13c7e1

Browse files
authored
Merge pull request #452 from Stypox/yt-import
Implement YouTube subscription import from Google takeout
2 parents ac50068 + 501ec30 commit b13c7e1

6 files changed

Lines changed: 310 additions & 162 deletions

File tree

Lines changed: 36 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -1,126 +1,71 @@
11
package org.schabi.newpipe.extractor.services.youtube.extractors;
22

3-
import org.jsoup.Jsoup;
4-
import org.jsoup.nodes.Document;
5-
import org.jsoup.nodes.Element;
3+
import com.grack.nanojson.JsonArray;
4+
import com.grack.nanojson.JsonObject;
5+
import com.grack.nanojson.JsonParser;
6+
import com.grack.nanojson.JsonParserException;
7+
68
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
79
import org.schabi.newpipe.extractor.services.youtube.YoutubeService;
810
import org.schabi.newpipe.extractor.subscription.SubscriptionExtractor;
911
import org.schabi.newpipe.extractor.subscription.SubscriptionItem;
10-
import org.schabi.newpipe.extractor.utils.Parser;
1112

12-
import java.io.IOException;
1313
import java.io.InputStream;
1414
import java.util.ArrayList;
1515
import java.util.Collections;
1616
import java.util.List;
1717

18+
import javax.annotation.Nonnull;
19+
1820
import static org.schabi.newpipe.extractor.subscription.SubscriptionExtractor.ContentSource.INPUT_STREAM;
1921

2022
/**
21-
* Extract subscriptions from a YouTube export (OPML format supported)
23+
* Extract subscriptions from a Google takout export (the user has to get the JSON out of the zip)
2224
*/
2325
public class YoutubeSubscriptionExtractor extends SubscriptionExtractor {
26+
private static final String BASE_CHANNEL_URL = "https://www.youtube.com/channel/";
2427

25-
public YoutubeSubscriptionExtractor(YoutubeService service) {
26-
super(service, Collections.singletonList(INPUT_STREAM));
28+
public YoutubeSubscriptionExtractor(final YoutubeService youtubeService) {
29+
super(youtubeService, Collections.singletonList(INPUT_STREAM));
2730
}
2831

2932
@Override
3033
public String getRelatedUrl() {
31-
return "https://www.youtube.com/subscription_manager?action_takeout=1";
34+
return "https://takeout.google.com/takeout/custom/youtube";
3235
}
3336

3437
@Override
35-
public List<SubscriptionItem> fromInputStream(InputStream contentInputStream) throws ExtractionException {
36-
if (contentInputStream == null) throw new InvalidSourceException("input stream is null");
37-
38-
return getItemsFromOPML(contentInputStream);
39-
}
40-
41-
/*//////////////////////////////////////////////////////////////////////////
42-
// OPML implementation
43-
//////////////////////////////////////////////////////////////////////////*/
44-
45-
private static final String ID_PATTERN = "/videos.xml\\?channel_id=([A-Za-z0-9_-]*)";
46-
private static final String BASE_CHANNEL_URL = "https://www.youtube.com/channel/";
47-
48-
private List<SubscriptionItem> getItemsFromOPML(InputStream contentInputStream) throws ExtractionException {
49-
final List<SubscriptionItem> result = new ArrayList<>();
50-
51-
final String contentString = readFromInputStream(contentInputStream);
52-
Document document = Jsoup.parse(contentString, "", org.jsoup.parser.Parser.xmlParser());
53-
54-
if (document.select("opml").isEmpty()) {
55-
throw new InvalidSourceException("document does not have OPML tag");
56-
}
57-
58-
if (document.select("outline").isEmpty()) {
59-
throw new InvalidSourceException("document does not have at least one outline tag");
60-
}
61-
62-
for (Element outline : document.select("outline[type=rss]")) {
63-
String title = outline.attr("title");
64-
String xmlUrl = outline.attr("abs:xmlUrl");
65-
66-
try {
67-
String id = Parser.matchGroup1(ID_PATTERN, xmlUrl);
68-
result.add(new SubscriptionItem(service.getServiceId(), BASE_CHANNEL_URL + id, title));
69-
} catch (Parser.RegexException ignored) { /* ignore invalid subscriptions */ }
70-
}
71-
72-
return result;
73-
}
74-
75-
/*//////////////////////////////////////////////////////////////////////////
76-
// Utils
77-
//////////////////////////////////////////////////////////////////////////*/
78-
79-
/**
80-
* Throws an exception if the string does not have the right tag/string from a valid export.
81-
*/
82-
private void throwIfTagIsNotFound(String content) throws InvalidSourceException {
83-
if (!content.trim().contains("<opml")) {
84-
throw new InvalidSourceException("input stream does not have OPML tag");
85-
}
86-
}
87-
88-
private String readFromInputStream(InputStream inputStream) throws InvalidSourceException {
89-
StringBuilder contentBuilder = new StringBuilder();
90-
boolean hasTag = false;
38+
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream)
39+
throws ExtractionException {
40+
final JsonArray subscriptions;
9141
try {
92-
byte[] buffer = new byte[16 * 1024];
93-
int read;
94-
while ((read = inputStream.read(buffer)) != -1) {
95-
String currentPartOfContent = new String(buffer, 0, read, "UTF-8");
96-
contentBuilder.append(currentPartOfContent);
42+
subscriptions = JsonParser.array().from(contentInputStream);
43+
} catch (JsonParserException e) {
44+
throw new InvalidSourceException("Invalid json input stream", e);
45+
}
9746

98-
// Fail-fast in case of reading a long unsupported input stream
99-
if (!hasTag && contentBuilder.length() > 128) {
100-
throwIfTagIsNotFound(contentBuilder.toString());
101-
hasTag = true;
102-
}
47+
boolean foundInvalidSubscription = false;
48+
final List<SubscriptionItem> subscriptionItems = new ArrayList<>();
49+
for (final Object subscriptionObject : subscriptions) {
50+
if (!(subscriptionObject instanceof JsonObject)) {
51+
foundInvalidSubscription = true;
52+
continue;
10353
}
104-
} catch (InvalidSourceException e) {
105-
throw e;
106-
} catch (Throwable e) {
107-
throw new InvalidSourceException(e);
108-
} finally {
109-
try {
110-
inputStream.close();
111-
} catch (IOException ignored) {
54+
55+
final JsonObject subscription = ((JsonObject) subscriptionObject).getObject("snippet");
56+
final String id = subscription.getObject("resourceId").getString("channelId", "");
57+
if (id.length() != 24) { // e.g. UCsXVk37bltHxD1rDPwtNM8Q
58+
foundInvalidSubscription = true;
59+
continue;
11260
}
113-
}
11461

115-
final String fileContent = contentBuilder.toString().trim();
116-
if (fileContent.isEmpty()) {
117-
throw new InvalidSourceException("Empty input stream");
62+
subscriptionItems.add(new SubscriptionItem(service.getServiceId(),
63+
BASE_CHANNEL_URL + id, subscription.getString("title", "")));
11864
}
11965

120-
if (!hasTag) {
121-
throwIfTagIsNotFound(fileContent);
66+
if (foundInvalidSubscription && subscriptionItems.isEmpty()) {
67+
throw new InvalidSourceException("Found only invalid channel ids");
12268
}
123-
124-
return fileContent;
69+
return subscriptionItems;
12570
}
12671
}

extractor/src/main/java/org/schabi/newpipe/extractor/subscription/SubscriptionExtractor.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import org.schabi.newpipe.extractor.exceptions.ExtractionException;
55
import org.schabi.newpipe.extractor.exceptions.ParsingException;
66

7+
import javax.annotation.Nonnull;
78
import javax.annotation.Nullable;
89
import java.io.IOException;
910
import java.io.InputStream;
@@ -71,8 +72,9 @@ public List<SubscriptionItem> fromChannelUrl(String channelUrl) throws IOExcepti
7172
*
7273
* @throws InvalidSourceException when the content read from the InputStream is invalid and can not be parsed
7374
*/
74-
@SuppressWarnings("RedundantThrows")
75-
public List<SubscriptionItem> fromInputStream(InputStream contentInputStream) throws IOException, ExtractionException {
76-
throw new UnsupportedOperationException("Service " + service.getServiceInfo().getName() + " doesn't support extracting from an InputStream");
75+
public List<SubscriptionItem> fromInputStream(@Nonnull final InputStream contentInputStream)
76+
throws ExtractionException {
77+
throw new UnsupportedOperationException("Service " + service.getServiceInfo().getName()
78+
+ " doesn't support extracting from an InputStream");
7779
}
7880
}

extractor/src/test/java/org/schabi/newpipe/FileUtils.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,21 @@ private static void writeFile(final String filename, final String content) throw
6161
writer.close();
6262
}
6363

64+
/**
65+
* Resolves the test resource file based on its filename. Looks in
66+
* {@code extractor/src/test/resources/} and {@code src/test/resources/}
67+
* @param filename the resource filename
68+
* @return the resource file
69+
*/
70+
public static File resolveTestResource(final String filename) {
71+
final File file = new File("extractor/src/test/resources/" + filename);
72+
if (file.exists()) {
73+
return file;
74+
} else {
75+
return new File("src/test/resources/" + filename);
76+
}
77+
}
78+
6479
/**
6580
* Convert a JSON object to String
6681
* toString() does not produce a valid JSON string

extractor/src/test/java/org/schabi/newpipe/extractor/services/youtube/YoutubeSubscriptionExtractorTest.java

Lines changed: 43 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,16 @@
1111
import org.schabi.newpipe.extractor.subscription.SubscriptionItem;
1212

1313
import java.io.ByteArrayInputStream;
14-
import java.io.File;
1514
import java.io.FileInputStream;
15+
import java.nio.charset.StandardCharsets;
1616
import java.util.Arrays;
1717
import java.util.List;
1818

19-
import static org.junit.Assert.*;
19+
import static org.junit.Assert.assertEquals;
20+
import static org.junit.Assert.assertNotNull;
21+
import static org.junit.Assert.assertTrue;
22+
import static org.junit.Assert.fail;
23+
import static org.schabi.newpipe.FileUtils.resolveTestResource;
2024

2125
/**
2226
* Test for {@link YoutubeSubscriptionExtractor}
@@ -34,81 +38,75 @@ public static void setupClass() {
3438

3539
@Test
3640
public void testFromInputStream() throws Exception {
37-
File testFile = new File("extractor/src/test/resources/youtube_export_test.xml");
38-
if (!testFile.exists()) testFile = new File("src/test/resources/youtube_export_test.xml");
41+
final List<SubscriptionItem> subscriptionItems = subscriptionExtractor.fromInputStream(
42+
new FileInputStream(resolveTestResource("youtube_takeout_import_test.json")));
43+
assertEquals(7, subscriptionItems.size());
3944

40-
List<SubscriptionItem> subscriptionItems = subscriptionExtractor.fromInputStream(new FileInputStream(testFile));
41-
assertTrue("List doesn't have exactly 8 items (had " + subscriptionItems.size() + ")", subscriptionItems.size() == 8);
42-
43-
for (SubscriptionItem item : subscriptionItems) {
45+
for (final SubscriptionItem item : subscriptionItems) {
4446
assertNotNull(item.getName());
4547
assertNotNull(item.getUrl());
4648
assertTrue(urlHandler.acceptUrl(item.getUrl()));
47-
assertFalse(item.getServiceId() == -1);
49+
assertEquals(ServiceList.YouTube.getServiceId(), item.getServiceId());
4850
}
4951
}
5052

5153
@Test
5254
public void testEmptySourceException() throws Exception {
53-
String emptySource = "<opml version=\"1.1\"><body>" +
54-
"<outline text=\"Testing\" title=\"123\" />" +
55-
"</body></opml>";
56-
57-
List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(new ByteArrayInputStream(emptySource.getBytes("UTF-8")));
55+
final List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(
56+
new ByteArrayInputStream("[]".getBytes(StandardCharsets.UTF_8)));
5857
assertTrue(items.isEmpty());
5958
}
6059

6160
@Test
6261
public void testSubscriptionWithEmptyTitleInSource() throws Exception {
63-
String channelId = "AA0AaAa0AaaaAAAAAA0aa0AA";
64-
String source = "<opml version=\"1.1\"><body><outline text=\"YouTube Subscriptions\" title=\"YouTube Subscriptions\">" +
65-
"<outline text=\"\" title=\"\" type=\"rss\" xmlUrl=\"https://www.youtube.com/feeds/videos.xml?channel_id=" + channelId + "\" />" +
66-
"</outline></body></opml>";
67-
68-
List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(new ByteArrayInputStream(source.getBytes("UTF-8")));
69-
assertTrue("List doesn't have exactly 1 item (had " + items.size() + ")", items.size() == 1);
70-
assertTrue("Item does not have an empty title (had \"" + items.get(0).getName() + "\")", items.get(0).getName().isEmpty());
71-
assertTrue("Item does not have the right channel id \"" + channelId + "\" (the whole url is \"" + items.get(0).getUrl() + "\")", items.get(0).getUrl().endsWith(channelId));
62+
final String source = "[{\"snippet\":{\"resourceId\":{\"channelId\":\"UCEOXxzW2vU0P-0THehuIIeg\"}}}]";
63+
final List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(
64+
new ByteArrayInputStream(source.getBytes(StandardCharsets.UTF_8)));
65+
66+
assertEquals(1, items.size());
67+
assertEquals(ServiceList.YouTube.getServiceId(), items.get(0).getServiceId());
68+
assertEquals("https://www.youtube.com/channel/UCEOXxzW2vU0P-0THehuIIeg", items.get(0).getUrl());
69+
assertEquals("", items.get(0).getName());
7270
}
7371

7472
@Test
7573
public void testSubscriptionWithInvalidUrlInSource() throws Exception {
76-
String source = "<opml version=\"1.1\"><body><outline text=\"YouTube Subscriptions\" title=\"YouTube Subscriptions\">" +
77-
"<outline text=\"invalid\" title=\"url\" type=\"rss\" xmlUrl=\"https://www.youtube.com/feeds/videos.xml?channel_not_id=|||||||\"/>" +
78-
"<outline text=\"fail\" title=\"fail\" type=\"rss\" xmlUgrl=\"invalidTag\"/>" +
79-
"<outline text=\"invalid\" title=\"url\" type=\"rss\" xmlUrl=\"\"/>" +
80-
"<outline text=\"\" title=\"\" type=\"rss\" xmlUrl=\"\"/>" +
81-
"</outline></body></opml>";
82-
83-
List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(new ByteArrayInputStream(source.getBytes("UTF-8")));
84-
assertTrue(items.isEmpty());
74+
final String source = "[{\"snippet\":{\"resourceId\":{\"channelId\":\"gibberish\"},\"title\":\"name1\"}}," +
75+
"{\"snippet\":{\"resourceId\":{\"channelId\":\"UCEOXxzW2vU0P-0THehuIIeg\"},\"title\":\"name2\"}}]";
76+
final List<SubscriptionItem> items = subscriptionExtractor.fromInputStream(
77+
new ByteArrayInputStream(source.getBytes(StandardCharsets.UTF_8)));
78+
79+
assertEquals(1, items.size());
80+
assertEquals(ServiceList.YouTube.getServiceId(), items.get(0).getServiceId());
81+
assertEquals("https://www.youtube.com/channel/UCEOXxzW2vU0P-0THehuIIeg", items.get(0).getUrl());
82+
assertEquals("name2", items.get(0).getName());
8583
}
8684

8785
@Test
8886
public void testInvalidSourceException() {
8987
List<String> invalidList = Arrays.asList(
9088
"<xml><notvalid></notvalid></xml>",
9189
"<opml><notvalid></notvalid></opml>",
92-
"<opml><body></body></opml>",
90+
"{\"a\":\"b\"}",
91+
"[{}]",
92+
"[\"\", 5]",
93+
"[{\"snippet\":{\"title\":\"name\"}}]",
94+
"[{\"snippet\":{\"resourceId\":{\"channelId\":\"gibberish\"}}}]",
9395
"",
94-
null,
9596
"\uD83D\uDC28\uD83D\uDC28\uD83D\uDC28",
9697
"gibberish");
9798

9899
for (String invalidContent : invalidList) {
99100
try {
100-
if (invalidContent != null) {
101-
byte[] bytes = invalidContent.getBytes("UTF-8");
102-
subscriptionExtractor.fromInputStream(new ByteArrayInputStream(bytes));
103-
fail("Extracting from \"" + invalidContent + "\" didn't throw an exception");
104-
} else {
105-
subscriptionExtractor.fromInputStream(null);
106-
fail("Extracting from null String didn't throw an exception");
101+
byte[] bytes = invalidContent.getBytes(StandardCharsets.UTF_8);
102+
subscriptionExtractor.fromInputStream(new ByteArrayInputStream(bytes));
103+
fail("Extracting from \"" + invalidContent + "\" didn't throw an exception");
104+
} catch (final Exception e) {
105+
boolean correctType = e instanceof SubscriptionExtractor.InvalidSourceException;
106+
if (!correctType) {
107+
e.printStackTrace();
107108
}
108-
} catch (Exception e) {
109-
// System.out.println(" -> " + e);
110-
boolean isExpectedException = e instanceof SubscriptionExtractor.InvalidSourceException;
111-
assertTrue("\"" + e.getClass().getSimpleName() + "\" is not the expected exception", isExpectedException);
109+
assertTrue(e.getClass().getSimpleName() + " is not InvalidSourceException", correctType);
112110
}
113111
}
114112
}

extractor/src/test/resources/youtube_export_test.xml

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)