From e60b827baa01500c96700a90f85184c72a44c0b3 Mon Sep 17 00:00:00 2001 From: yanqs Date: Tue, 20 Aug 2024 10:49:31 +0800 Subject: [PATCH] =?UTF-8?q?test:=E4=BC=98=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=E5=8D=95=E5=85=83=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 3 +- pom.xml | 6 ++ .../{HtmlParseUtils.java => HtmlUtils.java} | 57 +++++++++++++--- src/main/java/com/bcrjl/rss/job/RssJob.java | 10 +-- src/test/java/HtmlTest.java | 51 -------------- src/test/java/RSSReader.java | 40 ----------- .../bcrjl/rss/common/util/HtmlUtilsTest.java | 67 +++++++++++++++++++ .../bcrjl/rss/common/util/MailUtilsTest.java} | 16 +++-- .../bcrjl/rss/common/util/RssUtilsTest.java | 24 +++++++ 9 files changed, 165 insertions(+), 109 deletions(-) rename src/main/java/com/bcrjl/rss/common/util/{HtmlParseUtils.java => HtmlUtils.java} (53%) delete mode 100644 src/test/java/HtmlTest.java delete mode 100644 src/test/java/RSSReader.java create mode 100644 src/test/java/com/bcrjl/rss/common/util/HtmlUtilsTest.java rename src/test/java/{MailTest.java => com/bcrjl/rss/common/util/MailUtilsTest.java} (51%) create mode 100644 src/test/java/com/bcrjl/rss/common/util/RssUtilsTest.java diff --git a/README.md b/README.md index e125cfa..d12c64f 100644 --- a/README.md +++ b/README.md @@ -37,4 +37,5 @@ bcrjl/rss-reader:latest 1. 微博视频本地保存及上传AList; 2. 图片/视频本地保存性能提升 -3. ... +3. 订阅请求增加代理配置 +4. ... diff --git a/pom.xml b/pom.xml index bf18ea8..19757a7 100644 --- a/pom.xml +++ b/pom.xml @@ -50,6 +50,12 @@ spring-boot-starter-thymeleaf + + org.springframework.boot + spring-boot-starter-test + test + + cn.dev33 sa-token-spring-boot-starter diff --git a/src/main/java/com/bcrjl/rss/common/util/HtmlParseUtils.java b/src/main/java/com/bcrjl/rss/common/util/HtmlUtils.java similarity index 53% rename from src/main/java/com/bcrjl/rss/common/util/HtmlParseUtils.java rename to src/main/java/com/bcrjl/rss/common/util/HtmlUtils.java index 08ab846..82b2409 100644 --- a/src/main/java/com/bcrjl/rss/common/util/HtmlParseUtils.java +++ b/src/main/java/com/bcrjl/rss/common/util/HtmlUtils.java @@ -18,7 +18,7 @@ import static com.bcrjl.rss.common.constant.AppConstant.USER_AGENT; * @author yanqs */ @Slf4j -public class HtmlParseUtils { +public class HtmlUtils { /** * 获取html中的图片 * @@ -26,15 +26,37 @@ public class HtmlParseUtils { * @return */ public static List extractImageUrls(String htmlContent) { - List imageUrls = new ArrayList<>(); String regex = "]*?src\\s*=\\s*['\"]([^'\"]*?)['\"][^>]*?>"; - Pattern pattern = Pattern.compile(regex); - Matcher matcher = pattern.matcher(htmlContent); - while (matcher.find()) { - String imageUrl = matcher.group(1); - imageUrls.add(imageUrl); + return extractUrls(htmlContent, regex); + } + + /** + * 获取html中的视频 + * + * @param htmlContent html内容 + * @return + */ + public static List extractVideoUrls(String htmlContent) { + String regex = "]*?src\\s*=\\s*['\"]([^'\"]*?)['\"][^>]*?>"; + return extractUrls(htmlContent, regex); + } + + /** + * 获取url中的文件名称 + * + * @param url url + * @return 文件名称 + */ + public static String getFileName(String url) { + int lastSlashIndex = url.lastIndexOf('/'); + // 如果找到了斜杠,就从斜杠后面截取字符串 + String fileName = url.substring(lastSlashIndex + 1); + int queryIndex = fileName.indexOf('?'); + if (queryIndex == -1) { + return fileName; + } else { + return fileName.substring(0, queryIndex); } - return imageUrls; } /** @@ -56,4 +78,23 @@ public class HtmlParseUtils { return null; } } + + + /** + * 根据正则获取html中的内容 + * + * @param htmlContent html内容 + * @param regex 正则 + * @return urls + */ + private static List extractUrls(String htmlContent, String regex) { + List urls = new ArrayList<>(); + Pattern pattern = Pattern.compile(regex); + Matcher matcher = pattern.matcher(htmlContent); + while (matcher.find()) { + String url = matcher.group(1); + urls.add(url); + } + return urls; + } } diff --git a/src/main/java/com/bcrjl/rss/job/RssJob.java b/src/main/java/com/bcrjl/rss/job/RssJob.java index e784c6f..a288080 100644 --- a/src/main/java/com/bcrjl/rss/job/RssJob.java +++ b/src/main/java/com/bcrjl/rss/job/RssJob.java @@ -12,7 +12,7 @@ import cn.hutool.json.JSONObject; import cn.hutool.json.JSONUtil; import cn.hutool.setting.Setting; import com.bcrjl.rss.common.util.AListUtils; -import com.bcrjl.rss.common.util.HtmlParseUtils; +import com.bcrjl.rss.common.util.HtmlUtils; import com.bcrjl.rss.common.util.MailUtils; import com.bcrjl.rss.common.util.RssUtils; import com.bcrjl.rss.model.entity.RssEntity; @@ -90,7 +90,7 @@ public class RssJob { if (CollUtil.isNotEmpty(list)) { Setting setting = new Setting(CONFIG_PATH, CharsetUtil.CHARSET_UTF_8, true); Setting emailSetting = setting.getSetting(SET_MAIL); - saveWeiBoImagesOrUpdateAlist(list); + saveWeiBoImagesOrUpdateAList(list); if (emailSetting.getBool(MAIL_CONFIG_ENABLE) && emailSetting.getBool("sendUpdate")) { // 如果邮箱开启且发送更新邮件开启 则推送通知 StringBuffer stringBuffer = new StringBuffer(); @@ -111,7 +111,7 @@ public class RssJob { /** * 保存微博图片到本地且上传AList */ - private void saveWeiBoImagesOrUpdateAlist(List list) { + private void saveWeiBoImagesOrUpdateAList(List list) { Setting setting = new Setting(CONFIG_PATH, CharsetUtil.CHARSET_UTF_8, true); Setting systemSetting = setting.getSetting(SET_SYSTEM); Boolean saveImages = Boolean.valueOf(systemSetting.get(SAVE_WEIBO_IMAGES)); @@ -119,14 +119,14 @@ public class RssJob { if (saveImages) { // 保存图片 list.forEach(obj -> { - List imgList = HtmlParseUtils.extractImageUrls(obj.getDescription()); + List imgList = HtmlUtils.extractImageUrls(obj.getDescription()); imgList.forEach(imgObj -> { if (imgObj.contains("sinaimg") && !imgObj.contains("timeline_card") && !imgObj.contains("qixi2018")) { int lastSlashIndex = imgObj.lastIndexOf('/'); // 如果找到了斜杠,就从斜杠后面截取字符串 String fileName = imgObj.substring(lastSlashIndex + 1); //log.info("微博图片文件名:{}", fileName); - HttpResponse weiBoImagesHttpRequest = HtmlParseUtils.getWeiBoImagesHttpRequest(fileName); + HttpResponse weiBoImagesHttpRequest = HtmlUtils.getWeiBoImagesHttpRequest(fileName); byte[] bytes = weiBoImagesHttpRequest.bodyBytes(); FileUtil.writeBytes(bytes, new File(IMAGES_PATH + fileName)); if (uploadAList) { diff --git a/src/test/java/HtmlTest.java b/src/test/java/HtmlTest.java deleted file mode 100644 index 755ef7b..0000000 --- a/src/test/java/HtmlTest.java +++ /dev/null @@ -1,51 +0,0 @@ -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Html测试 - * - * @author yanqs - * @since 2024-08-10 - */ -public class HtmlTest { - public static void main(String[] args) throws IOException{ - String str="为什么温泉♨️水那么黄?

















"; - List strings = extractImageUrls(str); - strings.forEach(obj->{ - System.out.println(obj); - }); - - } - - - public static String readHtmlFile(String filePath) throws IOException { - StringBuilder content = new StringBuilder(); - BufferedReader reader = new BufferedReader(new FileReader(filePath)); - String line; - while ((line = reader.readLine()) != null) { - content.append(line); - } - reader.close(); - return content.toString(); - } - - public static List extractImageUrls(String htmlContent) { - List imageUrls = new ArrayList<>(); - String regex = "]*?src\\s*=\\s*['\"]([^'\"]*?)['\"][^>]*?>"; - Pattern pattern = Pattern.compile(regex); - Matcher matcher = pattern.matcher(htmlContent); - while (matcher.find()) { - String imageUrl = matcher.group(1); - imageUrls.add(imageUrl); - } - return imageUrls; - } -} - - - diff --git a/src/test/java/RSSReader.java b/src/test/java/RSSReader.java deleted file mode 100644 index 780f653..0000000 --- a/src/test/java/RSSReader.java +++ /dev/null @@ -1,40 +0,0 @@ -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import java.net.URL; - -/** - * RSS获取 测试类 - * - * @author yanqs - */ -public class RSSReader { - public static void main(String[] args) throws Exception { - // RSS feed URL - // URL rssUrl = new URL("https://rsshub.ys.bcrjl.com/weibo/user/6489032761"); - URL rssUrl = new URL("https://blog.yanqingshan.com/feed/"); - - // Create a DocumentBuilder - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - DocumentBuilder builder = factory.newDocumentBuilder(); - - // Parse the RSS file - Document document = builder.parse(rssUrl.openStream()); - - // Get all items - NodeList items = document.getElementsByTagName("item"); - - for (int i = 0; i < items.getLength(); i++) { - Element item = (Element) items.item(i); - Element title = (Element) item.getElementsByTagName("title").item(0); - Element link = (Element) item.getElementsByTagName("link").item(0); - - // Print the title - System.out.println(title.getTextContent()); - System.out.println(link.getTextContent()); - } - } -} diff --git a/src/test/java/com/bcrjl/rss/common/util/HtmlUtilsTest.java b/src/test/java/com/bcrjl/rss/common/util/HtmlUtilsTest.java new file mode 100644 index 0000000..e5e6256 --- /dev/null +++ b/src/test/java/com/bcrjl/rss/common/util/HtmlUtilsTest.java @@ -0,0 +1,67 @@ +package com.bcrjl.rss.common.util; + +import cn.hutool.core.io.FileUtil; +import cn.hutool.http.Header; +import cn.hutool.http.HttpRequest; +import cn.hutool.http.HttpResponse; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.util.List; + +import static com.bcrjl.rss.common.constant.AppConstant.IMAGES_PATH; +import static com.bcrjl.rss.common.constant.AppConstant.USER_AGENT; + +/** + * Html 工具类测试 + * + * @author yanqs + */ +class HtmlUtilsTest { + + /** + * 获取html中的图片 + */ + @Test + void extractImageUrls() { + String str = "为什么温泉♨️水那么黄?

















"; + List strings = HtmlUtils.extractImageUrls(str); + strings.forEach(System.out::println); + } + + /** + * 测试获取文件名 + */ + @Test + void getFileName() { + String imgSrc = "https://h5.sinaimg.cn/m/emoticon/icon/others/ct_kele-4ce616ef95.png"; + String videoSrc = "https://f.video.weibocdn.com/o0/H5AopYlvlx08hlyjma2A010412003vHq0E010.mp4?label=mp4_ld&template=360x480.24.0&ori=0&ps=1CwnkDw1GXwCQx&Expires=1724122135&ssig=QYOiNA1GMH&KID=unistore,video"; + System.out.println(HtmlUtils.getFileName(imgSrc)); + System.out.println(HtmlUtils.getFileName(videoSrc)); + } + + + /** + * 获取微博图片流 + */ + @Test + void getWeiBoImagesHttpRequest() { + + } + + /** + * 获取微博视频流 + */ + @Test + void getWeiBoVideosHttpRequest() { + String url = "https://f.video.weibocdn.com/o0/KjwJ3CYwlx08hlyj0a8001041200ah0h0E010.mp4" + + "?label=mp4_720p&template=720x960.24.0&ori=0&ps=1CwnkDw1GXwCQx&Expires=1724122135&ssig=Gd1gjIz05D&KID=unistore,video"; + HttpRequest request = HttpRequest.get(url) + .header(Header.REFERER, "https://weibo.com/") + .header(Header.USER_AGENT, USER_AGENT) + .timeout(20000); + HttpResponse httpResponse = request.executeAsync(); + byte[] bytes = httpResponse.bodyBytes(); + FileUtil.writeBytes(bytes, new File(IMAGES_PATH + "KjwJ3CYwlx08hlyj0a8001041200ah0h0E010.mp4")); + } +} diff --git a/src/test/java/MailTest.java b/src/test/java/com/bcrjl/rss/common/util/MailUtilsTest.java similarity index 51% rename from src/test/java/MailTest.java rename to src/test/java/com/bcrjl/rss/common/util/MailUtilsTest.java index e3cb3a1..ff7e273 100644 --- a/src/test/java/MailTest.java +++ b/src/test/java/com/bcrjl/rss/common/util/MailUtilsTest.java @@ -1,13 +1,21 @@ +package com.bcrjl.rss.common.util; + import cn.hutool.core.lang.Console; import cn.hutool.extra.mail.MailAccount; -import com.bcrjl.rss.common.util.MailUtils; +import org.junit.jupiter.api.Test; /** + * 邮件测试 + * * @author yanqs - * @since 2024-08-09 */ -public class MailTest { - public static void main(String[] args) { +class MailUtilsTest { + + /** + * 初始化邮件账号 + */ + @Test + void initMailAccount() { MailAccount mailAccount = MailUtils.initMailAccount(); Console.log(mailAccount); } diff --git a/src/test/java/com/bcrjl/rss/common/util/RssUtilsTest.java b/src/test/java/com/bcrjl/rss/common/util/RssUtilsTest.java new file mode 100644 index 0000000..554e78c --- /dev/null +++ b/src/test/java/com/bcrjl/rss/common/util/RssUtilsTest.java @@ -0,0 +1,24 @@ +package com.bcrjl.rss.common.util; + +import cn.hutool.core.lang.Console; +import com.bcrjl.rss.model.entity.RssEntity; +import org.junit.jupiter.api.Test; + +import java.util.List; + +/** + * RSSUtilsTest + * + * @author yanqs + */ +class RssUtilsTest { + + @Test + void getRssList() { + String url = "https://blog.yanqingshan.com/feed/"; + List rssList = RssUtils.getRssList(url); + rssList.forEach(obj -> { + Console.log(obj); + }); + } +}