test:优化项目单元测试

This commit is contained in:
yanqs 2024-08-20 10:49:31 +08:00
parent 64dd211839
commit e60b827baa
9 changed files with 165 additions and 109 deletions

View File

@ -37,4 +37,5 @@ bcrjl/rss-reader:latest
1. 微博视频本地保存及上传AList
2. 图片/视频本地保存性能提升
3. ...
3. 订阅请求增加代理配置
4. ...

View File

@ -50,6 +50,12 @@
<artifactId>spring-boot-starter-thymeleaf</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>cn.dev33</groupId>
<artifactId>sa-token-spring-boot-starter</artifactId>

View File

@ -18,7 +18,7 @@ import static com.bcrjl.rss.common.constant.AppConstant.USER_AGENT;
* @author yanqs
*/
@Slf4j
public class HtmlParseUtils {
public class HtmlUtils {
/**
* 获取html中的图片
*
@ -26,15 +26,37 @@ public class HtmlParseUtils {
* @return
*/
public static List<String> extractImageUrls(String htmlContent) {
List<String> imageUrls = new ArrayList<>();
String regex = "<img\\s+[^>]*?src\\s*=\\s*['\"]([^'\"]*?)['\"][^>]*?>";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
String imageUrl = matcher.group(1);
imageUrls.add(imageUrl);
return extractUrls(htmlContent, regex);
}
/**
* 获取html中的视频
*
* @param htmlContent html内容
* @return
*/
public static List<String> extractVideoUrls(String htmlContent) {
String regex = "<source\\s+[^>]*?src\\s*=\\s*['\"]([^'\"]*?)['\"][^>]*?>";
return extractUrls(htmlContent, regex);
}
/**
* 获取url中的文件名称
*
* @param url url
* @return 文件名称
*/
public static String getFileName(String url) {
int lastSlashIndex = url.lastIndexOf('/');
// 如果找到了斜杠就从斜杠后面截取字符串
String fileName = url.substring(lastSlashIndex + 1);
int queryIndex = fileName.indexOf('?');
if (queryIndex == -1) {
return fileName;
} else {
return fileName.substring(0, queryIndex);
}
return imageUrls;
}
/**
@ -56,4 +78,23 @@ public class HtmlParseUtils {
return null;
}
}
/**
* 根据正则获取html中的内容
*
* @param htmlContent html内容
* @param regex 正则
* @return urls
*/
private static List<String> extractUrls(String htmlContent, String regex) {
List<String> urls = new ArrayList<>();
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
String url = matcher.group(1);
urls.add(url);
}
return urls;
}
}

View File

@ -12,7 +12,7 @@ import cn.hutool.json.JSONObject;
import cn.hutool.json.JSONUtil;
import cn.hutool.setting.Setting;
import com.bcrjl.rss.common.util.AListUtils;
import com.bcrjl.rss.common.util.HtmlParseUtils;
import com.bcrjl.rss.common.util.HtmlUtils;
import com.bcrjl.rss.common.util.MailUtils;
import com.bcrjl.rss.common.util.RssUtils;
import com.bcrjl.rss.model.entity.RssEntity;
@ -90,7 +90,7 @@ public class RssJob {
if (CollUtil.isNotEmpty(list)) {
Setting setting = new Setting(CONFIG_PATH, CharsetUtil.CHARSET_UTF_8, true);
Setting emailSetting = setting.getSetting(SET_MAIL);
saveWeiBoImagesOrUpdateAlist(list);
saveWeiBoImagesOrUpdateAList(list);
if (emailSetting.getBool(MAIL_CONFIG_ENABLE) && emailSetting.getBool("sendUpdate")) {
// 如果邮箱开启且发送更新邮件开启 则推送通知
StringBuffer stringBuffer = new StringBuffer();
@ -111,7 +111,7 @@ public class RssJob {
/**
* 保存微博图片到本地且上传AList
*/
private void saveWeiBoImagesOrUpdateAlist(List<RssEntity> list) {
private void saveWeiBoImagesOrUpdateAList(List<RssEntity> list) {
Setting setting = new Setting(CONFIG_PATH, CharsetUtil.CHARSET_UTF_8, true);
Setting systemSetting = setting.getSetting(SET_SYSTEM);
Boolean saveImages = Boolean.valueOf(systemSetting.get(SAVE_WEIBO_IMAGES));
@ -119,14 +119,14 @@ public class RssJob {
if (saveImages) {
// 保存图片
list.forEach(obj -> {
List<String> imgList = HtmlParseUtils.extractImageUrls(obj.getDescription());
List<String> imgList = HtmlUtils.extractImageUrls(obj.getDescription());
imgList.forEach(imgObj -> {
if (imgObj.contains("sinaimg") && !imgObj.contains("timeline_card") && !imgObj.contains("qixi2018")) {
int lastSlashIndex = imgObj.lastIndexOf('/');
// 如果找到了斜杠就从斜杠后面截取字符串
String fileName = imgObj.substring(lastSlashIndex + 1);
//log.info("微博图片文件名:{}", fileName);
HttpResponse weiBoImagesHttpRequest = HtmlParseUtils.getWeiBoImagesHttpRequest(fileName);
HttpResponse weiBoImagesHttpRequest = HtmlUtils.getWeiBoImagesHttpRequest(fileName);
byte[] bytes = weiBoImagesHttpRequest.bodyBytes();
FileUtil.writeBytes(bytes, new File(IMAGES_PATH + fileName));
if (uploadAList) {

View File

@ -1,51 +0,0 @@
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Html测试
*
* @author yanqs
* @since 2024-08-10
*/
public class HtmlTest {
public static void main(String[] args) throws IOException{
String str="为什么温泉♨️水那么黄? <img style=\"\" src=\"https://tvax1.sinaimg.cn/large/00759jQJly1hsg8uwgmavj31401hcdy4.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax4.sinaimg.cn/large/00759jQJly1hsg8uvtqz7j31401hck91.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax2.sinaimg.cn/large/00759jQJly1hsg8ux1psqj31401z44h1.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax3.sinaimg.cn/large/00759jQJly1hsg8uxycmvj31401hc16o.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax2.sinaimg.cn/large/00759jQJly1hsg8uyn21kj31401hc4gl.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax3.sinaimg.cn/large/00759jQJly1hsg8uzeqdzj31401hck84.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax4.sinaimg.cn/large/00759jQJly1hsg8uzyy3yj3140140gz4.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax4.sinaimg.cn/large/00759jQJly1hsg8uv9nptj31401hctma.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax3.sinaimg.cn/large/00759jQJly1hsg8v09z04j31401404by.jpg\" referrerpolicy=\"no-referrer\"><br><br>";
List<String> strings = extractImageUrls(str);
strings.forEach(obj->{
System.out.println(obj);
});
}
public static String readHtmlFile(String filePath) throws IOException {
StringBuilder content = new StringBuilder();
BufferedReader reader = new BufferedReader(new FileReader(filePath));
String line;
while ((line = reader.readLine()) != null) {
content.append(line);
}
reader.close();
return content.toString();
}
public static List<String> extractImageUrls(String htmlContent) {
List<String> imageUrls = new ArrayList<>();
String regex = "<img\\s+[^>]*?src\\s*=\\s*['\"]([^'\"]*?)['\"][^>]*?>";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(htmlContent);
while (matcher.find()) {
String imageUrl = matcher.group(1);
imageUrls.add(imageUrl);
}
return imageUrls;
}
}

View File

@ -1,40 +0,0 @@
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.net.URL;
/**
* RSS获取 测试类
*
* @author yanqs
*/
public class RSSReader {
public static void main(String[] args) throws Exception {
// RSS feed URL
// URL rssUrl = new URL("https://rsshub.ys.bcrjl.com/weibo/user/6489032761");
URL rssUrl = new URL("https://blog.yanqingshan.com/feed/");
// Create a DocumentBuilder
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
// Parse the RSS file
Document document = builder.parse(rssUrl.openStream());
// Get all items
NodeList items = document.getElementsByTagName("item");
for (int i = 0; i < items.getLength(); i++) {
Element item = (Element) items.item(i);
Element title = (Element) item.getElementsByTagName("title").item(0);
Element link = (Element) item.getElementsByTagName("link").item(0);
// Print the title
System.out.println(title.getTextContent());
System.out.println(link.getTextContent());
}
}
}

View File

@ -0,0 +1,67 @@
package com.bcrjl.rss.common.util;
import cn.hutool.core.io.FileUtil;
import cn.hutool.http.Header;
import cn.hutool.http.HttpRequest;
import cn.hutool.http.HttpResponse;
import org.junit.jupiter.api.Test;
import java.io.File;
import java.util.List;
import static com.bcrjl.rss.common.constant.AppConstant.IMAGES_PATH;
import static com.bcrjl.rss.common.constant.AppConstant.USER_AGENT;
/**
* Html 工具类测试
*
* @author yanqs
*/
class HtmlUtilsTest {
/**
* 获取html中的图片
*/
@Test
void extractImageUrls() {
String str = "为什么温泉♨️水那么黄? <img style=\"\" src=\"https://tvax1.sinaimg.cn/large/00759jQJly1hsg8uwgmavj31401hcdy4.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax4.sinaimg.cn/large/00759jQJly1hsg8uvtqz7j31401hck91.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax2.sinaimg.cn/large/00759jQJly1hsg8ux1psqj31401z44h1.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax3.sinaimg.cn/large/00759jQJly1hsg8uxycmvj31401hc16o.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax2.sinaimg.cn/large/00759jQJly1hsg8uyn21kj31401hc4gl.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax3.sinaimg.cn/large/00759jQJly1hsg8uzeqdzj31401hck84.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax4.sinaimg.cn/large/00759jQJly1hsg8uzyy3yj3140140gz4.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax4.sinaimg.cn/large/00759jQJly1hsg8uv9nptj31401hctma.jpg\" referrerpolicy=\"no-referrer\"><br><br><img style=\"\" src=\"https://tvax3.sinaimg.cn/large/00759jQJly1hsg8v09z04j31401404by.jpg\" referrerpolicy=\"no-referrer\"><br><br>";
List<String> strings = HtmlUtils.extractImageUrls(str);
strings.forEach(System.out::println);
}
/**
* 测试获取文件名
*/
@Test
void getFileName() {
String imgSrc = "https://h5.sinaimg.cn/m/emoticon/icon/others/ct_kele-4ce616ef95.png";
String videoSrc = "https://f.video.weibocdn.com/o0/H5AopYlvlx08hlyjma2A010412003vHq0E010.mp4?label=mp4_ld&amp;template=360x480.24.0&amp;ori=0&amp;ps=1CwnkDw1GXwCQx&amp;Expires=1724122135&amp;ssig=QYOiNA1GMH&amp;KID=unistore,video";
System.out.println(HtmlUtils.getFileName(imgSrc));
System.out.println(HtmlUtils.getFileName(videoSrc));
}
/**
* 获取微博图片流
*/
@Test
void getWeiBoImagesHttpRequest() {
}
/**
* 获取微博视频流
*/
@Test
void getWeiBoVideosHttpRequest() {
String url = "https://f.video.weibocdn.com/o0/KjwJ3CYwlx08hlyj0a8001041200ah0h0E010.mp4" +
"?label=mp4_720p&amp;template=720x960.24.0&amp;ori=0&amp;ps=1CwnkDw1GXwCQx&amp;Expires=1724122135&amp;ssig=Gd1gjIz05D&amp;KID=unistore,video";
HttpRequest request = HttpRequest.get(url)
.header(Header.REFERER, "https://weibo.com/")
.header(Header.USER_AGENT, USER_AGENT)
.timeout(20000);
HttpResponse httpResponse = request.executeAsync();
byte[] bytes = httpResponse.bodyBytes();
FileUtil.writeBytes(bytes, new File(IMAGES_PATH + "KjwJ3CYwlx08hlyj0a8001041200ah0h0E010.mp4"));
}
}

View File

@ -1,13 +1,21 @@
package com.bcrjl.rss.common.util;
import cn.hutool.core.lang.Console;
import cn.hutool.extra.mail.MailAccount;
import com.bcrjl.rss.common.util.MailUtils;
import org.junit.jupiter.api.Test;
/**
* 邮件测试
*
* @author yanqs
* @since 2024-08-09
*/
public class MailTest {
public static void main(String[] args) {
class MailUtilsTest {
/**
* 初始化邮件账号
*/
@Test
void initMailAccount() {
MailAccount mailAccount = MailUtils.initMailAccount();
Console.log(mailAccount);
}

View File

@ -0,0 +1,24 @@
package com.bcrjl.rss.common.util;
import cn.hutool.core.lang.Console;
import com.bcrjl.rss.model.entity.RssEntity;
import org.junit.jupiter.api.Test;
import java.util.List;
/**
* RSSUtilsTest
*
* @author yanqs
*/
class RssUtilsTest {
@Test
void getRssList() {
String url = "https://blog.yanqingshan.com/feed/";
List<RssEntity> rssList = RssUtils.getRssList(url);
rssList.forEach(obj -> {
Console.log(obj);
});
}
}