2023最新Java获取微博cookie,可用于爬取文章(扫码登录)
2023-12-14 17:20:49
目录
文章最下面含有完整main类代码,和完整控制层代码
一、发送请求获取图片和qrid
这里注意一定要加请求头,不然会被微博拦截
HttpResponse response = HttpUtil.createGet("https://login.sina.com.cn/sso/qrcode/image?entry=weibo&size=180&callback=STK_17017598656821")
.header("Referer", "https://weibo.com/")
.execute();
System.out.println(response);
String regex = "\"qrid\":\"([^\"]+)\"";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(response.body());
String qrid = "";
if (matcher.find()) {
qrid = matcher.group(1);
} else {
System.out.println("qrid not found");
}
System.out.println(qrid);
String jsonText = response.body().substring(response.body().indexOf("(") + 1, response.body().lastIndexOf(")"));
JSONObject jsonObject = JSON.parseObject(jsonText);
String image = jsonObject.getJSONObject("data").getString("image");
System.out.println("https" + image);
String imageUrl = "https:" + image;
String base64Image = convertImageToBase64(imageUrl);
System.out.println(base64Image);
转成base64,如果要集成网页系统,可以直接将url返回给前端,可以直接显示,微博没有再做拦截
public static String convertImageToBase64(String imageUrl) {
InputStream inputStream = null;
ByteArrayOutputStream outputStream = null;
try{
URL url = new URL(imageUrl);
inputStream = url.openStream();
outputStream = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, bytesRead);
}
}catch (Exception e){
System.out.println("图片转换异常");
} finally {
IoUtil.close(inputStream);
IoUtil.close(outputStream);
}
byte[] imageBytes = outputStream.toByteArray();
return Base64.getEncoder().encodeToString(imageBytes);
}
二、发送请求确认二维码已被正确扫描
发送请求带上你的获取alt加密参数+时间戳
String alt = "";
while (true) {
String url = "https://login.sina.com.cn/sso/qrcode/check?entry=sso&qrid=" + qrid + "&callback=STK_" + System.currentTimeMillis();
HttpResponse response1 = HttpUtil.createGet(url)
.header("Referer", "https://weibo.com/")
.execute();
try {
TimeUnit.SECONDS.sleep(3);
} catch (InterruptedException e) {
System.out.println("睡眠异常");
}
System.out.println(response1.body());
String jsonData1 = response1.body().substring(response1.body().indexOf("(") + 1, response1.body().lastIndexOf(")"));
JSONObject jsonObject1 = JSON.parseObject(jsonData1);
int retcode = jsonObject1.getIntValue("retcode");
if (retcode == 20000000) {
alt = jsonObject1.getJSONObject("data").getString("alt");
System.out.println("alt: " + alt);
break;
}
if (retcode == 50114004 || retcode == 50114015) {
return;
}
}
三、携带拿到的alt,发送登录请求,获取cookie
首先拼接上alt和时间戳发送请求,返回是三个链接,三个链接之中两个没有action=login的,这个一定要拼上,没有的话cookie无效
String altUrl = "https://login.sina.com.cn/sso/login.php?entry=qrcodesso&returntype=TEXT&crossdomain=1&cdult=3&domain=weibo.com&alt=" + alt + "&savestate=30&callback=STK_" + System.currentTimeMillis();
HttpResponse response1 = HttpUtil.createGet(altUrl)
.execute();
System.out.println(response1.body());
String jsonData1 = response1.body().substring(response1.body().indexOf("(") + 1, response1.body().lastIndexOf(")"));
JSONObject jsonObject1 = JSON.parseObject(jsonData1);
JSONArray crossDomainUrlList = jsonObject1.getJSONArray("crossDomainUrlList");
System.out.println(crossDomainUrlList);
List<HttpCookie> cookies = new ArrayList<>();
Collections.reverse(crossDomainUrlList);
HttpResponse response3 = HttpUtil.createGet((String) crossDomainUrlList.get(0))
.execute();
System.out.println(crossDomainUrlList.get(0) + ":" + response3);
cookies.addAll(response3.getCookies());
HttpResponse response4 = HttpUtil.createGet( crossDomainUrlList.get(1) + "&action=login")
.execute();
System.out.println(crossDomainUrlList.get(1) + "&action=login" + ":" + response4);
cookies.addAll(response4.getCookies());
HttpResponse response5 = HttpUtil.createGet( crossDomainUrlList.get(2) + "&action=login")
.execute();
System.out.println(crossDomainUrlList.get(2) + "&action=login" + ":" + response5);
cookies.addAll(response5.getCookies());
System.out.println(cookies);
String finalCookie = getString(cookies);
// 输出最终的cookie字符串
System.out.println(finalCookie);
cookie拼成string+去重?
private static String getString(List<HttpCookie> cookies) {
StringBuilder cookieBuilder = new StringBuilder();
HashMap<String, String> cookieMap = new HashMap<>();
//去重
for (HttpCookie cookie : cookies) {
String name = cookie.getName();
String value = cookie.getValue();
if (!cookieMap.containsKey(name)) {
cookieMap.put(name, value);
}
}
//拼装
for (Map.Entry<String, String> entry : cookieMap.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
String keyValueString = key + "=" + value;
cookieBuilder.append(keyValueString).append("; ");
}
//最终结果
String finalCookie = cookieBuilder.toString();
if (finalCookie.endsWith("; ")) {
finalCookie = finalCookie.substring(0, finalCookie.length() - 2);
}
return finalCookie;
}
?四、main类完整方法代码
这里测试的话可以使用控制台输出的base64,用一些网页工具类,将base64转成图片,扫码后,就能输出完成cookie
public static void main(String[] args) {
//获取二维码id,以及二维码链接
HttpResponse response = HttpUtil.createGet("https://login.sina.com.cn/sso/qrcode/image?entry=weibo&size=180&callback=STK_17017598656821")
.header("Referer", "https://weibo.com/")
.execute();
System.out.println(response);
String regex = "\"qrid\":\"([^\"]+)\"";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(response.body());
String qrid = "";
if (matcher.find()) {
qrid = matcher.group(1);
} else {
System.out.println("qrid not found");
}
System.out.println(qrid);
String jsonText = response.body().substring(response.body().indexOf("(") + 1, response.body().lastIndexOf(")"));
JSONObject jsonObject = JSON.parseObject(jsonText);
String image = jsonObject.getJSONObject("data").getString("image");
System.out.println("https" + image);
String imageUrl = "https:" + image;
String base64Image = convertImageToBase64(imageUrl);
System.out.println(base64Image);
String alt = "";
while (true) {
String url = "https://login.sina.com.cn/sso/qrcode/check?entry=sso&qrid=" + qrid + "&callback=STK_" + System.currentTimeMillis();
HttpResponse response1 = HttpUtil.createGet(url)
.header("Referer", "https://weibo.com/")
.execute();
try {
TimeUnit.SECONDS.sleep(3);
} catch (InterruptedException e) {
System.out.println("睡眠异常");
}
System.out.println(response1.body());
String jsonData1 = response1.body().substring(response1.body().indexOf("(") + 1, response1.body().lastIndexOf(")"));
JSONObject jsonObject1 = JSON.parseObject(jsonData1);
int retcode = jsonObject1.getIntValue("retcode");
if (retcode == 20000000) {
alt = jsonObject1.getJSONObject("data").getString("alt");
System.out.println("alt: " + alt);
break;
}
if (retcode == 50114004 || retcode == 50114015) {
return;
}
}
String altUrl = "https://login.sina.com.cn/sso/login.php?entry=qrcodesso&returntype=TEXT&crossdomain=1&cdult=3&domain=weibo.com&alt=" + alt + "&savestate=30&callback=STK_" + System.currentTimeMillis();
HttpResponse response1 = HttpUtil.createGet(altUrl)
.execute();
System.out.println(response1.body());
String jsonData1 = response1.body().substring(response1.body().indexOf("(") + 1, response1.body().lastIndexOf(")"));
JSONObject jsonObject1 = JSON.parseObject(jsonData1);
JSONArray crossDomainUrlList = jsonObject1.getJSONArray("crossDomainUrlList");
System.out.println(crossDomainUrlList);
List<HttpCookie> cookies = new ArrayList<>();
Collections.reverse(crossDomainUrlList);
HttpResponse response3 = HttpUtil.createGet((String) crossDomainUrlList.get(0))
.execute();
System.out.println(crossDomainUrlList.get(0) + ":" + response3);
cookies.addAll(response3.getCookies());
HttpResponse response4 = HttpUtil.createGet( crossDomainUrlList.get(1) + "&action=login")
.execute();
System.out.println(crossDomainUrlList.get(1) + "&action=login" + ":" + response4);
cookies.addAll(response4.getCookies());
HttpResponse response5 = HttpUtil.createGet( crossDomainUrlList.get(2) + "&action=login")
.execute();
System.out.println(crossDomainUrlList.get(2) + "&action=login" + ":" + response5);
cookies.addAll(response5.getCookies());
System.out.println(cookies);
String finalCookie = getString(cookies);
// 输出最终的cookie字符串
System.out.println(finalCookie);
}
private static String getString(List<HttpCookie> cookies) {
StringBuilder cookieBuilder = new StringBuilder();
HashMap<String, String> cookieMap = new HashMap<>();
//去重
for (HttpCookie cookie : cookies) {
String name = cookie.getName();
String value = cookie.getValue();
if (!cookieMap.containsKey(name)) {
cookieMap.put(name, value);
}
}
//拼装
for (Map.Entry<String, String> entry : cookieMap.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
String keyValueString = key + "=" + value;
cookieBuilder.append(keyValueString).append("; ");
}
//最终结果
String finalCookie = cookieBuilder.toString();
if (finalCookie.endsWith("; ")) {
finalCookie = finalCookie.substring(0, finalCookie.length() - 2);
}
return finalCookie;
}
public static String convertImageToBase64(String imageUrl) {
InputStream inputStream = null;
ByteArrayOutputStream outputStream = null;
try{
URL url = new URL(imageUrl);
inputStream = url.openStream();
outputStream = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, bytesRead);
}
}catch (Exception e){
System.out.println("图片转换异常");
} finally {
IoUtil.close(inputStream);
IoUtil.close(outputStream);
}
byte[] imageBytes = outputStream.toByteArray();
return Base64.getEncoder().encodeToString(imageBytes);
}
五、控制层接口完整代码
返回类
/**
* 微博二维码返回
*
* @author youzai
* @date 2023/12/06
*/
@Data
public class SpiderWeiboQrVO implements Serializable {
@ApiModelProperty(value = "图片url")
private String imgUrl;
@ApiModelProperty(value = "图片id")
private String qRid;
}
控制层,这里的R是自己封装的,可以根据你的系统自己改
?控制层这里因为要集成到网页设计,第一步先是要把二维码url返回给前端,前端src渲染,渲染之后扫码,扫码完成后前端需要再次确认,将我们第一步返回给前端qrid再次返回给后端。后端执行登录逻辑,就可以获取cookie了。
@RestController
@RequestMapping("/api/spider/toolbox")
public class SpiderToolBoxController {
/**
* 功能描述:微博二维码获取
*
* @return {@link R }<{@link SpiderWeiboQrVO }>
* @author youzai
* @date 2023/12/06
*/
@GetMapping("/weiboQrCode")
public R<SpiderWeiboQrVO> weiboQrCode() {
HttpResponse response = HttpUtil.createGet("https://login.sina.com.cn/sso/qrcode/image?entry=weibo&size=180&callback=STK_" + System.currentTimeMillis())
.header("Referer", "https://weibo.com/")
.execute();
String regex = "\"qrid\":\"([^\"]+)\"";
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(response.body());
String qrid = "";
if (matcher.find()) {
qrid = matcher.group(1);
} else {
return R.failed("二维码获取失败!");
}
String jsonText = response.body().substring(response.body().indexOf("(") + 1, response.body().lastIndexOf(")"));
JSONObject jsonObject = JSON.parseObject(jsonText);
String imageUrl = jsonObject.getJSONObject("data").getString("image");
SpiderWeiboQrVO spiderWeiboQrVO = new SpiderWeiboQrVO();
spiderWeiboQrVO.setQRid(qrid);
spiderWeiboQrVO.setImgUrl(imageUrl);
return R.ok(spiderWeiboQrVO);
}
/**
* 功能描述:微博登录
*
* @param qrid
* @return {@link R }<{@link String }>
* @author youzai
* @date 2023/12/06
*/
@GetMapping("/weiboLogin/{qrid}")
public R<String> weiboLogin(@PathVariable("qrid") String qrid) {
String alt = "";
String url = "https://login.sina.com.cn/sso/qrcode/check?entry=sso&qrid=" + qrid + "&callback=STK_" + System.currentTimeMillis();
HttpResponse response1 = HttpUtil.createGet(url)
.header("Referer", "https://weibo.com/")
.execute();
String jsonData1 = response1.body().substring(response1.body().indexOf("(") + 1, response1.body().lastIndexOf(")"));
JSONObject jsonObject1 = JSON.parseObject(jsonData1);
int retcode = jsonObject1.getIntValue("retcode");
if (retcode == 20000000) {
alt = jsonObject1.getJSONObject("data").getString("alt");
} else {
return R.failed("登录失败");
}
String altUrl = "https://login.sina.com.cn/sso/login.php?entry=qrcodesso&returntype=TEXT&crossdomain=1&cdult=3&domain=weibo.com&alt=" + alt + "&savestate=30&callback=STK_" + System.currentTimeMillis();
HttpResponse response2 = HttpUtil.createGet(altUrl)
.execute();
String jsonData2 = response2.body().substring(response2.body().indexOf("(") + 1, response2.body().lastIndexOf(")"));
JSONObject jsonObject2 = JSON.parseObject(jsonData2);
JSONArray crossDomainUrlList = jsonObject2.getJSONArray("crossDomainUrlList");
List<HttpCookie> cookies = new ArrayList<>();
Collections.reverse(crossDomainUrlList);
HttpResponse response3 = HttpUtil.createGet((String) crossDomainUrlList.get(0))
.execute();
cookies.addAll(response3.getCookies());
HttpResponse response4 = HttpUtil.createGet(crossDomainUrlList.get(1) + "&action=login")
.execute();
cookies.addAll(response4.getCookies());
HttpResponse response5 = HttpUtil.createGet(crossDomainUrlList.get(2) + "&action=login")
.execute();
cookies.addAll(response5.getCookies());
String finalCookie = getCookieString(cookies);
return R.ok(finalCookie);
}
/**
* 功能描述:获取cookie
*
* @param cookies
* @return {@link String }
* @author youzai
* @date 2023/12/06
*/
private static String getCookieString(List<HttpCookie> cookies) {
StringBuilder cookieBuilder = new StringBuilder();
HashMap<String, String> cookieMap = new HashMap<>();
//去重
for (HttpCookie cookie : cookies) {
String name = cookie.getName();
String value = cookie.getValue();
if (!cookieMap.containsKey(name)) {
cookieMap.put(name, value);
}
}
//拼装
for (Map.Entry<String, String> entry : cookieMap.entrySet()) {
String key = entry.getKey();
String value = entry.getValue();
String keyValueString = key + "=" + value;
cookieBuilder.append(keyValueString).append("; ");
}
//最终结果
String finalCookie = cookieBuilder.toString();
if (finalCookie.endsWith("; ")) {
finalCookie = finalCookie.substring(0, finalCookie.length() - 2);
}
return finalCookie;
}
}
文章来源:https://blog.csdn.net/weixin_43162044/article/details/134922302
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。 如若内容造成侵权/违法违规/事实不符,请联系我的编程经验分享网邮箱:veading@qq.com进行投诉反馈,一经查实,立即删除!
本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。 如若内容造成侵权/违法违规/事实不符,请联系我的编程经验分享网邮箱:veading@qq.com进行投诉反馈,一经查实,立即删除!