再谈福利视频数据爬虫

最近忽然发现以前爬取的一个视频网站的爬虫失败了,即使更新token之后依旧无法正常爬取数据。为了能够再次爬取数据就需要进一步的分析apk文件,但是apk问价进行了加固,目前只有一个夜神模拟器是root的,但是很不幸,在模拟器上运行不了,也就无法进行脱壳。

此时可以考虑使用vmos进行脱壳,自带root环境,直接在手机上安装即可。

VMOS 是一款运行在安卓上的模拟器。模拟器是PC端的雷电模拟器泛指。相当于是虚拟化出的另一个系统。

在手机上运行vmos安装xposed,反射大师进行脱壳即可,脱壳之后的class文件就可以放入jeb进行分析了。解析之后要定位关键代码不难,header中的d生成函数如下:

static Response a(Interceptor.Chain arg15) throws IOException {
        Request v0 = arg15.request();
        Request.Builder v1 = v0.newBuilder();
        if(v0.url().toString().contains("avatar/upload")) {
            v1.tag(v0.url()).addHeader("Connection", "close").addHeader("platform", String.valueOf(Platform.ANDROID.getValue())).addHeader("version", "2.1.3").addHeader("encryptpic", "1").addHeader("token", SPHelper.getString(App.h(), "token", "")).addHeader("app", "3");
            return arg15.proceed(v1.build());
        }

        TreeMap v2 = new TreeMap();
        v2.put("token", SPHelper.getString(App.h(), "token", ""));
        v2.put("app", "3");
        v2.put("version", "2.1.3");
        v2.put("platform", String.valueOf(Platform.ANDROID.getValue()));
        v2.put("Connection", "close");
        v2.put("encryptpic", "1");
        v2.put("k", "hyfr6iyPZMZfJFEc");
        v2.put("t", "" + System.nanoTime());
        v2.put("s", EncryUtil.encryptMd5(v2));
        v2.remove("k");
        v1.tag(v0.url()).addHeader("d", EncryUtil.encryptAes(v2));
        return arg15.proceed(v1.build());
    }

在python中进行计算的时候发现计算的数值与apk生成的数值不一样,python代码如下:

def get_new_token():
    aid = random_android_id()
    url_paramter = '{"app":"3","Connection":"close","encryptpic":"1","k":"hyPZMZfJFEc","platform":"1","t":"237727916417263","token":"","version":"2.1.3"}'
    url_with_md5 = '{"app":"3","Connection":"close","encryptpic":"1","platform":"1","s":"'+aes.md5(url_paramter)+'","t":"237727916417263","token":"","version":"2.1.3"}'
    h_d = aes.encrypt(url_with_md5)
    import uuid
    duuid = str(uuid.uuid4())
    print('duuid:', duuid)
    bp = '{"android_id":"'+aid+'","brand":"google","channel_id":"1","code":"logourl","device_uuid":"'+duuid+'","deviceid":"'+aid+'","imei":"","k":"hyFEc","mac_address":"10:bf:48:d0:bf:3f","model":"Nexus 7","resolution":"800x1205","serial":"'+aid+'","t":"137727989875263"}'
    bpm = '{"android_id":"'+aid+'","brand":"google","channel_id":"1","code":"logourl","device_uuid":"'+duuid+'","deviceid":"'+aid+'","imei":"","s":"'+aes.md5(bp)+'","mac_address":"10:bf:48:d0:bf:3f","model":"Nexus 7","resolution":"800x1205","serial":"'+aid+'","t":"137727989875263"}'
    body_d = 'd='+ quote_plus(aes.encrypt(bpm))
    data = requests.post('https://jk.3igdfi.com/api/v2/bootstrap',
                         data=body_d,
                         headers={'content-type':'application/x-www-form-urlencoded',
                                  'd':h_d,
                                  'Host':'jk.3igdfi.com',
                                  'user-agent':'okhttp/3.10.0'})
    print('response text', data.text)
    ddt = aes.decrypt(json.loads(data.text)['d'])
    # print(ddt)
    token = json.loads(ddt)['data']['token']
    print('token=', token)
    return token

最终使用java重写了计算部分,发现生成的字符串排序,第一个是Connection~~,这尼玛,一直以为是按照字幕顺序排序。

    public static void main(String[] args) {
    // write your code here
        TreeMap v2 = new TreeMap();
        v2.put("token", "LzF2SHp6NldrVmVBaHFobmVMVys3R09TYjJrUmh0amJmTnJtejJ3QnNDMDh2bmxVRU9SODA3bTZzYmlIVzB2RmhOdTZMaVpmb0JBMG9EcEZMd0EvN2Q1c1dlK3QvRUo5cFlYOUlWMTFBMlBiS0V2NmRjdmkrVG9UOVZ1TDZGV0JiQ2J0a3h0MFl5eitKYm5Ib2E3bUhPdllnMkU5ZUxFOTUvZGVmaFNmQm90aFRjakJYQWFoYmdNY21VNDJDTVkv");
        v2.put("app", "3");
        v2.put("version", "2.1.3");
        v2.put("platform", "1");
        v2.put("Connection", "close");
        v2.put("encryptpic", "1");
        v2.put("k", "hyfr6iyPZMZfJFEc");
        v2.put("t", "237727916417262");
        System.out.println(JSON.toJSONString(v2));
        System.out.println(stringToMD5(JSON.toJSONString(v2)));

        System.out.println(v2);
        // {"android_id":"111849acc1dd0b21","brand":"google","channel_id":"1","code":"logourl","device_uuid":"664e154b-4a5c-31dc-b69a-e08a68565ffd","deviceid":"111849acc1dd0b21","imei":"","mac_address":"10:bf:48:d0:bf:3f","model":"Nexus 7","resolution":"800x1205","s":"eab1fd0614be1da3c7837f2d96fa9b38","serial":"015d256875181e11","t":"237727989875261"}
        TreeMap v3 = new TreeMap();
        v3.put("android_id","111849acc1dd0b21");
        v3.put("brand","google");
        v3.put("channel_id","1");
        v3.put("code","logourl");
        v3.put("device_uuid","664e154b-4a5c-31dc-b69a-e08a68565ffd");
        v3.put("deviceid","111849acc1dd0b21");
        v3.put("imei","");
        v3.put("mac_address","10:bf:48:d0:bf:3f");
        v3.put("model","Nexus 7");
        v3.put("resolution","800x1205");
        v3.put("serial","015d256875181e11");
        v3.put("t","237727989875261");
        v3.put("k", "hyfr6iyPZMZfJFEc");
        System.out.println(JSON.toJSONString(v3));
        System.out.println(stringToMD5(JSON.toJSONString(v3)));

        System.out.println(v3);
    }
"E:\Program Files\Java\jdk1.8.0_121\bin\java.exe" ":{"Connection":"close","app":"3","encryptpic":"1","k":"hyfr6iyPZMZfJFEc","platform":"1","t":"237727916417262","token":"LzF2SHp6NldrVmVBaHFobmVMVys3R09TYjJrUmh0amJmTnJtejJ3QnNDMDh2bmxVRU9SODA3bTZzYmlIVzB2RmhOdTZMaVpmb0JBMG9EcEZMd0EvN2Q1c1dlK3QvRUo5cFlYOUlWMTFBMlBiS0V2NmRjdmkrVG9UOVZ1TDZGV0JiQ2J0a3h0MFl5eitKYm5Ib2E3bUhPdllnMkU5ZUxFOTUvZGVmaFNmQm90aFRjakJYQWFoYmdNY21VNDJDTVkv","version":"2.1.3"}
Md5:86a30a65d4f0a94ae069fae2e598878c
{Connection=close, app=3, encryptpic=1, k=hyfr6iyPZMZfJFEc, platform=1, t=237727916417262, token=LzF2SHp6NldrVmVBaHFobmVMVys3R09TYjJrUmh0amJmTnJtejJ3QnNDMDh2bmxVRU9SODA3bTZzYmlIVzB2RmhOdTZMaVpmb0JBMG9EcEZMd0EvN2Q1c1dlK3QvRUo5cFlYOUlWMTFBMlBiS0V2NmRjdmkrVG9UOVZ1TDZGV0JiQ2J0a3h0MFl5eitKYm5Ib2E3bUhPdllnMkU5ZUxFOTUvZGVmaFNmQm90aFRjakJYQWFoYmdNY21VNDJDTVkv, version=2.1.3}
source string:{"android_id":"111849acc1dd0b21","brand":"google","channel_id":"1","code":"logourl","device_uuid":"664e154b-4a5c-31dc-b69a-e08a68565ffd","deviceid":"111849acc1dd0b21","imei":"","k":"hyfr6iyPZMZfJFEc","mac_address":"10:bf:48:d0:bf:3f","model":"Nexus 7","resolution":"800x1205","serial":"015d256875181e11","t":"237727989875261"}
Md5:eab1fd0614be1da3c7837f2d96fa9b38
{android_id=111849acc1dd0b21, brand=google, channel_id=1, code=logourl, device_uuid=664e154b-4a5c-31dc-b69a-e08a68565ffd, deviceid=111849acc1dd0b21, imei=, k=hyfr6iyPZMZfJFEc, mac_address=10:bf:48:d0:bf:3f, model=Nexus 7, resolution=800x1205, serial=015d256875181e11, t=237727989875261}

调整排序之后的代码就可以正常请求token了。

☆版权☆

* 网站名称:obaby@mars
* 网址:https://oba.by/
* 个性:https://oba.by/
* 本文标题: 《再谈福利视频数据爬虫》
* 本文链接:https://zhongxiaojie.cn/2020/09/7393
* 短链接:https://oba.by/?p=7393
* 转载文章请标明文章来源,原文标题以及原文链接。请遵从 《署名-非商业性使用-相同方式共享 2.5 中国大陆 (CC BY-NC-SA 2.5 CN) 》许可协议。


You may also like

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注