JsCrawler
JsCrawler copied to clipboard
A library for dynamic update crawler script on android app
JsCrawler for dynamic update crawler script on android app
How to setup
Step 1. Add the JitPack repository to your build file
allprojects {
repositories {
...
maven { url 'https://jitpack.io' }
}
}
Step 2. Add the dependency
dependencies {
compile 'com.github.YuanKJ-:JsCrawler:1.0.0'
}
Usage
Initialize JsCrawler in your Application:
public class MyApplication extends Application {
@Override
public void onCreate() {
super.onCreate();
JsCrawler.initialize(this);
// get JsCrawler instance
JsCrawler jsCrawler = JsCrawler.getInstance();
// set JQuery enabled
jsCrawler.setJQueryEnabled(true);
}
@Override
public void onTerminate() {
super.onTerminate();
JsCrawler.release();
}
}
Load script and call function getBlogList
Note: make sure to call callFunction method in UI thread.
public class MainActivity extends Activity {
private JsCrawler jsCrawler;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
// get JsCrawler instance
jsCrawler = JsCrawler.getInstance();
final String js = loadJs();
jsCrawler.callFunction(js, new JsCallback() {
@Override
public void onResult(String result) {
Log.d(TAG, "onResult: " + result);
// use json to communicate between js and java
Gson gson = new Gson();
MyModel model = gson.fromJson(result, MyModel.class);
// do something
}
@Override
public void onError(String errorMessage) {
Log.d(TAG, "onError: " + errorMessage);
}
}, "getBlogList");
}
public String loadJs() {
String path = Environment.getExternalStorageDirectory()
.getAbsolutePath() + "/Download/crawler.js";
try {
File file = new File(path);
InputStream inputStream = new FileInputStream(file);
Scanner scanner = new Scanner(inputStream, "UTF-8");
return scanner.useDelimiter("\\A").next();
} catch (final IOException e) {
e.printStackTrace();
}
return null;
}
}
Call js function with parameters:
jsCrawler.callFunction("function myFunction(a, b, c, a) { return 'result'; }",
new JsCallback() {
@Override
public void onResult(String result) {
// handle result
}
@Override
public void onError(String errorMessage) {
// handle error
}
}, "myFunction", "parameter 1", "parameter 2", 912, 101.3);
JavaScript sample
You can be very easy to make the http request in JavaScript and use JQuery to parse the body.
function getBlogList() {
// define url
var url = "http://droidyue.com";
// create request with RequestBuilder
var request = new RequestBuilder()
.url(url).method("GET")
.timeout(10000).build();
// get response with RequestEngine.executeByRequest()
var response = RequestEngine.executeByRequest(request);
// the response is a string of json:
// {"code":"200", "message":"OK", "body":"content"}
// {"code":"404", "message":"NOT FOUND", "body":"content"}
// {"code":"-1", "message":"Request Exception", "body":""}
// eval json to js object
response = eval("("+response+")");
// process exception code
if(response.code != 200) {
return "response error";
}
// get response body and process with JQuery
var body = response.body;
var articleEles = $(body).find(".blog-index article");
var articleList = new Array();
$.each(articleEles, function(index, element){
var article = new Object();
element = $(element);
var entry = element.find(".entry-title a").first();
article.title = entry.text();
article.url = url + entry.attr("href");
article.describe = element.find(".entry-content").text().trim();
articleList.push(article);
});
// parse array to json and return
return JSON.stringify(articleList);
}
RequestBuilder API
You can set Method Header Cookie Form-Data body timeout in request.
// create builder, support chains call
var builder = new RequestBuilder();
// set url
builder.url("http://api.kejie.tk");
// set method, only support POST or GET
builder.method("POST");
builder.method("GET");
// addHeader or setHeaders
builder.addHeader("User-Agent", "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N)")
.addHeader("Referer", "http://api.kejie.tk");
var headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N)",
"Referer": "http://api.kejie.tk"
}
builder.setHeaders(headers);
// addCookie or setCookies
builder.addCookie("uid", "1170120F8E53899BC88B236FA6A731FC");
var cookies = {
"uid": "1170120F8E53899BC88B236FA6A731FC",
"type": "1"
}
builder.setCookies(cookies);
// addData or setData
// data will encode as a query append to the url if method GET
// data will encode as form-data append to body if method POST
builder.addData("wd", "testData");
var data = {
"wd": "testData",
"qid": "59"
}
builder.setData(data);
// set body with string
// Note: setData will fail if set the body
// Content-type will automatically change to application/json
builder.body('{"username":"kejie","pwd":"d8j3kduui461p"}');
// set timeout, milliseconds
builder.timeout(10000);
// build request object
var request = builder.build();
Execute request
Get http response with RequestEngine.executeByRequest(request). The response is a string with json, it contains http code message and body. Http code will return -1 if a Request Exception has occurred.
var response = RequestEngine.executeByRequest(request);
// {"code":"200", "message":"OK", "body":"content"}
// {"code":"404", "message":"NOT FOUND", "body":"content"}
// {"code":"-1", "message":"Request Exception", "body":""}
response = eval("("+response+")");
// use console.log can print a string to android log
console.log(response.code);
console.log(response.message);
console.log(response.body);
Change default request engine
JsCrawler has two request engine, Jsoup and OkHttp. Default use Jsoup, you can change to OkHttp.
jsCrawler.setRequestEngine(new OkHttpEngine());
Extended Request Engine
Extend JsoupEngine OkHttpEngine to support specific http settings or extend RequestEngine to create a new Engine.
An example of adding proxy for JsoupEngine.
-
- Extend RequestModel,add variable proxy.
public class MyRequestModel extends RequestModel {
private String proxy;
public String getProxy() {
return proxy;
}
public void setProxy(String proxy) {
this.proxy = proxy;
}
}
-
- Extend JsoupEngine,overwrite process(),add processProxy(),overwrite jsonToModel(),convert json to MyRequestModel。
public class MyJsoupEngine extends JsoupEngine {
protected void processProxy(MyRequestModel model) {
if (model.getProxy() != null) {
String[] proxy = model.getProxy().split(":");
if (proxy.length > 1) {
// connection is an object of jsoup
connection.proxy(proxy[0], Integer.parseInt(proxy[1]));
}
}
}
@Override
protected void process(RequestModel model) {
super.process(model);
processProxy((MyRequestModel) model);
}
@Override
protected RequestModel jsonToModel(String request) {
return gson.fromJson(request, MyRequestModel.class);
}
}
-
- In script file, extend RequestBuilder add proxy function and overwrite build function.
RequestBuilder.prototype.proxy = function(host){
this.mProxy = host;
return this;
}
RequestBuilder.prototype.build = function() {
var request = new Request(this);
request.proxy = this.mProxy;
return JSON.stringify(request);
}
function getBlogList() {
// your js code ...
var request = new RequestBuilder()
.url(url).method("GET").proxy("127.0.0.1:8088")
.timeout(10000).build();
var response = RequestEngine.executeByRequest(request);
// your js code ...
}
-
- JsCrawler setRequestEngine
public class MyApplication extends Application {
@Override
public void onCreate() {
super.onCreate();
JsCrawler.initialize(this);
// 获取JsCrawler实例
JsCrawler jsCrawler = JsCrawler.getInstance();
// 设置是否开启使用JQuery
jsCrawler.setJQueryEnabled(true);
// 修改JsCrawler请求引擎
jsCrawler.setRequestEngine(new MyJsoupEngine());
}
@Override
public void onTerminate() {
super.onTerminate();
JsCrawler.release();
}
}
For additional information see sample module.
License
MIT License
Copyright (c) 2017 kejie
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.