node-geoip-native
node-geoip-native copied to clipboard
Faster but less accurate?
Unless I'm missing something I think I should be able to look up an IP taken from the CSV and get the same country back. For example these lines are pulled from the middle of the CSV:
data = [ { start:"125.255.0.0",end:"125.255.255.255",lower:"2113863680",upper:"2113929215",code:"AU",country:"Australia" },
{ start:"126.0.0.0",end:"126.255.255.255",lower:"2113929216",upper:"2130706431",code:"JP",country:"Japan" },
{ start:"128.0.0.0",end:"128.0.7.255",lower:"2147483648",upper:"2147485695",code:"RO",country:"Romania" },
{ start:"128.0.8.0",end:"128.0.15.255",lower:"2147485696",upper:"2147487743",code:"DK",country:"Denmark" },
{ start:"128.0.16.0",end:"128.0.23.255",lower:"2147487744",upper:"2147489791",code:"NO",country:"Norway" },
{ start:"128.0.24.0",end:"128.0.31.255",lower:"2147489792",upper:"2147491839",code:"RU",country:"Russian Federation" } ]
If I run them through a test script I get:
% node test-2.js
starting test: geoip-native
Looking up IP 125.255.0.0 (AU)
native: FAIL: CN != AU - {"ipstart":2113830912,"code":"CN","name":"China"}
Looking up IP 126.0.0.0 (JP)
native: FAIL: AU != JP - {"ipstart":2113863680,"code":"AU","name":"Australia"}
Looking up IP 128.0.0.0 (RO)
native: FAIL: JP != RO - {"ipstart":2113929216,"code":"JP","name":"Japan"}
Looking up IP 128.0.8.0 (DK)
native: FAIL: EU != DK - {"ipstart":2147483648,"code":"EU","name":"Europe"}
Looking up IP 128.0.16.0 (NO)
native: FAIL: EU != NO - {"ipstart":2147483648,"code":"EU","name":"Europe"}
Looking up IP 128.0.24.0 (RU)
native: FAIL: EU != RU - {"ipstart":2147483648,"code":"EU","name":"Europe"}
native: 6 failed; 0 correct
starting test: geoip-lite
Looking up IP 125.255.0.0 (AU)
lite: CORRECT: AU == AU - {"range":[2113863680,2113863933],"country":"AU","region":"02","city":"Newcastle","ll":[-32.9278,151.7845]}
Looking up IP 126.0.0.0 (JP)
lite: CORRECT: JP == JP - {"range":[2113929216,2114191359],"country":"JP","region":"","city":"","ll":[36,138]}
Looking up IP 128.0.0.0 (RO)
lite: CORRECT: RO == RO - {"range":[2147483648,2147485695],"country":"RO","region":"","city":"","ll":[46,25]}
Looking up IP 128.0.8.0 (DK)
lite: CORRECT: DK == DK - {"range":[2147485696,2147487743],"country":"DK","region":"","city":"","ll":[56,10]}
Looking up IP 128.0.16.0 (NO)
lite: CORRECT: NO == NO - {"range":[2147487744,2147489791],"country":"NO","region":"","city":"","ll":[62,10]}
Looking up IP 128.0.24.0 (RU)
lite: CORRECT: RU == RU - {"range":[2147489792,2147491839],"country":"RU","region":"","city":"","ll":[60,100]}
lite: 0 failed; 6 correct
Running the entire CSV through the test I get:
native: 73876 failed; 6577 correct
lite: 7195 failed; 70384 correct
I attribute the failures in GeoIP's "lite" implementation to some administrative accuracy differences in the datasets uses (many places show up as "EU" rather than the actual country in the binary datasets for some reason).
I'm still zeroing in on the actual bug but at least this test case makes it easy to spot:
var geoip = require("./geoip.js");
//var geoip = require("geoip-lite");
function readCountries()
{
var fs = require("fs");
var sys = require("sys");
// var stream = fs.createReadStream(__dirname + "/GeoIPCountryWhois.csv");
var buffer = fs.readFileSync(__dirname + "/GeoIPCountryWhois-new.csv", { encoding: "UTF8"} );
var countries = [];
buffer = buffer.replace(/"/g, "");
var entries = buffer.split("\n");
for(var i=0; i<entries.length; i++) {
var entry = entries[i].split(",");
countries.push({start: entry[0], ipstart: parseInt(entry[2]), code: entry[4], name: entry[5]});
}
countries.sort(function(a, b) {
return a.ipstart - b.ipstart;
});
numcountries = countries.length;
return countries;
}
var test1 = true;
function test() {
var total = 0;
var numtests = 20;
var numiterations = 1000000;
console.log("starting test: " + (test1 ? "geoip-native" : "geoip-lite"));
data = [ { start:"125.255.0.0",end:"125.255.255.255",lower:"2113863680",upper:"2113929215",code:"AU",country:"Australia" },
{ start:"126.0.0.0",end:"126.255.255.255",lower:"2113929216",upper:"2130706431",code:"JP",country:"Japan" },
{ start:"128.0.0.0",end:"128.0.7.255",lower:"2147483648",upper:"2147485695",code:"RO",country:"Romania" },
{ start:"128.0.8.0",end:"128.0.15.255",lower:"2147485696",upper:"2147487743",code:"DK",country:"Denmark" },
{ start:"128.0.16.0",end:"128.0.23.255",lower:"2147487744",upper:"2147489791",code:"NO",country:"Norway" },
{ start:"128.0.24.0",end:"128.0.31.255",lower:"2147489792",upper:"2147491839",code:"RU",country:"Russian Federation" } ]
failed = 0;
correct = 0;
testname = (test1 ? "native" : "lite");
countries = data; // readCountries();
for (i = 0; i < countries.length; i++)
{
console.log("Looking up IP " + countries[i].start + " ("+ countries[i].code + ")");
lookedup = geoip.lookup(countries[i].start);
if (lookedup)
{
lookedup_code = (test1 ? lookedup.code : lookedup.country)
if (lookedup_code != countries[i].code)
{
++failed;
console.log("\t" + testname + ": FAIL: " + lookedup_code + " != " + countries[i].code + " - " + JSON.stringify(lookedup))
}
else
{
++correct;
console.log("\t" + testname + ": CORRECT: " + lookedup_code + " == " + countries[i].code + " - " + JSON.stringify(lookedup))
}
}
}
console.log(testname + ": " + failed + " failed; " + correct + " correct");
if(!test1) {
return;
}
geoip = require("geoip-lite");
test1 = false;
test();
}
setTimeout(test, 1000);
Or has looking at this for a couple of hours blinded me to something obvious?