Porphyry icon indicating copy to clipboard operation
Porphyry copied to clipboard

Another index structure could be used for co-occurrences computation

Open benel opened this issue 5 years ago • 3 comments

An unpublished paper by Fréderic Merle, Aurélien Bénel and Yann Barloy (written in 2013-2014) compared the efficiency of 3 (or even 4) index structures usable to speed up the multidimensional browsing algorithm. One of them appeared to be far more efficient than those that were tested in earlier and current versions of Porphyry.

Please note however that, as any index, building and updating it takes time.

benel avatar Mar 04 '20 13:03 benel

Co-occurrences computation appears to be negligible in comparison with data downloading:

Time Task
0,1s – 0, 2s getView(user)
1,3 s – 2,0 s getView(corpora, viewpoints)
1,3 s – 2,0 s getView(corpora)
0,4 s – 0,6 s getView(viewpoints)
0,001 s restructuring viewpoints
0,002 s restructuring items
0,019 s co-occurrences

Tested on the stained-glasses portfolio (1771 items, 608 topics). @garnier5

benel avatar May 20 '20 17:05 benel

Pour info, voici le code (indépendant de Porphyry mais qui s'en inspire grandement) que j'avais utilisé pour faire mes mesures :

const Hypertopic = require('hypertopic');
const USER = 'vitraux';
const SERVICES = [
  'http://argos2.hypertopic.org',
  'http://steatite.hypertopic.org'
];


let start = new Date().getTime(); 

let logWithTime = (x) => {
  let end = new Date().getTime(); 
  console.log(end - start, x);
  start = end;
};

let user = {};
let viewpoints = [];
let items = [];

const hypertopic = new Hypertopic(SERVICES);
hypertopic.getView(`/user/${USER}`)
  .then((x) => {
    user = x[USER];
    logWithTime(user);
    return user.viewpoint.map(y => `/viewpoint/${y.id}`)
      .concat(user.corpus.map(y => `/corpus/${y.id}`));
  })
  .then(hypertopic.getView)
  .then((x) => {
    logWithTime('GOT corpora and viewpoints');
    return x;
  })
  .then((data) => {
    for (let v of user.viewpoint) {
      let viewpoint = data[v.id];
      viewpoint.id = v.id;
      viewpoints.push(viewpoint);
    }
    logWithTime({viewpoints: viewpoints.length});
    return data;
  })
  .then((data) => {
    for (let corpus of user.corpus) {
      for (let itemId in data[corpus.id]) {
        if (!['id','name','user'].includes(itemId)) {
          let item = data[corpus.id][itemId];
          if (!item.name || !item.name.length) {
          } else {
            item.id = itemId;
            item.corpus = corpus.id;
            items.push(item);
          }
        }
      }
    }
    logWithTime({items: items.length});
    return data;
  })
  .then((x) => {
    let selectedItems = items; //worst case
    let topicsItems = new Map();
    for (let e of selectedItems) {
      for (let t of _getRecursiveItemTopics(e)) {
        push(topicsItems, t, e.id);
      }
    }
    logWithTime({topics: topicsItems.size});
  });

  function _getTopic(id) {
    for (let v of viewpoints) {
      if (v[id]) return v[id];
    }
    return null;
  }

  function push(map, topicId, itemId) {
    let old = map.get(topicId);
    if (old) {
      map.set(topicId, old.add(itemId));
    } else {
      map.set(topicId, new Set([itemId]));
    }
  }

  function _getTopicPath(topicId) {
    let topic = _getTopic(topicId);
    let path = (topic && topic.broader)? _getTopicPath(topic.broader[0].id) : [];
    path.push(topicId);
    return path;
  }

  function _getItemTopicsPaths(item) {
    return (item.topic||[]).map(t => _getTopicPath(t.id));
  }

  function _getRecursiveItemTopics(item) {
    return Array.prototype.concat(..._getItemTopicsPaths(item));
  }

benel avatar Jun 05 '20 13:06 benel

Tested on the stained-glasses portfolio (always the same item, 14 topics).

10 000 items

Time Task
0,05s – 0,1s getView(user)
7,0 s – 8,0 s getView(corpora, viewpoints)
0,000 s restructuring viewpoints
0,009 s restructuring items
0,090 s co-occurrences

50 000 items

Time Task
0,05s – 0,1s getView(user)
30s – 35s getView(corpora, viewpoints)
0,002 s restructuring viewpoints
0,030 s restructuring items
0,376 s co-occurrences

100 000 items

Time Task
0,1s – 0,2s getView(user)
65s – 75s getView(corpora, viewpoints)
0,007 s restructuring viewpoints
0,050 s restructuring items
0,815 s co-occurrences

garnier5 avatar Jun 16 '20 14:06 garnier5