Commit 38317440 authored by sim's avatar sim

WIP tficf

parent 5b150ca3
from math import log
from gargantext.models import Nodes
def tficf(term, node, context):
# TF: dans NodeNgram; où est le calcul? extraction des ngrams?
# ICF: dans NodeNodeNgram
all_nodes = Nodes.within(context)
tf = node.frequency(term)
icf = all_nodes.count() / all_nodes.matching(term).count()
return tf / log(icf)
ResourceType
1. "API"
get_resource_by_name(name) # Mal utilisé dans graph/views.py (=> BUG), utilisé dans moissonneurs/pubmed.py
CorpusNode.resources()[0]['type'] # metric_tfidf.py
<query>.filter(CorpusNode.hyperdata['resources'][0]['type'].astext == ...) # metric_tfidf.py
CorpusNode.resources()[0] # ngrams_extraction.py
get_resource(resource["type"])
...
2. Localisation des appels
* Crawler abstract class in gargantext/u/c/_Crawler.py
* Some Crawler implementations in gargantext/u/c/*.py
* compute_ti_ranking func in gargantext/u/t/metric_tfidf.py
* parse func in gargantext/u/t/parsing.py
...
NODETYPES
./gargantext/models/nodes_constants.py *
./gargantext/util/toolchain/ngram_coocs.py
./gargantext/models/nodes.py
./gargantext/views/api/api.py
./gargantext/views/api/nodes.py
./unittests/tests_090_toolchain.py
./unittests/tests_070_routes.py
LISTTYPES
./gargantext/models/nodes_constants.py *
./gargantext/util/toolchain/list_stop.py
./gargantext/constants.py
./gargantext/models/nodes.py
INDEXED_HYPERDATA
./gargantext/models/nodes_constants.py *
./gargantext/constants.py
./gargantext/util/toolchain/hyperdata_indexing.py
./gargantext/util/toolchain/ngram_coocs.py
./gargantext/models/hyperdata.py
./gargantext/models/nodes.py
./gargantext/views/api/analytics.py
RESOURCETYPES
./gargantext/models/nodes_constants.py *
./gargantext/constants.py
./gargantext/util/crawlers/__init__.py
./gargantext/util/parsers/__init__.py
./gargantext/util/files.py
./gargantext/views/pages/projects.py
./gargantext/views/api/api.py
./docs/tools/resource.md
./docs/tools/overview/parser.md
./docs/resource.md
./docs/overview/parser.md
./site/tools/overview/parser/index.html
./site/tools/resource/index.html
./site/overview/parser/index.html
./site/resource/index.html
./site/mkdocs/search_index.json
./templates/pages/projects/old_project.html
./unittests/tests_090_toolchain.py
get_resource(sourcetype)
./gargantext/constants.py *
./gargantext/util/crawlers/_Crawler.py
./gargantext/util/toolchain/parsing.py
./gargantext/util/toolchain/ngrams_extraction.py
./gargantext/views/pages/terms.py
./gargantext/views/pages/projects.py
./gargantext/views/pages/corpora.py
./gargantext/views/api/api.py
./gargantext/views/api/projects.py
./unittests/tests_090_toolchain.py
./moissonneurs/multivac.py
./moissonneurs/isidore.py
./moissonneurs/cern.py
./moissonneurs/istex.py
./moissonneurs/hal.py
get_resource_by_name(sourcename)
./gargantext/constants.py *
./graph/views.py
./moissonneurs/pubmed.py
load_parser
./gargantext/constants.py *
./gargantext/util/toolchain/parsing.py
load_crawler
./gargantext/constants.py *
./moissonneurs/multivac.py
./moissonneurs/isidore.py
./moissonneurs/cern.py
./moissonneurs/hal.py
resources
./gargantext/models/nodes.py *
./gargantext/util/toolchain/metric_tfidf.py
./gargantext/util/toolchain/parsing.py
./gargantext/util/toolchain/ngrams_extraction.py
./gargantext/views/pages/terms.py
./gargantext/views/pages/projects.py
./gargantext/views/pages/corpora.py
./unittests/tests_090_toolchain.py
add_resource
./gargantext/models/nodes.py *
./gargantext/util/crawlers/PUBMED.py
./gargantext/util/crawlers/_Crawler.py
./gargantext/views/pages/projects.py
./unittests/tests_090_toolchain.py
./moissonneurs/pubmed.py
./moissonneurs/multivac.py
./moissonneurs/isidore.py
./moissonneurs/cern.py
./moissonneurs/istex.py
./moissonneurs/hal.py
SELECT CASE WHEN (anon_1.ngram1_id IS NOT NULL) THEN anon_1.ngram1_id WHEN (anon_1.ngram1_id IS NULL) THEN nodes_ngrams.ngram_id END AS counted_ngform, sum(nodes_ngrams.weight) AS sum_1, count(nodes_ngrams.node_id) AS count_1 FROM nodes_ngrams LEFT OUTER JOIN (SELECT nodes_ngrams_ngrams.ngram1_id AS ngram1_id, nodes_ngrams_ngrams.ngram2_id AS ngram2_id FROM nodes_ngrams_ngrams WHERE nodes_ngrams_ngrams.node_id = %(node_id_1)s) AS anon_1 ON anon_1.ngram2_id = nodes_ngrams.ngram_id JOIN (SELECT nodes.id AS id FROM nodes JOIN nodes AS nodes_1 ON nodes_1.id = nodes.parent_id WHERE nodes.typename = %(typename_1)s AND nodes_1.typename = %(typename_2)s AND ((((nodes_1.hyperdata -> %(hyperdata_1)s) -> %(param_1)s)) ->> %(param_2)s) = %(param_3)s) AS anon_2 ON anon_2.id = nodes_ngrams.node_id JOIN (SELECT DISTINCT nodes_ngrams.ngram_id AS uniq_ngid FROM nodes_ngrams JOIN nodes ON nodes.id = nodes_ngrams.node_id WHERE nodes.typename = %(typename_3)s AND nodes.parent_id = %(parent_id_1)s) AS anon_3 ON anon_3.uniq_ngid = nodes_ngrams.ngram_id GROUP BY counted_ngform
SELECT CASE
WHEN (anon_1.ngram1_id IS NOT NULL) THEN anon_1.ngram1_id
WHEN (anon_1.ngram1_id IS NULL) THEN nodes_ngrams.ngram_id
END AS counted_ngform,
sum(nodes_ngrams.weight) AS sum_1,
count(nodes_ngrams.node_id) AS count_1
FROM nodes_ngrams
LEFT OUTER JOIN
(SELECT nodes_ngrams_ngrams.ngram1_id AS ngram1_id,
nodes_ngrams_ngrams.ngram2_id AS ngram2_id
FROM nodes_ngrams_ngrams
WHERE nodes_ngrams_ngrams.node_id = %(node_id_1)s) AS anon_1 ON anon_1.ngram2_id = nodes_ngrams.ngram_id
JOIN
(SELECT nodes.id AS id
FROM nodes
JOIN nodes AS nodes_1 ON nodes_1.id = nodes.parent_id
WHERE nodes.typename = %(typename_1)s
AND nodes_1.typename = %(typename_2)s
AND ((((nodes_1.hyperdata -> %(hyperdata_1)s) -> %(param_1)s)) ->> %(param_2)s) = %(param_3)s) AS anon_2 ON anon_2.id = nodes_ngrams.node_id
JOIN
(SELECT DISTINCT nodes_ngrams.ngram_id AS uniq_ngid
FROM nodes_ngrams
JOIN nodes ON nodes.id = nodes_ngrams.node_id
WHERE nodes.typename = %(typename_3)s
AND nodes.parent_id = %(parent_id_1)s) AS anon_3 ON anon_3.uniq_ngid = nodes_ngrams.ngram_id
GROUP BY counted_ngform
SELECT CASE WHEN (anon_1.ngram1_id IS NOT NULL) THEN anon_1.ngram1_id WHEN (anon_1.ngram1_id IS NULL) THEN nodes_ngrams.ngram_id END AS counted_ngform, sum(nodes_ngrams.weight) AS sum_1, count(nodes_ngrams.node_id) AS count_1 FROM nodes_ngrams LEFT OUTER JOIN (SELECT nodes_ngrams_ngrams.ngram1_id AS ngram1_id, nodes_ngrams_ngrams.ngram2_id AS ngram2_id FROM nodes_ngrams_ngrams WHERE nodes_ngrams_ngrams.node_id = 55980) AS anon_1 ON anon_1.ngram2_id = nodes_ngrams.ngram_id JOIN (SELECT nodes.id AS id FROM nodes JOIN nodes AS nodes_1 ON nodes_1.id = nodes.parent_id WHERE nodes.typename = 4 AND nodes_1.typename = 3 AND ((((nodes_1.hyperdata -> 'resources') -> 0)) ->> 'type') = '3') AS anon_2 ON anon_2.id = nodes_ngrams.node_id JOIN (SELECT DISTINCT nodes_ngrams.ngram_id AS uniq_ngid FROM nodes_ngrams JOIN nodes ON nodes.id = nodes_ngrams.node_id WHERE nodes.typename = 4 AND nodes.parent_id = 6015) AS anon_3 ON anon_3.uniq_ngid = nodes_ngrams.ngram_id GROUP BY counted_ngform;
SELECT CASE
WHEN (anon_1.ngram1_id IS NOT NULL) THEN anon_1.ngram1_id
WHEN (anon_1.ngram1_id IS NULL) THEN nodes_ngrams.ngram_id
END AS counted_ngform,
sum(nodes_ngrams.weight) AS sum_1,
count(nodes_ngrams.node_id) AS count_1
FROM nodes_ngrams
LEFT OUTER JOIN
(SELECT nodes_ngrams_ngrams.ngram1_id AS ngram1_id,
nodes_ngrams_ngrams.ngram2_id AS ngram2_id
FROM nodes_ngrams_ngrams
WHERE nodes_ngrams_ngrams.node_id = 55980) AS anon_1 ON anon_1.ngram2_id = nodes_ngrams.ngram_id
JOIN
(SELECT nodes.id AS id
FROM nodes
JOIN nodes AS nodes_1 ON nodes_1.id = nodes.parent_id
WHERE nodes.typename = 4
AND nodes_1.typename = 3
AND ((((nodes_1.hyperdata -> 'resources') -> 0)) ->> 'type') = '3') AS anon_2 ON anon_2.id = nodes_ngrams.node_id
JOIN
(SELECT DISTINCT nodes_ngrams.ngram_id AS uniq_ngid
FROM nodes_ngrams
JOIN nodes ON nodes.id = nodes_ngrams.node_id
WHERE nodes.typename = 4
AND nodes.parent_id = 6015) AS anon_3 ON anon_3.uniq_ngid = nodes_ngrams.ngram_id
GROUP BY counted_ngform
SELECT CASE WHEN (anon_1.ngram1_id IS NOT NULL) THEN anon_1.ngram1_id WHEN (anon_1.ngram1_id IS NULL) THEN nodes_ngrams.ngram_id END AS counted_ngform, sum(nodes_ngrams.weight) AS sum_1, count(nodes_ngrams.node_id) AS count_1 FROM nodes_ngrams LEFT OUTER JOIN (SELECT nodes_ngrams_ngrams.ngram1_id AS ngram1_id, nodes_ngrams_ngrams.ngram2_id AS ngram2_id FROM nodes_ngrams_ngrams WHERE nodes_ngrams_ngrams.node_id = 118166) AS anon_1 ON anon_1.ngram2_id = nodes_ngrams.ngram_id JOIN (SELECT nodes.id AS id FROM nodes JOIN nodes AS nodes_1 ON nodes_1.id = nodes.parent_id WHERE nodes.typename = 4 AND nodes_1.typename = 3 AND ((((nodes_1.hyperdata -> 'resources') -> 0)) ->> 'type') = '3') AS anon_2 ON anon_2.id = nodes_ngrams.node_id JOIN (SELECT DISTINCT nodes_ngrams.ngram_id AS uniq_ngid FROM nodes_ngrams JOIN nodes ON nodes.id = nodes_ngrams.node_id WHERE nodes.typename = 4 AND nodes.parent_id = 118117) AS anon_3 ON anon_3.uniq_ngid = nodes_ngrams.ngram_id GROUP BY counted_ngform;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment