Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
38317440
Commit
38317440
authored
Sep 01, 2017
by
sim
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
WIP tficf
parent
5b150ca3
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
201 additions
and
0 deletions
+201
-0
metric_tficf.py
gargantext/util/toolchain/metric_tficf.py
+14
-0
refactor.txt
refactor.txt
+128
-0
tfidf.sql
tfidf.sql
+30
-0
tfidf_concrete.sql
tfidf_concrete.sql
+28
-0
tfidf_concrete2.sql
tfidf_concrete2.sql
+1
-0
No files found.
gargantext/util/toolchain/metric_tficf.py
0 → 100644
View file @
38317440
from
math
import
log
from
gargantext.models
import
Nodes
def
tficf
(
term
,
node
,
context
):
# TF: dans NodeNgram; où est le calcul? extraction des ngrams?
# ICF: dans NodeNodeNgram
all_nodes
=
Nodes
.
within
(
context
)
tf
=
node
.
frequency
(
term
)
icf
=
all_nodes
.
count
()
/
all_nodes
.
matching
(
term
)
.
count
()
return
tf
/
log
(
icf
)
refactor.txt
0 → 100644
View file @
38317440
ResourceType
1. "API"
get_resource_by_name(name) # Mal utilisé dans graph/views.py (=> BUG), utilisé dans moissonneurs/pubmed.py
CorpusNode.resources()[0]['type'] # metric_tfidf.py
<query>.filter(CorpusNode.hyperdata['resources'][0]['type'].astext == ...) # metric_tfidf.py
CorpusNode.resources()[0] # ngrams_extraction.py
get_resource(resource["type"])
...
2. Localisation des appels
* Crawler abstract class in gargantext/u/c/_Crawler.py
* Some Crawler implementations in gargantext/u/c/*.py
* compute_ti_ranking func in gargantext/u/t/metric_tfidf.py
* parse func in gargantext/u/t/parsing.py
...
NODETYPES
./gargantext/models/nodes_constants.py *
./gargantext/util/toolchain/ngram_coocs.py
./gargantext/models/nodes.py
./gargantext/views/api/api.py
./gargantext/views/api/nodes.py
./unittests/tests_090_toolchain.py
./unittests/tests_070_routes.py
LISTTYPES
./gargantext/models/nodes_constants.py *
./gargantext/util/toolchain/list_stop.py
./gargantext/constants.py
./gargantext/models/nodes.py
INDEXED_HYPERDATA
./gargantext/models/nodes_constants.py *
./gargantext/constants.py
./gargantext/util/toolchain/hyperdata_indexing.py
./gargantext/util/toolchain/ngram_coocs.py
./gargantext/models/hyperdata.py
./gargantext/models/nodes.py
./gargantext/views/api/analytics.py
RESOURCETYPES
./gargantext/models/nodes_constants.py *
./gargantext/constants.py
./gargantext/util/crawlers/__init__.py
./gargantext/util/parsers/__init__.py
./gargantext/util/files.py
./gargantext/views/pages/projects.py
./gargantext/views/api/api.py
./docs/tools/resource.md
./docs/tools/overview/parser.md
./docs/resource.md
./docs/overview/parser.md
./site/tools/overview/parser/index.html
./site/tools/resource/index.html
./site/overview/parser/index.html
./site/resource/index.html
./site/mkdocs/search_index.json
./templates/pages/projects/old_project.html
./unittests/tests_090_toolchain.py
get_resource(sourcetype)
./gargantext/constants.py *
./gargantext/util/crawlers/_Crawler.py
./gargantext/util/toolchain/parsing.py
./gargantext/util/toolchain/ngrams_extraction.py
./gargantext/views/pages/terms.py
./gargantext/views/pages/projects.py
./gargantext/views/pages/corpora.py
./gargantext/views/api/api.py
./gargantext/views/api/projects.py
./unittests/tests_090_toolchain.py
./moissonneurs/multivac.py
./moissonneurs/isidore.py
./moissonneurs/cern.py
./moissonneurs/istex.py
./moissonneurs/hal.py
get_resource_by_name(sourcename)
./gargantext/constants.py *
./graph/views.py
./moissonneurs/pubmed.py
load_parser
./gargantext/constants.py *
./gargantext/util/toolchain/parsing.py
load_crawler
./gargantext/constants.py *
./moissonneurs/multivac.py
./moissonneurs/isidore.py
./moissonneurs/cern.py
./moissonneurs/hal.py
resources
./gargantext/models/nodes.py *
./gargantext/util/toolchain/metric_tfidf.py
./gargantext/util/toolchain/parsing.py
./gargantext/util/toolchain/ngrams_extraction.py
./gargantext/views/pages/terms.py
./gargantext/views/pages/projects.py
./gargantext/views/pages/corpora.py
./unittests/tests_090_toolchain.py
add_resource
./gargantext/models/nodes.py *
./gargantext/util/crawlers/PUBMED.py
./gargantext/util/crawlers/_Crawler.py
./gargantext/views/pages/projects.py
./unittests/tests_090_toolchain.py
./moissonneurs/pubmed.py
./moissonneurs/multivac.py
./moissonneurs/isidore.py
./moissonneurs/cern.py
./moissonneurs/istex.py
./moissonneurs/hal.py
tfidf.sql
0 → 100644
View file @
38317440
SELECT
CASE
WHEN
(
anon_1
.
ngram1_id
IS
NOT
NULL
)
THEN
anon_1
.
ngram1_id
WHEN
(
anon_1
.
ngram1_id
IS
NULL
)
THEN
nodes_ngrams
.
ngram_id
END
AS
counted_ngform
,
sum
(
nodes_ngrams
.
weight
)
AS
sum_1
,
count
(
nodes_ngrams
.
node_id
)
AS
count_1
FROM
nodes_ngrams
LEFT
OUTER
JOIN
(
SELECT
nodes_ngrams_ngrams
.
ngram1_id
AS
ngram1_id
,
nodes_ngrams_ngrams
.
ngram2_id
AS
ngram2_id
FROM
nodes_ngrams_ngrams
WHERE
nodes_ngrams_ngrams
.
node_id
=
%
(
node_id_1
)
s
)
AS
anon_1
ON
anon_1
.
ngram2_id
=
nodes_ngrams
.
ngram_id
JOIN
(
SELECT
nodes
.
id
AS
id
FROM
nodes
JOIN
nodes
AS
nodes_1
ON
nodes_1
.
id
=
nodes
.
parent_id
WHERE
nodes
.
typename
=
%
(
typename_1
)
s
AND
nodes_1
.
typename
=
%
(
typename_2
)
s
AND
((((
nodes_1
.
hyperdata
->
%
(
hyperdata_1
)
s
)
->
%
(
param_1
)
s
))
->>
%
(
param_2
)
s
)
=
%
(
param_3
)
s
)
AS
anon_2
ON
anon_2
.
id
=
nodes_ngrams
.
node_id
JOIN
(
SELECT
DISTINCT
nodes_ngrams
.
ngram_id
AS
uniq_ngid
FROM
nodes_ngrams
JOIN
nodes
ON
nodes
.
id
=
nodes_ngrams
.
node_id
WHERE
nodes
.
typename
=
%
(
typename_3
)
s
AND
nodes
.
parent_id
=
%
(
parent_id_1
)
s
)
AS
anon_3
ON
anon_3
.
uniq_ngid
=
nodes_ngrams
.
ngram_id
GROUP
BY
counted_ngform
SELECT
CASE
WHEN
(
anon_1
.
ngram1_id
IS
NOT
NULL
)
THEN
anon_1
.
ngram1_id
WHEN
(
anon_1
.
ngram1_id
IS
NULL
)
THEN
nodes_ngrams
.
ngram_id
END
AS
counted_ngform
,
sum
(
nodes_ngrams
.
weight
)
AS
sum_1
,
count
(
nodes_ngrams
.
node_id
)
AS
count_1
FROM
nodes_ngrams
LEFT
OUTER
JOIN
(
SELECT
nodes_ngrams_ngrams
.
ngram1_id
AS
ngram1_id
,
nodes_ngrams_ngrams
.
ngram2_id
AS
ngram2_id
FROM
nodes_ngrams_ngrams
WHERE
nodes_ngrams_ngrams
.
node_id
=
%
(
node_id_1
)
s
)
AS
anon_1
ON
anon_1
.
ngram2_id
=
nodes_ngrams
.
ngram_id
JOIN
(
SELECT
nodes
.
id
AS
id
FROM
nodes
JOIN
nodes
AS
nodes_1
ON
nodes_1
.
id
=
nodes
.
parent_id
WHERE
nodes
.
typename
=
%
(
typename_1
)
s
AND
nodes_1
.
typename
=
%
(
typename_2
)
s
AND
((((
nodes_1
.
hyperdata
->
%
(
hyperdata_1
)
s
)
->
%
(
param_1
)
s
))
->>
%
(
param_2
)
s
)
=
%
(
param_3
)
s
)
AS
anon_2
ON
anon_2
.
id
=
nodes_ngrams
.
node_id
JOIN
(
SELECT
DISTINCT
nodes_ngrams
.
ngram_id
AS
uniq_ngid
FROM
nodes_ngrams
JOIN
nodes
ON
nodes
.
id
=
nodes_ngrams
.
node_id
WHERE
nodes
.
typename
=
%
(
typename_3
)
s
AND
nodes
.
parent_id
=
%
(
parent_id_1
)
s
)
AS
anon_3
ON
anon_3
.
uniq_ngid
=
nodes_ngrams
.
ngram_id
GROUP
BY
counted_ngform
tfidf_concrete.sql
0 → 100644
View file @
38317440
SELECT
CASE
WHEN
(
anon_1
.
ngram1_id
IS
NOT
NULL
)
THEN
anon_1
.
ngram1_id
WHEN
(
anon_1
.
ngram1_id
IS
NULL
)
THEN
nodes_ngrams
.
ngram_id
END
AS
counted_ngform
,
sum
(
nodes_ngrams
.
weight
)
AS
sum_1
,
count
(
nodes_ngrams
.
node_id
)
AS
count_1
FROM
nodes_ngrams
LEFT
OUTER
JOIN
(
SELECT
nodes_ngrams_ngrams
.
ngram1_id
AS
ngram1_id
,
nodes_ngrams_ngrams
.
ngram2_id
AS
ngram2_id
FROM
nodes_ngrams_ngrams
WHERE
nodes_ngrams_ngrams
.
node_id
=
55980
)
AS
anon_1
ON
anon_1
.
ngram2_id
=
nodes_ngrams
.
ngram_id
JOIN
(
SELECT
nodes
.
id
AS
id
FROM
nodes
JOIN
nodes
AS
nodes_1
ON
nodes_1
.
id
=
nodes
.
parent_id
WHERE
nodes
.
typename
=
4
AND
nodes_1
.
typename
=
3
AND
((((
nodes_1
.
hyperdata
->
'resources'
)
->
0
))
->>
'type'
)
=
'3'
)
AS
anon_2
ON
anon_2
.
id
=
nodes_ngrams
.
node_id
JOIN
(
SELECT
DISTINCT
nodes_ngrams
.
ngram_id
AS
uniq_ngid
FROM
nodes_ngrams
JOIN
nodes
ON
nodes
.
id
=
nodes_ngrams
.
node_id
WHERE
nodes
.
typename
=
4
AND
nodes
.
parent_id
=
6015
)
AS
anon_3
ON
anon_3
.
uniq_ngid
=
nodes_ngrams
.
ngram_id
GROUP
BY
counted_ngform
;
SELECT
CASE
WHEN
(
anon_1
.
ngram1_id
IS
NOT
NULL
)
THEN
anon_1
.
ngram1_id
WHEN
(
anon_1
.
ngram1_id
IS
NULL
)
THEN
nodes_ngrams
.
ngram_id
END
AS
counted_ngform
,
sum
(
nodes_ngrams
.
weight
)
AS
sum_1
,
count
(
nodes_ngrams
.
node_id
)
AS
count_1
FROM
nodes_ngrams
LEFT
OUTER
JOIN
(
SELECT
nodes_ngrams_ngrams
.
ngram1_id
AS
ngram1_id
,
nodes_ngrams_ngrams
.
ngram2_id
AS
ngram2_id
FROM
nodes_ngrams_ngrams
WHERE
nodes_ngrams_ngrams
.
node_id
=
55980
)
AS
anon_1
ON
anon_1
.
ngram2_id
=
nodes_ngrams
.
ngram_id
JOIN
(
SELECT
nodes
.
id
AS
id
FROM
nodes
JOIN
nodes
AS
nodes_1
ON
nodes_1
.
id
=
nodes
.
parent_id
WHERE
nodes
.
typename
=
4
AND
nodes_1
.
typename
=
3
AND
((((
nodes_1
.
hyperdata
->
'resources'
)
->
0
))
->>
'type'
)
=
'3'
)
AS
anon_2
ON
anon_2
.
id
=
nodes_ngrams
.
node_id
JOIN
(
SELECT
DISTINCT
nodes_ngrams
.
ngram_id
AS
uniq_ngid
FROM
nodes_ngrams
JOIN
nodes
ON
nodes
.
id
=
nodes_ngrams
.
node_id
WHERE
nodes
.
typename
=
4
AND
nodes
.
parent_id
=
6015
)
AS
anon_3
ON
anon_3
.
uniq_ngid
=
nodes_ngrams
.
ngram_id
GROUP
BY
counted_ngform
tfidf_concrete2.sql
0 → 100644
View file @
38317440
SELECT
CASE
WHEN
(
anon_1
.
ngram1_id
IS
NOT
NULL
)
THEN
anon_1
.
ngram1_id
WHEN
(
anon_1
.
ngram1_id
IS
NULL
)
THEN
nodes_ngrams
.
ngram_id
END
AS
counted_ngform
,
sum
(
nodes_ngrams
.
weight
)
AS
sum_1
,
count
(
nodes_ngrams
.
node_id
)
AS
count_1
FROM
nodes_ngrams
LEFT
OUTER
JOIN
(
SELECT
nodes_ngrams_ngrams
.
ngram1_id
AS
ngram1_id
,
nodes_ngrams_ngrams
.
ngram2_id
AS
ngram2_id
FROM
nodes_ngrams_ngrams
WHERE
nodes_ngrams_ngrams
.
node_id
=
118166
)
AS
anon_1
ON
anon_1
.
ngram2_id
=
nodes_ngrams
.
ngram_id
JOIN
(
SELECT
nodes
.
id
AS
id
FROM
nodes
JOIN
nodes
AS
nodes_1
ON
nodes_1
.
id
=
nodes
.
parent_id
WHERE
nodes
.
typename
=
4
AND
nodes_1
.
typename
=
3
AND
((((
nodes_1
.
hyperdata
->
'resources'
)
->
0
))
->>
'type'
)
=
'3'
)
AS
anon_2
ON
anon_2
.
id
=
nodes_ngrams
.
node_id
JOIN
(
SELECT
DISTINCT
nodes_ngrams
.
ngram_id
AS
uniq_ngid
FROM
nodes_ngrams
JOIN
nodes
ON
nodes
.
id
=
nodes_ngrams
.
node_id
WHERE
nodes
.
typename
=
4
AND
nodes
.
parent_id
=
118117
)
AS
anon_3
ON
anon_3
.
uniq_ngid
=
nodes_ngrams
.
ngram_id
GROUP
BY
counted_ngform
;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment