Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
gargantext
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
humanities
gargantext
Commits
78eb26d4
Commit
78eb26d4
authored
Sep 21, 2016
by
delanoe
Browse files
Options
Browse Files
Download
Plain Diff
Merge remote-tracking branch 'origin/romain-testing' into testing-merge
parents
4bccaa96
5a2f7efb
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
205 additions
and
76 deletions
+205
-76
app.css
annotations/static/annotations/app.css
+6
-3
app.js
annotations/static/annotations/app.js
+17
-2
highlight.js
annotations/static/annotations/highlight.js
+95
-41
http.js
annotations/static/annotations/http.js
+6
-3
ngramlist.js
annotations/static/annotations/ngramlist.js
+7
-7
ngrams_addition.py
gargantext/util/toolchain/ngrams_addition.py
+34
-13
ngrams.py
gargantext/views/api/ngrams.py
+40
-7
No files found.
annotations/static/annotations/app.css
View file @
78eb26d4
...
@@ -151,12 +151,15 @@
...
@@ -151,12 +151,15 @@
transition
:
all
0.25s
linear
;
transition
:
all
0.25s
linear
;
}
}
.selection
{
/* this was used for the p or div that *contained* a selection */
/*.selection {
color: #aaa;
color: #aaa;
}
}*/
/* this is used for the selected text itself */
::selection
{
::selection
{
color
:
black
;
color
:
black
;
background-color
:
rgba
(
0
,
0
,
0
,
0.4
)
;
background-color
:
#aaa
;
}
}
.noselection
{
.noselection
{
...
...
annotations/static/annotations/app.js
View file @
78eb26d4
...
@@ -97,6 +97,21 @@
...
@@ -97,6 +97,21 @@
// +propToRead+" ("+cache[propToRead]+")")
// +propToRead+" ("+cache[propToRead]+")")
params
[
key
]
=
cache
[
propToRead
]
params
[
key
]
=
cache
[
propToRead
]
}
}
else
if
(
typeof
val
==
"object"
&&
val
[
"fromCacheIfElse"
])
{
var
propToReadIf
=
val
[
"fromCacheIfElse"
][
0
]
var
propToReadElse
=
val
[
"fromCacheIfElse"
][
1
]
// console.log("reading from cache: response data property " +
// "if:"+propToReadIf+" ("+cache[propToReadIf]+")"+
// " else:"+propToReadElse+" ("+cache[propToReadElse]+")")
var
valueIf
=
cache
[
propToReadIf
]
var
valueElse
=
cache
[
propToReadElse
]
if
(
valueIf
&&
valueIf
!=
'null'
&&
valueIf
!=
''
)
{
params
[
key
]
=
valueIf
}
else
{
params
[
key
]
=
valueElse
}
}
}
}
// Now we run the call
// Now we run the call
...
@@ -149,8 +164,8 @@
...
@@ -149,8 +164,8 @@
// -------------------------------------------------------------------------
// -------------------------------------------------------------------------
// debug
// debug
//
console.log("==> $rootScope <==")
console
.
log
(
"==> $rootScope <=="
)
//
console.log($rootScope)
console
.
log
(
$rootScope
)
});
});
})(
window
);
})(
window
);
annotations/static/annotations/highlight.js
View file @
78eb26d4
...
@@ -50,6 +50,9 @@
...
@@ -50,6 +50,9 @@
/*
/*
* Universal text selection
* Universal text selection
*
* "universal" <=> (Chrome, Firefox, IE, Safari, Opera...)
* cf. quirksmode.org/dom/range_intro.html
*/
*/
function
getSelected
()
{
function
getSelected
()
{
if
(
window
.
getSelection
)
{
if
(
window
.
getSelection
)
{
...
@@ -67,19 +70,15 @@
...
@@ -67,19 +70,15 @@
}
}
return
false
;
return
false
;
}
}
// we only need one singleton at a time
var
selection
=
getSelected
();
/*
// £TODO extend "double click selection" on hyphen words
* When mouse selection is started, we highlight it
// and reduce it on apostrophe ones (except firefox)
*/
// cf. stackoverflow.com/a/39005881/2489184
function
toggleSelectionHighlight
(
text
)
{
// jsfiddle.net/avvhsruu/
if
(
text
.
trim
()
!==
""
&&
!
$element
.
hasClass
(
'menu-is-opened'
))
{
$
(
".text-panel"
).
addClass
(
"selection"
);
// we only need one singleton at a time
}
else
{
// (<=> is only created once per doc, but value of annotation changes)
$
(
".text-panel"
).
removeClass
(
"selection"
);
var
selectionObj
=
getSelected
();
}
}
/*
/*
* Dynamically construct the selection menu scope
* Dynamically construct the selection menu scope
...
@@ -107,10 +106,11 @@
...
@@ -107,10 +106,11 @@
$scope
.
selection_text
=
angular
.
copy
(
annotation
);
$scope
.
selection_text
=
angular
.
copy
(
annotation
);
// debug
// debug
// console.log("toggleMenu with context:", context) ;
// console.log("toggleMenu with annotation: '" + JSON.stringify(annotation) +"'") ;
// console.log("toggleMenu with \$scope.selection_text: '" + JSON.stringify($scope.selection_text) +"'") ;
// console.log("toggleMenu with \$scope.selection_text: '" + JSON.stringify($scope.selection_text) +"'") ;
if
(
angular
.
isObject
(
annotation
)
&&
!
$element
.
hasClass
(
'menu-is-opened'
))
{
if
(
angular
.
isObject
(
annotation
)
&&
!
$element
.
hasClass
(
'menu-is-opened'
))
{
// existing ngram
// existing ngram
var
ngramId
=
annotation
.
uuid
var
ngramId
=
annotation
.
uuid
var
mainformId
=
annotation
.
group
var
mainformId
=
annotation
.
group
...
@@ -210,7 +210,7 @@
...
@@ -210,7 +210,7 @@
}
}
// "add" actions for non-existing ngram
// "add" actions for non-existing ngram
else
if
(
annotation
.
trim
()
!==
""
&&
!
$element
.
hasClass
(
'menu-is-opened'
)
)
{
else
if
(
annotation
.
trim
()
!==
""
&&
!
context
)
{
var
newNgramText
=
annotation
.
trim
()
var
newNgramText
=
annotation
.
trim
()
// new ngram (first call creates then like previous case for list)
// new ngram (first call creates then like previous case for list)
$scope
.
menuItems
.
push
({
$scope
.
menuItems
.
push
({
...
@@ -219,9 +219,9 @@
...
@@ -219,9 +219,9 @@
'crudCalls'
:[
'crudCalls'
:[
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'ngramStr'
:
newNgramText
,
corpusId
:
$rootScope
.
corpusId
},
'params'
:
{
'ngramStr'
:
newNgramText
,
corpusId
:
$rootScope
.
corpusId
},
'dataPropertiesToCache'
:
[
'id'
]
},
'dataPropertiesToCache'
:
[
'id'
,
'group'
]
},
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'listId'
:
stoplist_id
,
'ngramIdList'
:
{
'fromCache
'
:
'id'
}
}
}
'params'
:
{
'listId'
:
stoplist_id
,
'ngramIdList'
:
{
'fromCache
IfElse'
:
[
'group'
,
'id'
]
}
}
}
]
]
})
;
})
;
$scope
.
menuItems
.
push
({
$scope
.
menuItems
.
push
({
...
@@ -230,9 +230,9 @@
...
@@ -230,9 +230,9 @@
'crudCalls'
:[
'crudCalls'
:[
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'ngramStr'
:
newNgramText
,
corpusId
:
$rootScope
.
corpusId
},
'params'
:
{
'ngramStr'
:
newNgramText
,
corpusId
:
$rootScope
.
corpusId
},
'dataPropertiesToCache'
:
[
'id'
]
},
'dataPropertiesToCache'
:
[
'id'
,
'group'
]
},
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'listId'
:
mainlist_id
,
'ngramIdList'
:
{
'fromCache
'
:
'id'
}
}
}
'params'
:
{
'listId'
:
mainlist_id
,
'ngramIdList'
:
{
'fromCache
IfElse'
:
[
'group'
,
'id'
]
}
}
}
]
]
})
;
})
;
$scope
.
menuItems
.
push
({
$scope
.
menuItems
.
push
({
...
@@ -241,23 +241,27 @@
...
@@ -241,23 +241,27 @@
'crudCalls'
:[
'crudCalls'
:[
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'ngramStr'
:
newNgramText
,
corpusId
:
$rootScope
.
corpusId
},
'params'
:
{
'ngramStr'
:
newNgramText
,
corpusId
:
$rootScope
.
corpusId
},
'dataPropertiesToCache'
:
[
'id'
]
},
'dataPropertiesToCache'
:
[
'id'
,
'group'
]
},
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'listId'
:
mainlist_id
,
'ngramIdList'
:
{
'fromCache
'
:
'id'
}
}
},
'params'
:
{
'listId'
:
mainlist_id
,
'ngramIdList'
:
{
'fromCache
IfElse'
:
[
'group'
,
'id'
]
}
}
},
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'listId'
:
maplist_id
,
'ngramIdList'
:
{
'fromCache
'
:
'id'
}
}
}
'params'
:
{
'listId'
:
maplist_id
,
'ngramIdList'
:
{
'fromCache
IfElse'
:
[
'group'
,
'id'
]
}
}
}
]
]
})
;
})
;
// show the menu
// show the menu
$element
.
fadeIn
(
50
);
$element
.
fadeIn
(
50
);
$element
.
addClass
(
'menu-is-opened'
);
$element
.
addClass
(
'menu-is-opened'
);
// console.warn("FADE IN menu", $element)
}
}
else
{
else
{
// console.warn("=> else")
// close the menu
// close the menu
$scope
.
menuItems
=
[];
$scope
.
menuItems
=
[];
$element
.
fadeOut
(
50
);
$element
.
fadeOut
(
50
);
$element
.
removeClass
(
'menu-is-opened'
);
$element
.
removeClass
(
'menu-is-opened'
);
// console.warn("FADE OUT menu", $element)
}
}
});
});
});
});
...
@@ -283,26 +287,15 @@
...
@@ -283,26 +287,15 @@
});
});
/*
/*
*
Finish positioning the menu then display the menu
*
Toggle the menu when clicking on an existing ngram or a free selection
*/
*/
$
(
".text-container"
).
mouseup
(
function
(
e
){
$
(
".text-container"
).
mouseup
(
function
(
e
){
$
(
".text-container"
).
unbind
(
"mousemove"
,
positionMenu
);
$
(
".text-container"
).
unbind
(
"mousemove"
,
positionMenu
);
$rootScope
.
$emit
(
"positionAnnotationMenu"
,
e
.
pageX
,
e
.
pageY
);
$rootScope
.
$emit
(
"positionAnnotationMenu"
,
e
.
pageX
,
e
.
pageY
);
toggleSelectionHighlight
(
selection
.
toString
().
trim
());
toggleMenu
(
null
,
selection
.
toString
().
trim
());
});
/*
* Toggle the menu when clicking on an existing ngram keyword
*
* £TODO test: apparently this is never used ?
* (superseded by TextSelectionController.onClick)
*/
$
(
".text-container"
).
delegate
(
':not("#selection")'
,
"click"
,
function
(
e
)
{
// if ($(e.target).hasClass("keyword-inline")) return;
positionMenu
(
e
);
positionMenu
(
e
);
toggleSelectionHighlight
(
selection
.
toString
().
trim
());
// console.warn("calling toggleMenu from *mouseup*")
toggleMenu
(
null
,
selection
.
toString
().
trim
());
toggleMenu
(
null
,
selection
Obj
.
toString
().
trim
());
});
});
$rootScope
.
$on
(
"positionAnnotationMenu"
,
positionElement
);
$rootScope
.
$on
(
"positionAnnotationMenu"
,
positionElement
);
...
@@ -322,9 +315,11 @@
...
@@ -322,9 +315,11 @@
$rootScope
.
makeChainedCalls
(
0
,
todoCrudCalls
,
$rootScope
.
refresh
)
$rootScope
.
makeChainedCalls
(
0
,
todoCrudCalls
,
$rootScope
.
refresh
)
// syntax: (step_to_run_first, list_of_steps, lastCallback)
// syntax: (step_to_run_first, list_of_steps, lastCallback)
// hide the highlighted text and the menu element
// hide the menu element
$
(
".text-panel"
).
removeClass
(
"selection"
);
$element
.
fadeOut
(
100
);
$element
.
fadeOut
(
100
);
// the highlighted text hides itself when deselected
// (thx to browser and css ::selection)
};
};
}
}
]);
]);
...
@@ -407,10 +402,68 @@
...
@@ -407,10 +402,68 @@
var
template
=
templateBegin
+
templateEnd
;
var
template
=
templateBegin
+
templateEnd
;
var
templateBeginRegexp
=
"<span ng-controller='TextSelectionController' ng-click='onClick
\
(
\
$event
\
)' class='keyword-inline'>"
;
var
templateBeginRegexp
=
"<span ng-controller='TextSelectionController' ng-click='onClick
\
(
\
$event
\
)' class='keyword-inline'>"
;
var
startPattern
=
"
\\
b
((?:"
+
templateBeginRegexp
+
")*"
;
var
startPattern
=
"
(
\\
W|^)
((?:"
+
templateBeginRegexp
+
")*"
;
var
middlePattern
=
"(?:<
\
/span>)*
\\
s(?:"
+
templateBeginRegexp
+
")*"
;
var
middlePattern
=
"(?:<
\
/span>)*
\\
s(?:"
+
templateBeginRegexp
+
")*"
;
var
middlePattern
=
" "
;
var
middlePattern
=
" "
;
var
endPattern
=
"(?:<
\
/span>)*)
\\
b"
;
var
endPattern
=
"(?:<
\
/span>)*)(?=
\\
W|$)"
;
// --------------------------------------------------------------------------------
// Remarks about /\b/ and /(\W|^)/ and /(?=\W|$)/ etc.
//
// -----------------
// 1) we need to match entire words only
//
// ex: "the manifestation manifest".match(/manifest/g)
//
// => not good because it would hilight the substr
// inside 2nd word "the manifestation manifest"
// ^^^^^^^^ ^^^^^^^^
//
// so in this situation one usually uses \b (boundary)
//
// ex: "the manifestation manifest".match(/\bmanifest\b/g)
//
// ok: now only 3rd word is highlighted:
// "the manifestation manifest"
// ^^^^^^^^
// -----------------
//
// 2) but we can't really use boundary \b when we have accented chars
// ex:
// no accent: "la moitié".match(/la/) => ["la"]
// "la moitié".match(/\bla\b/) => ["la"]
//
// but "la moitié".match(/moitié/) => ["moitié"]
// "la moitié".match(/\bmoitié\b/) => [] <~~~ problem !
//
// cf. stackoverflow.com/questions/23458872/javascript-regex-word-boundary-b-issue
// stackoverflow.com/questions/2881445/utf-8-word-boundary-regex-in-javascript
// -----------------
//
// 3) normally the typical replacement for \b would be:
// - at start of string: /(?<=\W|^)/ (lookbehind boundary)
// - at end of string: /(?=\W|$)/ (lookahead boundary)
//
// ...
// but lookbehind not supported in js !! (sept 2016)
// cf. stackoverflow.com/questions/30118815
// -----------------
//
// 4) so in conclusion we will use this strategy:
//
// - at start of string: /(\W|^)/ (boundary, may capture ' ' or '' into $1)
// - for the html+word: /<aa>bla</aa>/ (same pattern as before)
// - at end of string: /(?=\W|$)/ (lookahead boundary)
// - in replacement: $1+anchor
//
// => This way if $1 was ' ' (or other non word char),
// then we re-add the char that we are replacing,
// and if $1 was '' (beginning of str)
// then we re-add nothing ;) )
//
// ex: "la moitié".replace(/(\s|^)moitié(?=\s|$)/, '$1hello') => "la hello"
// "moitié la".replace(/(\s|^)moitié(?=\s|$)/, '$1hello') => "hello la"
// ---------------------------------------------------------------------------------
// hash of flags filled in first pass loop : (== did annotation i match ?)
// hash of flags filled in first pass loop : (== did annotation i match ?)
var
isDisplayedIntraText
=
{};
var
isDisplayedIntraText
=
{};
...
@@ -453,8 +506,8 @@
...
@@ -453,8 +506,8 @@
// var myPattern = new RegExp("\\b"+escapeRegExp(annotation.text)+"\\b", 'igm');
// var myPattern = new RegExp("\\b"+escapeRegExp(annotation.text)+"\\b", 'igm');
// previously:
// previously:
var
words
=
annotation
.
text
.
split
(
" "
).
map
(
escapeRegExp
);
var
words
=
annotation
.
text
.
split
(
" "
).
map
(
escapeRegExp
);
var
myPattern
=
new
RegExp
(
startPattern
+
words
.
join
(
middlePattern
)
+
endPattern
,
'gmi'
);
var
myPattern
=
new
RegExp
(
startPattern
+
words
.
join
(
middlePattern
)
+
endPattern
,
'gmi'
);
// -------------------------------------------
// -------------------------------------------
// replace in text: matched annots by anchors
// replace in text: matched annots by anchors
...
@@ -472,6 +525,7 @@
...
@@ -472,6 +525,7 @@
// £dbgcount here unnecessary nbMatches (can go straight to ICI)
// £dbgcount here unnecessary nbMatches (can go straight to ICI)
var
matches
=
eltLongtext
.
match
(
myPattern
)
var
matches
=
eltLongtext
.
match
(
myPattern
)
var
nbMatches
=
matches
?
eltLongtext
.
match
(
myPattern
).
length
:
0
var
nbMatches
=
matches
?
eltLongtext
.
match
(
myPattern
).
length
:
0
if
(
nbMatches
>
0
)
{
if
(
nbMatches
>
0
)
{
k
+=
nbMatches
;
k
+=
nbMatches
;
...
@@ -480,7 +534,7 @@
...
@@ -480,7 +534,7 @@
l
++
;
l
++
;
// ------------------------------------------------------------
// ------------------------------------------------------------
// ICI we update each time
// ICI we update each time
textMapping
[
eltId
]
=
eltLongtext
.
replace
(
myPattern
,
myAnchor
);
textMapping
[
eltId
]
=
eltLongtext
.
replace
(
myPattern
,
"$1"
+
myAnchor
);
// ex longtext -- "Background Few previous studies have
// ex longtext -- "Background Few previous studies have
// examined non-wealth-based inequalities etc"
// examined non-wealth-based inequalities etc"
...
...
annotations/static/annotations/http.js
View file @
78eb26d4
...
@@ -90,17 +90,20 @@
...
@@ -90,17 +90,20 @@
* MainApiAddNgramHttpService: Create and index a new ngram
* MainApiAddNgramHttpService: Create and index a new ngram
* ===========================
* ===========================
* route: PUT api/ngrams?text=mynewngramstring&corpus=corpus_id
* route: PUT api/ngrams?text=mynewngramstring&corpus=corpus_id
* ------
*
* NB it also checks if ngram exists (returns the preexisting id)
* and if it has a mainform/group (via 'testgroup' option)
* (useful if we add it to a list afterwards)
*
*
*/
*/
http
.
factory
(
'MainApiAddNgramHttpService'
,
function
(
$resource
)
{
http
.
factory
(
'MainApiAddNgramHttpService'
,
function
(
$resource
)
{
return
$resource
(
return
$resource
(
// adding explicit "http://" b/c this a cross origin request
// adding explicit "http://" b/c this a cross origin request
'http://'
+
window
.
GARG_ROOT_URL
'http://'
+
window
.
GARG_ROOT_URL
+
"/api/ngrams?text=:ngramStr&corpus=:corpusId"
,
+
"/api/ngrams?text=:ngramStr&corpus=:corpusId
&testgroup
"
,
{
{
ngramStr
:
'@ngramStr'
,
ngramStr
:
'@ngramStr'
,
corpusId
:
'@corpusId'
corpusId
:
'@corpusId'
,
},
},
{
{
put
:
{
put
:
{
...
...
annotations/static/annotations/ngramlist.js
View file @
78eb26d4
...
@@ -141,9 +141,9 @@
...
@@ -141,9 +141,9 @@
crudCallsToMake
=
[
crudCallsToMake
=
[
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'ngramStr'
:
value
,
corpusId
:
$rootScope
.
corpusId
},
'params'
:
{
'ngramStr'
:
value
,
corpusId
:
$rootScope
.
corpusId
},
'dataPropertiesToCache'
:
[
'id'
]
},
'dataPropertiesToCache'
:
[
'id'
,
'group'
]
},
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'listId'
:
tgtListId
,
'ngramIdList'
:
{
'fromCache
'
:
'id'
}
}
}
'params'
:
{
'listId'
:
tgtListId
,
'ngramIdList'
:
{
'fromCache
IfElse'
:
[
'group'
,
'id'
]
}
}
}
];
];
break
;
break
;
...
@@ -151,9 +151,9 @@
...
@@ -151,9 +151,9 @@
crudCallsToMake
=
[
crudCallsToMake
=
[
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'ngramStr'
:
value
,
corpusId
:
$rootScope
.
corpusId
},
'params'
:
{
'ngramStr'
:
value
,
corpusId
:
$rootScope
.
corpusId
},
'dataPropertiesToCache'
:
[
'id'
]
},
'dataPropertiesToCache'
:
[
'id'
,
'group'
]
},
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'listId'
:
tgtListId
,
'ngramIdList'
:
{
'fromCache
'
:
'id'
}
}
}
'params'
:
{
'listId'
:
tgtListId
,
'ngramIdList'
:
{
'fromCache
IfElse'
:
[
'group'
,
'id'
]
}
}
}
];
];
break
;
break
;
...
@@ -161,11 +161,11 @@
...
@@ -161,11 +161,11 @@
crudCallsToMake
=
[
crudCallsToMake
=
[
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiAddNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'ngramStr'
:
value
,
corpusId
:
$rootScope
.
corpusId
},
'params'
:
{
'ngramStr'
:
value
,
corpusId
:
$rootScope
.
corpusId
},
'dataPropertiesToCache'
:
[
'id'
]
},
'dataPropertiesToCache'
:
[
'id'
,
'group'
]
},
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'listId'
:
$rootScope
.
listIds
.
MAINLIST
,
'ngramIdList'
:
{
'fromCache
'
:
'id'
}
}
},
'params'
:
{
'listId'
:
$rootScope
.
listIds
.
MAINLIST
,
'ngramIdList'
:
{
'fromCache
IfElse'
:
[
'group'
,
'id'
]
}
}
},
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
{
'service'
:
MainApiChangeNgramHttpService
,
'action'
:
'put'
,
'params'
:
{
'listId'
:
tgtListId
,
'ngramIdList'
:
{
'fromCache
'
:
'id'
}
}
}
'params'
:
{
'listId'
:
tgtListId
,
'ngramIdList'
:
{
'fromCache
IfElse'
:
[
'group'
,
'id'
]
}
}
}
];
];
break
;
break
;
}
}
...
...
gargantext/util/toolchain/ngrams_addition.py
View file @
78eb26d4
...
@@ -19,6 +19,7 @@ procedure:
...
@@ -19,6 +19,7 @@ procedure:
from
gargantext.models
import
Ngram
,
Node
,
NodeNgram
from
gargantext.models
import
Ngram
,
Node
,
NodeNgram
from
gargantext.util.db
import
session
,
bulk_insert
from
gargantext.util.db
import
session
,
bulk_insert
from
gargantext.util.db
import
bulk_insert_ifnotexists
# £TODO debug
from
sqlalchemy
import
distinct
from
sqlalchemy
import
distinct
from
re
import
findall
,
IGNORECASE
from
re
import
findall
,
IGNORECASE
...
@@ -41,20 +42,13 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
...
@@ -41,20 +42,13 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
@param keys: the hyperdata fields to index
@param keys: the hyperdata fields to index
"""
"""
# check the ngrams we won't process (those that were already indexed)
# retrieve *all* the ngrams from our list
indexed_ngrams_subquery
=
(
session
# (even if some relations may be already indexed
.
query
(
distinct
(
NodeNgram
.
ngram_id
))
# b/c they were perhaps not extracted in all docs
.
join
(
Node
,
Node
.
id
==
NodeNgram
.
node_id
)
# => we'll use already_indexed later)
.
filter
(
Node
.
parent_id
==
corpus
.
id
)
.
filter
(
Node
.
typename
==
'DOCUMENT'
)
.
subquery
()
)
# retrieve the ngrams from our list, filtering out the already indexed ones
todo_ngrams
=
(
session
todo_ngrams
=
(
session
.
query
(
Ngram
)
.
query
(
Ngram
)
.
filter
(
Ngram
.
id
.
in_
(
ngram_ids
))
.
filter
(
Ngram
.
id
.
in_
(
ngram_ids
))
.
filter
(
~
Ngram
.
id
.
in_
(
indexed_ngrams_subquery
))
.
all
()
.
all
()
)
)
...
@@ -90,22 +84,49 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
...
@@ -90,22 +84,49 @@ def index_new_ngrams(ngram_ids, corpus, keys=('title', 'abstract', )):
else
:
else
:
node_ngram_to_write
[
doc
.
id
][
ngram
.
id
]
+=
n_occs
node_ngram_to_write
[
doc
.
id
][
ngram
.
id
]
+=
n_occs
# debug
# print("new node_ngrams before filter:", node_ngram_to_write)
# check the relations we won't insert (those that were already indexed)
# NB costly but currently impossible with bulk_insert_ifnotexists
# b/c double uniquekey
already_indexed
=
(
session
.
query
(
NodeNgram
.
node_id
,
NodeNgram
.
ngram_id
)
.
join
(
Node
,
Node
.
id
==
NodeNgram
.
node_id
)
.
filter
(
Node
.
parent_id
==
corpus
.
id
)
.
filter
(
Node
.
typename
==
'DOCUMENT'
)
.
all
()
)
filter_out
=
{(
nd_id
,
ng_id
)
for
(
nd_id
,
ng_id
)
in
already_indexed
}
# POSSIBLE update those that are filtered out if wei_previous != wei
# integrate all at the end
# integrate all at the end
my_new_rows
=
[]
my_new_rows
=
[]
add_new_row
=
my_new_rows
.
append
add_new_row
=
my_new_rows
.
append
for
doc_id
in
node_ngram_to_write
:
for
doc_id
in
node_ngram_to_write
:
for
ngram_id
in
node_ngram_to_write
[
doc_id
]:
for
ngram_id
in
node_ngram_to_write
[
doc_id
]:
wei
=
node_ngram_to_write
[
doc_id
][
ngram_id
]
if
(
doc_id
,
ngram_id
)
not
in
filter_out
:
add_new_row
([
doc_id
,
ngram_id
,
wei
])
wei
=
node_ngram_to_write
[
doc_id
][
ngram_id
]
add_new_row
([
doc_id
,
ngram_id
,
wei
])
del
node_ngram_to_write
del
node_ngram_to_write
# debug
# print("new node_ngrams after filter:", my_new_rows)
bulk_insert
(
bulk_insert
(
table
=
NodeNgram
,
table
=
NodeNgram
,
fields
=
(
'node_id'
,
'ngram_id'
,
'weight'
),
fields
=
(
'node_id'
,
'ngram_id'
,
'weight'
),
data
=
my_new_rows
data
=
my_new_rows
)
)
# bulk_insert_ifnotexists(
# model = NodeNgram,
# uniquekey = ('node_id','ngram_id'), <= currently impossible
# fields = ('node_id', 'ngram_id', 'weight'),
# data = my_new_rows
# )
n_added
=
len
(
my_new_rows
)
n_added
=
len
(
my_new_rows
)
print
(
"index_new_ngrams: added
%
i new NodeNgram rows"
%
n_added
)
print
(
"index_new_ngrams: added
%
i new NodeNgram rows"
%
n_added
)
...
...
gargantext/views/api/ngrams.py
View file @
78eb26d4
...
@@ -2,8 +2,8 @@ from gargantext.util.http import ValidationException, APIView \
...
@@ -2,8 +2,8 @@ from gargantext.util.http import ValidationException, APIView \
,
get_parameters
,
JsonHttpResponse
\
,
get_parameters
,
JsonHttpResponse
\
,
HttpResponse
,
HttpResponse
from
gargantext.util.db
import
session
,
func
from
gargantext.util.db
import
session
,
func
from
gargantext.util.db_cache
import
cache
from
gargantext.util.db_cache
import
cache
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
from
gargantext.models
import
Node
,
Ngram
,
NodeNgram
,
NodeNgramNgram
from
sqlalchemy.orm
import
aliased
from
sqlalchemy.orm
import
aliased
from
re
import
findall
from
re
import
findall
...
@@ -21,7 +21,7 @@ class ApiNgrams(APIView):
...
@@ -21,7 +21,7 @@ class ApiNgrams(APIView):
"""
"""
Used for analytics
Used for analytics
------------------
------------------
Get ngram listing + counts in a given scope
Get ngram listing + counts in a given scope
"""
"""
# parameters retrieval and validation
# parameters retrieval and validation
...
@@ -83,24 +83,30 @@ class ApiNgrams(APIView):
...
@@ -83,24 +83,30 @@ class ApiNgrams(APIView):
1 - checks user authentication before any changes
1 - checks user authentication before any changes
2 - adds the ngram to Ngram table in DB
2 - checks if ngram to Ngram table in DB
if yes returns ngram_id and optionally mainform_id
otherwise continues
3 - adds the ngram to Ngram table in DB
3
- (if corpus param is present)
4
- (if corpus param is present)
adds the ngram doc counts to NodeNgram table in DB
adds the ngram doc counts to NodeNgram table in DB
(aka "index the ngram" throught the docs of the corpus)
(aka "index the ngram" throught the docs of the corpus)
4
- returns json with:
5
- returns json with:
'msg' => a success msg
'msg' => a success msg
'text' => the initial text content
'text' => the initial text content
'term' => the normalized text content
'term' => the normalized text content
'id' => the new ngram_id
'id' => the new ngram_id
'count' => the number of docs with the ngram in the corpus
'count' => the number of docs with the ngram in the corpus
(if corpus param is present)
(if corpus param is present)
'group' => the mainform_id if applicable
possible inline parameters
possible inline parameters
--------------------------
--------------------------
@param text=<ngram_string> [required]
@param text=<ngram_string> [required]
@param corpus=<CORPUS_ID> [optional]
@param corpus=<CORPUS_ID> [optional]
@param testgroup (true if present) [optional, requires corpus]
"""
"""
# 1 - check user authentication
# 1 - check user authentication
...
@@ -122,6 +128,9 @@ class ApiNgrams(APIView):
...
@@ -122,6 +128,9 @@ class ApiNgrams(APIView):
It requires a "text" parameter,
\
It requires a "text" parameter,
\
for instance /api/ngrams?text=hydrometallurgy'
)
for instance /api/ngrams?text=hydrometallurgy'
)
if
(
'testgroup'
in
params
)
and
(
not
(
'corpus'
in
params
)):
raise
ValidationException
(
"'testgroup' param requires 'corpus' param"
)
# if we have a 'corpus' param (to do the indexing)...
# if we have a 'corpus' param (to do the indexing)...
do_indexation
=
False
do_indexation
=
False
if
'corpus'
in
params
:
if
'corpus'
in
params
:
...
@@ -143,10 +152,33 @@ class ApiNgrams(APIView):
...
@@ -143,10 +152,33 @@ class ApiNgrams(APIView):
try
:
try
:
log_msg
=
""
log_msg
=
""
ngram_id
=
None
ngram_id
=
None
mainform_id
=
None
preexisting
=
session
.
query
(
Ngram
)
.
filter
(
Ngram
.
terms
==
ngram_str
)
.
first
()
preexisting
=
session
.
query
(
Ngram
)
.
filter
(
Ngram
.
terms
==
ngram_str
)
.
first
()
if
preexisting
is
not
None
:
if
preexisting
is
not
None
:
ngram_id
=
preexisting
.
id
ngram_id
=
preexisting
.
id
log_msg
+=
"ngram already existed (id
%
i)
\n
"
%
ngram_id
log_msg
+=
"ngram already existed (id
%
i)
\n
"
%
ngram_id
# in the context of a corpus we can also check if has mainform
# (useful for)
if
'testgroup'
in
params
:
groupings_id
=
(
session
.
query
(
Node
.
id
)
.
filter
(
Node
.
parent_id
==
corpus_id
)
.
filter
(
Node
.
typename
==
'GROUPLIST'
)
.
first
()
)
had_mainform
=
(
session
.
query
(
NodeNgramNgram
.
ngram1_id
)
.
filter
(
NodeNgramNgram
.
node_id
==
groupings_id
)
.
filter
(
NodeNgramNgram
.
ngram2_id
==
preexisting
.
id
)
.
first
()
)
if
had_mainform
:
mainform_id
=
had_mainform
[
0
]
log_msg
+=
"ngram had mainform (id
%
i) in this corpus"
%
mainform_id
else
:
log_msg
+=
"ngram was not in any group for this corpus"
else
:
else
:
# 2 - insert into Ngrams
# 2 - insert into Ngrams
new_ngram
=
Ngram
(
terms
=
ngram_str
,
n
=
ngram_size
)
new_ngram
=
Ngram
(
terms
=
ngram_str
,
n
=
ngram_size
)
...
@@ -165,6 +197,7 @@ class ApiNgrams(APIView):
...
@@ -165,6 +197,7 @@ class ApiNgrams(APIView):
'text'
:
original_text
,
'text'
:
original_text
,
'term'
:
ngram_str
,
'term'
:
ngram_str
,
'id'
:
ngram_id
,
'id'
:
ngram_id
,
'group'
:
mainform_id
,
'count'
:
n_added
if
do_indexation
else
'no corpus provided for indexation'
'count'
:
n_added
if
do_indexation
else
'no corpus provided for indexation'
},
200
)
},
200
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment