Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
G
GarganTexternal tools
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Anne-Laure Thomas Derepas
GarganTexternal tools
Commits
333eb348
Commit
333eb348
authored
Aug 16, 2023
by
Loïc Chapron
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add page from convertion
parent
4463cc66
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
262 additions
and
2 deletions
+262
-2
risCorpusToTsv.py
Conversion/ToTSV/risCorpusToTSV/risCorpusToTsv.py
+1
-1
text_GarganTextJsonToTSV.csv
Streamlit/lang/text_GarganTextJsonToTSV.csv
+1
-1
text_PubMedToGarganText.csv
Streamlit/lang/text_PubMedToGarganText.csv
+15
-0
text_RisToGarganText.csv
Streamlit/lang/text_RisToGarganText.csv
+15
-0
Pubmed_To_GarganText.py
Streamlit/pages/Pubmed_To_GarganText.py
+120
-0
Ris_To_GarganText.py
Streamlit/pages/Ris_To_GarganText.py
+110
-0
No files found.
Conversion/ToTSV/risCorpusToTSV/risCorpusToTsv.py
View file @
333eb348
...
@@ -66,7 +66,7 @@ with open(path, 'r') as corpus :
...
@@ -66,7 +66,7 @@ with open(path, 'r') as corpus :
day
=
tmp
[
2
]
day
=
tmp
[
2
]
else
:
else
:
if
doc
.
__contains__
(
'year'
):
if
doc
.
__contains__
(
'year'
):
year
=
doc
[
'year'
]
year
=
doc
[
'year'
]
.
replace
(
'/'
,
''
)
.
replace
(
'.'
,
''
)
else
:
else
:
year
=
str
(
date
.
today
()
.
year
)
year
=
str
(
date
.
today
()
.
year
)
...
...
Streamlit/lang/text_GarganTextJsonToTSV.csv
View file @
333eb348
...
@@ -3,7 +3,7 @@ fr,title,"# Json Vers TSV"
...
@@ -3,7 +3,7 @@ fr,title,"# Json Vers TSV"
en,title,"# Json To TSV"
en,title,"# Json To TSV"
fr,text,"Transforme un corpus Json venant de Gargantext en TSV pour GarganText"
fr,text,"Transforme un corpus Json venant de Gargantext en TSV pour GarganText"
en,text,"Transform a Json corpus fro
n
GarganText to a TSV file for GarganText"
en,text,"Transform a Json corpus fro
m
GarganText to a TSV file for GarganText"
fr,file,"Choisir un fichier"
fr,file,"Choisir un fichier"
en,file,"Choose a file"
en,file,"Choose a file"
...
...
Streamlit/lang/text_PubMedToGarganText.csv
0 → 100644
View file @
333eb348
locale,key,value
fr,title,"# Pubmed Vers GarganText"
en,title,"# Pubmed To GarganText"
fr,text,"Transforme un corpus pubmed en TSV pour GarganText"
en,text,"Transform a pubmed corpus to a TSV file for GarganText"
fr,file,"Choisir un fichier"
en,file,"Choose a file"
fr,new_file,"Télécharge ton fichier TSV :"
en,new_file,"Download your TSV file:"
fr,error,"Erreur : le fichier n'est pas valide"
en,error,"Error : the file isn't valid"
\ No newline at end of file
Streamlit/lang/text_RisToGarganText.csv
0 → 100644
View file @
333eb348
locale,key,value
fr,title,"# Ris Vers GarganText"
en,title,"# Ris To GarganText"
fr,text,"Transforme un corpus ris en TSV pour GarganText"
en,text,"Transform a ris corpus to a TSV file for GarganText"
fr,file,"Choisir un fichier"
en,file,"Choose a file"
fr,new_file,"Télécharge ton fichier TSV :"
en,new_file,"Download your TSV file:"
fr,error,"Erreur : le fichier n'est pas valide"
en,error,"Error : the file isn't valid"
\ No newline at end of file
Streamlit/pages/Pubmed_To_GarganText.py
0 → 100644
View file @
333eb348
"""
Streamlit Application
Loïc Chapron
"""
import
streamlit
as
st
import
pandas
as
pd
import
nbib
import
re
import
calendar
st
.
image
(
'img/gargantool_banner.jpg'
)
st
.
markdown
(
"""
<style>
.block-container {
padding-top: 2rem;
padding-bottom: 0rem;
padding-left: 1rem;
padding-right: 1rem;
}
</style>
"""
,
unsafe_allow_html
=
True
)
def
load_bundle
(
lang
):
df
=
pd
.
read_csv
(
"lang/text_PubMedToGarganText.csv"
)
df
=
df
.
query
(
f
"locale == '{lang}'"
)
tmp
=
{}
for
i
in
range
(
len
(
df
)):
tmp
[
df
.
key
.
to_list
()[
i
]]
=
df
.
value
.
to_list
()[
i
]
return
tmp
def
update_lang
():
st
.
session_state
.
general_text_dict
=
load_bundle
(
st
.
session_state
.
general_lang_dict
[
st
.
session_state
.
general_language
])
if
'general_session_page'
not
in
st
.
session_state
.
keys
():
st
.
session_state
.
general_lang_dict
=
{
'Français'
:
'fr'
,
'English'
:
'en'
}
st
.
session_state
.
general_text_dict
=
load_bundle
(
'fr'
)
st
.
session_state
.
general_language
=
'Français'
st
.
session_state
.
general_session_page
=
'PubMedToGarganText'
elif
st
.
session_state
.
general_session_page
!=
'PubMedToGarganText'
:
st
.
session_state
.
general_text_dict
=
load_bundle
(
st
.
session_state
.
general_lang_dict
[
st
.
session_state
.
general_language
])
st
.
session_state
.
general_session_page
=
'PubMedToGarganText'
for
key
in
st
.
session_state
.
keys
():
if
'general_'
not
in
key
:
del
st
.
session_state
[
key
]
st
.
selectbox
(
'Langue'
,
list
(
st
.
session_state
.
general_lang_dict
.
keys
()),
list
(
st
.
session_state
.
general_lang_dict
.
keys
())
.
index
(
st
.
session_state
.
general_language
),
key
=
'general_language'
,
on_change
=
update_lang
)
def
read_file
(
file
):
docs
=
nbib
.
read
(
file
.
read
()
.
decode
(
'utf-8'
))
output
=
"title
\t
source
\t
publication_year
\t
publication_month
\t
publication_day
\t
abstract
\t
authors
\t
weight
\n
"
for
doc
in
docs
:
keys
=
doc
.
keys
()
if
len
(
list
(
set
([
'title'
,
'publication_date'
,
'authors'
])
&
set
(
keys
)))
<
3
:
continue
if
'journal'
in
keys
:
source
=
doc
[
'journal'
]
else
:
source
=
""
if
'abstract'
in
keys
:
abstract
=
doc
[
'abstract'
]
else
:
abstract
=
""
title
=
doc
[
'title'
]
date
=
doc
[
'publication_date'
]
.
split
(
' '
)
year
=
date
[
0
]
if
len
(
date
)
>
1
:
try
:
month
=
list
(
calendar
.
month_abbr
)
.
index
(
date
[
1
])
except
Exception
as
e
:
month
=
'1'
else
:
month
=
'1'
if
len
(
date
)
>
2
:
day
=
date
[
2
]
else
:
day
=
'1'
abstract
=
re
.
sub
(
'
\"
'
,
""
,
abstract
)
.
replace
(
"
\t
"
,
""
)
title
=
re
.
sub
(
'
\"
'
,
""
,
title
)
.
replace
(
"
\t
"
,
""
)
authors_lst
=
[]
for
author
in
doc
[
'authors'
]
:
authors_lst
.
append
((
author
[
'author'
])
.
replace
(
','
,
''
))
authors
=
','
.
join
(
authors_lst
)
row
=
str
(
title
)
+
"
\t
"
+
"scopus"
+
"
\t
"
+
year
+
"
\t
"
+
str
(
month
)
+
"
\t
"
+
str
(
day
)
+
"
\t
"
+
abstract
+
"
\t
"
+
authors
+
"
\t
"
+
str
(
1
)
+
"
\n
"
output
+=
row
return
output
st
.
write
(
st
.
session_state
.
general_text_dict
[
'title'
])
st
.
write
(
st
.
session_state
.
general_text_dict
[
'text'
])
file
=
st
.
file_uploader
(
st
.
session_state
.
general_text_dict
[
'file'
],
type
=
[
"txt"
],
key
=
'file'
)
if
file
:
try
:
name
=
file
.
name
.
split
(
'.'
)[
0
]
+
'.csv'
st
.
write
(
st
.
session_state
.
general_text_dict
[
'new_file'
])
st
.
download_button
(
name
,
read_file
(
file
),
name
)
except
Exception
as
e
:
st
.
write
(
st
.
session_state
.
general_text_dict
[
'error'
])
print
(
e
)
file
.
close
()
Streamlit/pages/Ris_To_GarganText.py
0 → 100644
View file @
333eb348
"""
Streamlit Application
Loïc Chapron
"""
import
streamlit
as
st
import
pandas
as
pd
import
rispy
from
datetime
import
date
st
.
image
(
'img/gargantool_banner.jpg'
)
st
.
markdown
(
"""
<style>
.block-container {
padding-top: 2rem;
padding-bottom: 0rem;
padding-left: 1rem;
padding-right: 1rem;
}
</style>
"""
,
unsafe_allow_html
=
True
)
def
load_bundle
(
lang
):
df
=
pd
.
read_csv
(
"lang/text_RisToGarganText.csv"
)
df
=
df
.
query
(
f
"locale == '{lang}'"
)
tmp
=
{}
for
i
in
range
(
len
(
df
)):
tmp
[
df
.
key
.
to_list
()[
i
]]
=
df
.
value
.
to_list
()[
i
]
return
tmp
def
update_lang
():
st
.
session_state
.
general_text_dict
=
load_bundle
(
st
.
session_state
.
general_lang_dict
[
st
.
session_state
.
general_language
])
if
'general_session_page'
not
in
st
.
session_state
.
keys
():
st
.
session_state
.
general_lang_dict
=
{
'Français'
:
'fr'
,
'English'
:
'en'
}
st
.
session_state
.
general_text_dict
=
load_bundle
(
'fr'
)
st
.
session_state
.
general_language
=
'Français'
st
.
session_state
.
general_session_page
=
'RisToGarganText'
elif
st
.
session_state
.
general_session_page
!=
'RisToGarganText'
:
st
.
session_state
.
general_text_dict
=
load_bundle
(
st
.
session_state
.
general_lang_dict
[
st
.
session_state
.
general_language
])
st
.
session_state
.
general_session_page
=
'RisToGarganText'
for
key
in
st
.
session_state
.
keys
():
if
'general_'
not
in
key
:
del
st
.
session_state
[
key
]
st
.
selectbox
(
'Langue'
,
list
(
st
.
session_state
.
general_lang_dict
.
keys
()),
list
(
st
.
session_state
.
general_lang_dict
.
keys
())
.
index
(
st
.
session_state
.
general_language
),
key
=
'general_language'
,
on_change
=
update_lang
)
def
read_file
(
file
):
output
=
"title
\t
source
\t
publication_year
\t
publication_month
\t
publication_day
\t
abstract
\t
authors
\t
weight
\n
"
docs
=
rispy
.
loads
(
file
.
read
()
.
decode
(
'utf-8'
))
for
doc
in
docs
:
keys
=
doc
.
keys
()
if
'secondary_title'
in
keys
:
source
=
doc
[
'secondary_title'
]
else
:
source
=
""
if
'abstract'
in
keys
:
abstract
=
doc
[
'abstract'
]
else
:
abstract
=
""
title
=
doc
[
'title'
]
month
=
'1'
day
=
'1'
if
doc
.
__contains__
(
'date'
):
tmp
=
doc
[
'date'
]
.
split
(
"/"
)
year
=
tmp
[
0
]
month
=
tmp
[
1
]
day
=
tmp
[
2
]
else
:
if
doc
.
__contains__
(
'year'
):
year
=
doc
[
'year'
]
.
replace
(
'/'
,
''
)
.
replace
(
'.'
,
''
)
else
:
year
=
str
(
date
.
today
()
.
year
)
if
doc
.
__contains__
(
'authors'
):
authors
=
';'
.
join
(
doc
[
'authors'
])
else
:
authors
=
""
row
=
str
(
title
)
+
"
\t
"
+
str
(
source
)
+
"
\t
"
+
year
+
"
\t
"
+
month
+
"
\t
"
+
day
+
"
\t
"
+
abstract
+
"
\t
"
+
authors
+
"
\t
"
+
str
(
1
)
+
"
\n
"
output
+=
row
return
output
st
.
write
(
st
.
session_state
.
general_text_dict
[
'title'
])
st
.
write
(
st
.
session_state
.
general_text_dict
[
'text'
])
file
=
st
.
file_uploader
(
st
.
session_state
.
general_text_dict
[
'file'
],
type
=
[
"ris"
],
key
=
'file'
)
if
file
:
try
:
name
=
file
.
name
.
split
(
'.'
)[
0
]
+
'.csv'
st
.
write
(
st
.
session_state
.
general_text_dict
[
'new_file'
])
st
.
download_button
(
name
,
read_file
(
file
),
name
)
except
Exception
as
e
:
st
.
write
(
st
.
session_state
.
general_text_dict
[
'error'
])
print
(
e
)
file
.
close
()
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment