Commit dff1a7c0 authored by qlobbe's avatar qlobbe

change the section preprocessing

parent 9b6e357f
This diff is collapsed.
This diff is collapsed.
# Newborn
(EUCTR2021-002613-34-NL','NCT04276896','NCT04299724','NCT04568811','NCT04796896','NCT04816643','NCT04992260','EUCTR2020-005442-42-PL','EUCTR2021-003277-55-AT','JPRN-UMIN000045492','NCT05193279')
# Infants (29 days to 1 year)
('EUCTR2021-002613-34-NL','NCT04276896','NCT04299724','NCT04568811','NCT04796896','NCT04816643','NCT04969601','NCT04992260','EUCTR2020-005442-42-PL','EUCTR2021-003277-55-AT','JPRN-UMIN000045492','NCT05193279')
# Children (1 to 9 years)
('ChiCTR2000031809','ChiCTR2100048439','EUCTR2021-002613-34-NL','ISRCTN15638344','NCT04276896','NCT04299724','NCT04551547','NCT04566770','NCT04568811','NCT04796896','NCT04816643','NCT04863638','NCT04869592','NCT04884685','NCT04895982','NCT04916886','NCT04917523','NCT04961359','NCT04969601','RPCEC00000374','RPCEC00000381','NCT04992260','EUCTR2020-005442-42-PL','NCT05003466','NCT05003479','NCT05013983','EUCTR2021-003277-55-AT','CTRI/2021/05/033752','RPCEC00000390','RPCEC00000391','RPCEC00000384','JPRN-UMIN000045492','NCT04918797','NCT05109598','NCT05107557','NCT05112913','NCT05137418','NCT05168709','NCT05169008','NCT05193279','NCT05198336','NCT05225285','NCT05231590','TCTR20220125002')
# Adolescents (10 to 17 years)
('ChiCTR2000031809','ChiCTR2100048439','CTRI/2020/09/027674','CTRI/2021/01/030416','EUCTR2021-002613-34-NL','EUCTR2021-003388-90-NL','ISRCTN15638344','JPRN-UMIN000044519','NCT04276896','NCT04299724','NCT04368728','NCT04471519','NCT04551547','NCT04566770','NCT04568811','NCT04611802','NCT04649151','NCT04683484','NCT04713553','NCT04761822','NCT04773067','NCT04796896','NCT04800133','NCT04816643','NCT04863638','NCT04864561','NCT04869592','NCT04884685','NCT04895982','NCT04916886','NCT04917523','NCT04951388','NCT04954092','NCT04955626','NCT04956224','NCT04961359','NCT04969601','NL9553','RPCEC00000374','RPCEC00000381','CTRI/2021/04/032942','NCT04992260','EUCTR2020-005442-42-PL','NCT05003466','NCT05003479','NCT05013983','EUCTR2021-003277-55-AT','CTRI/2021/05/033752','ISRCTN12348322','TCTR20210917004','TCTR20210920005','TCTR20210920006','TCTR20210923012','NCT05028361','RPCEC00000390','RPCEC00000391','RPCEC00000384','TCTR20210830002','ISRCTN15779782','EUCTR2021-005043-71-NL','JPRN-UMIN000045492','NCT04918797','NCT05109598','NCT05112913','IRCT20171122037571N3','NCT05007080','NCT05119855','NCT05137418','NCT05168709','NCT05169008','IRCT20150303021315N27','NCT05158855','NCT05193279','NCT05198336','NCT05225285','EUCTR2022-000074-25-ES','NCT05230940','NCT05231590','TCTR20220125002','IRCT20201214049709N5','NCT05249829','NCT05246137','NCT05239806','NCT05239923','NCT05239975','EUCTR2021-005197-25-DE')
# Immunosuppressed
('ACTRN12621000532808','ACTRN12621000661875','EUCTR2021-000175-37-SE','EUCTR2021-000291-11-AT','EUCTR2021-000412-28-BE','EUCTR2021-000461-33-BE','EUCTR2021-000515-24-NL','EUCTR2021-000905-26-SE','EUCTR2021-000930-32-BE','EUCTR2021-001414-10-NL','EUCTR2021-002014-14-CZ','EUCTR2021-002613-34-NL','EUCTR2021-002693-10-AT','EUCTR2021-003618-37-NO','NCT04754698','NCT04775069','NCT04780659','NCT04794946','NCT04805125','NCT04806113','NCT04839315','NCT04844489','NCT04847050','NCT04860258','NCT04860297','NCT04862806','NCT04869358','NCT04878211','NCT04885907','NCT04895982','NCT04904549','NCT04930770','NCT04954469','NCT04961229','NCT04969263','NCT04969601','NCT04977024','PACTR202105817814362','RBR-9ksh5f4','NCT05000216','NCT05022329','EUCTR2021-003277-55-AT','NCT05030974','NCT05047640','NCT05057897','ChiCTR2100049657','NCT05028374','NCT04801667','TCTR20210830002','NCT05075057','NCT05075083','NCT05075499','NCT05075538','EUCTR2021-001459-15-AT','NCT05081271','NCT05077254','NCT05085145','ACTRN12621001412820','ACTRN12621001465842','NCT04792567','TCTR20211102003','NCT05104333','NCT05104437','NCT05104216','NCT05112848','NCT05168813','NCT05170399','EUCTR2021-002927-39-AT','IRCT20140818018842N22','TCTR20211220004','TCTR20211228003','DRKS00027372','NCT05220397')
# Pregnant women
('EUCTR2021-000440-22-BE','EUCTR2021-000893-27-BE','EUCTR2021-002327-38-NL','NCT04754594','NCT04765384','TCTR20210923013','TCTR20210717002','ISRCTN15279830')
\ No newline at end of file
......@@ -27,37 +27,37 @@
\citation{delanoe_mining_2021}
\citation{chavalarias_phylomemetic_2013,chavalarias_draw_2021}
\citation{delanoe_mining_2021}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The four operators of the phylomemy reconstruction process: 1. terms indexation, 2. similarity measures, 3. fields detection, 4. inter-temporal matching}}{2}{figure.1}\protected@file@percent }
\newlabel{fig:reconstruction}{{1}{2}{The four operators of the phylomemy reconstruction process: 1. terms indexation, 2. similarity measures, 3. fields detection, 4. inter-temporal matching}{figure.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Pre-processing the database}{2}{subsection.2.2}\protected@file@percent }
\newlabel{pre-processing}{{2.2}{2}{Pre-processing the database}{subsection.2.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}The phylomemy reconstruction process}{2}{subsection.2.3}\protected@file@percent }
\citation{dias2008mapping}
\citation{uno2004lcm}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}\leavevmode {\color {red}\textbf {Collaboratively pre-processing the COVID-NMA database}}}{2}{subsection.2.2}\protected@file@percent }
\newlabel{pre-processing}{{2.2}{2}{\todo {Collaboratively pre-processing the COVID-NMA database}}{subsection.2.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}The phylomemy reconstruction process}{2}{subsection.2.3}\protected@file@percent }
\citation{chavalarias_draw_2021}
\citation{lobbe_exploring_2021}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The four operators of the phylomemy reconstruction process: 1. terms indexation, 2. similarity measures, 3. fields detection, 4. inter-temporal matching}}{3}{figure.1}\protected@file@percent }
\newlabel{fig:reconstruction}{{1}{3}{The four operators of the phylomemy reconstruction process: 1. terms indexation, 2. similarity measures, 3. fields detection, 4. inter-temporal matching}{figure.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Visualizing phylomemies}{3}{subsection.2.4}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Description of the resulting phylomemy}{3}{section.3}\protected@file@percent }
\citation{chumakove_old_2021}
\citation{chumakove_old_2021}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Phylomemy of 1,794 COVID-19 vaccines trials recorded between February 2020 and October 2021 in the COVID-NMA database. Online and interactive version available at \href {http://maps.gargantext.org/unpublished_phylo/vaccines_publications_10_2021/}{maps.gargantext.org/publications}}}{4}{figure.2}\protected@file@percent }
\newlabel{fig:phylomemy-randomized-unrandomized}{{2}{4}{Phylomemy of 1,794 COVID-19 vaccines trials recorded between February 2020 and October 2021 in the COVID-NMA database. Online and interactive version available at \href {http://maps.gargantext.org/unpublished_phylo/vaccines_publications_10_2021/}{maps.gargantext.org/publications}}{figure.2}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Following the worldwide tracks of COVID-19 vaccines}{4}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}General observations}{4}{subsection.4.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Repurposing non-COVID vaccines}{4}{subsection.4.2}\protected@file@percent }
\citation{krause_considerations_2021}
\citation{chumakove_old_2021}
\citation{chumakove_old_2021}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces A focus of \autoref {fig:phylomemy-randomized-unrandomized}. In red are highlighted all the trials evaluating heterologous primary vaccination and heterologous booster. We circle the first heterologous trials involving different platforms.}}{5}{figure.3}\protected@file@percent }
\newlabel{fig:heterologous}{{3}{5}{A focus of \autoref {fig:phylomemy-randomized-unrandomized}. In red are highlighted all the trials evaluating heterologous primary vaccination and heterologous booster. We circle the first heterologous trials involving different platforms}{figure.3}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Heterologous vaccination}{5}{subsection.4.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Boosters}{5}{subsection.4.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Filters and upcoming research questions}{5}{subsection.4.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Following the worldwide tracks of COVID-19 vaccines}{5}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}General observations}{5}{subsection.4.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Repurposing non-COVID vaccines}{5}{subsection.4.2}\protected@file@percent }
\citation{krause_considerations_2021}
\citation{nguyen_research_2021}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Phylomemy of the randomized only COVID-19 vaccines trials. In blue, we highlight all the trials with an associated publication (i.e., preprint or peer-reviewed articles).}}{6}{figure.4}\protected@file@percent }
\newlabel{fig:phylomemy-randomized-publications}{{4}{6}{Phylomemy of the randomized only COVID-19 vaccines trials. In blue, we highlight all the trials with an associated publication (i.e., preprint or peer-reviewed articles)}{figure.4}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Heterologous vaccination}{6}{subsection.4.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Boosters}{6}{subsection.4.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Filters and upcoming research questions}{6}{subsection.4.5}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Perspectives and insights for COVID-19 research}{6}{section.5}\protected@file@percent }
\bibstyle{cas-model2-names}
\bibdata{references}
\bibcite{boutron_nma_2020}{{1}{2020}{{Boutron et~al.}}{{Boutron, Chaimani, Meerpohl, Hróbjartsson, Devane, Rada, Tovey, Grasselli, Ravaud, Alawadhi, Amer-Yahia, Arienti, Auber, Ávila, Bafeta, Baldassarre, Banzi, Barnier, Baudry, Bergman, Bollig, Bonnet, Bouet, Boughanem, Boutron, Buckley, Cabanac, Chaiman, Charpy, Chavalarias, Chen, Chevance, Cohen-Boulakia, Cogo, Conil, Coquery, Davidson, De~Nale, Devane, Diard, Dkaki, Doreau, El~Asri, Evrenoglou, Fabbri, Featherstone, Feron, Ferrand, Fezeu, Fouet, Ghanawi, Ghosn El~Chall, Graña, Grasselli, Grolleau, Groz, Hacid, Hamel, Hansen, Henschke, Hohlfeld, Hróbjartsson, Julia, Mavridis, Meerpohl, Meyer, Minozzi, Moreno, Naidoo, Nguyen, Oikonomidi, Page, Petkovic, Pienaar, Pierre, Probyn, Quirke, Rada, Ravaud, Ripoll, Riveros, Rivière, Sauvant, Savovic, Schmucker, Sguassero, Sterne, Toumani, Tovey, Villanueva, Vuillemot, Xia, Yu, Zoletic and Zweigenbaum}}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Phylomemy of the randomized only COVID-19 vaccines trials. In blue, we highlight all the trials with an associated publication (i.e., preprint or peer-reviewed articles).}}{7}{figure.4}\protected@file@percent }
\newlabel{fig:phylomemy-randomized-publications}{{4}{7}{Phylomemy of the randomized only COVID-19 vaccines trials. In blue, we highlight all the trials with an associated publication (i.e., preprint or peer-reviewed articles)}{figure.4}{}}
\bibcite{chavalarias_phylomemetic_2013}{{2}{2013}{{Chavalarias and Cointet}}{{}}}
\bibcite{chavalarias_draw_2021}{{3}{2021}{{Chavalarias et~al.}}{{Chavalarias, Lobb{\'{e}} and Delanoë}}}
\bibcite{chumakove_old_2021}{{4}{2021}{{Chumakov et~al.}}{{Chumakov, Avidan, Benn, Bertozzi, Blatt, Chang, Jamison, Khader, Kottilil, Netea, Sparrow and Gallo}}}
......
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2021.3.24) 1 FEB 2022 16:01
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2021.3.24) 21 MAR 2022 18:07
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
......@@ -876,7 +876,7 @@ Overfull \hbox (117.0831pt too wide) detected at line 139
(./toward.abs)
LaTeX Font Info: Trying to load font information for T1+cmss on input line 1
82.
84.
(/usr/share/texlive/texmf-dist/tex/latex/base/t1cmss.fd
File: t1cmss.fd 2019/12/16 v2.5j Standard LaTeX font definitions
)
......@@ -890,7 +890,7 @@ erted-to.pdf>
(epstopdf) size: 150831 bytes
(epstopdf) Command: <repstopdf --outfile=../figures/reconstruction_
process-eps-converted-to.pdf ../figures/reconstruction_process.eps>
(epstopdf) \includegraphics on input line 196.
(epstopdf) \includegraphics on input line 202.
Package epstopdf Info: Output file is already uptodate.
<../figures/reconstruction_process-eps-converted-to.pdf, id=42, 511.9125pt x 42
6.59375pt>
......@@ -898,84 +898,81 @@ File: ../figures/reconstruction_process-eps-converted-to.pdf Graphic file (type
pdf)
<use ../figures/reconstruction_process-eps-converted-to.pdf>
Package pdftex.def Info: ../figures/reconstruction_process-eps-converted-to.pdf
used on input line 196.
used on input line 202.
(pdftex.def) Requested size: 281.00229pt x 234.16512pt.
[2pdfTeX warning (ext4): destination with the same identifier (name{figure.1})
has been already used, duplicate ignored
[2] [3pdfTeX warning (ext4): destination with the same identifier (name{figure
.1}) has been already used, duplicate ignored
\AtBegShi@Output ...ipout \box \AtBeginShipoutBox
\fi \fi
l.207 T
he corpus is then sliced into periods of interest $\mathcal{T^*} = \...
<../figures/reconstruction_process-eps-converted-to.pdf>]
<../figures/phylomemy-randomized-unrandomized.pdf, id=102, 796.67719pt x 448.13
l.236
<../figures/reconstruction_process-eps-converted-to.pdf>]
<../figures/phylomemy-randomized-unrandomized.pdf, id=114, 796.67719pt x 448.13
09pt>
File: ../figures/phylomemy-randomized-unrandomized.pdf Graphic file (type pdf)
<use ../figures/phylomemy-randomized-unrandomized.pdf>
Package pdftex.def Info: ../figures/phylomemy-randomized-unrandomized.pdf used
on input line 233.
on input line 239.
(pdftex.def) Requested size: 468.3324pt x 263.43762pt.
[3] [4pdfTeX warning (ext4): destination with the same identifier (name{figure
.2}) has been already used, duplicate ignored
[4pdfTeX warning (ext4): destination with the same identifier (name{figure.2})
has been already used, duplicate ignored
\AtBegShi@Output ...ipout \box \AtBeginShipoutBox
\fi \fi
l.249
<../figures/phylomemy-randomized-unrandomized.pdf>]
<../figures/heterologous.pdf, id=178, 796.67719pt x 369.88583pt>
l.244 \newpage
<../figures/phylomemy-randomized-unrandomized.pdf>]
<../figures/heterologous.pdf, id=174, 796.67719pt x 369.88583pt>
File: ../figures/heterologous.pdf Graphic file (type pdf)
<use ../figures/heterologous.pdf>
Package pdftex.def Info: ../figures/heterologous.pdf used on input line 252.
Package pdftex.def Info: ../figures/heterologous.pdf used on input line 258.
(pdftex.def) Requested size: 468.3324pt x 217.44058pt.
[5pdfTeX warning (ext4): destination with the same identifier (name{figure.3})
has been already used, duplicate ignored
\AtBegShi@Output ...ipout \box \AtBeginShipoutBox
\fi \fi
l.271 W
e can also explore the visualization to better understand how differe...
<../figures/heterologous.pdf>]
<../figures/phylomemy-randomized-publications.pdf, id=219, 796.67719pt x 412.56
l.266
<../figures/heterologous.pdf>]
<../figures/phylomemy-randomized-publications.pdf, id=213, 796.67719pt x 412.56
496pt>
File: ../figures/phylomemy-randomized-publications.pdf Graphic file (type pdf)
<use ../figures/phylomemy-randomized-publications.pdf>
Package pdftex.def Info: ../figures/phylomemy-randomized-publications.pdf used
on input line 277.
on input line 283.
(pdftex.def) Requested size: 468.3324pt x 242.52988pt.
LaTeX Font Info: Trying to load font information for TS1+stix on input line
288.
294.
(/usr/share/texlive/texmf-dist/tex/latex/stix/ts1stix.fd
File: ts1stix.fd 2015/04/17 v1.1.2-latex STIX TS1 font definitions
) [6pdfTeX warning (ext4): destination with the same identifier (name{figure.4}
) has been already used, duplicate ignored
\AtBegShi@Output ...ipout \box \AtBeginShipoutBox
\fi \fi
l.292 \item d
ata that exists outside of the registries (publications, trial...
<../figures/phylomemy-randomized-publications.pdf>]
) [6]
LaTeX Font Info: Trying to load font information for T1+cmtt on input line 3
07.
13.
(/usr/share/texlive/texmf-dist/tex/latex/base/t1cmtt.fd
File: t1cmtt.fd 2019/12/16 v2.5j Standard LaTeX font definitions
) (./toward.bbl [7])
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 316.
) (./toward.bbl [7pdfTeX warning (ext4): destination with the same identifier (
name{figure.4}) has been already used, duplicate ignored
[8]
Package atveryend Info: Empty hook `AfterLastShipout' on input line 316.
\AtBegShi@Output ...ipout \box \AtBeginShipoutBox
\fi \fi
l.81 ...tet(2013)}]{chavalarias_phylomemetic_2013}
<../figures/phylomemy-random
ized-publications.pdf>])
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 322.
[8]
Package atveryend Info: Empty hook `AfterLastShipout' on input line 322.
(./toward.aux)
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 316.
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 316.
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 322.
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 322.
Package rerunfilecheck Info: File `toward.out' has not changed.
(rerunfilecheck) Checksum: D41D8CD98F00B204E9800998ECF8427E;0.
)
Here is how much of TeX's memory you used:
13726 strings out of 482577
200208 string characters out of 5948912
469316 words of memory out of 5000000
28457 multiletter control sequences out of 15000+600000
13727 strings out of 482577
200218 string characters out of 5948912
464345 words of memory out of 5000000
28458 multiletter control sequences out of 15000+600000
609201 words of font info for 115 fonts, out of 8000000 for 9000
350 hyphenation exceptions out of 8191
42i,8n,46p,1913b,530s stack positions out of 5000i,500n,10000p,200000b,80000s
......@@ -992,7 +989,7 @@ mf-dist/fonts/type1/public/stix/stix-mathcal.pfb></usr/share/texlive/texmf-dist
/fonts/type1/public/stix/stix-mathex.pfb></usr/share/texlive/texmf-dist/fonts/t
ype1/public/stix/stix-mathit.pfb></usr/share/texlive/texmf-dist/fonts/type1/pub
lic/stix/stix-mathrm.pfb>
Output written on toward.pdf (9 pages, 1467391 bytes).
Output written on toward.pdf (9 pages, 1469956 bytes).
PDF statistics:
469 PDF objects out of 1000 (max. 8388607)
244 compressed objects within 3 object streams
......
No preview for this file type
......@@ -167,6 +167,8 @@ All authors contributed equally to the paper. G Ferrand was responsible for the
Over the past two years, the ongoing COVID-19 pandemic has impacted a wide number of human domains: from economy to education, from public health to politics. Among others, Science swung early on into action to find both a cure and an effective vaccine. This has resulted in an unprecedented volume of publications that have generated an information overload for the medical community. One of today's challenges is to synthesize this overwhelming amount of information about current COVID-19 research in order to improve coordination between the different research streams. Our paper thus proposes to address this issue by applying a new method for reconstructing the evolution of knowledge. We take as a case study the COVID-19 vaccines clinical trials from the \textit{COVID-NMA database} and use the \textit{phylomemy reconstruction process}~\cite{chavalarias_draw_2021}. The \textit{COVID-NMA database} stores the curated dataset of all the clinial trials available in the set of international primary and secondary trial registries~\cite{boutron_nma_2020,nguyen_research_2021} (~\textit{i.e.}, all trials registered in the International Clinical Trials Registry Platform (ICTRP), Clinicaltrials.gov and the EU clinical trials registry -- see~\ref{materials}). For the purpose of this study, the \textit{COVID-NMA database} has been reduced to a pruned corpus called~$\mathcal{D}_{vt}$ (see~\ref{pre-processing}). We then combine the expertise of epidemiologists and \textit{Complex Systems} researchers to interpret the resulting visualizations and reveal insights for upcoming COVID-19 research.
%\todo{la conjonction de la mutabilité et du flux de data a été nouveau}
%
%% Materials and methods
%
......@@ -178,14 +180,18 @@ Our paper aims at applying a new text mining method -- the \textit{phylomemy re
\subsection{The COVID-NMA database}
\label{materials}
The COVID-NMA project is an international initiative aimed at providing a living mapping and a living systematic review of all trials assessing treatments and preventive interventions for COVID-19~\cite{boutron_nma_2020,nguyen_research_2021}. The development of the COVID-NMA database relies on a full methodology designed to generate and make available a complete, comprehensive, integrated, non-redundant and carefully annotated data sets on clinical trials. We automatically extract data from clinical registries on a weekly basis and provide assistance to epidemiologists on the curation and annotation process. Raw data is extracted from the \href{https://www.clinicaltrialsregister.eu/}{EU clinical trials register}, from the \href{https://clinicaltrials.gov/}{ClinicalTrial registry} managed by the U.S. National Library of Medicine, from the \href{https://www.irct.ir/}{IRCT registry} and from the \href{https://www.who.int/ictrp/en/}{WHO International Clinical Trials Registry Platform} (ICTRP) -- an international registry that assembles information on clinical trials registered in 17 primary registries to identify new trial assessing COVID-19 vaccine and update of previously registered trial records. Data are extracted from registries, annotated by epidemiologists, then stored and made available through the COVID-NMA database. We here note that international trials registries can be post-updated by research teams, e.g. for post-adding a related publication.
The COVID-NMA project is an international initiative aimed at providing a living mapping and a living systematic review of all trials assessing treatments and preventive interventions for COVID-19~\cite{boutron_nma_2020,nguyen_research_2021}. The development of the COVID-NMA database relies on a full methodology designed to generate and make available a complete, comprehensive, integrated, non-redundant and carefully annotated data sets on clinical trials. We automatically extract data from clinical registries on a weekly basis and provide assistance to epidemiologists on the curation and annotation process. Raw data is extracted from the \href{https://www.clinicaltrialsregister.eu/}{EU clinical trials register}, from the \href{https://clinicaltrials.gov/}{ClinicalTrial registry} managed by the U.S. National Library of Medicine, from the \href{https://www.irct.ir/}{IRCT registry} and from the \href{https://www.who.int/ictrp/en/}{WHO International Clinical Trials Registry Platform} (ICTRP) -- an international registry that assembles information on clinical trials registered in 17 primary registries to identify new trial assessing COVID-19 vaccine and update of previously registered trial records. Data are extracted from registries, annotated \todo{and enriched} by epidemiologists, then stored and made available through the COVID-NMA database. We here note that international trials registries can be post-updated by research teams, e.g. for post-adding a related publication.
%Future versions of the phylomemies presented in this paper might thus be slightly different from the current ones. A promising way to get around this issue would be to archive every modifications of the original registries and then choose the version we want to integrate in the phylomemies.
\subsection{Pre-processing the database}
\subsection{\todo{Collaboratively pre-processing the COVID-NMA database}}
\label{pre-processing}
We have pre-processed the COVID-NMA database before using it for the phylomemy reconstruction to filter the 1,794 descriptions related to vaccines trials. The trials records have been first aggregated by publication week. Then, we have merged the sections `\textit{pharmacological treatment}', `\textit{treatment type}' and `\textit{treatment name}' together to shape the trial descriptions. These descriptions have also been enriched with extra-information such as trial phases, funding, involved countries or associated publications. The resulting corpus $\mathcal{D}_{vt}$ has latter been collectively and collaboratively curated by epidemiologists thanks to the free software \textit{Gargantext}~\cite{delanoe_mining_2021}. There, these experts have extracted and validated a core vocabulary as a list of 175 root terms.
\todo{In order to be used as input data for the phylomemy reconstruction process, the COVID-NMA database have had to be first pre-processed. But the unprecedented volume of trials challenged our capacity to build a time-consistent and insightful visualization. We have thus faced two main issues: 1) dealing with the mutable nature of the trials registries; 2) collaboratively curating a representative vocabulary of the trials descriptions.}
\todo{Contrary to regular scientific publications, the textual content of trials registries is mutable by nature: the description of a given trial can be updated weeks after having been recorded by detailing its protocol or simply by adding experimental results. As the tight temporality of the COVID-19 crisis forced us to monthly reconstruct our visualization, we have developed a time-consistent approach: for each recorded trial, we have made the decision not to update its textual descriptions but, in the same time, to keep the meta-data (i.e, phases, founders, associated publications, etc.) up to date with their most recent version. By doing so, we don't break the temporal continuity of the phylomemy reconstruction process as we preserve the natural evolution of the trials descriptions.}
\todo{We have then} filtered the 1,794 descriptions related to vaccines trials \todo{to isolate them from other preventive or post-treatments}. Next, the trials records have been aggregated by publication week and we have merged the sections `\textit{pharmacological treatment}', `\textit{treatment type}' and `\textit{treatment name}' together to shape \todo{a new field called} trial description. These descriptions have been enriched with \todo{meta-data} such as trial phases, funding, involved countries or associated publications. The resulting corpus $\mathcal{D}_{vt}$ has latter been collectively and collaboratively curated by epidemiologists thanks to the free software \textit{Gargantext}~\cite{delanoe_mining_2021}. There, these experts have extracted and validated a core vocabulary as a list of 175 root terms \todo{ready to be analyzed through the phylomemy}.
\subsection{The phylomemy reconstruction process}
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment