Description

This R project containt some script to format the database of clinical trials on covid-19 in Gargantext readable files (see http://Gargarntext.org).

The database should be in the tsv format (separator = tabulation ; no delimiters) and be formated in UTF8.

Load of Data

First define what is the name of the file to be processed. This file should be in the folder /data

library(lubridate)
source("coronalib.R") # R libraries 
name<-"database_d_chavalarias_2020-05-15" # name of the csv to be loaded

AllData <-read.csv(paste("data/",name,".csv",sep=""),head=TRUE,sep="\t")
AllData$Inclusion.criteria <- NULL
AllData$Exclusion.criteria <- NULL
#AllData <- filter(AllData,!is.na(AllData$Registration.date))
nrow(AllData)
[1] 1902
x <- unique(AllData$Trial.registration.number)
head(x)[[1]]
[1] NCT04254874
796 Levels:  2020-001113-21 2020-001200-42 2020-001209-22 2020-001246-18 2020-001327-13 2020-001408-41 ... TCTR20200409006

Data segmentation

Several dataframe are generated according to which are the CTs under study.

library(dplyr) 
library(stringr) 
Prevention<- filter(AllData,grepl("Prevention",AllData$Study.aim)) # CTs taggés prevention
Treatments <- filter(AllData,grepl("Treatment",AllData$Study.aim)) # # CTs taggés Treaments
Posttreatment<- filter(AllData,grepl("Post treatment",AllData$Study.aim)) # CTs taggé Post-Treatment
print(paste(count(Prevention)," Prevention arms,", count(Treatments)," Treatments arms and ",count(Posttreatment)," Post-treatment arms."))
[1] "282  Prevention arms, 1604  Treatments arms and  15  Post-treatment arms."

Export of data and viz

Data are exported in several formats. The list of all treatments is also exported assuming that treatments are separated by a ‘+’ signe in the column treatment of the original db.

source("coronalib.R")
library(reshape)
library(wordcloud)
library(ggplot2)

# Html format
# export of a corpus with treatments and outcomes
garg_export_with_html(Treatments,"Treatment") # exporte Treatmeant et Outcomes des essais cliniques de type Treatment
garg_export_with_html(AllData,"AllData") # exporte Treatmeant et Outcomes des essais cliniques de tous types
garg_export_treatments_with_html(AllData,"AllData") # exporte Treatmeant des essais cliniques de tous types


# export of the list of all types of treatments whatever the phase in the format Gargantext map list Gargantext V3 & V4
gargV4_export_treaments_list(AllData,"AllDb")
gargV3_export_treaments_list(AllData,"AllDb")


# Conversion of the tsv file into Gargantext readable tsv dile
# Seleciton of the kind of CT to export : All / Prevention / Treatment / Post-treatment
# Selection of the kind of informations to include in the main text to be processed by Gargantext (bastract column): Treatmeant and/or Outcomes


# simple txt export
garg_export_all_plain(Treatments,"Treatment") # export main information in plain text
garg_export_OnlyTreatments(Treatments,"Treatment") # export only treatments in plain text
garg_export_OnlyOutcomes(Treatments,"Treatment") # export only outcomes in plain text
garg_export_all_plain(AllData,"All") # export main information in plain text

# raw export (just to have specific maps)
garg_export_raw_treatments(AllData,"AllData") ## export only info relative to treatments without any formating.

# Some simple viz - Tag cloud of the treaments per category of CT
TreatmentsCloud(Treatments)

TreatmentsCloud(Prevention)

TreatmentsCloud(Posttreatment)

LS0tCnRpdGxlOiAiQ29yb25hdmlydXMgQ2xpbmljYWwgVHJpYWxzIFIgbGlicmFyeSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQojIyMgRGVzY3JpcHRpb24KClRoaXMgUiBwcm9qZWN0IGNvbnRhaW50IHNvbWUgc2NyaXB0IHRvIGZvcm1hdCB0aGUgZGF0YWJhc2Ugb2YgY2xpbmljYWwgdHJpYWxzIG9uIGNvdmlkLTE5IGluIEdhcmdhbnRleHQgcmVhZGFibGUgZmlsZXMgKHNlZSBodHRwOi8vR2FyZ2FybnRleHQub3JnKS4KClRoZSBkYXRhYmFzZSBzaG91bGQgYmUgaW4gdGhlIHRzdiBmb3JtYXQgKHNlcGFyYXRvciA9IHRhYnVsYXRpb24gOyBubyBkZWxpbWl0ZXJzKSBhbmQgYmUgZm9ybWF0ZWQgaW4gVVRGOC4KCiMjIyBMb2FkIG9mIERhdGEKRmlyc3QgZGVmaW5lIHdoYXQgaXMgdGhlIG5hbWUgb2YgdGhlIGZpbGUgdG8gYmUgcHJvY2Vzc2VkLiBUaGlzIGZpbGUgc2hvdWxkIGJlIGluIHRoZSBmb2xkZXIgL2RhdGEKYGBge3J9CmxpYnJhcnkobHVicmlkYXRlKQpzb3VyY2UoImNvcm9uYWxpYi5SIikgIyBSIGxpYnJhcmllcyAKbmFtZTwtImRhdGFiYXNlX2RfY2hhdmFsYXJpYXNfMjAyMC0wNS0xNSIgIyBuYW1lIG9mIHRoZSBjc3YgdG8gYmUgbG9hZGVkCgpBbGxEYXRhIDwtcmVhZC5jc3YocGFzdGUoImRhdGEvIixuYW1lLCIuY3N2IixzZXA9IiIpLGhlYWQ9VFJVRSxzZXA9Ilx0IikKQWxsRGF0YSRJbmNsdXNpb24uY3JpdGVyaWEgPC0gTlVMTApBbGxEYXRhJEV4Y2x1c2lvbi5jcml0ZXJpYSA8LSBOVUxMCiNBbGxEYXRhIDwtIGZpbHRlcihBbGxEYXRhLCFpcy5uYShBbGxEYXRhJFJlZ2lzdHJhdGlvbi5kYXRlKSkKbnJvdyhBbGxEYXRhKQp4IDwtIHVuaXF1ZShBbGxEYXRhJFRyaWFsLnJlZ2lzdHJhdGlvbi5udW1iZXIpCmhlYWQoeClbWzFdXQoKYGBgCiMjIyBEYXRhIHNlZ21lbnRhdGlvbgpTZXZlcmFsIGRhdGFmcmFtZSBhcmUgZ2VuZXJhdGVkIGFjY29yZGluZyB0byB3aGljaCBhcmUgdGhlIENUcyB1bmRlciBzdHVkeS4KCmBgYHtyfQpsaWJyYXJ5KGRwbHlyKSAKbGlicmFyeShzdHJpbmdyKSAKUHJldmVudGlvbjwtIGZpbHRlcihBbGxEYXRhLGdyZXBsKCJQcmV2ZW50aW9uIixBbGxEYXRhJFN0dWR5LmFpbSkpICMgQ1RzIHRhZ2fDqXMgcHJldmVudGlvbgpUcmVhdG1lbnRzIDwtIGZpbHRlcihBbGxEYXRhLGdyZXBsKCJUcmVhdG1lbnQiLEFsbERhdGEkU3R1ZHkuYWltKSkgI8KgIyBDVHMgdGFnZ8OpcyBUcmVhbWVudHMKUG9zdHRyZWF0bWVudDwtIGZpbHRlcihBbGxEYXRhLGdyZXBsKCJQb3N0IHRyZWF0bWVudCIsQWxsRGF0YSRTdHVkeS5haW0pKSAjIENUcyB0YWdnw6kgUG9zdC1UcmVhdG1lbnQKcHJpbnQocGFzdGUoY291bnQoUHJldmVudGlvbiksIiBQcmV2ZW50aW9uIGFybXMsIiwgY291bnQoVHJlYXRtZW50cyksIiBUcmVhdG1lbnRzIGFybXMgYW5kICIsY291bnQoUG9zdHRyZWF0bWVudCksIiBQb3N0LXRyZWF0bWVudCBhcm1zLiIpKQpgYGAKIyMgRXhwb3J0IG9mIGRhdGEgYW5kIHZpegpEYXRhIGFyZSBleHBvcnRlZCBpbiBzZXZlcmFsIGZvcm1hdHMuIFRoZSBsaXN0IG9mIGFsbCB0cmVhdG1lbnRzIGlzIGFsc28gZXhwb3J0ZWQgYXNzdW1pbmcgdGhhdCB0cmVhdG1lbnRzIGFyZSBzZXBhcmF0ZWQgYnkgYSAnKycgc2lnbmUgaW4gdGhlIGNvbHVtbiB0cmVhdG1lbnQgb2YgdGhlIG9yaWdpbmFsIGRiLgoKYGBge3J9CnNvdXJjZSgiY29yb25hbGliLlIiKQpsaWJyYXJ5KHJlc2hhcGUpCmxpYnJhcnkod29yZGNsb3VkKQpsaWJyYXJ5KGdncGxvdDIpCgojIEh0bWwgZm9ybWF0CiMgZXhwb3J0IG9mIGEgY29ycHVzIHdpdGggdHJlYXRtZW50cyBhbmQgb3V0Y29tZXMKZ2FyZ19leHBvcnRfd2l0aF9odG1sKFRyZWF0bWVudHMsIlRyZWF0bWVudCIpICMgZXhwb3J0ZSBUcmVhdG1lYW50IGV0IE91dGNvbWVzIGRlcyBlc3NhaXMgY2xpbmlxdWVzIGRlIHR5cGUgVHJlYXRtZW50CmdhcmdfZXhwb3J0X3dpdGhfaHRtbChBbGxEYXRhLCJBbGxEYXRhIikgIyBleHBvcnRlIFRyZWF0bWVhbnQgZXQgT3V0Y29tZXMgZGVzIGVzc2FpcyBjbGluaXF1ZXMgZGUgdG91cyB0eXBlcwpnYXJnX2V4cG9ydF90cmVhdG1lbnRzX3dpdGhfaHRtbChBbGxEYXRhLCJBbGxEYXRhIikgIyBleHBvcnRlIFRyZWF0bWVhbnQgZGVzIGVzc2FpcyBjbGluaXF1ZXMgZGUgdG91cyB0eXBlcwoKCiMgZXhwb3J0IG9mIHRoZSBsaXN0IG9mIGFsbCB0eXBlcyBvZiB0cmVhdG1lbnRzIHdoYXRldmVyIHRoZSBwaGFzZSBpbiB0aGUgZm9ybWF0IEdhcmdhbnRleHQgbWFwIGxpc3QgR2FyZ2FudGV4dCBWMyAmIFY0CmdhcmdWNF9leHBvcnRfdHJlYW1lbnRzX2xpc3QoQWxsRGF0YSwiQWxsRGIiKQpnYXJnVjNfZXhwb3J0X3RyZWFtZW50c19saXN0KEFsbERhdGEsIkFsbERiIikKCgojIENvbnZlcnNpb24gb2YgdGhlIHRzdiBmaWxlIGludG8gR2FyZ2FudGV4dCByZWFkYWJsZSB0c3YgZGlsZQojIFNlbGVjaXRvbiBvZiB0aGUga2luZCBvZiBDVCB0byBleHBvcnQgOiBBbGwgLyBQcmV2ZW50aW9uIC8gVHJlYXRtZW50IC8gUG9zdC10cmVhdG1lbnQKI8KgU2VsZWN0aW9uIG9mIHRoZSBraW5kIG9mIGluZm9ybWF0aW9ucyB0byBpbmNsdWRlIGluIHRoZSBtYWluIHRleHQgdG8gYmUgcHJvY2Vzc2VkIGJ5IEdhcmdhbnRleHQgKGJhc3RyYWN0IGNvbHVtbik6IFRyZWF0bWVhbnQgYW5kL29yIE91dGNvbWVzCgoKI8Kgc2ltcGxlIHR4dCBleHBvcnQKZ2FyZ19leHBvcnRfYWxsX3BsYWluKFRyZWF0bWVudHMsIlRyZWF0bWVudCIpICMgZXhwb3J0IG1haW4gaW5mb3JtYXRpb24gaW4gcGxhaW4gdGV4dApnYXJnX2V4cG9ydF9Pbmx5VHJlYXRtZW50cyhUcmVhdG1lbnRzLCJUcmVhdG1lbnQiKSAjIGV4cG9ydCBvbmx5IHRyZWF0bWVudHMgaW4gcGxhaW4gdGV4dApnYXJnX2V4cG9ydF9Pbmx5T3V0Y29tZXMoVHJlYXRtZW50cywiVHJlYXRtZW50IikgIyBleHBvcnQgb25seSBvdXRjb21lcyBpbiBwbGFpbiB0ZXh0CmdhcmdfZXhwb3J0X2FsbF9wbGFpbihBbGxEYXRhLCJBbGwiKSAjIGV4cG9ydCBtYWluIGluZm9ybWF0aW9uIGluIHBsYWluIHRleHQKCiMgcmF3IGV4cG9ydCAoanVzdCB0byBoYXZlIHNwZWNpZmljIG1hcHMpCmdhcmdfZXhwb3J0X3Jhd190cmVhdG1lbnRzKEFsbERhdGEsIkFsbERhdGEiKSAjIyBleHBvcnQgb25seSBpbmZvIHJlbGF0aXZlIHRvIHRyZWF0bWVudHMgd2l0aG91dCBhbnkgZm9ybWF0aW5nLgoKIyBTb21lIHNpbXBsZSB2aXogLSBUYWcgY2xvdWQgb2YgdGhlIHRyZWFtZW50cyBwZXIgY2F0ZWdvcnkgb2YgQ1QKVHJlYXRtZW50c0Nsb3VkKFRyZWF0bWVudHMpClRyZWF0bWVudHNDbG91ZChQcmV2ZW50aW9uKQpUcmVhdG1lbnRzQ2xvdWQoUG9zdHRyZWF0bWVudCkKCmBgYAo=