Commit f16f9681 authored by david Chavalarias's avatar david Chavalarias

R scripts added in /Rscripts

parent 5c8b59fa
---
title: "Coronavirus Clinical Trials R library"
output: html_notebook
---
### Description
This R project containt some script to format the database of clinical trials on covid-19 in Gargantext readable files (see http://Gargarntext.org).
The database should be in the tsv format (separator = tabulation ; no delimiters) and be formated in UTF8.
### Load of Data
First define what is the name of the file to be processed. This file should be in the folder /data
```{r}
library(lubridate)
source("coronalib.R") # R libraries
name<-"Database060520" # name of the csv to be loaded
AllData <-read.csv(paste("data/",name,".csv",sep=""),head=TRUE,sep="\t")
AllData$Inclusion.criteria <- NULL
AllData$Exclusion.criteria <- NULL
#AllData <- filter(AllDataTemp,AllDataTemp$First.author!="N/A")
nrow(AllData)
x <- unique(AllData$Trial.registration.number)
head(x)[[1]]
```
### Data segmentation
Several dataframe are generated according to which are the CTs under study.
```{r}
library(dplyr)
library(stringr)
Prevention<- filter(AllData,grepl("Prevention",AllData$Study.aim)) # CTs taggés prevention
Treatments <- filter(AllData,grepl("Treatment",AllData$Study.aim)) # # CTs taggés Treaments
Posttreatment<- filter(AllData,grepl("Post treatment",AllData$Study.aim)) # CTs taggé Post-Treatment
print(paste(count(Prevention)," Prevention CTs,", count(Treatments)," Treatments CTs and ",count(Posttreatment)," Post-treatment CTs."))
```
## Export of data and viz
Data are exported in several formats. The list of all treatments is also exported assuming that treatments are separated by a '+' signe in the column treatment of the original db.
```{r}
source("coronalib.R")
library(reshape)
library(wordcloud)
library(ggplot2)
# Html format
# export of a corpus with treatments and outcomes
garg_export_with_html(Treatments,"Treatment") # exporte Treatmeant et Outcomes des essais cliniques de type Treatment
garg_export_with_html(AllData,"AllData") # exporte Treatmeant et Outcomes des essais cliniques de tous types
garg_export_treatments_with_html(AllData,"AllData") # exporte Treatmeant des essais cliniques de tous types
# export of the list of all types of treatments whatever the phase in the format Gargantext map list Gargantext V3 & V4
gargV4_export_treaments_list(AllData,"AllDb")
gargV3_export_treaments_list(AllData,"AllDb")
# Conversion of the tsv file into Gargantext readable tsv dile
# Seleciton of the kind of CT to export : All / Prevention / Treatment / Post-treatment
# Selection of the kind of informations to include in the main text to be processed by Gargantext (bastract column): Treatmeant and/or Outcomes
# simple txt export
garg_export_all_plain(Treatments,"Treatment") # export main information in plain text
garg_export_OnlyTreatments(Treatments,"Treatment") # export only treatments in plain text
garg_export_OnlyOutcomes(Treatments,"Treatment") # export only outcomes in plain text
garg_export_all_plain(AllData,"All") # export main information in plain text
# raw export (just to have specific maps)
garg_export_raw_treatments(AllData,"AllData") ## export only info relative to treatments without any formating.
# Some simple viz - Tag cloud of the treaments per category of CT
TreatmentsCloud(Treatments)
TreatmentsCloud(Prevention)
TreatmentsCloud(Posttreatment)
```
This diff is collapsed.
# process_clinical_trials
Some R script to process convert clinical trials tsv file into Gargantext readable file + other small script.
The main file to run is Process_CT_for_Gargantext.Rmd
Make shure that you have placed your database in the /Rscripts/data folder in the tsv format (separator = tabulation ; no delimiters) formated in UTF8.
When Process_CT_for_Gargantext.Rmd is ran, a new csv file is writen at the right place in /data to update the clinical trials descriptions.
# Export Functions
## Gargantext V3
### abstract as html file as in template merging treatments and outcome
garg_export_with_html <- function(df,filename) {
x <- data.frame("publication_day"=day(as.Date(df$Registration.date,"%Y-%m-%d")))
x$publication_month = month(as.Date(df$Registration.date,"%Y-%m-%d"))
x$publication_year = year(as.Date(df$Registration.date,"%Y-%m-%d"))
x$authors=df$First.author
x$title=df$Trial.registration.number
x$source=df$Funding
x$abstract=paste("<a href='",df$Full.text.link,"' target='blank'>Link to study</a></br></br><b> ",df$Study.design," (</b>",df$Recruitment.status,")</br></br><b>TYPE OF PATIENTS:</b> ",df$Type.of.patients,"</br></br><b>",toupper(df$Pharmacological.treatment),"</b></br><i>Type: </i>",df$Treatment.type,"</br><i> Treatment name: </i>",str_replace_all(tolower(df$Treatment.name),"[[+]]"," + "),"</br></br><b>PRIMARY OUTCOME</b></br>",df$primary.outcome,"</br></br><b>",df$Center,"</b> (",df$Countries,")</br><b>N in this treatment group:</b> ",df$n.randomized.in.this.arm," </br><b>N in the trial: </b>",df$Total.sample.size)
write.table(x, file = paste("../data/gargCSV_all_html_",filename,".csv",sep = ""), sep = "\t",row.names = FALSE)
}
garg_export_treatments_with_html <- function(df,filename) {
x <- data.frame("publication_day"=day(as.Date(df$Registration.date,"%Y-%m-%d")))
x$publication_month = month(as.Date(df$Registration.date,"%Y-%m-%d"))
x$publication_year = year(as.Date(df$Registration.date,"%Y-%m-%d"))
x$authors=df$First.author
x$title=df$Trial.registration.number
x$source=df$Funding
x$abstract=paste("<a href='",df$Full.text.link,"' target='blank'>Link to study</a></br></br><b> ",toupper(df$Pharmacological.treatment),"</b></br><i>Type: </i>",df$Treatment.type,"</br><i> Treatment name: </i>",str_replace_all(tolower(df$Treatment.name),"[[+]]"," + "))
write.table(x, file = paste("output/gargCSV_Treatments_html_",filename,".csv",sep = ""), sep = "\t",row.names = FALSE)
}
garg_export_raw_treatments<- function(df,filename) {
x <- data.frame("publication_day"=day(as.Date(df$Registration.date,"%Y-%m-%d")))
x$publication_month = month(as.Date(df$Registration.date,"%Y-%m-%d"))
x$publication_year = year(as.Date(df$Registration.date,"%Y-%m-%d"))
x$authors=df$First.author
x$title=df$Trial.registration.number
x$source=df$Funding
x$abstract=paste(toupper(df$Pharmacological.treatment)," . ",df$Treatment.type," . ",str_replace_all(tolower(df$Treatment.name),"[[+]]"," ; "))
write.table(x, file = paste("output/gargCSV_Treatments_raw_",filename,".csv",sep = ""), sep = "\t",row.names = FALSE)
}
garg_export_all_plain <- function(df,filename) {
x <- data.frame("publication_day"=day(as.Date(df$Registration.date,"%Y-%m-%d")))
x$publication_month = month(as.Date(df$Registration.date,"%Y-%m-%d"))
x$publication_year = year(as.Date(df$Registration.date,"%Y-%m-%d"))
x$authors=df$First.author
x$title=df$Trial.registration.number
x$source=df$Funding
x$abstract=paste("Link to study: ",df$Full.text.link," DESIGN: ",df$Study.design,", RECRUITEMENT STATUS: ",df$Recruitment.status,", TYPE OF PATIENTS: ",df$Type.of.patients," TREATMENT: ",toupper(df$Pharmacological.treatment),", TYPE: ",df$Treatment.type,", TREATMENT NAME: ",str_replace_all(tolower(df$Treatment.name),"[[+]]"," + "),", PRIMARY OUTCOME: ",df$primary.outcome,", CENTER: ",df$Center,", COUNTRY: ",df$Countries,", N in this treatment group: ",df$n.randomized.in.this.arm,", N in the trial: ",df$Total.sample.size)
write.table(x, file = paste("output/gargCSV_all_plain_",filename,".csv",sep = ""), sep = "\t",row.names = FALSE)
}
garg_export_OnlyTreatments <- function(df,filename) {
x <- data.frame("publication_day"=day(as.Date(df$Registration.date,"%Y-%m-%d")))
x$publication_month = month(as.Date(df$Registration.date,"%Y-%m-%d"))
x$publication_year = year(as.Date(df$Registration.date,"%Y-%m-%d"))
x$authors=df$Full.text.link
x$title=df$Trial.registration.number
x$source=df$Funding
x$abstract=paste(" TREATMENT: ",toupper(df$Pharmacological.treatment),", TYPE: ",df$Treatment.type,", TREATMENT NAME: ",str_replace_all(tolower(df$Treatment.name),"[[+]]"," + "))
write.table(x, file = paste("output/gargCSV_only_treatment_plain",filename,".csv",sep = ""), sep = "\t",row.names = FALSE)
}
garg_export_OnlyOutcomes <- function(df,filename) {
x <- data.frame("publication_day"=day(as.Date(df$Registration.date,"%Y-%m-%d")))
x$publication_month = month(as.Date(df$Registration.date,"%Y-%m-%d"))
x$publication_year = year(as.Date(df$Registration.date,"%Y-%m-%d"))
x$authors=df$Full.text.link
x$title=df$Trial.registration.number
x$source=df$Funding
x$abstract=paste("PRIMARY OUTCOME: ",df$primary.outcome)
write.table(x, file = paste("output/gargCSV_only_outcomes_plain",filename,".csv",sep = ""), sep = "\t",row.names = FALSE)
}
# Export de listes de termes
getTreatmentsList <- function(df){
## fait juste la liste des treatments en partant du fait qu'ils sont séparé par des + dans la colonne treatments
y<-paste(df$Treatment.name,collapse = "+")
z<-data.frame("treatments"=strsplit(str_replace_all(tolower(y),"[[+]]","&"),"&")[[1]])
}
gargV3_export_treaments_list <- function(df,filename) {
## Export de la liste de tous les traitements pour import dans gargantextV3
y<-paste(AllData$Treatment.name,collapse = "+")
z<-data.frame("label"=unique(strsplit(str_replace_all(tolower(y),"[[+]]","&"),"&")[[1]]))
z$forms=z$label
write.table(z, file = paste("output/WL_GargV3_clinicaltrialsTreatments.csv",sep = ""), sep = "\t",row.names = FALSE,quote = FALSE)
}
gargV4_export_treaments_list <- function(df,filename) {
## Export de la liste de tous les traitements pour import dans gargantextV4 sur la base de traitements séparés par des + dans la bdd originale
t=getTreatmentsList(df)
output<-paste('"',t$treatments,'":{"size":1,"list":"GraphTerm","children":[]}')
output<-paste('{"NgramsTerms":{"version":1237,"data":{',paste(output,collapse =','),"}}}")
write.table(output, file = paste("output/WL_GargV4_CT_treaments_",filename,".json",sep = ""), sep = "",row.names = FALSE,quote = FALSE)
}
ColumnTextCount <- function(df,column){
y<-paste(df$Treatment.name,collapse = "+")
z<-data.frame("treatments"=strsplit(str_replace_all(tolower(y),"[[+]]","&"),"&")[[1]])
}
## Visualization functions
TreatmentsCloud <- function(df){
treatments_list<-getTreatmentsList(df)
words=treatments_list %>% count(treatments)
set.seed(1234)
wordcloud(words = words$treatments, freq = words$n, min.freq =2,
max.words=400, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
}
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
This diff is collapsed.
......@@ -4,7 +4,7 @@
"name": "term",
"reldbs": {
"csv": {
"file": "../clinicaltrials.csv",
"file": "../gargCSV_all_html_AllData.csv",
"qcols": ["title","abstract"],
"template": "bib_details"
}
......
......@@ -4,7 +4,7 @@
"name": "term",
"reldbs": {
"csv": {
"file": "../clinicaltrials.csv",
"file": "../gargCSV_all_html_AllData.csv",
"qcols": ["title","abstract"],
"template": "bib_details"
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment