Commit 485bc840 authored by Loïc Chapron's avatar Loïc Chapron

mergeTermListJson

parent e5f6aa75
# MergeTwoJsonLists
## About The project
MergeTwoJsonLists get two Json term list from Gargantext and merge them.
## Usage
```shell
python3 mergeTwoJsonLists.py list1.json list2.json
```
list1.json / list2.json -> GarganText Term list
Output a new list of term : list1-merged.json
## Date
This script have been last updated the 2023/07/21.
It can be outdated if the futur.
\ No newline at end of file
#######
# mergeListV4.py
# description : merge two list v4
# licence : AGPL + CECILL v3
# author : quentin lobbé - qlobbe@iscpif.fr
#######
# python3 mergeListV4.py list1.json list2.json
import sys
import json
try :
pathList1 = sys.argv[1]
pathList2 = sys.argv[2]
except :
print ("! args error\n Try : python3 mergeListV4.py list1.json list2.json\n")
sys.exit(0)
def readJson(path) :
file = open(path)
return json.load(file)
listJson1 = readJson(pathList1)
listJson2 = readJson(pathList2)
ngrams1 = listJson1['NgramsTerms']['data']
ngrams2 = listJson2['NgramsTerms']['data']
merged = {}
roots = []
leafs = []
# find the roots of list 1
for root in ngrams1.keys() :
if ngrams1[root]['list'] == "MapTerm" :
roots.append(root)
leafs = leafs + ngrams1[root]['children']
# merge list 2 in list 1
for root in ngrams2.keys() :
if root in roots :
ngrams1[root]['children'] = list(set(ngrams1[root]['children'] + ngrams2[root]['children']))
else :
if root not in leafs :
ngrams1[root] = ngrams2[root]
children = ngrams1[root]['children']
ngrams1[root]['children'] = []
for child in children :
if child not in root or child not in leafs :
ngrams1[root]['children'].append(child)
# clean the merged list
for root in ngrams1.keys() :
if ngrams1[root]['list'] == "MapTerm" :
if len(ngrams1[root]['children']) == 0 :
ngrams1[root]['children'] = []
merged[root] = ngrams1[root]
listJson1['NgramsTerms']['data'] = merged
listJson1['Authors']['data'] = {}
listJson1['Institutes']['data'] = {}
listJson1['Sources']['data'] = {}
name = ((pathList1.split('/')).pop()).split('.')
newName = name[0] + '-merged.' + name[1]
if len(pathList1.split('/')) == 1:
newPath = newName
else:
newPath = '/'.join((pathList1.split('/'))[:-1]) + '/' + newName
with open(newPath, 'w') as file :
json.dump(listJson1, file, sort_keys=False, indent=4)
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment