# = = = = [ 01. Getting ALL documents of the Corpus c ] = = = = #
Docs={}
document_type_id=cache.NodeType['Document'].id
sql_query='select id from node_node where parent_id='+str(corpus.id)+' and type_id='+str(document_type_id)
cursor=connection.cursor()
cursor.execute(sql_query)
results=cursor.fetchall()
foriinresults:
Docs[i[0]]=True
print("docs:",len(Docs.keys()))
# = = = = [ / 01. Getting ALL documents of the Corpus c ] = = = = #
# = = = = [ 02. Getting ALL Documents related with Ngrams of the carte semantic ] = = = = #
sql_query='select nodey_id,ngram_id from node_nodenodengram where ngram_id IN ('+','.join(map(str,NodesB.keys()))+")"
cursor=connection.cursor()
cursor.execute(sql_query)
results=cursor.fetchall()
# = = = = [ / 02. Getting ALL Documents related with Ngrams of the carte semantic ] = = = = #
# = = = = [ 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
Docs_and_={
"nodesA":{},
"nodesB":{}
}
NodesB_and_Docs={}
foriinresults:
doc_id=i[0]
ngram_id=i[1]
ifngram_idinNodesBanddoc_idinDocs:
ifdoc_idnotinDocs_and_["nodesB"]:
Docs_and_["nodesB"][doc_id]=[]
Docs_and_["nodesB"][doc_id].append(ngram_id)
ifngram_idnotinNodesB_and_Docs:
NodesB_and_Docs[ngram_id]=[]
NodesB_and_Docs[ngram_id].append(doc_id)
# = = = = [ / 03. Now we limit the retrieved Documents(step 02) to those belonging to the Corpus c ] = = = = ]
# # = = = = [ Getting Authors ] = = = = ]
# Authors = {}
# sql_query = 'select node_id,value_string from node_node_hyperdata where node_id IN (' + ','.join(map(str, Docs_and_["nodesB"].keys())) + ")"+' and hyperdata_id=10'# 10 -> authors
# cursor = connection.cursor()
# cursor.execute(sql_query)
# results = cursor.fetchall()
# for i in results:
# doc_id = i[0]
# authors = i[1].split(",")
# for a in authors:
# if a not in Authors:
# Authors[a] = 0
# Authors[a] += 1
# print("")
# print("#authors:")
# import pprint
# pprint.pprint(Authors)
# print("")
# # = = = = [ / Getting Authors ] = = = = ]
# = = = = [ 04. Getting A-elems and making the dictionaries] = = = = ]
sql_query='select node_id,value_string from node_node_hyperdata where node_id IN ('+','.join(map(str,Docs_and_["nodesB"].keys()))+")"+' and hyperdata_id='+str(nodeA_type_id)
cursor=connection.cursor()
cursor.execute(sql_query)
results=cursor.fetchall()
A_Freq={}
A_int2str={}
A_str2int={}
counter=max_nodeid+1
foriinresults:
doc_id=i[0]
a=i[1]
ifanotinA_str2int:
A_str2int[a]=counter
A_int2str[counter]=a
counter+=1
foriinresults:
doc_id=i[0]
a=A_str2int[i[1]]
Docs_and_["nodesA"][doc_id]=a
ifanotinA_Freq:
A_Freq[a]=0
A_Freq[a]+=1
# = = = = [ / 04. Getting A-elems and making the dictionaries ] = = = = ]
<lidata-url="/project/{{project.id}}/corpus/{{ corpus.id }}/explorer?field1=journal&field2=ngrams"onclick='gotoexplorer(this)'><a>Journals and Terms</a></li>
<li>Authors and Terms</li>
</ol>
{% endif %}
<h4><ahref="/project/{{project.id}}/corpus/{{corpus.id}}/">Back to corpus</a></h3>