Spaces:

nkasmanoff
/

sky-scribe

Runtime error

App Files Files Community

sky-scribe / knowledge_extraction.py

nkasmanoff

Create knowledge_extraction.py

f8aa4be over 2 years ago

raw

history blame contribute delete

2.29 kB

	import spacy
	from spacy.matcher import Matcher


	def get_entities(sent):
	## chunk 1
	ent1 = ""
	ent2 = ""

	prv_tok_dep = "" # dependency tag of previous token in the sentence
	prv_tok_text = "" # previous token in the sentence

	prefix = ""
	modifier = ""

	#############################################################

	for tok in nlp(sent):
	## chunk 2
	# if token is a punctuation mark then move on to the next token
	if tok.dep_ != "punct":
	# check: token is a compound word or not
	if tok.dep_ == "compound":
	prefix = tok.text
	# if the previous word was also a 'compound' then add the current word to it
	if prv_tok_dep == "compound":
	prefix = prv_tok_text + " " + tok.text

	# check: token is a modifier or not
	if tok.dep_.endswith("mod") == True:
	modifier = tok.text
	# if the previous word was also a 'compound' then add the current word to it
	if prv_tok_dep == "compound":
	modifier = prv_tok_text + " " + tok.text

	## chunk 3
	if tok.dep_.find("subj") == True:
	ent1 = modifier + " " + prefix + " " + tok.text
	prefix = ""
	modifier = ""
	prv_tok_dep = ""
	prv_tok_text = ""

	## chunk 4
	if tok.dep_.find("obj") == True:
	ent2 = modifier + " " + prefix + " " + tok.text

	## chunk 5
	# update variables
	prv_tok_dep = tok.dep_
	prv_tok_text = tok.text
	#############################################################

	return [ent1.strip(), ent2.strip()]




	def get_relation(sent):
	nlp = spacy.load('en_core_web_sm')

	doc = nlp(sent)

	# Matcher class object
	matcher = Matcher(nlp.vocab)

	#define the pattern
	pattern = [{'DEP':'ROOT'},
	{'DEP':'prep','OP':"?"},
	{'DEP':'agent','OP':"?"},
	{'POS':'ADJ','OP':"?"}]

	matcher.add('matching_pattern', patterns=[pattern])
	matches = matcher(doc)
	k = len(matches) - 1

	span = doc[matches[k][1]:matches[k][2]]

	return(span.text)