Spaces:

nkasmanoff
/

sky-scribe

Runtime error

App Files Files Community

nkasmanoff commited on Mar 17, 2023

Commit

f8aa4be

1 Parent(s): b4e3258

Create knowledge_extraction.py

Browse files

Files changed (1) hide show

knowledge_extraction.py +80 -0

knowledge_extraction.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import spacy
+from spacy.matcher import Matcher
+def get_entities(sent):
+    ## chunk 1
+    ent1 = ""
+    ent2 = ""
+    prv_tok_dep = ""  # dependency tag of previous token in the sentence
+    prv_tok_text = ""  # previous token in the sentence
+    prefix = ""
+    modifier = ""
+    #############################################################
+    for tok in nlp(sent):
+        ## chunk 2
+        # if token is a punctuation mark then move on to the next token
+        if tok.dep_ != "punct":
+            # check: token is a compound word or not
+            if tok.dep_ == "compound":
+                prefix = tok.text
+                # if the previous word was also a 'compound' then add the current word to it
+                if prv_tok_dep == "compound":
+                    prefix = prv_tok_text + " " + tok.text
+            # check: token is a modifier or not
+            if tok.dep_.endswith("mod") == True:
+                modifier = tok.text
+                # if the previous word was also a 'compound' then add the current word to it
+                if prv_tok_dep == "compound":
+                    modifier = prv_tok_text + " " + tok.text
+            ## chunk 3
+            if tok.dep_.find("subj") == True:
+                ent1 = modifier + " " + prefix + " " + tok.text
+                prefix = ""
+                modifier = ""
+                prv_tok_dep = ""
+                prv_tok_text = ""
+                ## chunk 4
+            if tok.dep_.find("obj") == True:
+                ent2 = modifier + " " + prefix + " " + tok.text
+            ## chunk 5
+            # update variables
+            prv_tok_dep = tok.dep_
+            prv_tok_text = tok.text
+    #############################################################
+    return [ent1.strip(), ent2.strip()]
+def get_relation(sent):
+    nlp = spacy.load('en_core_web_sm')
+    doc = nlp(sent)
+    # Matcher class object
+    matcher = Matcher(nlp.vocab)
+    #define the pattern
+    pattern = [{'DEP':'ROOT'},
+            {'DEP':'prep','OP':"?"},
+            {'DEP':'agent','OP':"?"},
+            {'POS':'ADJ','OP':"?"}]
+    matcher.add('matching_pattern', patterns=[pattern])
+    matches = matcher(doc)
+    k = len(matches) - 1
+    span = doc[matches[k][1]:matches[k][2]]
+    return(span.text)