Search
 
SCRIPT & CODE EXAMPLE
 
CODE EXAMPLE FOR PYTHON

NLP text summarization preprocess and tokenization

import pandas as pd
import numpy as np
data = "my name is shubham kumar shukla. It is my pleasure to got opportunity to write article for xyz related to nlp"
from nltk.tokenize
import word_tokenize, sent_tokenize
from nltk.corpus
import stopwords
def solve(text):
  stopwords1 = set(stopwords.words("english"))
words = word_tokenize(text)
freqTable = {}
for word in words:
  word = word.lower()
if word in stopwords1:
  continue
if word in freqTable:
  freqTable[word] += 1
else :
  freqTable[word] = 1

sentences = sent_tokenize(text)
sentenceValue = {}
for sentence in sentences:
  for word, freq in freqTable.items():
  if word in sentence.lower():
  if sentence in sentenceValue:
  sentenceValue[sentence] += freq
else :
  sentenceValue[sentence] = freq
sumValues = 0
for sentence in sentenceValue:
  sumValues += sentenceValue[sentence]
average = int(sumValues / len(sentenceValue))

summary = ''
for sentence in sentences:
  if (sentence in sentenceValue) and(sentenceValue[sentence] > (1.2 * average)):
    summary += "" + sentence
return summary
Source by www.topcoder.com #
 
PREVIOUS NEXT
Tagged: #NLP #text #summarization #preprocess #tokenization
ADD COMMENT
Topic
Name
2+6 =