Source Code tutorial sentimen analisis Object Oriented Python

Wednesday, May 5th, 2021

Buat file dengan nama bebas, contoh sentimen.py
Biasanya langsung muncul di spyder klik menu File > new file kemdian save dengan nama file di atas.

Python modul atau satu python file dapat terdiri dari beberapa class. Copy code class-class di bawah ini ke python module atau file di Spyder.

from nltk.tokenize import word_tokenize, sent_tokenize
import nltk
import string


class Dataset:
    def __init__(self):
        self.neg_text=[
            ('sangat tidak efektif','negative'),
            ('semoga dievaluasi lagi kinerjanya','negative'),
            ('Agar Lebih ditingkatkan dengan menggunakan Aplikasi e-learning versi terbaru.','negative'),
            ('Dosennya baik dan asik, namun sayang fitur elearning nya kurang','negative'),
            ('Memudahkan dalam memberikan materi, namun kurang mengetahui keluhan mengenai mata kuliah dari setiap mahasiswa, karena tidak selalu ada tatap muka.','negative')]
        self.pos_text=[
            ('sangat cukup','positive'),
            ('akses nya cukup mudah','positive'),
            ('sangat baik dan luar biasa','positive'),
            ('Dengan aktifitas elearning ini untuk sistem pembelajaran bisa lebih praktis','positive'),
            ('Simple dan menarik sehingga memudahkan untuk melakukan perkuliahan','positive')]
        
        self.document = ['sangat','tidak','efektif','semoga','dievaluasi','lagi','kinerjanya','Agar','Lebih','ditingkatkan','dengan','menggunakan','Aplikasi','e-learning','versi terbaru.','Dosennya','baik','dan','asik','namun','sayang','fitur','elearning','nya','kurang','Memudahkan','dalam','memberikan','materi','namun','kurang','mengetahui','keluhan','mengenai','mata','kuliah','dari','setiap','mahasiswa','karena','tidak','selalu','ada','tatap','muka','sangat','cukup','akses','cukup','mudah','sangat','baik','dan','luar','biasa','Dengan','aktifitas','elearning','ini','untuk','sistem','pembelajaran','bisa','lebih','praktis','Simple','dan','menarik','sehingga','memudahkan','untuk','melakukan','perkuliahan']
class TrainTest:
    def __init__(self):
        dtObj = Dataset()
        train_pos_text = int(len(dtObj.pos_text)*4/5)
        train_neg_text = int(len(dtObj.neg_text)*4/5)
        test_pos_text = int(len(dtObj.neg_text)*1/5)
        test_neg_text = int(len(dtObj.neg_text)*1/5)
        
        
        #untuk train
        train_total_lower_word_token = []
        for (words, sentiment) in dtObj.pos_text[:train_pos_text] + dtObj.neg_text[:train_neg_text]:
            words_filtered = str.lower(words)
            train_total_lower_word_token.append((words_filtered, sentiment))

#untuk test	
        test_total_lower_word_token = []
        for (words, sentiment) in dtObj.pos_text[test_pos_text:] + dtObj.neg_text[test_neg_text:]:
            words_filtered = str.lower(words)
            test_total_lower_word_token.append((words_filtered, sentiment))
        
        self.train_total_word_removeToken = []
        punctuations = list(string.punctuation)
        for i in train_total_lower_word_token:
            lab = i[1]
            j = [i for i in word_tokenize(i[0]) if i not in punctuations], lab
            self.train_total_word_removeToken += [j]
	
	
        self.test_total_word_removeToken = []
        punctuations = list(string.punctuation)
        for i in test_total_lower_word_token:
            lab = i[1]
            j = [i for i in word_tokenize(i[0]) if i not in punctuations], lab
            self.test_total_word_removeToken += [j]
class Feature:
    
    def __init__(self):
        self.objTrain = TrainTest()
        self.dtObj = Dataset()
        self.word()
        
    
    def word(self):
        self.doc = []
        for (words, sentiment) in self.dtObj.pos_text +  self.dtObj.neg_text:
            words_filtered = [e.lower() for e in words.split() if len(e) >= 3]
            self.doc.append((words_filtered, sentiment))
        return self.doc
        
    def get_words_in_tweets(self,doc):
        all_words = []
        for (words, sentiment) in doc:
            all_words.extend(words)
        return all_words
	
    def get_word_features(self,wordlist):
        wordlist = nltk.FreqDist(wordlist)
        word_features = wordlist.keys()
        return word_features	
    
    def process(self):
        
        self.total_lower_word_token = []
        for (words, sentiment) in self.dtObj.pos_text + self.dtObj.neg_text:
            words_filtered = str.lower(words)
            self.total_lower_word_token.append((words_filtered, sentiment))

        self.total_word_removeToken = []
        punctuations = list(string.punctuation)
        for i in self.total_lower_word_token:
            lab = i[1]
            j = [i for i in word_tokenize(i[0]) if i not in punctuations], lab
            self.total_word_removeToken += [j]
        
        self.featureTrain = self.objTrain.train_total_word_removeToken
        self.featureTest = self.objTrain.test_total_word_removeToken
        
        self.train_feature_elearning =   self.get_word_features(self.get_words_in_tweets(self.featureTrain))
        self.test_feature_elearning = self.get_word_features(self.get_words_in_tweets(self.featureTest))
   
class Classification:
    def __init__(self):
        self.objFeature = Feature()
        self.objFeature.process()
        self.dtObj = Dataset()
        #document = self.dtObj.document
        
    def extract_features(self,document):
        document_words = set(document)
        features = {}
        for word in self.objFeature.train_feature_elearning:
            features['contains(%s)' % word] = (word in document_words)
        return features
        
    def extract_features_train(self,document):
        document_words = set(document)
        features = {}
        for word in self.objFeature.train_feature_elearning:
            features['contains(%s)' % word] = (word in document_words)
        return features
	
    def extract_features_test(self,document):
        document_words = set(document)
        features = {}
        for word in self.objFeature.test_feature_elearning:
            features['contains(%s)' % word] = (word in document_words)
        return features
    
    def klasifikasi(self):
        self.training_set =  nltk.classify.apply_features(self.extract_features_train, self.objFeature.total_word_removeToken)

        self.classifier = nltk.NaiveBayesClassifier.train(self.training_set)

        self.test_set = nltk.classify.apply_features(self.extract_features_test, self.objFeature.total_word_removeToken)
    
    def result(self):
        self.klasifikasi()
        print(f'accuracy: {nltk.classify.util.accuracy(self.classifier, self.test_set)}')

        print(f'{self.classifier.show_most_informative_features()}')

Gunakan python console pada spyder, ketikan code berikut untuk inisiasi object dan menjalankan code di atas.

obj = Classification()
obj.result()

Code di atas akan memunculkan akurasi dan most important features.
Ketikan code di bawah ini untuk testing ke kalimat baru menggunakan classifier yang sudah dibuat sebelumnya.

sentence = 'hal ini kurang baik'
print(f'{obj.classifier.classify(extract_features(sentence.split()))}')

Boleh diganti dengan kalimat lain.

sentence = 'saya suka sekali'
print(f'{obj.classifier.classify(extract_features(sentence.split()))}')

Leave a Reply

Your email address will not be published. Required fields are marked *

458post_id=458