Source Code tutorial sentimen analisis Object Oriented Python
Wednesday, May 5th, 2021
Buat file dengan nama bebas, contoh sentimen.py
Biasanya langsung muncul di spyder klik menu File > new file kemdian save dengan nama file di atas.
Python modul atau satu python file dapat terdiri dari beberapa class. Copy code class-class di bawah ini ke python module atau file di Spyder.
from nltk.tokenize import word_tokenize, sent_tokenize import nltk import string class Dataset: def __init__(self): self.neg_text=[ ('sangat tidak efektif','negative'), ('semoga dievaluasi lagi kinerjanya','negative'), ('Agar Lebih ditingkatkan dengan menggunakan Aplikasi e-learning versi terbaru.','negative'), ('Dosennya baik dan asik, namun sayang fitur elearning nya kurang','negative'), ('Memudahkan dalam memberikan materi, namun kurang mengetahui keluhan mengenai mata kuliah dari setiap mahasiswa, karena tidak selalu ada tatap muka.','negative')] self.pos_text=[ ('sangat cukup','positive'), ('akses nya cukup mudah','positive'), ('sangat baik dan luar biasa','positive'), ('Dengan aktifitas elearning ini untuk sistem pembelajaran bisa lebih praktis','positive'), ('Simple dan menarik sehingga memudahkan untuk melakukan perkuliahan','positive')] self.document = ['sangat','tidak','efektif','semoga','dievaluasi','lagi','kinerjanya','Agar','Lebih','ditingkatkan','dengan','menggunakan','Aplikasi','e-learning','versi terbaru.','Dosennya','baik','dan','asik','namun','sayang','fitur','elearning','nya','kurang','Memudahkan','dalam','memberikan','materi','namun','kurang','mengetahui','keluhan','mengenai','mata','kuliah','dari','setiap','mahasiswa','karena','tidak','selalu','ada','tatap','muka','sangat','cukup','akses','cukup','mudah','sangat','baik','dan','luar','biasa','Dengan','aktifitas','elearning','ini','untuk','sistem','pembelajaran','bisa','lebih','praktis','Simple','dan','menarik','sehingga','memudahkan','untuk','melakukan','perkuliahan']
class TrainTest: def __init__(self): dtObj = Dataset() train_pos_text = int(len(dtObj.pos_text)*4/5) train_neg_text = int(len(dtObj.neg_text)*4/5) test_pos_text = int(len(dtObj.neg_text)*1/5) test_neg_text = int(len(dtObj.neg_text)*1/5) #untuk train train_total_lower_word_token = [] for (words, sentiment) in dtObj.pos_text[:train_pos_text] + dtObj.neg_text[:train_neg_text]: words_filtered = str.lower(words) train_total_lower_word_token.append((words_filtered, sentiment)) #untuk test test_total_lower_word_token = [] for (words, sentiment) in dtObj.pos_text[test_pos_text:] + dtObj.neg_text[test_neg_text:]: words_filtered = str.lower(words) test_total_lower_word_token.append((words_filtered, sentiment)) self.train_total_word_removeToken = [] punctuations = list(string.punctuation) for i in train_total_lower_word_token: lab = i[1] j = [i for i in word_tokenize(i[0]) if i not in punctuations], lab self.train_total_word_removeToken += [j] self.test_total_word_removeToken = [] punctuations = list(string.punctuation) for i in test_total_lower_word_token: lab = i[1] j = [i for i in word_tokenize(i[0]) if i not in punctuations], lab self.test_total_word_removeToken += [j]
class Feature: def __init__(self): self.objTrain = TrainTest() self.dtObj = Dataset() self.word() def word(self): self.doc = [] for (words, sentiment) in self.dtObj.pos_text + self.dtObj.neg_text: words_filtered = [e.lower() for e in words.split() if len(e) >= 3] self.doc.append((words_filtered, sentiment)) return self.doc def get_words_in_tweets(self,doc): all_words = [] for (words, sentiment) in doc: all_words.extend(words) return all_words def get_word_features(self,wordlist): wordlist = nltk.FreqDist(wordlist) word_features = wordlist.keys() return word_features def process(self): self.total_lower_word_token = [] for (words, sentiment) in self.dtObj.pos_text + self.dtObj.neg_text: words_filtered = str.lower(words) self.total_lower_word_token.append((words_filtered, sentiment)) self.total_word_removeToken = [] punctuations = list(string.punctuation) for i in self.total_lower_word_token: lab = i[1] j = [i for i in word_tokenize(i[0]) if i not in punctuations], lab self.total_word_removeToken += [j] self.featureTrain = self.objTrain.train_total_word_removeToken self.featureTest = self.objTrain.test_total_word_removeToken self.train_feature_elearning = self.get_word_features(self.get_words_in_tweets(self.featureTrain)) self.test_feature_elearning = self.get_word_features(self.get_words_in_tweets(self.featureTest))
class Classification: def __init__(self): self.objFeature = Feature() self.objFeature.process() self.dtObj = Dataset() #document = self.dtObj.document def extract_features(self,document): document_words = set(document) features = {} for word in self.objFeature.train_feature_elearning: features['contains(%s)' % word] = (word in document_words) return features def extract_features_train(self,document): document_words = set(document) features = {} for word in self.objFeature.train_feature_elearning: features['contains(%s)' % word] = (word in document_words) return features def extract_features_test(self,document): document_words = set(document) features = {} for word in self.objFeature.test_feature_elearning: features['contains(%s)' % word] = (word in document_words) return features def klasifikasi(self): self.training_set = nltk.classify.apply_features(self.extract_features_train, self.objFeature.total_word_removeToken) self.classifier = nltk.NaiveBayesClassifier.train(self.training_set) self.test_set = nltk.classify.apply_features(self.extract_features_test, self.objFeature.total_word_removeToken) def result(self): self.klasifikasi() print(f'accuracy: {nltk.classify.util.accuracy(self.classifier, self.test_set)}') print(f'{self.classifier.show_most_informative_features()}')
Gunakan python console pada spyder, ketikan code berikut untuk inisiasi object dan menjalankan code di atas.
obj = Classification() obj.result()
Code di atas akan memunculkan akurasi dan most important features.
Ketikan code di bawah ini untuk testing ke kalimat baru menggunakan classifier yang sudah dibuat sebelumnya.
sentence = 'hal ini kurang baik' print(f'{obj.classifier.classify(extract_features(sentence.split()))}')
Boleh diganti dengan kalimat lain.
sentence = 'saya suka sekali' print(f'{obj.classifier.classify(extract_features(sentence.split()))}')
Leave a Reply