Search⌘ K
AI Features

Decision Trees and Overfitting

Explore how decision trees function in sentiment classification using simple word rules. Learn to identify and control overfitting by limiting tree depth, balancing bias and variance to improve model generalization and performance on new data.

Decision trees are one of the most beginner-friendly models in machine learning—they mirror how we make choices in everyday life: step-by-step, rule-by-rule. That’s part of why they are commonly ...

Python
class SimpleSentimentTree:
def __init__(self):
# Simple sentiment lexicons
self.positive_words = {'good', 'great', 'awesome', 'love', 'happy'}
self.negative_words = {'bad', 'terrible', 'hate', 'sad', 'worst'}
# Negation words that can flip sentiment
self.negation_words = {'not', 'never', "don't", 'no', "isn't", "aren't", "wasn't"}
# Threshold for neutral sentiment when counts are equal
self.neutral_threshold = 0.2 # Difference must be > 20% of text length to be non-neutral
def handle_negations(self, words):
"""
Process text to account for negation words that flip sentiment
Args:
words (list): List of words in the text
Returns:
tuple: Modified positive and negative counts considering negations
"""
pos_count = 0
neg_count = 0
for i, word in enumerate(words):
# Look for negation words in a window before sentiment words
negation_window = words[max(0, i-3):i]
is_negated = any(neg_word in negation_window for neg_word in self.negation_words)
if word in self.positive_words:
if is_negated:
neg_count += 1 # Negated positive becomes negative
else:
pos_count += 1
elif word in self.negative_words:
if is_negated:
pos_count += 1 # Negated negative becomes positive
else:
neg_count += 1
return pos_count, neg_count
def count_sentiment_words(self, text):
"""
Count positive and negative words in the text, accounting for negations
Args:
text (str): Input text to analyze
Returns:
dict: Counts of sentiment words and text length
"""
words = text.lower().split()
pos_count, neg_count = self.handle_negations(words)
return {
'positive_count': pos_count,
'negative_count': neg_count,
'text_length': len(words)
}
def predict_sentiment(self, text):
"""
Predict sentiment using enhanced decision rules
Args:
text (str): Input text to classify
Returns:
str: 'positive', 'negative', or 'neutral'
"""
features = self.count_sentiment_words(text)
text_length = features['text_length']
pos_count = features['positive_count']
neg_count = features['negative_count']
# Calculate the difference in sentiment counts
sentiment_diff = abs(pos_count - neg_count)
# If the difference is small relative to text length, return neutral
if sentiment_diff <= self.neutral_threshold * text_length:
return 'neutral'
# Otherwise return the dominant sentiment
return 'positive' if pos_count > neg_count else 'negative'
# Create classifier
classifier = SimpleSentimentTree()
# Test cases demonstrating new features
test_texts = [
"I love this product, it's amazing!",
"This is a terrible experience.",
"It was okay, nothing special.",
"This is not bad at all.",
"I don't love this product.",
"The movie wasn't terrible.",
"Happy and sad moments."
]
# Predict and print results
print("Sentiment Analysis Results:")
for text in test_texts:
sentiment = classifier.predict_sentiment(text)
print(f"Text: '{text}'\nSentiment: {sentiment}\n")
...