import spacy
from spacy.lang.en.stop_words import STOP_WORDS
nlp = spacy.load("en_core_web_md")
# Example news articles
articles = ["Microsoft announced a new version of Windows that includes a redesigned interface and several new features.", "Apple released its latest iPhone model, which features a new camera and faster performance.", "Amazon unveiled a new smart speaker that includes a built-in display and improved voice recognition technology."]
# Process text by removing stop words and punctuation marks
processed = []
for article in articles:
doc = nlp(article.lower())
tokens = [token.text for token in doc if not token.is_stop and not token.is_punct]
processed.append(" ".join(tokens))
# Extract key noun phrases
noun_phrases = []
for article in processed:
doc = nlp(article)
noun_chunks = list(doc.noun_chunks)
noun_phrases.append([chunk.text for chunk in noun_chunks])
# Identify named entities and compare across articles
named_entities = []
for article in articles:
doc = nlp(article)
entities = [(entity.text, entity.label_) for entity in doc.ents]
named_entities.append(entities)
for i, article in enumerate(articles):
print("Article", i+1)
print("Noun Phrases:", noun_phrases[i])
print("Named Entities:", named_entities[i])
print()