How can I detect multiple items in a list that are separated with a somewhat equivalent list in Python?
I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:
posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']
However, the text I need to analyse on positive words aren't holding any spaces within items:
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
I want to iterate over wordList and when the algorithm sees words that are also in posWords, but merged (e.g. 'happy day'), remove the corresponding words in wordList ('happy', 'day') and add the merged version in wordList.
So in the end, the wordList must look like this:
wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
BIG UPDATE:
Because I promised you guys to keep you updated, this is my code so far. It was kinda tricky because in my lists with positive words and negative words there where phrases that contained max three words. So I needed to figure out how to work with that. I realised (also because of the answers you guys gave me, thanks again!) that I had to make lists from all the words of the text that needed to be analysed with either 3, 2, or 1 words in one string item so I can check if the items also appear in my lists of positive words end negative words. Here is my code so far. It's kinda bulky, with a lot of copy pasting... Im planning to fix that but im quite tired and weekend is starting now, so no hate please! (tips are welcome tho)
from textblob import TextBlob
# open the files with negative words
negatives = open("neg_dutch_2.txt")
neg_list =
# push all the words from text file to list
for lines in negatives:
lines = lines.lower()
neg_list.append(lines.strip("n"))
neg_no_space =
neg_one_space =
neg_two_spaces =
neg_three_spaces =
count = 0
def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count):
# read every word in the list with negative words
for i in range(len(neg_list)):
# every word is a phrase, because there are "words" with spaces
phrase = neg_list[i]
# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
neg_one_space.append(phrase)
elif count == 2:
neg_two_spaces.append(phrase)
elif count == 3:
neg_three_spaces.append(phrase)
else:
neg_no_space.append(phrase)
# reset the counter to avoid the total sum of spaces in a list
count = 0
return neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count
neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,
neg_two_spaces, neg_three_spaces, count)
# open the files with positive words
positives = open("pos_dutch_2.txt")
pos_list =
# push all the words from text file to list
for lines in positives:
lines = lines.lower()
pos_list.append(lines.strip("n"))
pos_no_space =
pos_one_space =
pos_two_spaces =
pos_three_spaces =
count = 0
def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count):
# read every word in the list with positive words
for i in range(len(pos_list)):
# every word is a phrase, because there are "words" with spaces
phrase = pos_list[i]
# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
pos_one_space.append(phrase)
elif count == 2:
pos_two_spaces.append(phrase)
elif count == 3:
pos_three_spaces.append(phrase)
else:
pos_no_space.append(phrase)
# reset the counter to avoid the total sum of spaces in a list
count = 0
return pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count
pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,
pos_two_spaces, pos_three_spaces, count)
text = open("nrc_sample.TXT")
# reading the article, using TextBlob library to seperate each word
text = text.read()
blob = TextBlob(text)
# these are words that are bound to the meta-deta of the articlesfile
ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",
"PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",
"Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]
# make a list for all the words in the articles
word_list =
# and store every word in that list
for word in blob.words:
if not any(x in word for x in ruis):
word = word.lower()
if word.isalpha():
word_list.append(word)
# variables for the frequencies of negative and positive words in articles
amount_pos = 0
amount_neg = 0
count = 0
phrases_four =
phrases_three =
phrases_two =
phrases_one =
amount_neg = 0
# PHRASE 4
for i in range(0, len(word_list)-4, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]
phrase = phrase.join(strings)
phrases_four.append(phrase)
count = 0
for phrase in phrases_four:
print("phrase4", count, phrase)
count += 1
for neg in neg_three_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
# PHRASE 3
for i in range(0, len(word_list)-3, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2]
phrase = phrase.join(strings)
phrases_three.append(phrase)
count = 0
for phrase in phrases_three:
print("phrase3", count, phrase)
count += 1
for neg in neg_two_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
# PHRASE 2
# start at index zero, till one before end of the list
for i in range(0, len(word_list)-2, 1):
# until it hits the last word of the list, make for every two words in the
# article next to each other a phrase of two words, so we can check if
# there are phrases in the article who also exists in the pos or neg wordslists
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1]
phrase = phrase.join(strings)
phrases_two.append(phrase)
count = 0
# examine each phrase, and check if the same phrase exists in the list
# with negative phrases containing two words
# dont forget to delete the counter, is only for readability
for phrase in phrases_two:
count += 1
for neg in neg_one_space:
if phrase == neg:
amount_neg += 1
print(amount_neg)
# JUST A WORD
for i in range(0, len(word_list)-1, 1):
if word_list[-1]:
phrase = word_list[i]
phrases_one.append(phrase)
count = 0
for phrase in phrases_one:
print("phrase1", count, phrase)
count += 1
for neg in neg_no_space:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
python string python-3.x list
add a comment |
I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:
posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']
However, the text I need to analyse on positive words aren't holding any spaces within items:
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
I want to iterate over wordList and when the algorithm sees words that are also in posWords, but merged (e.g. 'happy day'), remove the corresponding words in wordList ('happy', 'day') and add the merged version in wordList.
So in the end, the wordList must look like this:
wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
BIG UPDATE:
Because I promised you guys to keep you updated, this is my code so far. It was kinda tricky because in my lists with positive words and negative words there where phrases that contained max three words. So I needed to figure out how to work with that. I realised (also because of the answers you guys gave me, thanks again!) that I had to make lists from all the words of the text that needed to be analysed with either 3, 2, or 1 words in one string item so I can check if the items also appear in my lists of positive words end negative words. Here is my code so far. It's kinda bulky, with a lot of copy pasting... Im planning to fix that but im quite tired and weekend is starting now, so no hate please! (tips are welcome tho)
from textblob import TextBlob
# open the files with negative words
negatives = open("neg_dutch_2.txt")
neg_list =
# push all the words from text file to list
for lines in negatives:
lines = lines.lower()
neg_list.append(lines.strip("n"))
neg_no_space =
neg_one_space =
neg_two_spaces =
neg_three_spaces =
count = 0
def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count):
# read every word in the list with negative words
for i in range(len(neg_list)):
# every word is a phrase, because there are "words" with spaces
phrase = neg_list[i]
# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
neg_one_space.append(phrase)
elif count == 2:
neg_two_spaces.append(phrase)
elif count == 3:
neg_three_spaces.append(phrase)
else:
neg_no_space.append(phrase)
# reset the counter to avoid the total sum of spaces in a list
count = 0
return neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count
neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,
neg_two_spaces, neg_three_spaces, count)
# open the files with positive words
positives = open("pos_dutch_2.txt")
pos_list =
# push all the words from text file to list
for lines in positives:
lines = lines.lower()
pos_list.append(lines.strip("n"))
pos_no_space =
pos_one_space =
pos_two_spaces =
pos_three_spaces =
count = 0
def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count):
# read every word in the list with positive words
for i in range(len(pos_list)):
# every word is a phrase, because there are "words" with spaces
phrase = pos_list[i]
# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
pos_one_space.append(phrase)
elif count == 2:
pos_two_spaces.append(phrase)
elif count == 3:
pos_three_spaces.append(phrase)
else:
pos_no_space.append(phrase)
# reset the counter to avoid the total sum of spaces in a list
count = 0
return pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count
pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,
pos_two_spaces, pos_three_spaces, count)
text = open("nrc_sample.TXT")
# reading the article, using TextBlob library to seperate each word
text = text.read()
blob = TextBlob(text)
# these are words that are bound to the meta-deta of the articlesfile
ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",
"PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",
"Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]
# make a list for all the words in the articles
word_list =
# and store every word in that list
for word in blob.words:
if not any(x in word for x in ruis):
word = word.lower()
if word.isalpha():
word_list.append(word)
# variables for the frequencies of negative and positive words in articles
amount_pos = 0
amount_neg = 0
count = 0
phrases_four =
phrases_three =
phrases_two =
phrases_one =
amount_neg = 0
# PHRASE 4
for i in range(0, len(word_list)-4, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]
phrase = phrase.join(strings)
phrases_four.append(phrase)
count = 0
for phrase in phrases_four:
print("phrase4", count, phrase)
count += 1
for neg in neg_three_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
# PHRASE 3
for i in range(0, len(word_list)-3, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2]
phrase = phrase.join(strings)
phrases_three.append(phrase)
count = 0
for phrase in phrases_three:
print("phrase3", count, phrase)
count += 1
for neg in neg_two_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
# PHRASE 2
# start at index zero, till one before end of the list
for i in range(0, len(word_list)-2, 1):
# until it hits the last word of the list, make for every two words in the
# article next to each other a phrase of two words, so we can check if
# there are phrases in the article who also exists in the pos or neg wordslists
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1]
phrase = phrase.join(strings)
phrases_two.append(phrase)
count = 0
# examine each phrase, and check if the same phrase exists in the list
# with negative phrases containing two words
# dont forget to delete the counter, is only for readability
for phrase in phrases_two:
count += 1
for neg in neg_one_space:
if phrase == neg:
amount_neg += 1
print(amount_neg)
# JUST A WORD
for i in range(0, len(word_list)-1, 1):
if word_list[-1]:
phrase = word_list[i]
phrases_one.append(phrase)
count = 0
for phrase in phrases_one:
print("phrase1", count, phrase)
count += 1
for neg in neg_no_space:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
python string python-3.x list
1
show us what you tried.
– Sociopath
Nov 21 '18 at 10:21
2
Will'happy'and'day'(for example) always be consecutive elements inwordList, or could they appear anywhere?
– thesilkworm
Nov 21 '18 at 10:23
1
might the phrases be more than 2 words?
– Chris_Rands
Nov 21 '18 at 10:24
Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, forhappy dayis one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.
– mick_zon_24
Nov 21 '18 at 10:31
add a comment |
I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:
posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']
However, the text I need to analyse on positive words aren't holding any spaces within items:
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
I want to iterate over wordList and when the algorithm sees words that are also in posWords, but merged (e.g. 'happy day'), remove the corresponding words in wordList ('happy', 'day') and add the merged version in wordList.
So in the end, the wordList must look like this:
wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
BIG UPDATE:
Because I promised you guys to keep you updated, this is my code so far. It was kinda tricky because in my lists with positive words and negative words there where phrases that contained max three words. So I needed to figure out how to work with that. I realised (also because of the answers you guys gave me, thanks again!) that I had to make lists from all the words of the text that needed to be analysed with either 3, 2, or 1 words in one string item so I can check if the items also appear in my lists of positive words end negative words. Here is my code so far. It's kinda bulky, with a lot of copy pasting... Im planning to fix that but im quite tired and weekend is starting now, so no hate please! (tips are welcome tho)
from textblob import TextBlob
# open the files with negative words
negatives = open("neg_dutch_2.txt")
neg_list =
# push all the words from text file to list
for lines in negatives:
lines = lines.lower()
neg_list.append(lines.strip("n"))
neg_no_space =
neg_one_space =
neg_two_spaces =
neg_three_spaces =
count = 0
def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count):
# read every word in the list with negative words
for i in range(len(neg_list)):
# every word is a phrase, because there are "words" with spaces
phrase = neg_list[i]
# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
neg_one_space.append(phrase)
elif count == 2:
neg_two_spaces.append(phrase)
elif count == 3:
neg_three_spaces.append(phrase)
else:
neg_no_space.append(phrase)
# reset the counter to avoid the total sum of spaces in a list
count = 0
return neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count
neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,
neg_two_spaces, neg_three_spaces, count)
# open the files with positive words
positives = open("pos_dutch_2.txt")
pos_list =
# push all the words from text file to list
for lines in positives:
lines = lines.lower()
pos_list.append(lines.strip("n"))
pos_no_space =
pos_one_space =
pos_two_spaces =
pos_three_spaces =
count = 0
def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count):
# read every word in the list with positive words
for i in range(len(pos_list)):
# every word is a phrase, because there are "words" with spaces
phrase = pos_list[i]
# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
pos_one_space.append(phrase)
elif count == 2:
pos_two_spaces.append(phrase)
elif count == 3:
pos_three_spaces.append(phrase)
else:
pos_no_space.append(phrase)
# reset the counter to avoid the total sum of spaces in a list
count = 0
return pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count
pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,
pos_two_spaces, pos_three_spaces, count)
text = open("nrc_sample.TXT")
# reading the article, using TextBlob library to seperate each word
text = text.read()
blob = TextBlob(text)
# these are words that are bound to the meta-deta of the articlesfile
ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",
"PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",
"Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]
# make a list for all the words in the articles
word_list =
# and store every word in that list
for word in blob.words:
if not any(x in word for x in ruis):
word = word.lower()
if word.isalpha():
word_list.append(word)
# variables for the frequencies of negative and positive words in articles
amount_pos = 0
amount_neg = 0
count = 0
phrases_four =
phrases_three =
phrases_two =
phrases_one =
amount_neg = 0
# PHRASE 4
for i in range(0, len(word_list)-4, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]
phrase = phrase.join(strings)
phrases_four.append(phrase)
count = 0
for phrase in phrases_four:
print("phrase4", count, phrase)
count += 1
for neg in neg_three_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
# PHRASE 3
for i in range(0, len(word_list)-3, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2]
phrase = phrase.join(strings)
phrases_three.append(phrase)
count = 0
for phrase in phrases_three:
print("phrase3", count, phrase)
count += 1
for neg in neg_two_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
# PHRASE 2
# start at index zero, till one before end of the list
for i in range(0, len(word_list)-2, 1):
# until it hits the last word of the list, make for every two words in the
# article next to each other a phrase of two words, so we can check if
# there are phrases in the article who also exists in the pos or neg wordslists
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1]
phrase = phrase.join(strings)
phrases_two.append(phrase)
count = 0
# examine each phrase, and check if the same phrase exists in the list
# with negative phrases containing two words
# dont forget to delete the counter, is only for readability
for phrase in phrases_two:
count += 1
for neg in neg_one_space:
if phrase == neg:
amount_neg += 1
print(amount_neg)
# JUST A WORD
for i in range(0, len(word_list)-1, 1):
if word_list[-1]:
phrase = word_list[i]
phrases_one.append(phrase)
count = 0
for phrase in phrases_one:
print("phrase1", count, phrase)
count += 1
for neg in neg_no_space:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
python string python-3.x list
I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:
posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']
However, the text I need to analyse on positive words aren't holding any spaces within items:
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
I want to iterate over wordList and when the algorithm sees words that are also in posWords, but merged (e.g. 'happy day'), remove the corresponding words in wordList ('happy', 'day') and add the merged version in wordList.
So in the end, the wordList must look like this:
wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
BIG UPDATE:
Because I promised you guys to keep you updated, this is my code so far. It was kinda tricky because in my lists with positive words and negative words there where phrases that contained max three words. So I needed to figure out how to work with that. I realised (also because of the answers you guys gave me, thanks again!) that I had to make lists from all the words of the text that needed to be analysed with either 3, 2, or 1 words in one string item so I can check if the items also appear in my lists of positive words end negative words. Here is my code so far. It's kinda bulky, with a lot of copy pasting... Im planning to fix that but im quite tired and weekend is starting now, so no hate please! (tips are welcome tho)
from textblob import TextBlob
# open the files with negative words
negatives = open("neg_dutch_2.txt")
neg_list =
# push all the words from text file to list
for lines in negatives:
lines = lines.lower()
neg_list.append(lines.strip("n"))
neg_no_space =
neg_one_space =
neg_two_spaces =
neg_three_spaces =
count = 0
def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count):
# read every word in the list with negative words
for i in range(len(neg_list)):
# every word is a phrase, because there are "words" with spaces
phrase = neg_list[i]
# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
neg_one_space.append(phrase)
elif count == 2:
neg_two_spaces.append(phrase)
elif count == 3:
neg_three_spaces.append(phrase)
else:
neg_no_space.append(phrase)
# reset the counter to avoid the total sum of spaces in a list
count = 0
return neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count
neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,
neg_two_spaces, neg_three_spaces, count)
# open the files with positive words
positives = open("pos_dutch_2.txt")
pos_list =
# push all the words from text file to list
for lines in positives:
lines = lines.lower()
pos_list.append(lines.strip("n"))
pos_no_space =
pos_one_space =
pos_two_spaces =
pos_three_spaces =
count = 0
def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count):
# read every word in the list with positive words
for i in range(len(pos_list)):
# every word is a phrase, because there are "words" with spaces
phrase = pos_list[i]
# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
pos_one_space.append(phrase)
elif count == 2:
pos_two_spaces.append(phrase)
elif count == 3:
pos_three_spaces.append(phrase)
else:
pos_no_space.append(phrase)
# reset the counter to avoid the total sum of spaces in a list
count = 0
return pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count
pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,
pos_two_spaces, pos_three_spaces, count)
text = open("nrc_sample.TXT")
# reading the article, using TextBlob library to seperate each word
text = text.read()
blob = TextBlob(text)
# these are words that are bound to the meta-deta of the articlesfile
ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",
"PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",
"Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]
# make a list for all the words in the articles
word_list =
# and store every word in that list
for word in blob.words:
if not any(x in word for x in ruis):
word = word.lower()
if word.isalpha():
word_list.append(word)
# variables for the frequencies of negative and positive words in articles
amount_pos = 0
amount_neg = 0
count = 0
phrases_four =
phrases_three =
phrases_two =
phrases_one =
amount_neg = 0
# PHRASE 4
for i in range(0, len(word_list)-4, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]
phrase = phrase.join(strings)
phrases_four.append(phrase)
count = 0
for phrase in phrases_four:
print("phrase4", count, phrase)
count += 1
for neg in neg_three_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
# PHRASE 3
for i in range(0, len(word_list)-3, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2]
phrase = phrase.join(strings)
phrases_three.append(phrase)
count = 0
for phrase in phrases_three:
print("phrase3", count, phrase)
count += 1
for neg in neg_two_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
# PHRASE 2
# start at index zero, till one before end of the list
for i in range(0, len(word_list)-2, 1):
# until it hits the last word of the list, make for every two words in the
# article next to each other a phrase of two words, so we can check if
# there are phrases in the article who also exists in the pos or neg wordslists
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1]
phrase = phrase.join(strings)
phrases_two.append(phrase)
count = 0
# examine each phrase, and check if the same phrase exists in the list
# with negative phrases containing two words
# dont forget to delete the counter, is only for readability
for phrase in phrases_two:
count += 1
for neg in neg_one_space:
if phrase == neg:
amount_neg += 1
print(amount_neg)
# JUST A WORD
for i in range(0, len(word_list)-1, 1):
if word_list[-1]:
phrase = word_list[i]
phrases_one.append(phrase)
count = 0
for phrase in phrases_one:
print("phrase1", count, phrase)
count += 1
for neg in neg_no_space:
if phrase == neg:
print("negatief woord^")
amount_neg += 1
print(amount_neg)
python string python-3.x list
python string python-3.x list
edited Nov 23 '18 at 16:39
mick_zon_24
asked Nov 21 '18 at 10:16
mick_zon_24mick_zon_24
82
82
1
show us what you tried.
– Sociopath
Nov 21 '18 at 10:21
2
Will'happy'and'day'(for example) always be consecutive elements inwordList, or could they appear anywhere?
– thesilkworm
Nov 21 '18 at 10:23
1
might the phrases be more than 2 words?
– Chris_Rands
Nov 21 '18 at 10:24
Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, forhappy dayis one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.
– mick_zon_24
Nov 21 '18 at 10:31
add a comment |
1
show us what you tried.
– Sociopath
Nov 21 '18 at 10:21
2
Will'happy'and'day'(for example) always be consecutive elements inwordList, or could they appear anywhere?
– thesilkworm
Nov 21 '18 at 10:23
1
might the phrases be more than 2 words?
– Chris_Rands
Nov 21 '18 at 10:24
Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, forhappy dayis one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.
– mick_zon_24
Nov 21 '18 at 10:31
1
1
show us what you tried.
– Sociopath
Nov 21 '18 at 10:21
show us what you tried.
– Sociopath
Nov 21 '18 at 10:21
2
2
Will
'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?– thesilkworm
Nov 21 '18 at 10:23
Will
'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?– thesilkworm
Nov 21 '18 at 10:23
1
1
might the phrases be more than 2 words?
– Chris_Rands
Nov 21 '18 at 10:24
might the phrases be more than 2 words?
– Chris_Rands
Nov 21 '18 at 10:24
Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for
happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.– mick_zon_24
Nov 21 '18 at 10:31
Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for
happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.– mick_zon_24
Nov 21 '18 at 10:31
add a comment |
4 Answers
4
active
oldest
votes
Here is a way to do it:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
# Create a sentence for the wordList.
joinedWordList = " ".join(wordList)
# Find all phrases in the posWords list.
phrases = [elem for elem in posWords if len(elem.split()) > 1]
# For every phrase, locate it in the sentence,
# count the space characters which is the same number as the index of the first word of phrase in the word list,
# insert the phrase and delete the word that combine the phrase from the wordList.
for phrase in phrases:
try:
i = joinedWordList.index(phrase)
spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
wordList.insert(spaces,phrase)
del wordList[spaces+1:spaces+1 + len(phrase.split())]
except ValueError:
pass
print(wordList)
Output:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated
– mick_zon_24
Nov 21 '18 at 13:54
By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words
– mick_zon_24
Nov 21 '18 at 14:04
@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.
– Vasilis G.
Nov 21 '18 at 22:39
update posted! @vasilis G
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
Here is another approach that would work for any phrase length:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
for w in posWords:
nrWords = len(w.split(' '))
if nrWords > 1:
word_array = w.split(' ')
word_index_array = [wordList.index(w) for w in word_array]
index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]
if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
for elem in word_array:
wordList.remove(elem)
wordList.insert(word_index_array[0], w)
Output will be:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
But if we for example input something like:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
The output will be:
['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']
update posted! (see above)
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
You can do something like this:
In [711]: s = ''.join(posWords)
In [712]: s
Out[712]: 'beautifullovehappy dayenjoysmelling flowers'
In [672]: n =
In [673]: for i in wordList:
...: if i in s:
...: n.append(i)
...:
In [713]: n
Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']
In [740]: for c, i in enumerate(n):
...: if c+1 < len(n):
...: word = n[c] + ' ' + n[c+1]
...: if word in posWords:
...: ix1 = wordList.index(n[c])
...: del wordList[ix1: ix1+2]
...: wordList.insert(ix1,word)
...:
In [710]: wordList
Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
Let me know if this helps.
2
You can usefor c, i in enumerate(n):and remove thec=0andc+=1. You could also replace theif - break - elsewith simplyif c+1 < len(n):and noelseorbreakused at all.
– Guimoute
Nov 21 '18 at 13:13
@Guimoute Thanks a lot for that. Edited my answer too.
– Mayank Porwal
Nov 21 '18 at 13:30
thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!
– mick_zon_24
Nov 21 '18 at 14:00
update posted (see above)!
– mick_zon_24
Nov 23 '18 at 16:41
add a comment |
Another way to do it:
>>> m=["good bad", "enjoy", "play"]
>>> l=["good", "bad", "happy", "delight"]
>>>
>>> for e in m:
... tmp = e.split(" ")
... if(len(tmp) > 1):
... l = [ent for ent in l if ent not in tmp]
... l.append(" ".join(tmp))
...
>>>
>>> l
['happy', 'delight', 'good bad']
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53409801%2fhow-can-i-detect-multiple-items-in-a-list-that-are-separated-with-a-somewhat-equ%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
4 Answers
4
active
oldest
votes
4 Answers
4
active
oldest
votes
active
oldest
votes
active
oldest
votes
Here is a way to do it:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
# Create a sentence for the wordList.
joinedWordList = " ".join(wordList)
# Find all phrases in the posWords list.
phrases = [elem for elem in posWords if len(elem.split()) > 1]
# For every phrase, locate it in the sentence,
# count the space characters which is the same number as the index of the first word of phrase in the word list,
# insert the phrase and delete the word that combine the phrase from the wordList.
for phrase in phrases:
try:
i = joinedWordList.index(phrase)
spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
wordList.insert(spaces,phrase)
del wordList[spaces+1:spaces+1 + len(phrase.split())]
except ValueError:
pass
print(wordList)
Output:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated
– mick_zon_24
Nov 21 '18 at 13:54
By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words
– mick_zon_24
Nov 21 '18 at 14:04
@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.
– Vasilis G.
Nov 21 '18 at 22:39
update posted! @vasilis G
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
Here is a way to do it:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
# Create a sentence for the wordList.
joinedWordList = " ".join(wordList)
# Find all phrases in the posWords list.
phrases = [elem for elem in posWords if len(elem.split()) > 1]
# For every phrase, locate it in the sentence,
# count the space characters which is the same number as the index of the first word of phrase in the word list,
# insert the phrase and delete the word that combine the phrase from the wordList.
for phrase in phrases:
try:
i = joinedWordList.index(phrase)
spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
wordList.insert(spaces,phrase)
del wordList[spaces+1:spaces+1 + len(phrase.split())]
except ValueError:
pass
print(wordList)
Output:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated
– mick_zon_24
Nov 21 '18 at 13:54
By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words
– mick_zon_24
Nov 21 '18 at 14:04
@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.
– Vasilis G.
Nov 21 '18 at 22:39
update posted! @vasilis G
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
Here is a way to do it:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
# Create a sentence for the wordList.
joinedWordList = " ".join(wordList)
# Find all phrases in the posWords list.
phrases = [elem for elem in posWords if len(elem.split()) > 1]
# For every phrase, locate it in the sentence,
# count the space characters which is the same number as the index of the first word of phrase in the word list,
# insert the phrase and delete the word that combine the phrase from the wordList.
for phrase in phrases:
try:
i = joinedWordList.index(phrase)
spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
wordList.insert(spaces,phrase)
del wordList[spaces+1:spaces+1 + len(phrase.split())]
except ValueError:
pass
print(wordList)
Output:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
Here is a way to do it:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
# Create a sentence for the wordList.
joinedWordList = " ".join(wordList)
# Find all phrases in the posWords list.
phrases = [elem for elem in posWords if len(elem.split()) > 1]
# For every phrase, locate it in the sentence,
# count the space characters which is the same number as the index of the first word of phrase in the word list,
# insert the phrase and delete the word that combine the phrase from the wordList.
for phrase in phrases:
try:
i = joinedWordList.index(phrase)
spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
wordList.insert(spaces,phrase)
del wordList[spaces+1:spaces+1 + len(phrase.split())]
except ValueError:
pass
print(wordList)
Output:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
edited Nov 21 '18 at 12:44
answered Nov 21 '18 at 11:19
Vasilis G.Vasilis G.
3,4082722
3,4082722
Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated
– mick_zon_24
Nov 21 '18 at 13:54
By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words
– mick_zon_24
Nov 21 '18 at 14:04
@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.
– Vasilis G.
Nov 21 '18 at 22:39
update posted! @vasilis G
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated
– mick_zon_24
Nov 21 '18 at 13:54
By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words
– mick_zon_24
Nov 21 '18 at 14:04
@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.
– Vasilis G.
Nov 21 '18 at 22:39
update posted! @vasilis G
– mick_zon_24
Nov 23 '18 at 16:40
Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated
– mick_zon_24
Nov 21 '18 at 13:54
Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated
– mick_zon_24
Nov 21 '18 at 13:54
By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words
– mick_zon_24
Nov 21 '18 at 14:04
By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words
– mick_zon_24
Nov 21 '18 at 14:04
@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.
– Vasilis G.
Nov 21 '18 at 22:39
@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.
– Vasilis G.
Nov 21 '18 at 22:39
update posted! @vasilis G
– mick_zon_24
Nov 23 '18 at 16:40
update posted! @vasilis G
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
Here is another approach that would work for any phrase length:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
for w in posWords:
nrWords = len(w.split(' '))
if nrWords > 1:
word_array = w.split(' ')
word_index_array = [wordList.index(w) for w in word_array]
index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]
if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
for elem in word_array:
wordList.remove(elem)
wordList.insert(word_index_array[0], w)
Output will be:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
But if we for example input something like:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
The output will be:
['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']
update posted! (see above)
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
Here is another approach that would work for any phrase length:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
for w in posWords:
nrWords = len(w.split(' '))
if nrWords > 1:
word_array = w.split(' ')
word_index_array = [wordList.index(w) for w in word_array]
index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]
if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
for elem in word_array:
wordList.remove(elem)
wordList.insert(word_index_array[0], w)
Output will be:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
But if we for example input something like:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
The output will be:
['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']
update posted! (see above)
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
Here is another approach that would work for any phrase length:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
for w in posWords:
nrWords = len(w.split(' '))
if nrWords > 1:
word_array = w.split(' ')
word_index_array = [wordList.index(w) for w in word_array]
index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]
if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
for elem in word_array:
wordList.remove(elem)
wordList.insert(word_index_array[0], w)
Output will be:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
But if we for example input something like:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
The output will be:
['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']
Here is another approach that would work for any phrase length:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
for w in posWords:
nrWords = len(w.split(' '))
if nrWords > 1:
word_array = w.split(' ')
word_index_array = [wordList.index(w) for w in word_array]
index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]
if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
for elem in word_array:
wordList.remove(elem)
wordList.insert(word_index_array[0], w)
Output will be:
['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
But if we for example input something like:
posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']
The output will be:
['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']
answered Nov 21 '18 at 13:30
toti08toti08
1,73931523
1,73931523
update posted! (see above)
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
update posted! (see above)
– mick_zon_24
Nov 23 '18 at 16:40
update posted! (see above)
– mick_zon_24
Nov 23 '18 at 16:40
update posted! (see above)
– mick_zon_24
Nov 23 '18 at 16:40
add a comment |
You can do something like this:
In [711]: s = ''.join(posWords)
In [712]: s
Out[712]: 'beautifullovehappy dayenjoysmelling flowers'
In [672]: n =
In [673]: for i in wordList:
...: if i in s:
...: n.append(i)
...:
In [713]: n
Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']
In [740]: for c, i in enumerate(n):
...: if c+1 < len(n):
...: word = n[c] + ' ' + n[c+1]
...: if word in posWords:
...: ix1 = wordList.index(n[c])
...: del wordList[ix1: ix1+2]
...: wordList.insert(ix1,word)
...:
In [710]: wordList
Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
Let me know if this helps.
2
You can usefor c, i in enumerate(n):and remove thec=0andc+=1. You could also replace theif - break - elsewith simplyif c+1 < len(n):and noelseorbreakused at all.
– Guimoute
Nov 21 '18 at 13:13
@Guimoute Thanks a lot for that. Edited my answer too.
– Mayank Porwal
Nov 21 '18 at 13:30
thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!
– mick_zon_24
Nov 21 '18 at 14:00
update posted (see above)!
– mick_zon_24
Nov 23 '18 at 16:41
add a comment |
You can do something like this:
In [711]: s = ''.join(posWords)
In [712]: s
Out[712]: 'beautifullovehappy dayenjoysmelling flowers'
In [672]: n =
In [673]: for i in wordList:
...: if i in s:
...: n.append(i)
...:
In [713]: n
Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']
In [740]: for c, i in enumerate(n):
...: if c+1 < len(n):
...: word = n[c] + ' ' + n[c+1]
...: if word in posWords:
...: ix1 = wordList.index(n[c])
...: del wordList[ix1: ix1+2]
...: wordList.insert(ix1,word)
...:
In [710]: wordList
Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
Let me know if this helps.
2
You can usefor c, i in enumerate(n):and remove thec=0andc+=1. You could also replace theif - break - elsewith simplyif c+1 < len(n):and noelseorbreakused at all.
– Guimoute
Nov 21 '18 at 13:13
@Guimoute Thanks a lot for that. Edited my answer too.
– Mayank Porwal
Nov 21 '18 at 13:30
thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!
– mick_zon_24
Nov 21 '18 at 14:00
update posted (see above)!
– mick_zon_24
Nov 23 '18 at 16:41
add a comment |
You can do something like this:
In [711]: s = ''.join(posWords)
In [712]: s
Out[712]: 'beautifullovehappy dayenjoysmelling flowers'
In [672]: n =
In [673]: for i in wordList:
...: if i in s:
...: n.append(i)
...:
In [713]: n
Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']
In [740]: for c, i in enumerate(n):
...: if c+1 < len(n):
...: word = n[c] + ' ' + n[c+1]
...: if word in posWords:
...: ix1 = wordList.index(n[c])
...: del wordList[ix1: ix1+2]
...: wordList.insert(ix1,word)
...:
In [710]: wordList
Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
Let me know if this helps.
You can do something like this:
In [711]: s = ''.join(posWords)
In [712]: s
Out[712]: 'beautifullovehappy dayenjoysmelling flowers'
In [672]: n =
In [673]: for i in wordList:
...: if i in s:
...: n.append(i)
...:
In [713]: n
Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']
In [740]: for c, i in enumerate(n):
...: if c+1 < len(n):
...: word = n[c] + ' ' + n[c+1]
...: if word in posWords:
...: ix1 = wordList.index(n[c])
...: del wordList[ix1: ix1+2]
...: wordList.insert(ix1,word)
...:
In [710]: wordList
Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']
Let me know if this helps.
edited Nov 21 '18 at 13:30
answered Nov 21 '18 at 10:59
Mayank PorwalMayank Porwal
4,6571624
4,6571624
2
You can usefor c, i in enumerate(n):and remove thec=0andc+=1. You could also replace theif - break - elsewith simplyif c+1 < len(n):and noelseorbreakused at all.
– Guimoute
Nov 21 '18 at 13:13
@Guimoute Thanks a lot for that. Edited my answer too.
– Mayank Porwal
Nov 21 '18 at 13:30
thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!
– mick_zon_24
Nov 21 '18 at 14:00
update posted (see above)!
– mick_zon_24
Nov 23 '18 at 16:41
add a comment |
2
You can usefor c, i in enumerate(n):and remove thec=0andc+=1. You could also replace theif - break - elsewith simplyif c+1 < len(n):and noelseorbreakused at all.
– Guimoute
Nov 21 '18 at 13:13
@Guimoute Thanks a lot for that. Edited my answer too.
– Mayank Porwal
Nov 21 '18 at 13:30
thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!
– mick_zon_24
Nov 21 '18 at 14:00
update posted (see above)!
– mick_zon_24
Nov 23 '18 at 16:41
2
2
You can use
for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.– Guimoute
Nov 21 '18 at 13:13
You can use
for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.– Guimoute
Nov 21 '18 at 13:13
@Guimoute Thanks a lot for that. Edited my answer too.
– Mayank Porwal
Nov 21 '18 at 13:30
@Guimoute Thanks a lot for that. Edited my answer too.
– Mayank Porwal
Nov 21 '18 at 13:30
thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!
– mick_zon_24
Nov 21 '18 at 14:00
thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!
– mick_zon_24
Nov 21 '18 at 14:00
update posted (see above)!
– mick_zon_24
Nov 23 '18 at 16:41
update posted (see above)!
– mick_zon_24
Nov 23 '18 at 16:41
add a comment |
Another way to do it:
>>> m=["good bad", "enjoy", "play"]
>>> l=["good", "bad", "happy", "delight"]
>>>
>>> for e in m:
... tmp = e.split(" ")
... if(len(tmp) > 1):
... l = [ent for ent in l if ent not in tmp]
... l.append(" ".join(tmp))
...
>>>
>>> l
['happy', 'delight', 'good bad']
add a comment |
Another way to do it:
>>> m=["good bad", "enjoy", "play"]
>>> l=["good", "bad", "happy", "delight"]
>>>
>>> for e in m:
... tmp = e.split(" ")
... if(len(tmp) > 1):
... l = [ent for ent in l if ent not in tmp]
... l.append(" ".join(tmp))
...
>>>
>>> l
['happy', 'delight', 'good bad']
add a comment |
Another way to do it:
>>> m=["good bad", "enjoy", "play"]
>>> l=["good", "bad", "happy", "delight"]
>>>
>>> for e in m:
... tmp = e.split(" ")
... if(len(tmp) > 1):
... l = [ent for ent in l if ent not in tmp]
... l.append(" ".join(tmp))
...
>>>
>>> l
['happy', 'delight', 'good bad']
Another way to do it:
>>> m=["good bad", "enjoy", "play"]
>>> l=["good", "bad", "happy", "delight"]
>>>
>>> for e in m:
... tmp = e.split(" ")
... if(len(tmp) > 1):
... l = [ent for ent in l if ent not in tmp]
... l.append(" ".join(tmp))
...
>>>
>>> l
['happy', 'delight', 'good bad']
answered Nov 21 '18 at 13:48
AkhileshaAkhilesha
113
113
add a comment |
add a comment |
Thanks for contributing an answer to Stack Overflow!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53409801%2fhow-can-i-detect-multiple-items-in-a-list-that-are-separated-with-a-somewhat-equ%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
1
show us what you tried.
– Sociopath
Nov 21 '18 at 10:21
2
Will
'happy'and'day'(for example) always be consecutive elements inwordList, or could they appear anywhere?– thesilkworm
Nov 21 '18 at 10:23
1
might the phrases be more than 2 words?
– Chris_Rands
Nov 21 '18 at 10:24
Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for
happy dayis one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.– mick_zon_24
Nov 21 '18 at 10:31