How can I detect multiple items in a list that are separated with a somewhat equivalent list in Python?












-1















I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:



posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']


However, the text I need to analyse on positive words aren't holding any spaces within items:



wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 


I want to iterate over wordList and when the algorithm sees words that are also in posWords, but merged (e.g. 'happy day'), remove the corresponding words in wordList ('happy', 'day') and add the merged version in wordList.



So in the end, the wordList must look like this:



wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


BIG UPDATE:



Because I promised you guys to keep you updated, this is my code so far. It was kinda tricky because in my lists with positive words and negative words there where phrases that contained max three words. So I needed to figure out how to work with that. I realised (also because of the answers you guys gave me, thanks again!) that I had to make lists from all the words of the text that needed to be analysed with either 3, 2, or 1 words in one string item so I can check if the items also appear in my lists of positive words end negative words. Here is my code so far. It's kinda bulky, with a lot of copy pasting... Im planning to fix that but im quite tired and weekend is starting now, so no hate please! (tips are welcome tho)



    from textblob import TextBlob

# open the files with negative words
negatives = open("neg_dutch_2.txt")

neg_list =

# push all the words from text file to list
for lines in negatives:
lines = lines.lower()
neg_list.append(lines.strip("n"))

neg_no_space =
neg_one_space =
neg_two_spaces =
neg_three_spaces =

count = 0

def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count):

# read every word in the list with negative words
for i in range(len(neg_list)):

# every word is a phrase, because there are "words" with spaces
phrase = neg_list[i]

# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
neg_one_space.append(phrase)
elif count == 2:
neg_two_spaces.append(phrase)
elif count == 3:
neg_three_spaces.append(phrase)
else:
neg_no_space.append(phrase)

# reset the counter to avoid the total sum of spaces in a list
count = 0

return neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count

neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,
neg_two_spaces, neg_three_spaces, count)

# open the files with positive words
positives = open("pos_dutch_2.txt")

pos_list =

# push all the words from text file to list
for lines in positives:
lines = lines.lower()
pos_list.append(lines.strip("n"))

pos_no_space =
pos_one_space =
pos_two_spaces =
pos_three_spaces =
count = 0

def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count):

# read every word in the list with positive words
for i in range(len(pos_list)):

# every word is a phrase, because there are "words" with spaces
phrase = pos_list[i]

# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
pos_one_space.append(phrase)
elif count == 2:
pos_two_spaces.append(phrase)
elif count == 3:
pos_three_spaces.append(phrase)
else:
pos_no_space.append(phrase)

# reset the counter to avoid the total sum of spaces in a list
count = 0

return pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count

pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,
pos_two_spaces, pos_three_spaces, count)

text = open("nrc_sample.TXT")

# reading the article, using TextBlob library to seperate each word
text = text.read()
blob = TextBlob(text)

# these are words that are bound to the meta-deta of the articlesfile
ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",
"PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",
"Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]

# make a list for all the words in the articles
word_list =

# and store every word in that list
for word in blob.words:
if not any(x in word for x in ruis):
word = word.lower()
if word.isalpha():
word_list.append(word)

# variables for the frequencies of negative and positive words in articles
amount_pos = 0
amount_neg = 0
count = 0

phrases_four =
phrases_three =
phrases_two =
phrases_one =
amount_neg = 0

# PHRASE 4
for i in range(0, len(word_list)-4, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]
phrase = phrase.join(strings)
phrases_four.append(phrase)
count = 0

for phrase in phrases_four:
print("phrase4", count, phrase)
count += 1

for neg in neg_three_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)

# PHRASE 3
for i in range(0, len(word_list)-3, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2]
phrase = phrase.join(strings)
phrases_three.append(phrase)
count = 0

for phrase in phrases_three:
print("phrase3", count, phrase)
count += 1

for neg in neg_two_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)

# PHRASE 2
# start at index zero, till one before end of the list
for i in range(0, len(word_list)-2, 1):

# until it hits the last word of the list, make for every two words in the
# article next to each other a phrase of two words, so we can check if
# there are phrases in the article who also exists in the pos or neg wordslists
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1]
phrase = phrase.join(strings)
phrases_two.append(phrase)
count = 0

# examine each phrase, and check if the same phrase exists in the list
# with negative phrases containing two words
# dont forget to delete the counter, is only for readability
for phrase in phrases_two:
count += 1

for neg in neg_one_space:
if phrase == neg:
amount_neg += 1

print(amount_neg)

# JUST A WORD
for i in range(0, len(word_list)-1, 1):
if word_list[-1]:
phrase = word_list[i]
phrases_one.append(phrase)
count = 0

for phrase in phrases_one:
print("phrase1", count, phrase)
count += 1

for neg in neg_no_space:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)









share|improve this question




















  • 1





    show us what you tried.

    – Sociopath
    Nov 21 '18 at 10:21






  • 2





    Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

    – thesilkworm
    Nov 21 '18 at 10:23






  • 1





    might the phrases be more than 2 words?

    – Chris_Rands
    Nov 21 '18 at 10:24











  • Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

    – mick_zon_24
    Nov 21 '18 at 10:31


















-1















I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:



posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']


However, the text I need to analyse on positive words aren't holding any spaces within items:



wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 


I want to iterate over wordList and when the algorithm sees words that are also in posWords, but merged (e.g. 'happy day'), remove the corresponding words in wordList ('happy', 'day') and add the merged version in wordList.



So in the end, the wordList must look like this:



wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


BIG UPDATE:



Because I promised you guys to keep you updated, this is my code so far. It was kinda tricky because in my lists with positive words and negative words there where phrases that contained max three words. So I needed to figure out how to work with that. I realised (also because of the answers you guys gave me, thanks again!) that I had to make lists from all the words of the text that needed to be analysed with either 3, 2, or 1 words in one string item so I can check if the items also appear in my lists of positive words end negative words. Here is my code so far. It's kinda bulky, with a lot of copy pasting... Im planning to fix that but im quite tired and weekend is starting now, so no hate please! (tips are welcome tho)



    from textblob import TextBlob

# open the files with negative words
negatives = open("neg_dutch_2.txt")

neg_list =

# push all the words from text file to list
for lines in negatives:
lines = lines.lower()
neg_list.append(lines.strip("n"))

neg_no_space =
neg_one_space =
neg_two_spaces =
neg_three_spaces =

count = 0

def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count):

# read every word in the list with negative words
for i in range(len(neg_list)):

# every word is a phrase, because there are "words" with spaces
phrase = neg_list[i]

# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
neg_one_space.append(phrase)
elif count == 2:
neg_two_spaces.append(phrase)
elif count == 3:
neg_three_spaces.append(phrase)
else:
neg_no_space.append(phrase)

# reset the counter to avoid the total sum of spaces in a list
count = 0

return neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count

neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,
neg_two_spaces, neg_three_spaces, count)

# open the files with positive words
positives = open("pos_dutch_2.txt")

pos_list =

# push all the words from text file to list
for lines in positives:
lines = lines.lower()
pos_list.append(lines.strip("n"))

pos_no_space =
pos_one_space =
pos_two_spaces =
pos_three_spaces =
count = 0

def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count):

# read every word in the list with positive words
for i in range(len(pos_list)):

# every word is a phrase, because there are "words" with spaces
phrase = pos_list[i]

# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
pos_one_space.append(phrase)
elif count == 2:
pos_two_spaces.append(phrase)
elif count == 3:
pos_three_spaces.append(phrase)
else:
pos_no_space.append(phrase)

# reset the counter to avoid the total sum of spaces in a list
count = 0

return pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count

pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,
pos_two_spaces, pos_three_spaces, count)

text = open("nrc_sample.TXT")

# reading the article, using TextBlob library to seperate each word
text = text.read()
blob = TextBlob(text)

# these are words that are bound to the meta-deta of the articlesfile
ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",
"PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",
"Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]

# make a list for all the words in the articles
word_list =

# and store every word in that list
for word in blob.words:
if not any(x in word for x in ruis):
word = word.lower()
if word.isalpha():
word_list.append(word)

# variables for the frequencies of negative and positive words in articles
amount_pos = 0
amount_neg = 0
count = 0

phrases_four =
phrases_three =
phrases_two =
phrases_one =
amount_neg = 0

# PHRASE 4
for i in range(0, len(word_list)-4, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]
phrase = phrase.join(strings)
phrases_four.append(phrase)
count = 0

for phrase in phrases_four:
print("phrase4", count, phrase)
count += 1

for neg in neg_three_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)

# PHRASE 3
for i in range(0, len(word_list)-3, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2]
phrase = phrase.join(strings)
phrases_three.append(phrase)
count = 0

for phrase in phrases_three:
print("phrase3", count, phrase)
count += 1

for neg in neg_two_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)

# PHRASE 2
# start at index zero, till one before end of the list
for i in range(0, len(word_list)-2, 1):

# until it hits the last word of the list, make for every two words in the
# article next to each other a phrase of two words, so we can check if
# there are phrases in the article who also exists in the pos or neg wordslists
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1]
phrase = phrase.join(strings)
phrases_two.append(phrase)
count = 0

# examine each phrase, and check if the same phrase exists in the list
# with negative phrases containing two words
# dont forget to delete the counter, is only for readability
for phrase in phrases_two:
count += 1

for neg in neg_one_space:
if phrase == neg:
amount_neg += 1

print(amount_neg)

# JUST A WORD
for i in range(0, len(word_list)-1, 1):
if word_list[-1]:
phrase = word_list[i]
phrases_one.append(phrase)
count = 0

for phrase in phrases_one:
print("phrase1", count, phrase)
count += 1

for neg in neg_no_space:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)









share|improve this question




















  • 1





    show us what you tried.

    – Sociopath
    Nov 21 '18 at 10:21






  • 2





    Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

    – thesilkworm
    Nov 21 '18 at 10:23






  • 1





    might the phrases be more than 2 words?

    – Chris_Rands
    Nov 21 '18 at 10:24











  • Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

    – mick_zon_24
    Nov 21 '18 at 10:31
















-1












-1








-1


0






I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:



posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']


However, the text I need to analyse on positive words aren't holding any spaces within items:



wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 


I want to iterate over wordList and when the algorithm sees words that are also in posWords, but merged (e.g. 'happy day'), remove the corresponding words in wordList ('happy', 'day') and add the merged version in wordList.



So in the end, the wordList must look like this:



wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


BIG UPDATE:



Because I promised you guys to keep you updated, this is my code so far. It was kinda tricky because in my lists with positive words and negative words there where phrases that contained max three words. So I needed to figure out how to work with that. I realised (also because of the answers you guys gave me, thanks again!) that I had to make lists from all the words of the text that needed to be analysed with either 3, 2, or 1 words in one string item so I can check if the items also appear in my lists of positive words end negative words. Here is my code so far. It's kinda bulky, with a lot of copy pasting... Im planning to fix that but im quite tired and weekend is starting now, so no hate please! (tips are welcome tho)



    from textblob import TextBlob

# open the files with negative words
negatives = open("neg_dutch_2.txt")

neg_list =

# push all the words from text file to list
for lines in negatives:
lines = lines.lower()
neg_list.append(lines.strip("n"))

neg_no_space =
neg_one_space =
neg_two_spaces =
neg_three_spaces =

count = 0

def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count):

# read every word in the list with negative words
for i in range(len(neg_list)):

# every word is a phrase, because there are "words" with spaces
phrase = neg_list[i]

# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
neg_one_space.append(phrase)
elif count == 2:
neg_two_spaces.append(phrase)
elif count == 3:
neg_three_spaces.append(phrase)
else:
neg_no_space.append(phrase)

# reset the counter to avoid the total sum of spaces in a list
count = 0

return neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count

neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,
neg_two_spaces, neg_three_spaces, count)

# open the files with positive words
positives = open("pos_dutch_2.txt")

pos_list =

# push all the words from text file to list
for lines in positives:
lines = lines.lower()
pos_list.append(lines.strip("n"))

pos_no_space =
pos_one_space =
pos_two_spaces =
pos_three_spaces =
count = 0

def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count):

# read every word in the list with positive words
for i in range(len(pos_list)):

# every word is a phrase, because there are "words" with spaces
phrase = pos_list[i]

# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
pos_one_space.append(phrase)
elif count == 2:
pos_two_spaces.append(phrase)
elif count == 3:
pos_three_spaces.append(phrase)
else:
pos_no_space.append(phrase)

# reset the counter to avoid the total sum of spaces in a list
count = 0

return pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count

pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,
pos_two_spaces, pos_three_spaces, count)

text = open("nrc_sample.TXT")

# reading the article, using TextBlob library to seperate each word
text = text.read()
blob = TextBlob(text)

# these are words that are bound to the meta-deta of the articlesfile
ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",
"PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",
"Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]

# make a list for all the words in the articles
word_list =

# and store every word in that list
for word in blob.words:
if not any(x in word for x in ruis):
word = word.lower()
if word.isalpha():
word_list.append(word)

# variables for the frequencies of negative and positive words in articles
amount_pos = 0
amount_neg = 0
count = 0

phrases_four =
phrases_three =
phrases_two =
phrases_one =
amount_neg = 0

# PHRASE 4
for i in range(0, len(word_list)-4, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]
phrase = phrase.join(strings)
phrases_four.append(phrase)
count = 0

for phrase in phrases_four:
print("phrase4", count, phrase)
count += 1

for neg in neg_three_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)

# PHRASE 3
for i in range(0, len(word_list)-3, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2]
phrase = phrase.join(strings)
phrases_three.append(phrase)
count = 0

for phrase in phrases_three:
print("phrase3", count, phrase)
count += 1

for neg in neg_two_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)

# PHRASE 2
# start at index zero, till one before end of the list
for i in range(0, len(word_list)-2, 1):

# until it hits the last word of the list, make for every two words in the
# article next to each other a phrase of two words, so we can check if
# there are phrases in the article who also exists in the pos or neg wordslists
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1]
phrase = phrase.join(strings)
phrases_two.append(phrase)
count = 0

# examine each phrase, and check if the same phrase exists in the list
# with negative phrases containing two words
# dont forget to delete the counter, is only for readability
for phrase in phrases_two:
count += 1

for neg in neg_one_space:
if phrase == neg:
amount_neg += 1

print(amount_neg)

# JUST A WORD
for i in range(0, len(word_list)-1, 1):
if word_list[-1]:
phrase = word_list[i]
phrases_one.append(phrase)
count = 0

for phrase in phrases_one:
print("phrase1", count, phrase)
count += 1

for neg in neg_no_space:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)









share|improve this question
















I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:



posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']


However, the text I need to analyse on positive words aren't holding any spaces within items:



wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 


I want to iterate over wordList and when the algorithm sees words that are also in posWords, but merged (e.g. 'happy day'), remove the corresponding words in wordList ('happy', 'day') and add the merged version in wordList.



So in the end, the wordList must look like this:



wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


BIG UPDATE:



Because I promised you guys to keep you updated, this is my code so far. It was kinda tricky because in my lists with positive words and negative words there where phrases that contained max three words. So I needed to figure out how to work with that. I realised (also because of the answers you guys gave me, thanks again!) that I had to make lists from all the words of the text that needed to be analysed with either 3, 2, or 1 words in one string item so I can check if the items also appear in my lists of positive words end negative words. Here is my code so far. It's kinda bulky, with a lot of copy pasting... Im planning to fix that but im quite tired and weekend is starting now, so no hate please! (tips are welcome tho)



    from textblob import TextBlob

# open the files with negative words
negatives = open("neg_dutch_2.txt")

neg_list =

# push all the words from text file to list
for lines in negatives:
lines = lines.lower()
neg_list.append(lines.strip("n"))

neg_no_space =
neg_one_space =
neg_two_spaces =
neg_three_spaces =

count = 0

def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count):

# read every word in the list with negative words
for i in range(len(neg_list)):

# every word is a phrase, because there are "words" with spaces
phrase = neg_list[i]

# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
neg_one_space.append(phrase)
elif count == 2:
neg_two_spaces.append(phrase)
elif count == 3:
neg_three_spaces.append(phrase)
else:
neg_no_space.append(phrase)

# reset the counter to avoid the total sum of spaces in a list
count = 0

return neg_list, neg_no_space, neg_one_space, neg_two_spaces,
neg_three_spaces, count

neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,
neg_two_spaces, neg_three_spaces, count)

# open the files with positive words
positives = open("pos_dutch_2.txt")

pos_list =

# push all the words from text file to list
for lines in positives:
lines = lines.lower()
pos_list.append(lines.strip("n"))

pos_no_space =
pos_one_space =
pos_two_spaces =
pos_three_spaces =
count = 0

def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count):

# read every word in the list with positive words
for i in range(len(pos_list)):

# every word is a phrase, because there are "words" with spaces
phrase = pos_list[i]

# look at every character and assign the phrase to a list
# that correspondes with the number of spaces in it
for j in range(len(phrase)):
if phrase[j] == " ":
count += 1
if phrase[-1]:
if count == 1:
pos_one_space.append(phrase)
elif count == 2:
pos_two_spaces.append(phrase)
elif count == 3:
pos_three_spaces.append(phrase)
else:
pos_no_space.append(phrase)

# reset the counter to avoid the total sum of spaces in a list
count = 0

return pos_list, pos_no_space, pos_one_space, pos_two_spaces,
pos_three_spaces, count

pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,
pos_two_spaces, pos_three_spaces, count)

text = open("nrc_sample.TXT")

# reading the article, using TextBlob library to seperate each word
text = text.read()
blob = TextBlob(text)

# these are words that are bound to the meta-deta of the articlesfile
ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",
"PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",
"Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]

# make a list for all the words in the articles
word_list =

# and store every word in that list
for word in blob.words:
if not any(x in word for x in ruis):
word = word.lower()
if word.isalpha():
word_list.append(word)

# variables for the frequencies of negative and positive words in articles
amount_pos = 0
amount_neg = 0
count = 0

phrases_four =
phrases_three =
phrases_two =
phrases_one =
amount_neg = 0

# PHRASE 4
for i in range(0, len(word_list)-4, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]
phrase = phrase.join(strings)
phrases_four.append(phrase)
count = 0

for phrase in phrases_four:
print("phrase4", count, phrase)
count += 1

for neg in neg_three_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)

# PHRASE 3
for i in range(0, len(word_list)-3, 1):
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1], word_list[i+2]
phrase = phrase.join(strings)
phrases_three.append(phrase)
count = 0

for phrase in phrases_three:
print("phrase3", count, phrase)
count += 1

for neg in neg_two_spaces:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)

# PHRASE 2
# start at index zero, till one before end of the list
for i in range(0, len(word_list)-2, 1):

# until it hits the last word of the list, make for every two words in the
# article next to each other a phrase of two words, so we can check if
# there are phrases in the article who also exists in the pos or neg wordslists
if word_list[-1]:
phrase = " "
strings = word_list[i], word_list[i+1]
phrase = phrase.join(strings)
phrases_two.append(phrase)
count = 0

# examine each phrase, and check if the same phrase exists in the list
# with negative phrases containing two words
# dont forget to delete the counter, is only for readability
for phrase in phrases_two:
count += 1

for neg in neg_one_space:
if phrase == neg:
amount_neg += 1

print(amount_neg)

# JUST A WORD
for i in range(0, len(word_list)-1, 1):
if word_list[-1]:
phrase = word_list[i]
phrases_one.append(phrase)
count = 0

for phrase in phrases_one:
print("phrase1", count, phrase)
count += 1

for neg in neg_no_space:
if phrase == neg:
print("negatief woord^")
amount_neg += 1

print(amount_neg)






python string python-3.x list






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Nov 23 '18 at 16:39







mick_zon_24

















asked Nov 21 '18 at 10:16









mick_zon_24mick_zon_24

82




82








  • 1





    show us what you tried.

    – Sociopath
    Nov 21 '18 at 10:21






  • 2





    Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

    – thesilkworm
    Nov 21 '18 at 10:23






  • 1





    might the phrases be more than 2 words?

    – Chris_Rands
    Nov 21 '18 at 10:24











  • Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

    – mick_zon_24
    Nov 21 '18 at 10:31
















  • 1





    show us what you tried.

    – Sociopath
    Nov 21 '18 at 10:21






  • 2





    Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

    – thesilkworm
    Nov 21 '18 at 10:23






  • 1





    might the phrases be more than 2 words?

    – Chris_Rands
    Nov 21 '18 at 10:24











  • Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

    – mick_zon_24
    Nov 21 '18 at 10:31










1




1





show us what you tried.

– Sociopath
Nov 21 '18 at 10:21





show us what you tried.

– Sociopath
Nov 21 '18 at 10:21




2




2





Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

– thesilkworm
Nov 21 '18 at 10:23





Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

– thesilkworm
Nov 21 '18 at 10:23




1




1





might the phrases be more than 2 words?

– Chris_Rands
Nov 21 '18 at 10:24





might the phrases be more than 2 words?

– Chris_Rands
Nov 21 '18 at 10:24













Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

– mick_zon_24
Nov 21 '18 at 10:31







Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

– mick_zon_24
Nov 21 '18 at 10:31














4 Answers
4






active

oldest

votes


















1














Here is a way to do it:



posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

# Create a sentence for the wordList.
joinedWordList = " ".join(wordList)

# Find all phrases in the posWords list.
phrases = [elem for elem in posWords if len(elem.split()) > 1]

# For every phrase, locate it in the sentence,
# count the space characters which is the same number as the index of the first word of phrase in the word list,
# insert the phrase and delete the word that combine the phrase from the wordList.
for phrase in phrases:
try:
i = joinedWordList.index(phrase)
spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
wordList.insert(spaces,phrase)
del wordList[spaces+1:spaces+1 + len(phrase.split())]
except ValueError:
pass
print(wordList)


Output:



['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']





share|improve this answer


























  • Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

    – mick_zon_24
    Nov 21 '18 at 13:54











  • By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

    – mick_zon_24
    Nov 21 '18 at 14:04











  • @mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

    – Vasilis G.
    Nov 21 '18 at 22:39











  • update posted! @vasilis G

    – mick_zon_24
    Nov 23 '18 at 16:40



















1














Here is another approach that would work for any phrase length:



posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

for w in posWords:
nrWords = len(w.split(' '))
if nrWords > 1:
word_array = w.split(' ')
word_index_array = [wordList.index(w) for w in word_array]
index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]

if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
for elem in word_array:
wordList.remove(elem)
wordList.insert(word_index_array[0], w)


Output will be:



['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


But if we for example input something like:



posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']


The output will be:



['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']





share|improve this answer
























  • update posted! (see above)

    – mick_zon_24
    Nov 23 '18 at 16:40



















0














You can do something like this:



In [711]: s = ''.join(posWords)

In [712]: s
Out[712]: 'beautifullovehappy dayenjoysmelling flowers'

In [672]: n =

In [673]: for i in wordList:
...: if i in s:
...: n.append(i)
...:

In [713]: n
Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']

In [740]: for c, i in enumerate(n):
...: if c+1 < len(n):
...: word = n[c] + ' ' + n[c+1]
...: if word in posWords:
...: ix1 = wordList.index(n[c])
...: del wordList[ix1: ix1+2]
...: wordList.insert(ix1,word)
...:

In [710]: wordList
Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


Let me know if this helps.






share|improve this answer





















  • 2





    You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

    – Guimoute
    Nov 21 '18 at 13:13













  • @Guimoute Thanks a lot for that. Edited my answer too.

    – Mayank Porwal
    Nov 21 '18 at 13:30











  • thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

    – mick_zon_24
    Nov 21 '18 at 14:00











  • update posted (see above)!

    – mick_zon_24
    Nov 23 '18 at 16:41



















0














Another way to do it:



>>> m=["good bad", "enjoy", "play"]
>>> l=["good", "bad", "happy", "delight"]
>>>
>>> for e in m:
... tmp = e.split(" ")
... if(len(tmp) > 1):
... l = [ent for ent in l if ent not in tmp]
... l.append(" ".join(tmp))
...
>>>
>>> l
['happy', 'delight', 'good bad']





share|improve this answer























    Your Answer






    StackExchange.ifUsing("editor", function () {
    StackExchange.using("externalEditor", function () {
    StackExchange.using("snippets", function () {
    StackExchange.snippets.init();
    });
    });
    }, "code-snippets");

    StackExchange.ready(function() {
    var channelOptions = {
    tags: "".split(" "),
    id: "1"
    };
    initTagRenderer("".split(" "), "".split(" "), channelOptions);

    StackExchange.using("externalEditor", function() {
    // Have to fire editor after snippets, if snippets enabled
    if (StackExchange.settings.snippets.snippetsEnabled) {
    StackExchange.using("snippets", function() {
    createEditor();
    });
    }
    else {
    createEditor();
    }
    });

    function createEditor() {
    StackExchange.prepareEditor({
    heartbeatType: 'answer',
    autoActivateHeartbeat: false,
    convertImagesToLinks: true,
    noModals: true,
    showLowRepImageUploadWarning: true,
    reputationToPostImages: 10,
    bindNavPrevention: true,
    postfix: "",
    imageUploader: {
    brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
    contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
    allowUrls: true
    },
    onDemand: true,
    discardSelector: ".discard-answer"
    ,immediatelyShowMarkdownHelp:true
    });


    }
    });














    draft saved

    draft discarded


















    StackExchange.ready(
    function () {
    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53409801%2fhow-can-i-detect-multiple-items-in-a-list-that-are-separated-with-a-somewhat-equ%23new-answer', 'question_page');
    }
    );

    Post as a guest















    Required, but never shown

























    4 Answers
    4






    active

    oldest

    votes








    4 Answers
    4






    active

    oldest

    votes









    active

    oldest

    votes






    active

    oldest

    votes









    1














    Here is a way to do it:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

    # Create a sentence for the wordList.
    joinedWordList = " ".join(wordList)

    # Find all phrases in the posWords list.
    phrases = [elem for elem in posWords if len(elem.split()) > 1]

    # For every phrase, locate it in the sentence,
    # count the space characters which is the same number as the index of the first word of phrase in the word list,
    # insert the phrase and delete the word that combine the phrase from the wordList.
    for phrase in phrases:
    try:
    i = joinedWordList.index(phrase)
    spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
    wordList.insert(spaces,phrase)
    del wordList[spaces+1:spaces+1 + len(phrase.split())]
    except ValueError:
    pass
    print(wordList)


    Output:



    ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']





    share|improve this answer


























    • Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

      – mick_zon_24
      Nov 21 '18 at 13:54











    • By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

      – mick_zon_24
      Nov 21 '18 at 14:04











    • @mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

      – Vasilis G.
      Nov 21 '18 at 22:39











    • update posted! @vasilis G

      – mick_zon_24
      Nov 23 '18 at 16:40
















    1














    Here is a way to do it:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

    # Create a sentence for the wordList.
    joinedWordList = " ".join(wordList)

    # Find all phrases in the posWords list.
    phrases = [elem for elem in posWords if len(elem.split()) > 1]

    # For every phrase, locate it in the sentence,
    # count the space characters which is the same number as the index of the first word of phrase in the word list,
    # insert the phrase and delete the word that combine the phrase from the wordList.
    for phrase in phrases:
    try:
    i = joinedWordList.index(phrase)
    spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
    wordList.insert(spaces,phrase)
    del wordList[spaces+1:spaces+1 + len(phrase.split())]
    except ValueError:
    pass
    print(wordList)


    Output:



    ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']





    share|improve this answer


























    • Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

      – mick_zon_24
      Nov 21 '18 at 13:54











    • By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

      – mick_zon_24
      Nov 21 '18 at 14:04











    • @mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

      – Vasilis G.
      Nov 21 '18 at 22:39











    • update posted! @vasilis G

      – mick_zon_24
      Nov 23 '18 at 16:40














    1












    1








    1







    Here is a way to do it:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

    # Create a sentence for the wordList.
    joinedWordList = " ".join(wordList)

    # Find all phrases in the posWords list.
    phrases = [elem for elem in posWords if len(elem.split()) > 1]

    # For every phrase, locate it in the sentence,
    # count the space characters which is the same number as the index of the first word of phrase in the word list,
    # insert the phrase and delete the word that combine the phrase from the wordList.
    for phrase in phrases:
    try:
    i = joinedWordList.index(phrase)
    spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
    wordList.insert(spaces,phrase)
    del wordList[spaces+1:spaces+1 + len(phrase.split())]
    except ValueError:
    pass
    print(wordList)


    Output:



    ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']





    share|improve this answer















    Here is a way to do it:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

    # Create a sentence for the wordList.
    joinedWordList = " ".join(wordList)

    # Find all phrases in the posWords list.
    phrases = [elem for elem in posWords if len(elem.split()) > 1]

    # For every phrase, locate it in the sentence,
    # count the space characters which is the same number as the index of the first word of phrase in the word list,
    # insert the phrase and delete the word that combine the phrase from the wordList.
    for phrase in phrases:
    try:
    i = joinedWordList.index(phrase)
    spaces = len([letter for letter in joinedWordList[:i] if letter==' '])
    wordList.insert(spaces,phrase)
    del wordList[spaces+1:spaces+1 + len(phrase.split())]
    except ValueError:
    pass
    print(wordList)


    Output:



    ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']






    share|improve this answer














    share|improve this answer



    share|improve this answer








    edited Nov 21 '18 at 12:44

























    answered Nov 21 '18 at 11:19









    Vasilis G.Vasilis G.

    3,4082722




    3,4082722













    • Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

      – mick_zon_24
      Nov 21 '18 at 13:54











    • By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

      – mick_zon_24
      Nov 21 '18 at 14:04











    • @mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

      – Vasilis G.
      Nov 21 '18 at 22:39











    • update posted! @vasilis G

      – mick_zon_24
      Nov 23 '18 at 16:40



















    • Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

      – mick_zon_24
      Nov 21 '18 at 13:54











    • By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

      – mick_zon_24
      Nov 21 '18 at 14:04











    • @mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

      – Vasilis G.
      Nov 21 '18 at 22:39











    • update posted! @vasilis G

      – mick_zon_24
      Nov 23 '18 at 16:40

















    Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

    – mick_zon_24
    Nov 21 '18 at 13:54





    Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

    – mick_zon_24
    Nov 21 '18 at 13:54













    By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

    – mick_zon_24
    Nov 21 '18 at 14:04





    By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

    – mick_zon_24
    Nov 21 '18 at 14:04













    @mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

    – Vasilis G.
    Nov 21 '18 at 22:39





    @mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

    – Vasilis G.
    Nov 21 '18 at 22:39













    update posted! @vasilis G

    – mick_zon_24
    Nov 23 '18 at 16:40





    update posted! @vasilis G

    – mick_zon_24
    Nov 23 '18 at 16:40













    1














    Here is another approach that would work for any phrase length:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

    for w in posWords:
    nrWords = len(w.split(' '))
    if nrWords > 1:
    word_array = w.split(' ')
    word_index_array = [wordList.index(w) for w in word_array]
    index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]

    if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
    for elem in word_array:
    wordList.remove(elem)
    wordList.insert(word_index_array[0], w)


    Output will be:



    ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


    But if we for example input something like:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']


    The output will be:



    ['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']





    share|improve this answer
























    • update posted! (see above)

      – mick_zon_24
      Nov 23 '18 at 16:40
















    1














    Here is another approach that would work for any phrase length:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

    for w in posWords:
    nrWords = len(w.split(' '))
    if nrWords > 1:
    word_array = w.split(' ')
    word_index_array = [wordList.index(w) for w in word_array]
    index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]

    if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
    for elem in word_array:
    wordList.remove(elem)
    wordList.insert(word_index_array[0], w)


    Output will be:



    ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


    But if we for example input something like:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']


    The output will be:



    ['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']





    share|improve this answer
























    • update posted! (see above)

      – mick_zon_24
      Nov 23 '18 at 16:40














    1












    1








    1







    Here is another approach that would work for any phrase length:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

    for w in posWords:
    nrWords = len(w.split(' '))
    if nrWords > 1:
    word_array = w.split(' ')
    word_index_array = [wordList.index(w) for w in word_array]
    index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]

    if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
    for elem in word_array:
    wordList.remove(elem)
    wordList.insert(word_index_array[0], w)


    Output will be:



    ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


    But if we for example input something like:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']


    The output will be:



    ['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']





    share|improve this answer













    Here is another approach that would work for any phrase length:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

    for w in posWords:
    nrWords = len(w.split(' '))
    if nrWords > 1:
    word_array = w.split(' ')
    word_index_array = [wordList.index(w) for w in word_array]
    index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]

    if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList
    for elem in word_array:
    wordList.remove(elem)
    wordList.insert(word_index_array[0], w)


    Output will be:



    ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


    But if we for example input something like:



    posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']
    wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']


    The output will be:



    ['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']






    share|improve this answer












    share|improve this answer



    share|improve this answer










    answered Nov 21 '18 at 13:30









    toti08toti08

    1,73931523




    1,73931523













    • update posted! (see above)

      – mick_zon_24
      Nov 23 '18 at 16:40



















    • update posted! (see above)

      – mick_zon_24
      Nov 23 '18 at 16:40

















    update posted! (see above)

    – mick_zon_24
    Nov 23 '18 at 16:40





    update posted! (see above)

    – mick_zon_24
    Nov 23 '18 at 16:40











    0














    You can do something like this:



    In [711]: s = ''.join(posWords)

    In [712]: s
    Out[712]: 'beautifullovehappy dayenjoysmelling flowers'

    In [672]: n =

    In [673]: for i in wordList:
    ...: if i in s:
    ...: n.append(i)
    ...:

    In [713]: n
    Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']

    In [740]: for c, i in enumerate(n):
    ...: if c+1 < len(n):
    ...: word = n[c] + ' ' + n[c+1]
    ...: if word in posWords:
    ...: ix1 = wordList.index(n[c])
    ...: del wordList[ix1: ix1+2]
    ...: wordList.insert(ix1,word)
    ...:

    In [710]: wordList
    Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


    Let me know if this helps.






    share|improve this answer





















    • 2





      You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

      – Guimoute
      Nov 21 '18 at 13:13













    • @Guimoute Thanks a lot for that. Edited my answer too.

      – Mayank Porwal
      Nov 21 '18 at 13:30











    • thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

      – mick_zon_24
      Nov 21 '18 at 14:00











    • update posted (see above)!

      – mick_zon_24
      Nov 23 '18 at 16:41
















    0














    You can do something like this:



    In [711]: s = ''.join(posWords)

    In [712]: s
    Out[712]: 'beautifullovehappy dayenjoysmelling flowers'

    In [672]: n =

    In [673]: for i in wordList:
    ...: if i in s:
    ...: n.append(i)
    ...:

    In [713]: n
    Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']

    In [740]: for c, i in enumerate(n):
    ...: if c+1 < len(n):
    ...: word = n[c] + ' ' + n[c+1]
    ...: if word in posWords:
    ...: ix1 = wordList.index(n[c])
    ...: del wordList[ix1: ix1+2]
    ...: wordList.insert(ix1,word)
    ...:

    In [710]: wordList
    Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


    Let me know if this helps.






    share|improve this answer





















    • 2





      You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

      – Guimoute
      Nov 21 '18 at 13:13













    • @Guimoute Thanks a lot for that. Edited my answer too.

      – Mayank Porwal
      Nov 21 '18 at 13:30











    • thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

      – mick_zon_24
      Nov 21 '18 at 14:00











    • update posted (see above)!

      – mick_zon_24
      Nov 23 '18 at 16:41














    0












    0








    0







    You can do something like this:



    In [711]: s = ''.join(posWords)

    In [712]: s
    Out[712]: 'beautifullovehappy dayenjoysmelling flowers'

    In [672]: n =

    In [673]: for i in wordList:
    ...: if i in s:
    ...: n.append(i)
    ...:

    In [713]: n
    Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']

    In [740]: for c, i in enumerate(n):
    ...: if c+1 < len(n):
    ...: word = n[c] + ' ' + n[c+1]
    ...: if word in posWords:
    ...: ix1 = wordList.index(n[c])
    ...: del wordList[ix1: ix1+2]
    ...: wordList.insert(ix1,word)
    ...:

    In [710]: wordList
    Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


    Let me know if this helps.






    share|improve this answer















    You can do something like this:



    In [711]: s = ''.join(posWords)

    In [712]: s
    Out[712]: 'beautifullovehappy dayenjoysmelling flowers'

    In [672]: n =

    In [673]: for i in wordList:
    ...: if i in s:
    ...: n.append(i)
    ...:

    In [713]: n
    Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']

    In [740]: for c, i in enumerate(n):
    ...: if c+1 < len(n):
    ...: word = n[c] + ' ' + n[c+1]
    ...: if word in posWords:
    ...: ix1 = wordList.index(n[c])
    ...: del wordList[ix1: ix1+2]
    ...: wordList.insert(ix1,word)
    ...:

    In [710]: wordList
    Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']


    Let me know if this helps.







    share|improve this answer














    share|improve this answer



    share|improve this answer








    edited Nov 21 '18 at 13:30

























    answered Nov 21 '18 at 10:59









    Mayank PorwalMayank Porwal

    4,6571624




    4,6571624








    • 2





      You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

      – Guimoute
      Nov 21 '18 at 13:13













    • @Guimoute Thanks a lot for that. Edited my answer too.

      – Mayank Porwal
      Nov 21 '18 at 13:30











    • thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

      – mick_zon_24
      Nov 21 '18 at 14:00











    • update posted (see above)!

      – mick_zon_24
      Nov 23 '18 at 16:41














    • 2





      You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

      – Guimoute
      Nov 21 '18 at 13:13













    • @Guimoute Thanks a lot for that. Edited my answer too.

      – Mayank Porwal
      Nov 21 '18 at 13:30











    • thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

      – mick_zon_24
      Nov 21 '18 at 14:00











    • update posted (see above)!

      – mick_zon_24
      Nov 23 '18 at 16:41








    2




    2





    You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

    – Guimoute
    Nov 21 '18 at 13:13







    You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

    – Guimoute
    Nov 21 '18 at 13:13















    @Guimoute Thanks a lot for that. Edited my answer too.

    – Mayank Porwal
    Nov 21 '18 at 13:30





    @Guimoute Thanks a lot for that. Edited my answer too.

    – Mayank Porwal
    Nov 21 '18 at 13:30













    thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

    – mick_zon_24
    Nov 21 '18 at 14:00





    thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

    – mick_zon_24
    Nov 21 '18 at 14:00













    update posted (see above)!

    – mick_zon_24
    Nov 23 '18 at 16:41





    update posted (see above)!

    – mick_zon_24
    Nov 23 '18 at 16:41











    0














    Another way to do it:



    >>> m=["good bad", "enjoy", "play"]
    >>> l=["good", "bad", "happy", "delight"]
    >>>
    >>> for e in m:
    ... tmp = e.split(" ")
    ... if(len(tmp) > 1):
    ... l = [ent for ent in l if ent not in tmp]
    ... l.append(" ".join(tmp))
    ...
    >>>
    >>> l
    ['happy', 'delight', 'good bad']





    share|improve this answer




























      0














      Another way to do it:



      >>> m=["good bad", "enjoy", "play"]
      >>> l=["good", "bad", "happy", "delight"]
      >>>
      >>> for e in m:
      ... tmp = e.split(" ")
      ... if(len(tmp) > 1):
      ... l = [ent for ent in l if ent not in tmp]
      ... l.append(" ".join(tmp))
      ...
      >>>
      >>> l
      ['happy', 'delight', 'good bad']





      share|improve this answer


























        0












        0








        0







        Another way to do it:



        >>> m=["good bad", "enjoy", "play"]
        >>> l=["good", "bad", "happy", "delight"]
        >>>
        >>> for e in m:
        ... tmp = e.split(" ")
        ... if(len(tmp) > 1):
        ... l = [ent for ent in l if ent not in tmp]
        ... l.append(" ".join(tmp))
        ...
        >>>
        >>> l
        ['happy', 'delight', 'good bad']





        share|improve this answer













        Another way to do it:



        >>> m=["good bad", "enjoy", "play"]
        >>> l=["good", "bad", "happy", "delight"]
        >>>
        >>> for e in m:
        ... tmp = e.split(" ")
        ... if(len(tmp) > 1):
        ... l = [ent for ent in l if ent not in tmp]
        ... l.append(" ".join(tmp))
        ...
        >>>
        >>> l
        ['happy', 'delight', 'good bad']






        share|improve this answer












        share|improve this answer



        share|improve this answer










        answered Nov 21 '18 at 13:48









        AkhileshaAkhilesha

        113




        113






























            draft saved

            draft discarded




















































            Thanks for contributing an answer to Stack Overflow!


            • Please be sure to answer the question. Provide details and share your research!

            But avoid



            • Asking for help, clarification, or responding to other answers.

            • Making statements based on opinion; back them up with references or personal experience.


            To learn more, see our tips on writing great answers.




            draft saved


            draft discarded














            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53409801%2fhow-can-i-detect-multiple-items-in-a-list-that-are-separated-with-a-somewhat-equ%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown





















































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown

































            Required, but never shown














            Required, but never shown












            Required, but never shown







            Required, but never shown







            Popular posts from this blog

            Ottavio Pratesi

            Tricia Helfer

            15 giugno