How can I detect multiple items in a list that are separated with a somewhat equivalent list in Python?

-1

I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:

posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']

However, the text I need to analyse on positive words aren't holding any spaces within items:

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

I want to iterate over wordList and when the algorithm sees words that are also in posWords, but merged (e.g. 'happy day'), remove the corresponding words in wordList ('happy', 'day') and add the merged version in wordList.

So in the end, the wordList must look like this:

wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

BIG UPDATE:

Because I promised you guys to keep you updated, this is my code so far. It was kinda tricky because in my lists with positive words and negative words there where phrases that contained max three words. So I needed to figure out how to work with that. I realised (also because of the answers you guys gave me, thanks again!) that I had to make lists from all the words of the text that needed to be analysed with either 3, 2, or 1 words in one string item so I can check if the items also appear in my lists of positive words end negative words. Here is my code so far. It's kinda bulky, with a lot of copy pasting... Im planning to fix that but im quite tired and weekend is starting now, so no hate please! (tips are welcome tho)

    from textblob import TextBlob



# open the files with negative words

negatives = open("neg_dutch_2.txt")



neg_list = 



# push all the words from text file to list

for lines in negatives:

    lines = lines.lower()

    neg_list.append(lines.strip("n"))



neg_no_space = 

neg_one_space = 

neg_two_spaces = 

neg_three_spaces = 



count = 0



def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,

                        neg_three_spaces, count):



    # read every word in the list with negative words

    for i in range(len(neg_list)):



        # every word is a phrase, because there are "words" with spaces

        phrase = neg_list[i]



        # look at every character and assign the phrase to a list

        # that correspondes with the number of spaces in it

        for j in range(len(phrase)):

            if phrase[j] == " ":

                count += 1

        if phrase[-1]:

            if count == 1:

                neg_one_space.append(phrase)

            elif count == 2:

                neg_two_spaces.append(phrase)

            elif count == 3:

                neg_three_spaces.append(phrase)

            else:

                neg_no_space.append(phrase)



            # reset the counter to avoid the total sum of spaces in a list

            count = 0



    return neg_list, neg_no_space, neg_one_space, neg_two_spaces,

    neg_three_spaces, count



neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,

                    neg_two_spaces, neg_three_spaces, count)



# open the files with positive words

positives = open("pos_dutch_2.txt")



pos_list = 



# push all the words from text file to list

for lines in positives:

    lines = lines.lower()

    pos_list.append(lines.strip("n"))



pos_no_space = 

pos_one_space = 

pos_two_spaces = 

pos_three_spaces = 

count = 0



def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,

                        pos_three_spaces, count):



    # read every word in the list with positive words

    for i in range(len(pos_list)):



        # every word is a phrase, because there are "words" with spaces

        phrase = pos_list[i]



        # look at every character and assign the phrase to a list

        # that correspondes with the number of spaces in it

        for j in range(len(phrase)):

            if phrase[j] == " ":

                count += 1

        if phrase[-1]:

            if count == 1:

                pos_one_space.append(phrase)

            elif count == 2:

                pos_two_spaces.append(phrase)

            elif count == 3:

                pos_three_spaces.append(phrase)

            else:

                pos_no_space.append(phrase)



            # reset the counter to avoid the total sum of spaces in a list

            count = 0



    return pos_list, pos_no_space, pos_one_space, pos_two_spaces,

    pos_three_spaces, count



pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,

                    pos_two_spaces, pos_three_spaces, count)



text = open("nrc_sample.TXT")



# reading the article, using TextBlob library to seperate each word

text = text.read()

blob = TextBlob(text)



# these are words that are bound to the meta-deta of the articlesfile

ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",

        "PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",

        "Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]



# make a list for all the words in the articles

word_list = 



# and store every word in that list

for word in blob.words:

    if not any(x in word for x in ruis):

       word = word.lower()

       if word.isalpha():

           word_list.append(word)



# variables for the frequencies of negative and positive words in articles

amount_pos = 0

amount_neg = 0

count = 0



phrases_four = 

phrases_three = 

phrases_two = 

phrases_one = 

amount_neg = 0



# PHRASE 4

for i in range(0, len(word_list)-4, 1):

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]

        phrase = phrase.join(strings)

        phrases_four.append(phrase)

        count = 0



for phrase in phrases_four:

    print("phrase4", count, phrase)

    count += 1



    for neg in neg_three_spaces:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)



# PHRASE 3

for i in range(0, len(word_list)-3, 1):

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1], word_list[i+2]

        phrase = phrase.join(strings)

        phrases_three.append(phrase)

        count = 0



for phrase in phrases_three:

    print("phrase3", count, phrase)

    count += 1



    for neg in neg_two_spaces:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)



# PHRASE 2

# start at index zero, till one before end of the list

for i in range(0, len(word_list)-2, 1):



    # until it hits the last word of the list, make for every two words in the

    # article next to each other a phrase of two words, so we can check if

    # there are phrases in the article who also exists in the pos or neg wordslists

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1]

        phrase = phrase.join(strings)

        phrases_two.append(phrase)

        count = 0



# examine each phrase, and check if the same phrase exists in the list

# with negative phrases containing two words

# dont forget to delete the counter, is only for readability

for phrase in phrases_two:

    count += 1



    for neg in neg_one_space:

        if phrase == neg:

            amount_neg += 1



print(amount_neg)



# JUST A WORD

for i in range(0, len(word_list)-1, 1):

    if word_list[-1]:

        phrase = word_list[i]

        phrases_one.append(phrase)

        count = 0



for phrase in phrases_one:

    print("phrase1", count, phrase)

    count += 1



    for neg in neg_no_space:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)

edited Nov 23 '18 at 16:39

asked Nov 21 '18 at 10:16

mick_zon_24

1

show us what you tried.

– Sociopath
Nov 21 '18 at 10:21

2

Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

– thesilkworm
Nov 21 '18 at 10:23

1

might the phrases be more than 2 words?

– Chris_Rands
Nov 21 '18 at 10:24

Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

– mick_zon_24
Nov 21 '18 at 10:31

add a comment |

-1

I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:

posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']

However, the text I need to analyse on positive words aren't holding any spaces within items:

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

So in the end, the wordList must look like this:

wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

BIG UPDATE:

    from textblob import TextBlob



# open the files with negative words

negatives = open("neg_dutch_2.txt")



neg_list = 



# push all the words from text file to list

for lines in negatives:

    lines = lines.lower()

    neg_list.append(lines.strip("n"))



neg_no_space = 

neg_one_space = 

neg_two_spaces = 

neg_three_spaces = 



count = 0



def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,

                        neg_three_spaces, count):



    # read every word in the list with negative words

    for i in range(len(neg_list)):



        # every word is a phrase, because there are "words" with spaces

        phrase = neg_list[i]



        # look at every character and assign the phrase to a list

        # that correspondes with the number of spaces in it

        for j in range(len(phrase)):

            if phrase[j] == " ":

                count += 1

        if phrase[-1]:

            if count == 1:

                neg_one_space.append(phrase)

            elif count == 2:

                neg_two_spaces.append(phrase)

            elif count == 3:

                neg_three_spaces.append(phrase)

            else:

                neg_no_space.append(phrase)



            # reset the counter to avoid the total sum of spaces in a list

            count = 0



    return neg_list, neg_no_space, neg_one_space, neg_two_spaces,

    neg_three_spaces, count



neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,

                    neg_two_spaces, neg_three_spaces, count)



# open the files with positive words

positives = open("pos_dutch_2.txt")



pos_list = 



# push all the words from text file to list

for lines in positives:

    lines = lines.lower()

    pos_list.append(lines.strip("n"))



pos_no_space = 

pos_one_space = 

pos_two_spaces = 

pos_three_spaces = 

count = 0



def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,

                        pos_three_spaces, count):



    # read every word in the list with positive words

    for i in range(len(pos_list)):



        # every word is a phrase, because there are "words" with spaces

        phrase = pos_list[i]



        # look at every character and assign the phrase to a list

        # that correspondes with the number of spaces in it

        for j in range(len(phrase)):

            if phrase[j] == " ":

                count += 1

        if phrase[-1]:

            if count == 1:

                pos_one_space.append(phrase)

            elif count == 2:

                pos_two_spaces.append(phrase)

            elif count == 3:

                pos_three_spaces.append(phrase)

            else:

                pos_no_space.append(phrase)



            # reset the counter to avoid the total sum of spaces in a list

            count = 0



    return pos_list, pos_no_space, pos_one_space, pos_two_spaces,

    pos_three_spaces, count



pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,

                    pos_two_spaces, pos_three_spaces, count)



text = open("nrc_sample.TXT")



# reading the article, using TextBlob library to seperate each word

text = text.read()

blob = TextBlob(text)



# these are words that are bound to the meta-deta of the articlesfile

ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",

        "PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",

        "Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]



# make a list for all the words in the articles

word_list = 



# and store every word in that list

for word in blob.words:

    if not any(x in word for x in ruis):

       word = word.lower()

       if word.isalpha():

           word_list.append(word)



# variables for the frequencies of negative and positive words in articles

amount_pos = 0

amount_neg = 0

count = 0



phrases_four = 

phrases_three = 

phrases_two = 

phrases_one = 

amount_neg = 0



# PHRASE 4

for i in range(0, len(word_list)-4, 1):

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]

        phrase = phrase.join(strings)

        phrases_four.append(phrase)

        count = 0



for phrase in phrases_four:

    print("phrase4", count, phrase)

    count += 1



    for neg in neg_three_spaces:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)



# PHRASE 3

for i in range(0, len(word_list)-3, 1):

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1], word_list[i+2]

        phrase = phrase.join(strings)

        phrases_three.append(phrase)

        count = 0



for phrase in phrases_three:

    print("phrase3", count, phrase)

    count += 1



    for neg in neg_two_spaces:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)



# PHRASE 2

# start at index zero, till one before end of the list

for i in range(0, len(word_list)-2, 1):



    # until it hits the last word of the list, make for every two words in the

    # article next to each other a phrase of two words, so we can check if

    # there are phrases in the article who also exists in the pos or neg wordslists

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1]

        phrase = phrase.join(strings)

        phrases_two.append(phrase)

        count = 0



# examine each phrase, and check if the same phrase exists in the list

# with negative phrases containing two words

# dont forget to delete the counter, is only for readability

for phrase in phrases_two:

    count += 1



    for neg in neg_one_space:

        if phrase == neg:

            amount_neg += 1



print(amount_neg)



# JUST A WORD

for i in range(0, len(word_list)-1, 1):

    if word_list[-1]:

        phrase = word_list[i]

        phrases_one.append(phrase)

        count = 0



for phrase in phrases_one:

    print("phrase1", count, phrase)

    count += 1



    for neg in neg_no_space:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)

edited Nov 23 '18 at 16:39

asked Nov 21 '18 at 10:16

mick_zon_24

1

show us what you tried.

– Sociopath
Nov 21 '18 at 10:21

2

Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

– thesilkworm
Nov 21 '18 at 10:23

1

might the phrases be more than 2 words?

– Chris_Rands
Nov 21 '18 at 10:24

Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

– mick_zon_24
Nov 21 '18 at 10:31

add a comment |

-1

I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:

posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']

However, the text I need to analyse on positive words aren't holding any spaces within items:

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

So in the end, the wordList must look like this:

wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

BIG UPDATE:

    from textblob import TextBlob



# open the files with negative words

negatives = open("neg_dutch_2.txt")



neg_list = 



# push all the words from text file to list

for lines in negatives:

    lines = lines.lower()

    neg_list.append(lines.strip("n"))



neg_no_space = 

neg_one_space = 

neg_two_spaces = 

neg_three_spaces = 



count = 0



def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,

                        neg_three_spaces, count):



    # read every word in the list with negative words

    for i in range(len(neg_list)):



        # every word is a phrase, because there are "words" with spaces

        phrase = neg_list[i]



        # look at every character and assign the phrase to a list

        # that correspondes with the number of spaces in it

        for j in range(len(phrase)):

            if phrase[j] == " ":

                count += 1

        if phrase[-1]:

            if count == 1:

                neg_one_space.append(phrase)

            elif count == 2:

                neg_two_spaces.append(phrase)

            elif count == 3:

                neg_three_spaces.append(phrase)

            else:

                neg_no_space.append(phrase)



            # reset the counter to avoid the total sum of spaces in a list

            count = 0



    return neg_list, neg_no_space, neg_one_space, neg_two_spaces,

    neg_three_spaces, count



neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,

                    neg_two_spaces, neg_three_spaces, count)



# open the files with positive words

positives = open("pos_dutch_2.txt")



pos_list = 



# push all the words from text file to list

for lines in positives:

    lines = lines.lower()

    pos_list.append(lines.strip("n"))



pos_no_space = 

pos_one_space = 

pos_two_spaces = 

pos_three_spaces = 

count = 0



def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,

                        pos_three_spaces, count):



    # read every word in the list with positive words

    for i in range(len(pos_list)):



        # every word is a phrase, because there are "words" with spaces

        phrase = pos_list[i]



        # look at every character and assign the phrase to a list

        # that correspondes with the number of spaces in it

        for j in range(len(phrase)):

            if phrase[j] == " ":

                count += 1

        if phrase[-1]:

            if count == 1:

                pos_one_space.append(phrase)

            elif count == 2:

                pos_two_spaces.append(phrase)

            elif count == 3:

                pos_three_spaces.append(phrase)

            else:

                pos_no_space.append(phrase)



            # reset the counter to avoid the total sum of spaces in a list

            count = 0



    return pos_list, pos_no_space, pos_one_space, pos_two_spaces,

    pos_three_spaces, count



pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,

                    pos_two_spaces, pos_three_spaces, count)



text = open("nrc_sample.TXT")



# reading the article, using TextBlob library to seperate each word

text = text.read()

blob = TextBlob(text)



# these are words that are bound to the meta-deta of the articlesfile

ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",

        "PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",

        "Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]



# make a list for all the words in the articles

word_list = 



# and store every word in that list

for word in blob.words:

    if not any(x in word for x in ruis):

       word = word.lower()

       if word.isalpha():

           word_list.append(word)



# variables for the frequencies of negative and positive words in articles

amount_pos = 0

amount_neg = 0

count = 0



phrases_four = 

phrases_three = 

phrases_two = 

phrases_one = 

amount_neg = 0



# PHRASE 4

for i in range(0, len(word_list)-4, 1):

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]

        phrase = phrase.join(strings)

        phrases_four.append(phrase)

        count = 0



for phrase in phrases_four:

    print("phrase4", count, phrase)

    count += 1



    for neg in neg_three_spaces:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)



# PHRASE 3

for i in range(0, len(word_list)-3, 1):

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1], word_list[i+2]

        phrase = phrase.join(strings)

        phrases_three.append(phrase)

        count = 0



for phrase in phrases_three:

    print("phrase3", count, phrase)

    count += 1



    for neg in neg_two_spaces:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)



# PHRASE 2

# start at index zero, till one before end of the list

for i in range(0, len(word_list)-2, 1):



    # until it hits the last word of the list, make for every two words in the

    # article next to each other a phrase of two words, so we can check if

    # there are phrases in the article who also exists in the pos or neg wordslists

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1]

        phrase = phrase.join(strings)

        phrases_two.append(phrase)

        count = 0



# examine each phrase, and check if the same phrase exists in the list

# with negative phrases containing two words

# dont forget to delete the counter, is only for readability

for phrase in phrases_two:

    count += 1



    for neg in neg_one_space:

        if phrase == neg:

            amount_neg += 1



print(amount_neg)



# JUST A WORD

for i in range(0, len(word_list)-1, 1):

    if word_list[-1]:

        phrase = word_list[i]

        phrases_one.append(phrase)

        count = 0



for phrase in phrases_one:

    print("phrase1", count, phrase)

    count += 1



    for neg in neg_no_space:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)

edited Nov 23 '18 at 16:39

asked Nov 21 '18 at 10:16

mick_zon_24

I'm coding in python version 3, and I got a list with positive "words", but some items hold a space:

posWords = ['beautiful', 'love', 'happy day', 'enjoy', 'smelling flowers']

However, the text I need to analyse on positive words aren't holding any spaces within items:

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

So in the end, the wordList must look like this:

wordList = ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

BIG UPDATE:

    from textblob import TextBlob



# open the files with negative words

negatives = open("neg_dutch_2.txt")



neg_list = 



# push all the words from text file to list

for lines in negatives:

    lines = lines.lower()

    neg_list.append(lines.strip("n"))



neg_no_space = 

neg_one_space = 

neg_two_spaces = 

neg_three_spaces = 



count = 0



def neg_how_many_spaces(neg_list, neg_no_space, neg_one_space, neg_two_spaces,

                        neg_three_spaces, count):



    # read every word in the list with negative words

    for i in range(len(neg_list)):



        # every word is a phrase, because there are "words" with spaces

        phrase = neg_list[i]



        # look at every character and assign the phrase to a list

        # that correspondes with the number of spaces in it

        for j in range(len(phrase)):

            if phrase[j] == " ":

                count += 1

        if phrase[-1]:

            if count == 1:

                neg_one_space.append(phrase)

            elif count == 2:

                neg_two_spaces.append(phrase)

            elif count == 3:

                neg_three_spaces.append(phrase)

            else:

                neg_no_space.append(phrase)



            # reset the counter to avoid the total sum of spaces in a list

            count = 0



    return neg_list, neg_no_space, neg_one_space, neg_two_spaces,

    neg_three_spaces, count



neg_how_many_spaces(neg_list, neg_no_space, neg_one_space,

                    neg_two_spaces, neg_three_spaces, count)



# open the files with positive words

positives = open("pos_dutch_2.txt")



pos_list = 



# push all the words from text file to list

for lines in positives:

    lines = lines.lower()

    pos_list.append(lines.strip("n"))



pos_no_space = 

pos_one_space = 

pos_two_spaces = 

pos_three_spaces = 

count = 0



def pos_how_many_spaces(pos_list, pos_no_space, pos_one_space, pos_two_spaces,

                        pos_three_spaces, count):



    # read every word in the list with positive words

    for i in range(len(pos_list)):



        # every word is a phrase, because there are "words" with spaces

        phrase = pos_list[i]



        # look at every character and assign the phrase to a list

        # that correspondes with the number of spaces in it

        for j in range(len(phrase)):

            if phrase[j] == " ":

                count += 1

        if phrase[-1]:

            if count == 1:

                pos_one_space.append(phrase)

            elif count == 2:

                pos_two_spaces.append(phrase)

            elif count == 3:

                pos_three_spaces.append(phrase)

            else:

                pos_no_space.append(phrase)



            # reset the counter to avoid the total sum of spaces in a list

            count = 0



    return pos_list, pos_no_space, pos_one_space, pos_two_spaces,

    pos_three_spaces, count



pos_how_many_spaces(pos_list, pos_no_space, pos_one_space,

                    pos_two_spaces, pos_three_spaces, count)



text = open("nrc_sample.TXT")



# reading the article, using TextBlob library to seperate each word

text = text.read()

blob = TextBlob(text)



# these are words that are bound to the meta-deta of the articlesfile

ruis = ["DOCUMENTS", "SECTION", "LENGTH", "LOAD-DATE", "LANGUAGE",

        "PUBLICATION-TYPE", "JOURNAL-CODE", "BYLINE", "All", "Rights",

        "Reserved", "Copyright", "krant", "Krant", "KRANT", "blz"]



# make a list for all the words in the articles

word_list = 



# and store every word in that list

for word in blob.words:

    if not any(x in word for x in ruis):

       word = word.lower()

       if word.isalpha():

           word_list.append(word)



# variables for the frequencies of negative and positive words in articles

amount_pos = 0

amount_neg = 0

count = 0



phrases_four = 

phrases_three = 

phrases_two = 

phrases_one = 

amount_neg = 0



# PHRASE 4

for i in range(0, len(word_list)-4, 1):

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1], word_list[i+2], word_list[i+3]

        phrase = phrase.join(strings)

        phrases_four.append(phrase)

        count = 0



for phrase in phrases_four:

    print("phrase4", count, phrase)

    count += 1



    for neg in neg_three_spaces:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)



# PHRASE 3

for i in range(0, len(word_list)-3, 1):

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1], word_list[i+2]

        phrase = phrase.join(strings)

        phrases_three.append(phrase)

        count = 0



for phrase in phrases_three:

    print("phrase3", count, phrase)

    count += 1



    for neg in neg_two_spaces:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)



# PHRASE 2

# start at index zero, till one before end of the list

for i in range(0, len(word_list)-2, 1):



    # until it hits the last word of the list, make for every two words in the

    # article next to each other a phrase of two words, so we can check if

    # there are phrases in the article who also exists in the pos or neg wordslists

    if word_list[-1]:

        phrase = " "

        strings = word_list[i], word_list[i+1]

        phrase = phrase.join(strings)

        phrases_two.append(phrase)

        count = 0



# examine each phrase, and check if the same phrase exists in the list

# with negative phrases containing two words

# dont forget to delete the counter, is only for readability

for phrase in phrases_two:

    count += 1



    for neg in neg_one_space:

        if phrase == neg:

            amount_neg += 1



print(amount_neg)



# JUST A WORD

for i in range(0, len(word_list)-1, 1):

    if word_list[-1]:

        phrase = word_list[i]

        phrases_one.append(phrase)

        count = 0



for phrase in phrases_one:

    print("phrase1", count, phrase)

    count += 1



    for neg in neg_no_space:

        if phrase == neg:

            print("negatief woord^")

            amount_neg += 1



print(amount_neg)

python string python-3.x list

edited Nov 23 '18 at 16:39

asked Nov 21 '18 at 10:16

mick_zon_24

edited Nov 23 '18 at 16:39

asked Nov 21 '18 at 10:16

mick_zon_24

edited Nov 23 '18 at 16:39

asked Nov 21 '18 at 10:16

mick_zon_24

asked Nov 21 '18 at 10:16

mick_zon_24

asked Nov 21 '18 at 10:16

mick_zon_24

1

show us what you tried.

– Sociopath
Nov 21 '18 at 10:21

2

Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

– thesilkworm
Nov 21 '18 at 10:23

1

might the phrases be more than 2 words?

– Chris_Rands
Nov 21 '18 at 10:24

Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

– mick_zon_24
Nov 21 '18 at 10:31

add a comment |

1

show us what you tried.

– Sociopath
Nov 21 '18 at 10:21

2

Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

– thesilkworm
Nov 21 '18 at 10:23

1

might the phrases be more than 2 words?

– Chris_Rands
Nov 21 '18 at 10:24

Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

– mick_zon_24
Nov 21 '18 at 10:31

show us what you tried.

– Sociopath
Nov 21 '18 at 10:21

Will 'happy' and 'day' (for example) always be consecutive elements in wordList, or could they appear anywhere?

– thesilkworm
Nov 21 '18 at 10:23

might the phrases be more than 2 words?

– Chris_Rands
Nov 21 '18 at 10:24

Yes, the words in wordList will always be in consecutive order. This is part of the problem, because if the appear elsewhere in the list, they wouldn't be counted as one positive word, for happy day is one positive "word" @thesilkworm. And yes, the phrases could be more than 2 words @Chris_Rands.

– mick_zon_24
Nov 21 '18 at 10:31

add a comment |

4 Answers
4

active

oldest

votes

Here is a way to do it:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



# Create a sentence for the wordList.

joinedWordList = " ".join(wordList)



# Find all phrases in the posWords list.

phrases = [elem for elem in posWords if len(elem.split()) > 1]



# For every phrase, locate it in the sentence, 

# count the space characters which is the same number as the index of the first word of phrase in the word list,

# insert the phrase and delete the word that combine the phrase from the wordList.

for phrase in phrases:

    try:

        i = joinedWordList.index(phrase)

        spaces = len([letter for letter in joinedWordList[:i] if letter==' '])

        wordList.insert(spaces,phrase)

        del wordList[spaces+1:spaces+1 + len(phrase.split())]

    except ValueError:

        pass

print(wordList)

Output:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

edited Nov 21 '18 at 12:44

answered Nov 21 '18 at 11:19

Vasilis G.

3,4082722

Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

– mick_zon_24
Nov 21 '18 at 13:54

By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

– mick_zon_24
Nov 21 '18 at 14:04

@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

– Vasilis G.
Nov 21 '18 at 22:39

update posted! @vasilis G

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

Here is another approach that would work for any phrase length:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



for w in posWords:

    nrWords = len(w.split(' '))

    if nrWords > 1:

        word_array = w.split(' ')

        word_index_array = [wordList.index(w) for w in word_array]

        index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]



        if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList

            for elem in word_array:

                wordList.remove(elem)                        

            wordList.insert(word_index_array[0], w)

Output will be:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

But if we for example input something like:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

The output will be:

['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']

answered Nov 21 '18 at 13:30

toti08

1,73931523

update posted! (see above)

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

You can do something like this:

In [711]: s = ''.join(posWords)



In [712]: s

Out[712]: 'beautifullovehappy dayenjoysmelling flowers'



In [672]: n = 



In [673]: for i in wordList:

     ...:     if i in s:

     ...:         n.append(i)

     ...: 



In [713]: n

Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']



In [740]: for c, i in enumerate(n):

     ...:     if c+1 < len(n):

     ...:         word = n[c] + ' ' + n[c+1]

     ...:         if word in posWords:

     ...:             ix1 = wordList.index(n[c])

     ...:             del wordList[ix1: ix1+2]

     ...:             wordList.insert(ix1,word)

     ...:             



In [710]: wordList

Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

Let me know if this helps.

edited Nov 21 '18 at 13:30

answered Nov 21 '18 at 10:59

Mayank Porwal

4,6571624

2

You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

– Guimoute
Nov 21 '18 at 13:13

@Guimoute Thanks a lot for that. Edited my answer too.

– Mayank Porwal
Nov 21 '18 at 13:30

thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

– mick_zon_24
Nov 21 '18 at 14:00

update posted (see above)!

– mick_zon_24
Nov 23 '18 at 16:41

add a comment |

Another way to do it:

>>> m=["good bad", "enjoy", "play"]

>>> l=["good", "bad", "happy", "delight"]

>>>

>>> for e in m:

...     tmp = e.split(" ")

...     if(len(tmp) > 1):

...             l = [ent for ent in l if ent not in tmp]

...             l.append(" ".join(tmp))

...

>>>

>>> l

['happy', 'delight', 'good bad']

answered Nov 21 '18 at 13:48

Akhilesha

113

add a comment |

Your Answer

StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");

StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "1"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);

StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});

function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: true,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: 10,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});

}
});

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53409801%2fhow-can-i-detect-multiple-items-in-a-list-that-are-separated-with-a-somewhat-equ%23new-answer', 'question_page');
}
);

Post as a guest

Name

Required, but never shown

4 Answers
4

active

oldest

votes

4 Answers
4

active

oldest

votes

Here is a way to do it:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



# Create a sentence for the wordList.

joinedWordList = " ".join(wordList)



# Find all phrases in the posWords list.

phrases = [elem for elem in posWords if len(elem.split()) > 1]



# For every phrase, locate it in the sentence, 

# count the space characters which is the same number as the index of the first word of phrase in the word list,

# insert the phrase and delete the word that combine the phrase from the wordList.

for phrase in phrases:

    try:

        i = joinedWordList.index(phrase)

        spaces = len([letter for letter in joinedWordList[:i] if letter==' '])

        wordList.insert(spaces,phrase)

        del wordList[spaces+1:spaces+1 + len(phrase.split())]

    except ValueError:

        pass

print(wordList)

Output:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

edited Nov 21 '18 at 12:44

answered Nov 21 '18 at 11:19

Vasilis G.

3,4082722

Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

– mick_zon_24
Nov 21 '18 at 13:54

By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

– mick_zon_24
Nov 21 '18 at 14:04

@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

– Vasilis G.
Nov 21 '18 at 22:39

update posted! @vasilis G

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

Here is a way to do it:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



# Create a sentence for the wordList.

joinedWordList = " ".join(wordList)



# Find all phrases in the posWords list.

phrases = [elem for elem in posWords if len(elem.split()) > 1]



# For every phrase, locate it in the sentence, 

# count the space characters which is the same number as the index of the first word of phrase in the word list,

# insert the phrase and delete the word that combine the phrase from the wordList.

for phrase in phrases:

    try:

        i = joinedWordList.index(phrase)

        spaces = len([letter for letter in joinedWordList[:i] if letter==' '])

        wordList.insert(spaces,phrase)

        del wordList[spaces+1:spaces+1 + len(phrase.split())]

    except ValueError:

        pass

print(wordList)

Output:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

edited Nov 21 '18 at 12:44

answered Nov 21 '18 at 11:19

Vasilis G.

3,4082722

Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

– mick_zon_24
Nov 21 '18 at 13:54

By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

– mick_zon_24
Nov 21 '18 at 14:04

@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

– Vasilis G.
Nov 21 '18 at 22:39

update posted! @vasilis G

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

Here is a way to do it:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



# Create a sentence for the wordList.

joinedWordList = " ".join(wordList)



# Find all phrases in the posWords list.

phrases = [elem for elem in posWords if len(elem.split()) > 1]



# For every phrase, locate it in the sentence, 

# count the space characters which is the same number as the index of the first word of phrase in the word list,

# insert the phrase and delete the word that combine the phrase from the wordList.

for phrase in phrases:

    try:

        i = joinedWordList.index(phrase)

        spaces = len([letter for letter in joinedWordList[:i] if letter==' '])

        wordList.insert(spaces,phrase)

        del wordList[spaces+1:spaces+1 + len(phrase.split())]

    except ValueError:

        pass

print(wordList)

Output:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

edited Nov 21 '18 at 12:44

answered Nov 21 '18 at 11:19

Vasilis G.

3,4082722

Here is a way to do it:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



# Create a sentence for the wordList.

joinedWordList = " ".join(wordList)



# Find all phrases in the posWords list.

phrases = [elem for elem in posWords if len(elem.split()) > 1]



# For every phrase, locate it in the sentence, 

# count the space characters which is the same number as the index of the first word of phrase in the word list,

# insert the phrase and delete the word that combine the phrase from the wordList.

for phrase in phrases:

    try:

        i = joinedWordList.index(phrase)

        spaces = len([letter for letter in joinedWordList[:i] if letter==' '])

        wordList.insert(spaces,phrase)

        del wordList[spaces+1:spaces+1 + len(phrase.split())]

    except ValueError:

        pass

print(wordList)

Output:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

edited Nov 21 '18 at 12:44

answered Nov 21 '18 at 11:19

Vasilis G.

3,4082722

edited Nov 21 '18 at 12:44

answered Nov 21 '18 at 11:19

Vasilis G.

3,4082722

answered Nov 21 '18 at 11:19

Vasilis G.

3,4082722

answered Nov 21 '18 at 11:19

Vasilis G.

3,4082722

Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

– mick_zon_24
Nov 21 '18 at 13:54

By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

– mick_zon_24
Nov 21 '18 at 14:04

@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

– Vasilis G.
Nov 21 '18 at 22:39

update posted! @vasilis G

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

– mick_zon_24
Nov 21 '18 at 13:54

By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

– mick_zon_24
Nov 21 '18 at 14:04

@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

– Vasilis G.
Nov 21 '18 at 22:39

update posted! @vasilis G

– mick_zon_24
Nov 23 '18 at 16:40

Thank you, if i try it with a longer sentence the program is buggy and does not erase the words in the right way. But this helps me a lot and the following days I wil try to solve this on my own. I keep you updated

– mick_zon_24
Nov 21 '18 at 13:54

By the way, this was yet the best solution, as the other answers didn't correctly processed positive words for more than two words

– mick_zon_24
Nov 21 '18 at 14:04

@mick_zon_24 thanks for the feedback, would you mind showing the specific case in which the above solution gets buggy? Please do, so that I can improve it.

– Vasilis G.
Nov 21 '18 at 22:39

update posted! @vasilis G

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

Here is another approach that would work for any phrase length:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



for w in posWords:

    nrWords = len(w.split(' '))

    if nrWords > 1:

        word_array = w.split(' ')

        word_index_array = [wordList.index(w) for w in word_array]

        index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]



        if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList

            for elem in word_array:

                wordList.remove(elem)                        

            wordList.insert(word_index_array[0], w)

Output will be:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

But if we for example input something like:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

The output will be:

['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']

answered Nov 21 '18 at 13:30

toti08

1,73931523

update posted! (see above)

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

Here is another approach that would work for any phrase length:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



for w in posWords:

    nrWords = len(w.split(' '))

    if nrWords > 1:

        word_array = w.split(' ')

        word_index_array = [wordList.index(w) for w in word_array]

        index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]



        if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList

            for elem in word_array:

                wordList.remove(elem)                        

            wordList.insert(word_index_array[0], w)

Output will be:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

But if we for example input something like:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

The output will be:

['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']

answered Nov 21 '18 at 13:30

toti08

1,73931523

update posted! (see above)

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

Here is another approach that would work for any phrase length:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



for w in posWords:

    nrWords = len(w.split(' '))

    if nrWords > 1:

        word_array = w.split(' ')

        word_index_array = [wordList.index(w) for w in word_array]

        index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]



        if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList

            for elem in word_array:

                wordList.remove(elem)                        

            wordList.insert(word_index_array[0], w)

Output will be:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

But if we for example input something like:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

The output will be:

['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']

answered Nov 21 '18 at 13:30

toti08

1,73931523

Here is another approach that would work for any phrase length:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day'] 



for w in posWords:

    nrWords = len(w.split(' '))

    if nrWords > 1:

        word_array = w.split(' ')

        word_index_array = [wordList.index(w) for w in word_array]

        index_difference_array = [abs(b-a) for a in word_index_array[0:-1] for b in word_index_array[1:]]



        if sum(index_difference_array) == len(index_difference_array): #elements are consecutive in wordList

            for elem in word_array:

                wordList.remove(elem)                        

            wordList.insert(word_index_array[0], w)

Output will be:

['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

But if we for example input something like:

posWords = ['beautiful', 'love', 'happy day', 'enjoy','smelling flowers on']

wordList = ['I', 'enjoy', 'smelling', 'flowers', 'on', 'a', 'happy', 'day']

The output will be:

['I', 'enjoy', 'smelling flowers on', 'a', 'happy day']

answered Nov 21 '18 at 13:30

toti08

1,73931523

answered Nov 21 '18 at 13:30

toti08

1,73931523

answered Nov 21 '18 at 13:30

toti08

1,73931523

answered Nov 21 '18 at 13:30

toti08

1,73931523

update posted! (see above)

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

update posted! (see above)

– mick_zon_24
Nov 23 '18 at 16:40

update posted! (see above)

– mick_zon_24
Nov 23 '18 at 16:40

add a comment |

You can do something like this:

In [711]: s = ''.join(posWords)



In [712]: s

Out[712]: 'beautifullovehappy dayenjoysmelling flowers'



In [672]: n = 



In [673]: for i in wordList:

     ...:     if i in s:

     ...:         n.append(i)

     ...: 



In [713]: n

Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']



In [740]: for c, i in enumerate(n):

     ...:     if c+1 < len(n):

     ...:         word = n[c] + ' ' + n[c+1]

     ...:         if word in posWords:

     ...:             ix1 = wordList.index(n[c])

     ...:             del wordList[ix1: ix1+2]

     ...:             wordList.insert(ix1,word)

     ...:             



In [710]: wordList

Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

Let me know if this helps.

edited Nov 21 '18 at 13:30

answered Nov 21 '18 at 10:59

Mayank Porwal

4,6571624

2

You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

– Guimoute
Nov 21 '18 at 13:13

@Guimoute Thanks a lot for that. Edited my answer too.

– Mayank Porwal
Nov 21 '18 at 13:30

thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

– mick_zon_24
Nov 21 '18 at 14:00

update posted (see above)!

– mick_zon_24
Nov 23 '18 at 16:41

add a comment |

You can do something like this:

In [711]: s = ''.join(posWords)



In [712]: s

Out[712]: 'beautifullovehappy dayenjoysmelling flowers'



In [672]: n = 



In [673]: for i in wordList:

     ...:     if i in s:

     ...:         n.append(i)

     ...: 



In [713]: n

Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']



In [740]: for c, i in enumerate(n):

     ...:     if c+1 < len(n):

     ...:         word = n[c] + ' ' + n[c+1]

     ...:         if word in posWords:

     ...:             ix1 = wordList.index(n[c])

     ...:             del wordList[ix1: ix1+2]

     ...:             wordList.insert(ix1,word)

     ...:             



In [710]: wordList

Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

Let me know if this helps.

edited Nov 21 '18 at 13:30

answered Nov 21 '18 at 10:59

Mayank Porwal

4,6571624

2

You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

– Guimoute
Nov 21 '18 at 13:13

@Guimoute Thanks a lot for that. Edited my answer too.

– Mayank Porwal
Nov 21 '18 at 13:30

thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

– mick_zon_24
Nov 21 '18 at 14:00

update posted (see above)!

– mick_zon_24
Nov 23 '18 at 16:41

add a comment |

You can do something like this:

In [711]: s = ''.join(posWords)



In [712]: s

Out[712]: 'beautifullovehappy dayenjoysmelling flowers'



In [672]: n = 



In [673]: for i in wordList:

     ...:     if i in s:

     ...:         n.append(i)

     ...: 



In [713]: n

Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']



In [740]: for c, i in enumerate(n):

     ...:     if c+1 < len(n):

     ...:         word = n[c] + ' ' + n[c+1]

     ...:         if word in posWords:

     ...:             ix1 = wordList.index(n[c])

     ...:             del wordList[ix1: ix1+2]

     ...:             wordList.insert(ix1,word)

     ...:             



In [710]: wordList

Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

Let me know if this helps.

edited Nov 21 '18 at 13:30

answered Nov 21 '18 at 10:59

Mayank Porwal

4,6571624

You can do something like this:

In [711]: s = ''.join(posWords)



In [712]: s

Out[712]: 'beautifullovehappy dayenjoysmelling flowers'



In [672]: n = 



In [673]: for i in wordList:

     ...:     if i in s:

     ...:         n.append(i)

     ...: 



In [713]: n

Out[713]: ['enjoy', 'smelling', 'flowers', 'a', 'happy', 'day']



In [740]: for c, i in enumerate(n):

     ...:     if c+1 < len(n):

     ...:         word = n[c] + ' ' + n[c+1]

     ...:         if word in posWords:

     ...:             ix1 = wordList.index(n[c])

     ...:             del wordList[ix1: ix1+2]

     ...:             wordList.insert(ix1,word)

     ...:             



In [710]: wordList

Out[710]: ['I', 'enjoy', 'smelling flowers', 'on', 'a', 'happy day']

Let me know if this helps.

edited Nov 21 '18 at 13:30

answered Nov 21 '18 at 10:59

Mayank Porwal

4,6571624

edited Nov 21 '18 at 13:30

answered Nov 21 '18 at 10:59

Mayank Porwal

4,6571624

answered Nov 21 '18 at 10:59

Mayank Porwal

4,6571624

answered Nov 21 '18 at 10:59

Mayank Porwal

4,6571624

2

You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

– Guimoute
Nov 21 '18 at 13:13

@Guimoute Thanks a lot for that. Edited my answer too.

– Mayank Porwal
Nov 21 '18 at 13:30

thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

– mick_zon_24
Nov 21 '18 at 14:00

update posted (see above)!

– mick_zon_24
Nov 23 '18 at 16:41

add a comment |

2

You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

– Guimoute
Nov 21 '18 at 13:13

@Guimoute Thanks a lot for that. Edited my answer too.

– Mayank Porwal
Nov 21 '18 at 13:30

thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

– mick_zon_24
Nov 21 '18 at 14:00

update posted (see above)!

– mick_zon_24
Nov 23 '18 at 16:41

You can use for c, i in enumerate(n): and remove the c=0 and c+=1. You could also replace the if - break - else with simply if c+1 < len(n): and no else or break used at all.

– Guimoute
Nov 21 '18 at 13:13

@Guimoute Thanks a lot for that. Edited my answer too.

– Mayank Porwal
Nov 21 '18 at 13:30

thanks this helps a lot! but it is still a bit buggy when i pass longer sentences (i'm doing a sentiment analysis on Dutch newspapers). The following days I will try to solve the problem on my own for pedagogical purposes. I will keep you updated when I find a solution or stuck!

– mick_zon_24
Nov 21 '18 at 14:00

update posted (see above)!

– mick_zon_24
Nov 23 '18 at 16:41

add a comment |

Another way to do it:

>>> m=["good bad", "enjoy", "play"]

>>> l=["good", "bad", "happy", "delight"]

>>>

>>> for e in m:

...     tmp = e.split(" ")

...     if(len(tmp) > 1):

...             l = [ent for ent in l if ent not in tmp]

...             l.append(" ".join(tmp))

...

>>>

>>> l

['happy', 'delight', 'good bad']

answered Nov 21 '18 at 13:48

Akhilesha

113

add a comment |

Another way to do it:

>>> m=["good bad", "enjoy", "play"]

>>> l=["good", "bad", "happy", "delight"]

>>>

>>> for e in m:

...     tmp = e.split(" ")

...     if(len(tmp) > 1):

...             l = [ent for ent in l if ent not in tmp]

...             l.append(" ".join(tmp))

...

>>>

>>> l

['happy', 'delight', 'good bad']

answered Nov 21 '18 at 13:48

Akhilesha

113

add a comment |

Another way to do it:

>>> m=["good bad", "enjoy", "play"]

>>> l=["good", "bad", "happy", "delight"]

>>>

>>> for e in m:

...     tmp = e.split(" ")

...     if(len(tmp) > 1):

...             l = [ent for ent in l if ent not in tmp]

...             l.append(" ".join(tmp))

...

>>>

>>> l

['happy', 'delight', 'good bad']

answered Nov 21 '18 at 13:48

Akhilesha

113

Another way to do it:

>>> m=["good bad", "enjoy", "play"]

>>> l=["good", "bad", "happy", "delight"]

>>>

>>> for e in m:

...     tmp = e.split(" ")

...     if(len(tmp) > 1):

...             l = [ent for ent in l if ent not in tmp]

...             l.append(" ".join(tmp))

...

>>>

>>> l

['happy', 'delight', 'good bad']

answered Nov 21 '18 at 13:48

Akhilesha

113

answered Nov 21 '18 at 13:48

Akhilesha

113

answered Nov 21 '18 at 13:48

Akhilesha

113

answered Nov 21 '18 at 13:48

Akhilesha

113

add a comment |

draft saved

draft discarded

Thanks for contributing an answer to Stack Overflow!

Please be sure to answer the question. Provide details and share your research!

But avoid …

Asking for help, clarification, or responding to other answers.

Making statements based on opinion; back them up with references or personal experience.

To learn more, see our tips on writing great answers.

draft saved

draft discarded

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Sign up or log in

StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});

Post as a guest

Name

Required, but never shown

Name

Required, but never shown

Name

Required, but never shown

This page is only for reference, If you need detailed information, please check here

搜尋此網誌

Nsryjdtyk