TypeError when running Concurrent Futures inside a Class












0















I'm building a class that allows me to mine a set of links off a site, and then i use concurrent.futures in order to check the validity of the links.



When I run the function multithreaded_link_checking, it runs fine.



However, when its inside a class, it returns this error:

Traceback (most recent call last):
File "datahandler.py", line 236, in <module>
data.multithreaded_link_checking(links)
File "datahandler.py", line 209, in multithreaded_link_checking
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
File "/anaconda3/lib/python3.7/concurrent/futures/thread.py", line 128, in __init__
if max_workers <= 0:
TypeError: '<=' not supported between instances of 'list' and 'int'


My code is below:



class DataHandler:

def __init__(self, url, file=None,):
self.file = file
self.url = url

def get_links_a_on_page(self):
"""
Gets a links from the igm website
returns list -> links
"""
# TODO: Check if URL is correct
print('checking site')
site = requests.get(self.url)
soup = BeautifulSoup(site.text, 'html.parser')
print("URL received, cleaning links.")

# Find all the href on self.url
links = [a_link['href'] for a_link in soup.find_all("a", href=True)]

for n, i in enumerate(links):
clean_link = re.search("http:(.*)", i)
links[n] = clean_link.group(0)
print("Cleaning URL")
return links

def get_links_from_csv(self):
"""

:return list -> links:
"""

# TODO: Check if File is CSV
# TODO: Check if File has links
links =
try:
with open(self.file, newline='') as csvfile:
spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
for row in spamreader:
links.append(', '.join(row))
except FileNotFoundError:
print("File not found")
return links

def check_links_urllib3_helper(link, return_links=True):
"""
Checks the response code of the url
:param site_url:
:return response code:
"""
if return_links is True:
# Initialize urllib manager -- highly efficient
http = urllib3.PoolManager()
# HEAD to get header values -- much faster
r = http.request("HEAD", link)
if r.status == 200:
return link
if return_links is False:
# Initialize urllib manager -- highly efficient
http = urllib3.PoolManager()
# HEAD to get header values -- much faster
r = http.request("HEAD", link)
return r.status


def multithreaded_link_checking(links, max_workers=99):
"""
multithreaded operation to review a set of links and identifies working links vs. 404 codes
:param links:
:return None:
"""
# codes list in order to log the response
downloadable_links =
# Use the ThreadPoolExecutor to run concurrent processes
with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
# set the size to know how many links are left -- do this to make sure the threads dont bug out
size = len(links)
# map(function,iterable)
for i in executor.map(check_links_urllib3_helper, links):
print("Links Left: ", size)
downloadable_links.append(i)
size -= 1
return downloadable_links









share|improve this question



























    0















    I'm building a class that allows me to mine a set of links off a site, and then i use concurrent.futures in order to check the validity of the links.



    When I run the function multithreaded_link_checking, it runs fine.



    However, when its inside a class, it returns this error:

    Traceback (most recent call last):
    File "datahandler.py", line 236, in <module>
    data.multithreaded_link_checking(links)
    File "datahandler.py", line 209, in multithreaded_link_checking
    with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
    File "/anaconda3/lib/python3.7/concurrent/futures/thread.py", line 128, in __init__
    if max_workers <= 0:
    TypeError: '<=' not supported between instances of 'list' and 'int'


    My code is below:



    class DataHandler:

    def __init__(self, url, file=None,):
    self.file = file
    self.url = url

    def get_links_a_on_page(self):
    """
    Gets a links from the igm website
    returns list -> links
    """
    # TODO: Check if URL is correct
    print('checking site')
    site = requests.get(self.url)
    soup = BeautifulSoup(site.text, 'html.parser')
    print("URL received, cleaning links.")

    # Find all the href on self.url
    links = [a_link['href'] for a_link in soup.find_all("a", href=True)]

    for n, i in enumerate(links):
    clean_link = re.search("http:(.*)", i)
    links[n] = clean_link.group(0)
    print("Cleaning URL")
    return links

    def get_links_from_csv(self):
    """

    :return list -> links:
    """

    # TODO: Check if File is CSV
    # TODO: Check if File has links
    links =
    try:
    with open(self.file, newline='') as csvfile:
    spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
    for row in spamreader:
    links.append(', '.join(row))
    except FileNotFoundError:
    print("File not found")
    return links

    def check_links_urllib3_helper(link, return_links=True):
    """
    Checks the response code of the url
    :param site_url:
    :return response code:
    """
    if return_links is True:
    # Initialize urllib manager -- highly efficient
    http = urllib3.PoolManager()
    # HEAD to get header values -- much faster
    r = http.request("HEAD", link)
    if r.status == 200:
    return link
    if return_links is False:
    # Initialize urllib manager -- highly efficient
    http = urllib3.PoolManager()
    # HEAD to get header values -- much faster
    r = http.request("HEAD", link)
    return r.status


    def multithreaded_link_checking(links, max_workers=99):
    """
    multithreaded operation to review a set of links and identifies working links vs. 404 codes
    :param links:
    :return None:
    """
    # codes list in order to log the response
    downloadable_links =
    # Use the ThreadPoolExecutor to run concurrent processes
    with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
    # set the size to know how many links are left -- do this to make sure the threads dont bug out
    size = len(links)
    # map(function,iterable)
    for i in executor.map(check_links_urllib3_helper, links):
    print("Links Left: ", size)
    downloadable_links.append(i)
    size -= 1
    return downloadable_links









    share|improve this question

























      0












      0








      0








      I'm building a class that allows me to mine a set of links off a site, and then i use concurrent.futures in order to check the validity of the links.



      When I run the function multithreaded_link_checking, it runs fine.



      However, when its inside a class, it returns this error:

      Traceback (most recent call last):
      File "datahandler.py", line 236, in <module>
      data.multithreaded_link_checking(links)
      File "datahandler.py", line 209, in multithreaded_link_checking
      with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
      File "/anaconda3/lib/python3.7/concurrent/futures/thread.py", line 128, in __init__
      if max_workers <= 0:
      TypeError: '<=' not supported between instances of 'list' and 'int'


      My code is below:



      class DataHandler:

      def __init__(self, url, file=None,):
      self.file = file
      self.url = url

      def get_links_a_on_page(self):
      """
      Gets a links from the igm website
      returns list -> links
      """
      # TODO: Check if URL is correct
      print('checking site')
      site = requests.get(self.url)
      soup = BeautifulSoup(site.text, 'html.parser')
      print("URL received, cleaning links.")

      # Find all the href on self.url
      links = [a_link['href'] for a_link in soup.find_all("a", href=True)]

      for n, i in enumerate(links):
      clean_link = re.search("http:(.*)", i)
      links[n] = clean_link.group(0)
      print("Cleaning URL")
      return links

      def get_links_from_csv(self):
      """

      :return list -> links:
      """

      # TODO: Check if File is CSV
      # TODO: Check if File has links
      links =
      try:
      with open(self.file, newline='') as csvfile:
      spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
      for row in spamreader:
      links.append(', '.join(row))
      except FileNotFoundError:
      print("File not found")
      return links

      def check_links_urllib3_helper(link, return_links=True):
      """
      Checks the response code of the url
      :param site_url:
      :return response code:
      """
      if return_links is True:
      # Initialize urllib manager -- highly efficient
      http = urllib3.PoolManager()
      # HEAD to get header values -- much faster
      r = http.request("HEAD", link)
      if r.status == 200:
      return link
      if return_links is False:
      # Initialize urllib manager -- highly efficient
      http = urllib3.PoolManager()
      # HEAD to get header values -- much faster
      r = http.request("HEAD", link)
      return r.status


      def multithreaded_link_checking(links, max_workers=99):
      """
      multithreaded operation to review a set of links and identifies working links vs. 404 codes
      :param links:
      :return None:
      """
      # codes list in order to log the response
      downloadable_links =
      # Use the ThreadPoolExecutor to run concurrent processes
      with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
      # set the size to know how many links are left -- do this to make sure the threads dont bug out
      size = len(links)
      # map(function,iterable)
      for i in executor.map(check_links_urllib3_helper, links):
      print("Links Left: ", size)
      downloadable_links.append(i)
      size -= 1
      return downloadable_links









      share|improve this question














      I'm building a class that allows me to mine a set of links off a site, and then i use concurrent.futures in order to check the validity of the links.



      When I run the function multithreaded_link_checking, it runs fine.



      However, when its inside a class, it returns this error:

      Traceback (most recent call last):
      File "datahandler.py", line 236, in <module>
      data.multithreaded_link_checking(links)
      File "datahandler.py", line 209, in multithreaded_link_checking
      with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
      File "/anaconda3/lib/python3.7/concurrent/futures/thread.py", line 128, in __init__
      if max_workers <= 0:
      TypeError: '<=' not supported between instances of 'list' and 'int'


      My code is below:



      class DataHandler:

      def __init__(self, url, file=None,):
      self.file = file
      self.url = url

      def get_links_a_on_page(self):
      """
      Gets a links from the igm website
      returns list -> links
      """
      # TODO: Check if URL is correct
      print('checking site')
      site = requests.get(self.url)
      soup = BeautifulSoup(site.text, 'html.parser')
      print("URL received, cleaning links.")

      # Find all the href on self.url
      links = [a_link['href'] for a_link in soup.find_all("a", href=True)]

      for n, i in enumerate(links):
      clean_link = re.search("http:(.*)", i)
      links[n] = clean_link.group(0)
      print("Cleaning URL")
      return links

      def get_links_from_csv(self):
      """

      :return list -> links:
      """

      # TODO: Check if File is CSV
      # TODO: Check if File has links
      links =
      try:
      with open(self.file, newline='') as csvfile:
      spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
      for row in spamreader:
      links.append(', '.join(row))
      except FileNotFoundError:
      print("File not found")
      return links

      def check_links_urllib3_helper(link, return_links=True):
      """
      Checks the response code of the url
      :param site_url:
      :return response code:
      """
      if return_links is True:
      # Initialize urllib manager -- highly efficient
      http = urllib3.PoolManager()
      # HEAD to get header values -- much faster
      r = http.request("HEAD", link)
      if r.status == 200:
      return link
      if return_links is False:
      # Initialize urllib manager -- highly efficient
      http = urllib3.PoolManager()
      # HEAD to get header values -- much faster
      r = http.request("HEAD", link)
      return r.status


      def multithreaded_link_checking(links, max_workers=99):
      """
      multithreaded operation to review a set of links and identifies working links vs. 404 codes
      :param links:
      :return None:
      """
      # codes list in order to log the response
      downloadable_links =
      # Use the ThreadPoolExecutor to run concurrent processes
      with concurrent.futures.ThreadPoolExecutor(max_workers) as executor:
      # set the size to know how many links are left -- do this to make sure the threads dont bug out
      size = len(links)
      # map(function,iterable)
      for i in executor.map(check_links_urllib3_helper, links):
      print("Links Left: ", size)
      downloadable_links.append(i)
      size -= 1
      return downloadable_links






      python python-3.x multithreading concurrency






      share|improve this question













      share|improve this question











      share|improve this question




      share|improve this question










      asked Nov 23 '18 at 2:09









      Rizwan QaiserRizwan Qaiser

      11




      11
























          1 Answer
          1






          active

          oldest

          votes


















          0














          fixed.



          Needed to add self as a positional argument to multithreaded_link_checking()






          share|improve this answer























            Your Answer






            StackExchange.ifUsing("editor", function () {
            StackExchange.using("externalEditor", function () {
            StackExchange.using("snippets", function () {
            StackExchange.snippets.init();
            });
            });
            }, "code-snippets");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "1"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            autoActivateHeartbeat: false,
            convertImagesToLinks: true,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: 10,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });














            draft saved

            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53439878%2ftypeerror-when-running-concurrent-futures-inside-a-class%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            1 Answer
            1






            active

            oldest

            votes








            1 Answer
            1






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes









            0














            fixed.



            Needed to add self as a positional argument to multithreaded_link_checking()






            share|improve this answer




























              0














              fixed.



              Needed to add self as a positional argument to multithreaded_link_checking()






              share|improve this answer


























                0












                0








                0







                fixed.



                Needed to add self as a positional argument to multithreaded_link_checking()






                share|improve this answer













                fixed.



                Needed to add self as a positional argument to multithreaded_link_checking()







                share|improve this answer












                share|improve this answer



                share|improve this answer










                answered Nov 23 '18 at 2:19









                Rizwan QaiserRizwan Qaiser

                11




                11
































                    draft saved

                    draft discarded




















































                    Thanks for contributing an answer to Stack Overflow!


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    To learn more, see our tips on writing great answers.




                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53439878%2ftypeerror-when-running-concurrent-futures-inside-a-class%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    Costa Masnaga

                    Fotorealismo

                    Sidney Franklin