Python requests remove website from further checking if keyword found

Issue

I want to remove the website from further checking process, if my "keyword" on the website got found, so they are not getting still checked multiple times.
How can i do this? Iam still a beginner but i linked you my whole Script at the bottom thanks.

If the keyword "google" got found at the currently checking website, i want to remove this website from further checking.

    if "google" in r2.text:
            print (bcolors.OKGREEN + "Parameters Found : " +server+ "/" + para1 + "/" + para2 + bcolors.ENDC)
            client = server + "," + para1 + "," + para2 + "\n"
            f = open('log.txt', 'a')
            f.write(client)
            f.close()
            

My whole Script

import os
import sys
from threading import Thread, BoundedSemaphore
from datetime import datetime
import optparse
import requests
import urllib3

os.system("color")
requests.urllib3.disable_warnings()

maxConnections = 10
connection_lock = BoundedSemaphore(maxConnections)
time = datetime.now().time()

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    ENDC = '\033[0m'


def connect(server, para1, para2):
    try:
        r = requests.request('put', server + para1 + para2, timeout=30, verify=False, headers={'Content-Type':'application/octet-stream'})
        r.close()      
    except Exception as e:
        print(e)
    r2 = requests.request('get', server + para1 + para2, verify=False, timeout=30, headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'})
    r2.close()
    if "google" in r2.text:
            print (bcolors.OKGREEN + "Parameters Found : " +server+ "/" + para1 + "/" + para2 + bcolors.ENDC)
            client = server + "," + para1 + "," + para2 + "\n"
            f = open('log.txt', 'a')
            f.write(client)
            f.close()


def generate_tests(hosts, paras1, paras2):
    i = 0
    for para1 in paras1:
        para1 = para1.strip('\n\r')
        for para2 in paras2:
            para2 = para2.strip('\n\r')
            for host in hosts:
                server = host.strip('\n\r')
                print (bcolors.OKGREEN + "=" * 60 + bcolors.ENDC)
                print (bcolors.BOLD + "Website: " + bcolors.OKBLUE + server + para1 + para2 + bcolors.ENDC)
                print (bcolors.BOLD + "Parameter1: " + bcolors.OKBLUE + para1 + bcolors.ENDC)
                print (bcolors.BOLD + "Parameter2: " + bcolors.OKBLUE + para2 + bcolors.ENDC)
                i += 1
                print (bcolors.BOLD + "Attempts: " + bcolors.OKBLUE + str(i) + bcolors.ENDC)
                print (bcolors.BOLD + "Time Started: " + bcolors.OKBLUE + str(time) + bcolors.ENDC)
                print (bcolors.BOLD + "Time now: " + bcolors.OKBLUE + str(datetime.now().time()) + bcolors.ENDC)
                print (bcolors.OKGREEN + "=" * 60 + bcolors.ENDC)
                t = Thread(target=connect, args=(server, para1, para2))
                t.start()

def read_test_files(hostsfile, paras1file, paras2file):
    hosts = open(hostsfile, 'r').readlines()
    paras1 = open(paras1file, 'r').readlines()
    paras2 = open(paras2file, 'r').readlines()
    generate_tests(hosts, paras1, paras2)


def main():
    parser = optparse.OptionParser('usage python test.py -H <hosts file> -U <para1 file> -P <para2 file>')
    parser.add_option('-H', dest='hostsfile', help="specify host file to test")
    parser.add_option('-U', dest='paras1file', help="specify possible parameters1")
    parser.add_option('-P', dest='paras2file', help="specify possible parameters2")

    (options, args) = parser.parse_args()

    if options.hostsfile and options.paras1file and options.paras2file:
        hostsfile = options.hostsfile
        paras1file = options.paras1file
        paras2file = options.paras2file
        read_test_files(hostsfile, paras1file, paras2file)

    else:
        print (parser.usage)
        exit(0)




if __name__ == "__main__":
    main()

Solution

You can create a collection of whitelists, which will be used to save websites that pass the check, and determine whether they are in the whitelist before starting the thread each time, and skip if they exist.

In the code below, the whitelist collection is temporary.

If you want it to take effect every time you start the program, you can save it to a file, read the file every time you start the program, and write it every time it ends.

import os
import sys
from threading import Thread, BoundedSemaphore
from datetime import datetime
import optparse
import requests
import urllib3

os.system("color")
requests.urllib3.disable_warnings()

maxConnections = 10
connection_lock = BoundedSemaphore(maxConnections)
time = datetime.now().time()

white_list = set()


class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'
    ENDC = '\033[0m'


def connect(server, para1, para2):
    try:
        r = requests.request('put', server + para1 + para2, timeout=30, verify=False,
                             headers={'Content-Type': 'application/octet-stream'})
        r.close()
    except Exception as e:
        print(e)
    r2 = requests.request('get', server + para1 + para2, verify=False, timeout=30, headers={
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'})
    r2.close()
    if "google" in r2.text:
        # add
        white_list.add(server)
        print(bcolors.OKGREEN + "Parameters Found : " + server + "/" + para1 + "/" + para2 + bcolors.ENDC)
        client = server + "," + para1 + "," + para2 + "\n"
        f = open('log.txt', 'a')
        f.write(client)
        f.close()


def generate_tests(hosts, paras1, paras2):
    i = 0
    for para1 in paras1:
        para1 = para1.strip('\n\r')
        for para2 in paras2:
            para2 = para2.strip('\n\r')
            for host in hosts:
                server = host.strip('\n\r')
                # Determine whether it is in the whitelist
                # The `host` in the whitelist has already been subjected to the `strip('\n\r')` operation.
                if server in white_list:
                    continue
                print(bcolors.OKGREEN + "=" * 60 + bcolors.ENDC)
                print(bcolors.BOLD + "Website: " + bcolors.OKBLUE + server + para1 + para2 + bcolors.ENDC)
                print(bcolors.BOLD + "Parameter1: " + bcolors.OKBLUE + para1 + bcolors.ENDC)
                print(bcolors.BOLD + "Parameter2: " + bcolors.OKBLUE + para2 + bcolors.ENDC)
                i += 1
                print(bcolors.BOLD + "Attempts: " + bcolors.OKBLUE + str(i) + bcolors.ENDC)
                print(bcolors.BOLD + "Time Started: " + bcolors.OKBLUE + str(time) + bcolors.ENDC)
                print(bcolors.BOLD + "Time now: " + bcolors.OKBLUE + str(datetime.now().time()) + bcolors.ENDC)
                print(bcolors.OKGREEN + "=" * 60 + bcolors.ENDC)
                t = Thread(target=connect, args=(server, para1, para2))
                t.start()
                t.join()


def read_test_files(hostsfile, paras1file, paras2file):
    hosts = open(hostsfile, 'r').readlines()
    paras1 = open(paras1file, 'r').readlines()
    paras2 = open(paras2file, 'r').readlines()
    generate_tests(hosts, paras1, paras2)


def main():
    parser = optparse.OptionParser('usage python test.py -H <hosts file> -U <para1 file> -P <para2 file>')
    parser.add_option('-H', dest='hostsfile', help="specify host file to test")
    parser.add_option('-U', dest='paras1file', help="specify possible parameters1")
    parser.add_option('-P', dest='paras2file', help="specify possible parameters2")

    (options, args) = parser.parse_args()

    if options.hostsfile and options.paras1file and options.paras2file:
        hostsfile = options.hostsfile
        paras1file = options.paras1file
        paras2file = options.paras2file
        read_test_files(hostsfile, paras1file, paras2file)

    else:
        print(parser.usage)
        exit(0)


if __name__ == "__main__":
    main()

Answered By – pppig

This Answer collected from stackoverflow, is licensed under cc by-sa 2.5 , cc by-sa 3.0 and cc by-sa 4.0

Leave a Reply

(*) Required, Your email will not be published