2020-06-19 20:43:08 +03:00
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
2025-04-14 19:05:04 +03:00
|
|
|
|
(с) 2025 gSpot (https://github.com/gSpotx2f/ruantiblock_openwrt)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
Python >= 3.6
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from contextlib import contextmanager
|
2025-09-15 18:25:20 +03:00
|
|
|
|
from ipaddress import (IPv4Address, IPv4Network, summarize_address_range,
|
|
|
|
|
|
AddressValueError, NetmaskValueError)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
import os
|
|
|
|
|
|
import re
|
|
|
|
|
|
import socket
|
|
|
|
|
|
import ssl
|
|
|
|
|
|
import sys
|
|
|
|
|
|
from urllib import request
|
2025-01-21 23:33:51 +03:00
|
|
|
|
#import zlib
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Config:
|
|
|
|
|
|
environ_list = [
|
|
|
|
|
|
"BLLIST_SOURCE",
|
|
|
|
|
|
"BLLIST_MODE",
|
2022-05-05 18:28:32 +03:00
|
|
|
|
"BLLIST_ALT_NSLOOKUP",
|
|
|
|
|
|
"BLLIST_ALT_DNS_ADDR",
|
|
|
|
|
|
"BLLIST_ENABLE_IDN",
|
2024-09-23 00:52:58 +03:00
|
|
|
|
"BLLIST_GR_EXCLUDED_SLD_FILE",
|
|
|
|
|
|
"BLLIST_GR_EXCLUDED_SLD_MASKS_FILE",
|
2022-05-05 18:28:32 +03:00
|
|
|
|
"BLLIST_FQDN_FILTER",
|
2023-01-14 15:53:19 +03:00
|
|
|
|
"BLLIST_FQDN_FILTER_TYPE",
|
2022-05-05 18:28:32 +03:00
|
|
|
|
"BLLIST_FQDN_FILTER_FILE",
|
|
|
|
|
|
"BLLIST_IP_FILTER",
|
2023-01-14 15:53:19 +03:00
|
|
|
|
"BLLIST_IP_FILTER_TYPE",
|
2022-05-05 18:28:32 +03:00
|
|
|
|
"BLLIST_IP_FILTER_FILE",
|
|
|
|
|
|
"BLLIST_SD_LIMIT",
|
|
|
|
|
|
"BLLIST_IP_LIMIT",
|
2024-09-23 00:52:58 +03:00
|
|
|
|
"BLLIST_GR_EXCLUDED_NETS_FILE",
|
2022-05-05 18:28:32 +03:00
|
|
|
|
"BLLIST_MIN_ENTRIES",
|
|
|
|
|
|
"BLLIST_STRIP_WWW",
|
2023-02-06 17:27:15 +03:00
|
|
|
|
"NFT_TABLE",
|
|
|
|
|
|
"NFT_TABLE_DNSMASQ",
|
2023-02-13 01:13:43 +03:00
|
|
|
|
"NFTSET_CIDR",
|
|
|
|
|
|
"NFTSET_IP",
|
|
|
|
|
|
"NFTSET_DNSMASQ",
|
2024-11-03 02:20:45 +03:00
|
|
|
|
"NFTSET_CIDR_STRING_MAIN",
|
|
|
|
|
|
"NFTSET_IP_STRING_MAIN",
|
2020-06-19 20:43:08 +03:00
|
|
|
|
"DNSMASQ_DATA_FILE",
|
|
|
|
|
|
"IP_DATA_FILE",
|
|
|
|
|
|
"UPDATE_STATUS_FILE",
|
|
|
|
|
|
"RBL_ALL_URL",
|
|
|
|
|
|
"RBL_IP_URL",
|
2024-03-14 22:20:11 +03:00
|
|
|
|
"RBL_DPI_URL",
|
2020-06-19 20:43:08 +03:00
|
|
|
|
"ZI_ALL_URL",
|
2025-11-13 18:14:06 +03:00
|
|
|
|
"AF_IP_FULL_URL",
|
2020-06-19 20:43:08 +03:00
|
|
|
|
"AF_IP_URL",
|
2025-11-13 18:14:06 +03:00
|
|
|
|
"AF_NET_URL",
|
2020-06-19 20:43:08 +03:00
|
|
|
|
"AF_FQDN_URL",
|
2024-03-21 00:06:43 +03:00
|
|
|
|
"FZ_URL",
|
2024-09-23 00:52:58 +03:00
|
|
|
|
"DL_IPSET_URL",
|
|
|
|
|
|
"DL_DMASK_URL",
|
|
|
|
|
|
"DL_STAT_URL",
|
2020-06-19 20:43:08 +03:00
|
|
|
|
"RBL_ENCODING",
|
|
|
|
|
|
"ZI_ENCODING",
|
|
|
|
|
|
"AF_ENCODING",
|
2024-03-21 00:06:43 +03:00
|
|
|
|
"FZ_ENCODING",
|
2022-05-05 18:28:32 +03:00
|
|
|
|
"BLLIST_SUMMARIZE_IP",
|
|
|
|
|
|
"BLLIST_SUMMARIZE_CIDR",
|
2024-09-23 00:52:58 +03:00
|
|
|
|
"BLLIST_FQDN_EXCLUDED_ENABLE",
|
|
|
|
|
|
"BLLIST_FQDN_EXCLUDED_FILE",
|
|
|
|
|
|
"BLLIST_IP_EXCLUDED_ENABLE",
|
|
|
|
|
|
"BLLIST_IP_EXCLUDED_FILE",
|
2025-09-15 18:25:20 +03:00
|
|
|
|
"BLLIST_CIDR_EXCLUDED_ENABLE",
|
|
|
|
|
|
"BLLIST_CIDR_EXCLUDED_FILE",
|
2025-11-13 18:14:06 +03:00
|
|
|
|
"BLLIST_ORG_EXCLUDED_ENABLE",
|
|
|
|
|
|
"BLLIST_ORG_EXCLUDED_FILE",
|
2020-06-19 20:43:08 +03:00
|
|
|
|
]
|
2025-09-15 18:25:20 +03:00
|
|
|
|
BLLIST_FQDN_FILTER_PATTERNS = []
|
|
|
|
|
|
BLLIST_IP_FILTER_PATTERNS = []
|
2024-09-23 00:52:58 +03:00
|
|
|
|
BLLIST_GR_EXCLUDED_SLD_PATTERNS = set()
|
|
|
|
|
|
BLLIST_GR_EXCLUDED_SLD_MASKS_PATTERNS = []
|
|
|
|
|
|
BLLIST_GR_EXCLUDED_NETS_PATTERNS = set()
|
|
|
|
|
|
BLLIST_FQDN_EXCLUDED_ITEMS = set()
|
|
|
|
|
|
BLLIST_IP_EXCLUDED_ITEMS = set()
|
2025-09-15 18:25:20 +03:00
|
|
|
|
BLLIST_CIDR_EXCLUDED_ITEMS = []
|
2025-11-13 18:14:06 +03:00
|
|
|
|
BLLIST_ORG_EXCLUDED_ITEMS = set()
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def _load_config(cls, cfg_dict):
|
|
|
|
|
|
|
|
|
|
|
|
def normalize_string(string):
|
2025-11-13 18:14:06 +03:00
|
|
|
|
return string.replace('"', '')
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
2024-09-23 00:52:58 +03:00
|
|
|
|
config_sets = set()
|
2024-03-21 00:06:43 +03:00
|
|
|
|
config_arrays = {
|
|
|
|
|
|
"RBL_ALL_URL",
|
|
|
|
|
|
"RBL_IP_URL",
|
|
|
|
|
|
"RBL_DPI_URL",
|
|
|
|
|
|
"ZI_ALL_URL",
|
2025-11-13 18:14:06 +03:00
|
|
|
|
"AF_IP_FULL_URL",
|
2024-03-21 00:06:43 +03:00
|
|
|
|
"AF_IP_URL",
|
2025-11-13 18:14:06 +03:00
|
|
|
|
"AF_NET_URL",
|
2024-03-21 00:06:43 +03:00
|
|
|
|
"AF_FQDN_URL",
|
|
|
|
|
|
"FZ_URL",
|
2024-09-23 00:52:58 +03:00
|
|
|
|
"DL_IPSET_URL",
|
|
|
|
|
|
"DL_DMASK_URL",
|
|
|
|
|
|
"DL_STAT_URL",
|
2024-03-21 00:06:43 +03:00
|
|
|
|
}
|
2020-06-19 20:43:08 +03:00
|
|
|
|
try:
|
|
|
|
|
|
for k, v in cfg_dict.items():
|
2024-03-21 00:06:43 +03:00
|
|
|
|
if k in config_sets:
|
2020-06-19 20:43:08 +03:00
|
|
|
|
value = {normalize_string(i) for i in v.split(" ")}
|
2024-03-21 00:06:43 +03:00
|
|
|
|
elif k in config_arrays:
|
|
|
|
|
|
value = [normalize_string(i) for i in v.split(" ")]
|
2020-06-19 20:43:08 +03:00
|
|
|
|
else:
|
|
|
|
|
|
try:
|
|
|
|
|
|
value = int(v)
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
value = normalize_string(v)
|
|
|
|
|
|
setattr(cls, k, value)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_environ_config(cls):
|
|
|
|
|
|
cls._load_config({
|
|
|
|
|
|
k: v for k, v in os.environ.items()
|
|
|
|
|
|
if k in cls.environ_list
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
2025-09-15 18:25:20 +03:00
|
|
|
|
def _load_filter(cls, file_path, filter_patterns, is_array=False, func=None):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
try:
|
|
|
|
|
|
with open(file_path, "rt") as file_handler:
|
|
|
|
|
|
for line in file_handler:
|
2025-09-15 18:25:20 +03:00
|
|
|
|
if line and not re.match(r"(^#|^$)", line):
|
|
|
|
|
|
value = line.strip()
|
|
|
|
|
|
if func:
|
|
|
|
|
|
value = func(value)
|
|
|
|
|
|
if value is None:
|
|
|
|
|
|
continue
|
2024-09-23 00:52:58 +03:00
|
|
|
|
if is_array:
|
2025-09-15 18:25:20 +03:00
|
|
|
|
filter_patterns.append(value)
|
2024-09-23 00:52:58 +03:00
|
|
|
|
else:
|
2025-09-15 18:25:20 +03:00
|
|
|
|
filter_patterns.add(value)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
except OSError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_fqdn_filter(cls, file_path=None):
|
2022-05-05 18:28:32 +03:00
|
|
|
|
if cls.BLLIST_FQDN_FILTER:
|
2024-09-23 00:52:58 +03:00
|
|
|
|
cls._load_filter(file_path or cls.BLLIST_FQDN_FILTER_FILE,
|
2025-09-15 18:25:20 +03:00
|
|
|
|
cls.BLLIST_FQDN_FILTER_PATTERNS, is_array=True)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_ip_filter(cls, file_path=None):
|
2022-05-05 18:28:32 +03:00
|
|
|
|
if cls.BLLIST_IP_FILTER:
|
2024-09-23 00:52:58 +03:00
|
|
|
|
cls._load_filter(file_path or cls.BLLIST_IP_FILTER_FILE,
|
2025-09-15 18:25:20 +03:00
|
|
|
|
cls.BLLIST_IP_FILTER_PATTERNS, is_array=True)
|
2024-09-23 00:52:58 +03:00
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_gr_excluded_sld(cls, file_path=None):
|
|
|
|
|
|
if cls.BLLIST_GR_EXCLUDED_SLD_FILE:
|
|
|
|
|
|
cls._load_filter(file_path or cls.BLLIST_GR_EXCLUDED_SLD_FILE,
|
|
|
|
|
|
cls.BLLIST_GR_EXCLUDED_SLD_PATTERNS)
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_gr_excluded_sld_masks(cls, file_path=None):
|
|
|
|
|
|
if cls.BLLIST_GR_EXCLUDED_SLD_MASKS_FILE:
|
|
|
|
|
|
cls._load_filter(file_path or cls.BLLIST_GR_EXCLUDED_SLD_MASKS_FILE,
|
|
|
|
|
|
cls.BLLIST_GR_EXCLUDED_SLD_MASKS_PATTERNS, is_array=True)
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_gr_excluded_nets(cls, file_path=None):
|
|
|
|
|
|
if cls.BLLIST_GR_EXCLUDED_NETS_FILE:
|
|
|
|
|
|
cls._load_filter(file_path or cls.BLLIST_GR_EXCLUDED_NETS_FILE,
|
|
|
|
|
|
cls.BLLIST_GR_EXCLUDED_NETS_PATTERNS)
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_fqdn_excluded(cls, file_path=None):
|
|
|
|
|
|
if cls.BLLIST_FQDN_EXCLUDED_ENABLE:
|
|
|
|
|
|
cls._load_filter(file_path or cls.BLLIST_FQDN_EXCLUDED_FILE,
|
|
|
|
|
|
cls.BLLIST_FQDN_EXCLUDED_ITEMS)
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_ip_excluded(cls, file_path=None):
|
|
|
|
|
|
if cls.BLLIST_IP_EXCLUDED_ENABLE:
|
|
|
|
|
|
cls._load_filter(file_path or cls.BLLIST_IP_EXCLUDED_FILE,
|
|
|
|
|
|
cls.BLLIST_IP_EXCLUDED_ITEMS)
|
|
|
|
|
|
|
2025-09-15 18:25:20 +03:00
|
|
|
|
@staticmethod
|
|
|
|
|
|
def makeIPv4Network(s):
|
|
|
|
|
|
net = None
|
|
|
|
|
|
try:
|
|
|
|
|
|
net = IPv4Network(s)
|
|
|
|
|
|
except (AddressValueError, NetmaskValueError):
|
|
|
|
|
|
pass
|
|
|
|
|
|
return net
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_cidr_excluded(cls, file_path=None):
|
|
|
|
|
|
if cls.BLLIST_CIDR_EXCLUDED_ENABLE:
|
|
|
|
|
|
cls._load_filter(file_path or cls.BLLIST_CIDR_EXCLUDED_FILE,
|
|
|
|
|
|
cls.BLLIST_CIDR_EXCLUDED_ITEMS, is_array=True,
|
|
|
|
|
|
func=cls.makeIPv4Network)
|
|
|
|
|
|
|
2025-11-13 18:14:06 +03:00
|
|
|
|
@classmethod
|
|
|
|
|
|
def load_org_excluded(cls, file_path=None):
|
|
|
|
|
|
if cls.BLLIST_ORG_EXCLUDED_ENABLE:
|
|
|
|
|
|
cls._load_filter(file_path or cls.BLLIST_ORG_EXCLUDED_FILE,
|
|
|
|
|
|
cls.BLLIST_ORG_EXCLUDED_ITEMS)
|
|
|
|
|
|
|
2025-09-15 18:25:20 +03:00
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _check_filter(string, filter_patterns, reverse=False):
|
|
|
|
|
|
if filter_patterns and string:
|
|
|
|
|
|
for pattern in filter_patterns:
|
|
|
|
|
|
if pattern and pattern.search(string):
|
|
|
|
|
|
return not reverse
|
|
|
|
|
|
return reverse
|
|
|
|
|
|
|
2024-09-23 00:52:58 +03:00
|
|
|
|
def check_sld_masks(self, sld):
|
|
|
|
|
|
if self.BLLIST_GR_EXCLUDED_SLD_MASKS_PATTERNS:
|
|
|
|
|
|
for pattern in self.BLLIST_GR_EXCLUDED_SLD_MASKS_PATTERNS:
|
|
|
|
|
|
if re.fullmatch(pattern, sld):
|
|
|
|
|
|
return True
|
|
|
|
|
|
return False
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
2025-09-15 18:25:20 +03:00
|
|
|
|
def check_cidr_overlap(self, ip):
|
|
|
|
|
|
if self.BLLIST_CIDR_EXCLUDED_ITEMS:
|
|
|
|
|
|
try:
|
|
|
|
|
|
ip_obj = IPv4Network(ip)
|
|
|
|
|
|
except (AddressValueError, NetmaskValueError):
|
|
|
|
|
|
pass
|
|
|
|
|
|
else:
|
|
|
|
|
|
for net in self.BLLIST_CIDR_EXCLUDED_ITEMS:
|
|
|
|
|
|
if net.overlaps(ip_obj):
|
|
|
|
|
|
return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
class ParserError(Exception):
|
|
|
|
|
|
def __init__(self, reason=None):
|
|
|
|
|
|
super().__init__(reason)
|
|
|
|
|
|
self.reason = reason
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
|
return self.reason
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FieldValueError(ParserError):
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BlackListParser(Config):
|
|
|
|
|
|
def __init__(self):
|
2023-02-06 17:27:15 +03:00
|
|
|
|
self.ip_pattern = re.compile(r"(([0-9]{1,3}[.]){3})[0-9]{1,3}")
|
|
|
|
|
|
self.cidr_pattern = re.compile(r"([0-9]{1,3}[.]){3}[0-9]{1,3}/[0-9]{1,2}")
|
2020-06-19 20:43:08 +03:00
|
|
|
|
self.fqdn_pattern = re.compile(
|
2024-04-07 17:07:12 +03:00
|
|
|
|
r"([а-яёa-z0-9_.*-]*?)([а-яёa-z0-9_-]+[.][а-яёa-z0-9-]+)", re.U)
|
2023-02-06 17:27:15 +03:00
|
|
|
|
self.www_pattern = re.compile(r"^www[0-9]?[.]")
|
|
|
|
|
|
self.cyr_pattern = re.compile(r"[а-яё]", re.U)
|
|
|
|
|
|
self.cidr_set = set()
|
2024-03-14 22:20:11 +03:00
|
|
|
|
self.ip_dict = {}
|
2020-06-19 20:43:08 +03:00
|
|
|
|
self.ip_subnet_dict = {}
|
2024-03-14 22:20:11 +03:00
|
|
|
|
self.fqdn_dict = {}
|
2023-02-06 17:27:15 +03:00
|
|
|
|
self.sld_dict = {}
|
2020-06-19 20:43:08 +03:00
|
|
|
|
self.cidr_count = 0
|
|
|
|
|
|
self.ip_count = 0
|
|
|
|
|
|
self.output_fqdn_count = 0
|
|
|
|
|
|
self.ssl_unverified = False
|
|
|
|
|
|
self.send_headers_dict = {
|
2025-11-13 18:14:06 +03:00
|
|
|
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:144.0) Gecko/20100101 Firefox/144.0",
|
2020-06-19 20:43:08 +03:00
|
|
|
|
}
|
|
|
|
|
|
### Proxies (ex.: self.proxies = {"http": "http://192.168.0.1:8080", "https": "http://192.168.0.1:8080"})
|
|
|
|
|
|
self.proxies = None
|
|
|
|
|
|
self.connect_timeout = None
|
|
|
|
|
|
self.data_chunk = 2048
|
2024-03-21 00:06:43 +03:00
|
|
|
|
self.url = ["http://127.0.0.1"]
|
2020-06-19 20:43:08 +03:00
|
|
|
|
self.records_separator = "\n"
|
|
|
|
|
|
self.default_site_encoding = "utf-8"
|
|
|
|
|
|
self.site_encoding = self.default_site_encoding
|
2024-03-21 00:06:43 +03:00
|
|
|
|
self.rest = bytes()
|
2024-04-07 17:07:12 +03:00
|
|
|
|
self.http_codes = set()
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _compile_filter_patterns(filters_seq):
|
|
|
|
|
|
return {
|
|
|
|
|
|
re.compile(i, re.U)
|
|
|
|
|
|
for i in filters_seq
|
|
|
|
|
|
if i and type(i) == str
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
|
|
def _make_connection(self,
|
|
|
|
|
|
url,
|
|
|
|
|
|
method="GET",
|
|
|
|
|
|
postData=None,
|
|
|
|
|
|
send_headers_dict=None,
|
|
|
|
|
|
timeout=None):
|
|
|
|
|
|
conn_object = http_code = received_headers = None
|
|
|
|
|
|
req_object = request.Request(url,
|
|
|
|
|
|
data=postData,
|
|
|
|
|
|
headers=send_headers_dict,
|
|
|
|
|
|
method=method)
|
|
|
|
|
|
opener_args = [request.ProxyHandler(self.proxies)]
|
|
|
|
|
|
if self.ssl_unverified:
|
|
|
|
|
|
opener_args.append(request.HTTPSHandler(context=ssl._create_unverified_context()))
|
|
|
|
|
|
try:
|
|
|
|
|
|
conn_object = request.build_opener(*opener_args).open(
|
|
|
|
|
|
req_object,
|
|
|
|
|
|
timeout=(
|
|
|
|
|
|
timeout if type(timeout) == int else socket._GLOBAL_DEFAULT_TIMEOUT
|
|
|
|
|
|
)
|
|
|
|
|
|
)
|
|
|
|
|
|
http_code, received_headers = conn_object.status, conn_object.getheaders()
|
|
|
|
|
|
except Exception as exception_object:
|
|
|
|
|
|
print(f" Connection error! {exception_object} ( {url} )",
|
|
|
|
|
|
file=sys.stderr)
|
|
|
|
|
|
try:
|
|
|
|
|
|
yield (conn_object, http_code, received_headers)
|
|
|
|
|
|
except Exception as exception_object:
|
2024-03-21 00:06:43 +03:00
|
|
|
|
raise ParserError(f"Parser error! {exception_object} ( {url} )")
|
2020-06-19 20:43:08 +03:00
|
|
|
|
finally:
|
|
|
|
|
|
if conn_object:
|
|
|
|
|
|
conn_object.close()
|
|
|
|
|
|
|
2024-03-21 00:06:43 +03:00
|
|
|
|
def _download_data(self, url):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
with self._make_connection(
|
2024-03-21 00:06:43 +03:00
|
|
|
|
url,
|
2020-06-19 20:43:08 +03:00
|
|
|
|
send_headers_dict=self.send_headers_dict,
|
|
|
|
|
|
timeout=self.connect_timeout
|
|
|
|
|
|
) as conn_params:
|
|
|
|
|
|
conn_object, http_code, _ = conn_params
|
2024-04-07 17:07:12 +03:00
|
|
|
|
self.http_codes.add(http_code)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
if http_code == 200:
|
|
|
|
|
|
while True:
|
|
|
|
|
|
chunk = conn_object.read(self.data_chunk)
|
|
|
|
|
|
yield (chunk or None)
|
|
|
|
|
|
if not chunk:
|
|
|
|
|
|
break
|
|
|
|
|
|
|
2025-01-21 23:33:51 +03:00
|
|
|
|
def prepare_data(self, url):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for chunk in self._download_data(url):
|
2025-01-21 23:33:51 +03:00
|
|
|
|
yield chunk
|
|
|
|
|
|
|
|
|
|
|
|
def _align_chunk(self, url):
|
|
|
|
|
|
for chunk in self.prepare_data(url):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
if chunk is None:
|
2024-03-21 00:06:43 +03:00
|
|
|
|
yield self.rest
|
2020-06-19 20:43:08 +03:00
|
|
|
|
continue
|
2024-03-21 00:06:43 +03:00
|
|
|
|
data, _, self.rest = (self.rest + chunk).rpartition(self.records_separator)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
yield data
|
|
|
|
|
|
|
2024-03-21 00:06:43 +03:00
|
|
|
|
def _split_entries(self, url):
|
|
|
|
|
|
for chunk in self._align_chunk(url):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
for entry in chunk.split(self.records_separator):
|
|
|
|
|
|
try:
|
|
|
|
|
|
yield entry.decode(
|
|
|
|
|
|
self.site_encoding or self.default_site_encoding)
|
|
|
|
|
|
except UnicodeError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
def _get_subnet(self, ip_addr):
|
|
|
|
|
|
regexp_obj = self.ip_pattern.fullmatch(ip_addr)
|
|
|
|
|
|
return regexp_obj.group(1) if regexp_obj else None
|
|
|
|
|
|
|
2024-03-14 22:20:11 +03:00
|
|
|
|
def ip_value_processing(self, value):
|
2024-09-23 00:52:58 +03:00
|
|
|
|
if self.BLLIST_IP_EXCLUDED_ENABLE and value in self.BLLIST_IP_EXCLUDED_ITEMS:
|
|
|
|
|
|
return
|
2024-03-14 22:20:11 +03:00
|
|
|
|
if self.BLLIST_IP_FILTER and self._check_filter(
|
|
|
|
|
|
value, self.BLLIST_IP_FILTER_PATTERNS, self.BLLIST_IP_FILTER_TYPE):
|
|
|
|
|
|
return
|
|
|
|
|
|
if self.ip_pattern.fullmatch(value) and value not in self.ip_dict:
|
|
|
|
|
|
subnet = self._get_subnet(value)
|
2024-09-23 00:52:58 +03:00
|
|
|
|
if subnet in self.BLLIST_GR_EXCLUDED_NETS_PATTERNS or (
|
2024-03-14 22:20:11 +03:00
|
|
|
|
not self.BLLIST_IP_LIMIT or (
|
|
|
|
|
|
subnet not in self.ip_subnet_dict or self.ip_subnet_dict[subnet] < self.BLLIST_IP_LIMIT
|
|
|
|
|
|
)
|
|
|
|
|
|
):
|
|
|
|
|
|
self.ip_dict[value] = subnet
|
|
|
|
|
|
self.ip_subnet_dict[subnet] = (self.ip_subnet_dict.get(subnet) or 0) + 1
|
2024-12-01 17:30:11 +03:00
|
|
|
|
elif self.cidr_pattern.fullmatch(value):
|
|
|
|
|
|
self.cidr_set.add(value)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
def _convert_to_punycode(self, string):
|
|
|
|
|
|
if self.cyr_pattern.search(string):
|
2022-05-05 18:28:32 +03:00
|
|
|
|
if self.BLLIST_ENABLE_IDN:
|
2020-06-19 20:43:08 +03:00
|
|
|
|
try:
|
|
|
|
|
|
string = string.encode("idna").decode(
|
|
|
|
|
|
self.site_encoding or self.default_site_encoding)
|
|
|
|
|
|
except UnicodeError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise FieldValueError()
|
|
|
|
|
|
return string
|
|
|
|
|
|
|
|
|
|
|
|
def _get_sld(self, fqdn):
|
|
|
|
|
|
regexp_obj = self.fqdn_pattern.fullmatch(fqdn)
|
|
|
|
|
|
return regexp_obj.group(2) if regexp_obj else None
|
|
|
|
|
|
|
2024-03-14 22:20:11 +03:00
|
|
|
|
def fqdn_value_processing(self, value):
|
|
|
|
|
|
if self.ip_pattern.fullmatch(value):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
raise FieldValueError()
|
2024-03-14 22:20:11 +03:00
|
|
|
|
value = value.strip("*.").lower()
|
2022-05-05 18:28:32 +03:00
|
|
|
|
if self.BLLIST_STRIP_WWW:
|
2024-03-14 22:20:11 +03:00
|
|
|
|
value = self.www_pattern.sub("", value)
|
2024-09-23 00:52:58 +03:00
|
|
|
|
if self.BLLIST_FQDN_EXCLUDED_ENABLE and value in self.BLLIST_FQDN_EXCLUDED_ITEMS:
|
|
|
|
|
|
return
|
2022-05-05 18:28:32 +03:00
|
|
|
|
if not self.BLLIST_FQDN_FILTER or (
|
2023-01-14 15:53:19 +03:00
|
|
|
|
self.BLLIST_FQDN_FILTER and not self._check_filter(
|
2024-03-14 22:20:11 +03:00
|
|
|
|
value, self.BLLIST_FQDN_FILTER_PATTERNS, self.BLLIST_FQDN_FILTER_TYPE)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
):
|
2024-03-14 22:20:11 +03:00
|
|
|
|
if self.fqdn_pattern.fullmatch(value):
|
|
|
|
|
|
value = self._convert_to_punycode(value)
|
|
|
|
|
|
sld = self._get_sld(value)
|
2024-09-23 00:52:58 +03:00
|
|
|
|
if (sld in self.BLLIST_GR_EXCLUDED_SLD_PATTERNS or self.check_sld_masks(sld)) or (
|
2022-05-05 18:28:32 +03:00
|
|
|
|
not self.BLLIST_SD_LIMIT or (
|
|
|
|
|
|
sld not in self.sld_dict or self.sld_dict[sld] < self.BLLIST_SD_LIMIT
|
2020-06-19 20:43:08 +03:00
|
|
|
|
)
|
|
|
|
|
|
):
|
|
|
|
|
|
self.sld_dict[sld] = (self.sld_dict.get(sld) or 0) + 1
|
2024-03-14 22:20:11 +03:00
|
|
|
|
self.fqdn_dict[value] = sld
|
2020-06-19 20:43:08 +03:00
|
|
|
|
else:
|
|
|
|
|
|
raise FieldValueError()
|
|
|
|
|
|
|
2025-11-13 18:14:06 +03:00
|
|
|
|
def org_value_processing(self, value):
|
|
|
|
|
|
if not value:
|
|
|
|
|
|
return
|
|
|
|
|
|
if self.BLLIST_ORG_EXCLUDED_ENABLE and value in self.BLLIST_ORG_EXCLUDED_ITEMS:
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
2020-06-19 20:43:08 +03:00
|
|
|
|
def parser_func(self):
|
2021-03-26 23:49:38 +03:00
|
|
|
|
"""Must be overridden by a subclass"""
|
2020-06-19 20:43:08 +03:00
|
|
|
|
raise NotImplementedError()
|
|
|
|
|
|
|
2024-03-14 22:20:11 +03:00
|
|
|
|
def _group_ip_ranges(self):
|
|
|
|
|
|
if self.BLLIST_SUMMARIZE_IP:
|
|
|
|
|
|
for i in Summarize.summarize_ip_ranges(self.ip_dict, True):
|
|
|
|
|
|
self.cidr_set.add(i.with_prefixlen)
|
|
|
|
|
|
self.ip_count = len(self.ip_dict)
|
|
|
|
|
|
|
|
|
|
|
|
def _group_cidr_ranges(self):
|
|
|
|
|
|
if self.BLLIST_SUMMARIZE_CIDR:
|
|
|
|
|
|
for i in Summarize.summarize_nets(self.cidr_set):
|
|
|
|
|
|
self.cidr_set.add(i.with_prefixlen)
|
|
|
|
|
|
self.cidr_count = len(self.cidr_set)
|
|
|
|
|
|
|
|
|
|
|
|
def run(self):
|
|
|
|
|
|
ret_value = 1
|
|
|
|
|
|
self.BLLIST_FQDN_FILTER_PATTERNS = self._compile_filter_patterns(self.BLLIST_FQDN_FILTER_PATTERNS)
|
|
|
|
|
|
self.BLLIST_IP_FILTER_PATTERNS = self._compile_filter_patterns(self.BLLIST_IP_FILTER_PATTERNS)
|
|
|
|
|
|
self.records_separator = bytes(self.records_separator, "utf-8")
|
|
|
|
|
|
self.parser_func()
|
|
|
|
|
|
if (len(self.ip_dict) + len(self.cidr_set) + len(self.fqdn_dict)) >= self.BLLIST_MIN_ENTRIES:
|
|
|
|
|
|
ret_value = 0
|
|
|
|
|
|
else:
|
|
|
|
|
|
ret_value = 2
|
2024-04-07 17:07:12 +03:00
|
|
|
|
for i in self.http_codes:
|
|
|
|
|
|
if i != 200:
|
|
|
|
|
|
ret_value = 2
|
|
|
|
|
|
break
|
2024-03-21 00:06:43 +03:00
|
|
|
|
self.rest = bytes()
|
2024-04-07 17:07:12 +03:00
|
|
|
|
self.http_codes = set()
|
2024-03-14 22:20:11 +03:00
|
|
|
|
return ret_value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Summarize:
|
|
|
|
|
|
HOSTS_LIMIT = 0
|
|
|
|
|
|
NETS_LIMIT = 0
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _sort_ip_func(e):
|
|
|
|
|
|
return IPv4Address(e)
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def _group_ip_ranges(cls, ip_list, raw_list=None):
|
|
|
|
|
|
def remove_items(start, end):
|
|
|
|
|
|
for ip in range(int(start), int(end) + 1):
|
2026-03-04 16:22:22 +03:00
|
|
|
|
raw_list.remove(str(IPv4Address(ip)))
|
2024-03-14 22:20:11 +03:00
|
|
|
|
|
|
|
|
|
|
start = end = None
|
|
|
|
|
|
hosts = 1
|
|
|
|
|
|
for ip in ip_list:
|
|
|
|
|
|
ip_obj = IPv4Address(ip)
|
|
|
|
|
|
if end and (end + 1) == ip_obj:
|
|
|
|
|
|
hosts += 1
|
|
|
|
|
|
else:
|
|
|
|
|
|
if hosts > 1 and hosts >= cls.HOSTS_LIMIT:
|
|
|
|
|
|
if raw_list:
|
|
|
|
|
|
remove_items(start, end)
|
|
|
|
|
|
yield start, end
|
|
|
|
|
|
start = ip_obj
|
|
|
|
|
|
hosts = 1
|
|
|
|
|
|
end = ip_obj
|
|
|
|
|
|
else:
|
2024-12-01 17:30:11 +03:00
|
|
|
|
if hosts > 1 and hosts >= cls.HOSTS_LIMIT:
|
2024-03-14 22:20:11 +03:00
|
|
|
|
if raw_list:
|
|
|
|
|
|
remove_items(start, end)
|
|
|
|
|
|
yield start, end
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def summarize_ip_ranges(cls, ip_list, modify_raw_list=False):
|
|
|
|
|
|
for s, e in cls._group_ip_ranges(sorted(ip_list, key=cls._sort_ip_func),
|
|
|
|
|
|
modify_raw_list and ip_list):
|
|
|
|
|
|
for i in summarize_address_range(s, e):
|
|
|
|
|
|
if i.prefixlen == 32:
|
|
|
|
|
|
if modify_raw_list:
|
|
|
|
|
|
if type(ip_list) == set:
|
|
|
|
|
|
ip_list.add(i.network_address)
|
|
|
|
|
|
else:
|
|
|
|
|
|
ip_list.append(i.network_address)
|
|
|
|
|
|
else:
|
|
|
|
|
|
yield i
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
|
def _sort_net_func(e):
|
|
|
|
|
|
return IPv4Network(e)
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def _group_nets(cls, cidr_list, raw_list=None):
|
|
|
|
|
|
def remove_items(start, end):
|
|
|
|
|
|
for ip in range(int(start), int(end) + 1, 256):
|
|
|
|
|
|
raw_list.remove(str(IPv4Address(ip)) + "/24")
|
|
|
|
|
|
|
|
|
|
|
|
start = end = curr_super_net = None
|
|
|
|
|
|
nets = 1
|
|
|
|
|
|
for net in cidr_list:
|
|
|
|
|
|
net_obj = IPv4Network(net)
|
|
|
|
|
|
prefix_len = net_obj.prefixlen
|
|
|
|
|
|
if prefix_len == 24:
|
|
|
|
|
|
address = net_obj.network_address
|
|
|
|
|
|
super_net = net_obj.supernet(new_prefix=16)
|
|
|
|
|
|
if end and super_net == curr_super_net and (end + 256) == address:
|
|
|
|
|
|
nets += 1
|
|
|
|
|
|
else:
|
|
|
|
|
|
if nets > 1 and nets >= cls.NETS_LIMIT:
|
|
|
|
|
|
if raw_list:
|
|
|
|
|
|
remove_items(start, end)
|
|
|
|
|
|
yield summarize_address_range(IPv4Address(start), IPv4Address(end + 255))
|
|
|
|
|
|
start = address
|
|
|
|
|
|
curr_super_net = super_net
|
|
|
|
|
|
nets = 1
|
|
|
|
|
|
end = address
|
|
|
|
|
|
else:
|
|
|
|
|
|
if nets > 1 and nets >= cls.NETS_LIMIT:
|
|
|
|
|
|
if raw_list:
|
|
|
|
|
|
remove_items(start, end)
|
|
|
|
|
|
yield summarize_address_range(IPv4Address(start), IPv4Address(end + 255))
|
|
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
|
def summarize_nets(cls, cidr_list):
|
|
|
|
|
|
for i in cls._group_nets(sorted(cidr_list, key=cls._sort_net_func), cidr_list):
|
|
|
|
|
|
for j in i:
|
|
|
|
|
|
yield j
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OptimizeConfig(Config):
|
|
|
|
|
|
def __init__(self, parsers_list):
|
|
|
|
|
|
self.parsers_list = parsers_list
|
|
|
|
|
|
self.cidr_set = set()
|
|
|
|
|
|
self.ip_dict = {}
|
|
|
|
|
|
self.ip_subnet_dict = {}
|
|
|
|
|
|
self.fqdn_dict = {}
|
|
|
|
|
|
self.sld_dict = {}
|
|
|
|
|
|
self.cidr_count = 0
|
|
|
|
|
|
self.ip_count = 0
|
|
|
|
|
|
self.output_fqdn_count = 0
|
|
|
|
|
|
|
2025-09-15 18:25:20 +03:00
|
|
|
|
def _exclude_nets(self):
|
|
|
|
|
|
if self.BLLIST_CIDR_EXCLUDED_ENABLE:
|
|
|
|
|
|
ip_dict = {}
|
|
|
|
|
|
for ip, subnet in self.ip_dict.items():
|
|
|
|
|
|
if not self.check_cidr_overlap(ip):
|
|
|
|
|
|
ip_dict[ip] = subnet
|
|
|
|
|
|
self.ip_dict = ip_dict
|
|
|
|
|
|
cidr_set = set()
|
|
|
|
|
|
for net in self.cidr_set:
|
|
|
|
|
|
if not self.check_cidr_overlap(net):
|
|
|
|
|
|
cidr_set.add(net)
|
|
|
|
|
|
self.cidr_set = cidr_set
|
|
|
|
|
|
|
2025-01-21 23:33:51 +03:00
|
|
|
|
def _remove_subdomains(self):
|
|
|
|
|
|
tld_dict = {}
|
|
|
|
|
|
for fqdn, sld in self.fqdn_dict.items():
|
|
|
|
|
|
tld_dict.setdefault(sld, [])
|
|
|
|
|
|
tld_dict[sld].append(fqdn)
|
|
|
|
|
|
for v in tld_dict.values():
|
|
|
|
|
|
for i in v:
|
|
|
|
|
|
if i in self.fqdn_dict:
|
|
|
|
|
|
for j in v:
|
|
|
|
|
|
if (j != i) and j.endswith("." + i):
|
|
|
|
|
|
self.fqdn_dict.pop(j, None)
|
|
|
|
|
|
|
2024-03-14 22:20:11 +03:00
|
|
|
|
def _optimize_fqdn_dict(self):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
optimized_set = set()
|
2024-03-14 22:20:11 +03:00
|
|
|
|
for fqdn, sld in self.fqdn_dict.items():
|
|
|
|
|
|
if sld and (fqdn == sld or sld not in self.fqdn_dict) and self.sld_dict.get(sld):
|
2024-09-23 00:52:58 +03:00
|
|
|
|
if (not self.check_sld_masks(sld) and (
|
|
|
|
|
|
self.BLLIST_SD_LIMIT and sld not in self.BLLIST_GR_EXCLUDED_SLD_PATTERNS
|
2022-05-05 18:28:32 +03:00
|
|
|
|
)) and (self.sld_dict[sld] >= self.BLLIST_SD_LIMIT):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
record_value = sld
|
|
|
|
|
|
del(self.sld_dict[sld])
|
|
|
|
|
|
else:
|
|
|
|
|
|
record_value = fqdn
|
|
|
|
|
|
optimized_set.add(record_value)
|
|
|
|
|
|
self.output_fqdn_count += 1
|
2024-03-14 22:20:11 +03:00
|
|
|
|
self.fqdn_dict = optimized_set
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
2024-03-14 22:20:11 +03:00
|
|
|
|
def _optimize_ip_dict(self):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
optimized_set = set()
|
2024-03-14 22:20:11 +03:00
|
|
|
|
for ip_addr, subnet in self.ip_dict.items():
|
2020-06-19 20:43:08 +03:00
|
|
|
|
if subnet in self.ip_subnet_dict:
|
2024-09-23 00:52:58 +03:00
|
|
|
|
if subnet not in self.BLLIST_GR_EXCLUDED_NETS_PATTERNS and (
|
2022-05-05 18:28:32 +03:00
|
|
|
|
self.BLLIST_IP_LIMIT and self.ip_subnet_dict[subnet] >= self.BLLIST_IP_LIMIT
|
2020-06-19 20:43:08 +03:00
|
|
|
|
):
|
|
|
|
|
|
self.cidr_set.add(f"{subnet}0/24")
|
|
|
|
|
|
del(self.ip_subnet_dict[subnet])
|
|
|
|
|
|
else:
|
|
|
|
|
|
optimized_set.add(ip_addr)
|
|
|
|
|
|
self.ip_count += 1
|
2024-03-14 22:20:11 +03:00
|
|
|
|
self.ip_dict = optimized_set
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
def _group_ip_ranges(self):
|
2022-05-05 18:28:32 +03:00
|
|
|
|
if self.BLLIST_SUMMARIZE_IP:
|
2024-03-14 22:20:11 +03:00
|
|
|
|
for i in Summarize.summarize_ip_ranges(self.ip_dict, True):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
self.cidr_set.add(i.with_prefixlen)
|
2024-03-14 22:20:11 +03:00
|
|
|
|
self.ip_count = len(self.ip_dict)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
def _group_cidr_ranges(self):
|
2022-05-05 18:28:32 +03:00
|
|
|
|
if self.BLLIST_SUMMARIZE_CIDR:
|
2023-09-08 21:53:56 +03:00
|
|
|
|
for i in Summarize.summarize_nets(self.cidr_set):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
self.cidr_set.add(i.with_prefixlen)
|
|
|
|
|
|
self.cidr_count = len(self.cidr_set)
|
|
|
|
|
|
|
2024-03-14 22:20:11 +03:00
|
|
|
|
def optimize(self):
|
|
|
|
|
|
for i in self.parsers_list:
|
|
|
|
|
|
self.cidr_set |= i.cidr_set
|
|
|
|
|
|
self.ip_dict.update(i.ip_dict)
|
|
|
|
|
|
self.ip_subnet_dict.update(i.ip_subnet_dict)
|
|
|
|
|
|
self.fqdn_dict.update(i.fqdn_dict)
|
|
|
|
|
|
self.sld_dict.update(i.sld_dict)
|
2025-09-15 18:25:20 +03:00
|
|
|
|
self._exclude_nets()
|
2025-01-21 23:33:51 +03:00
|
|
|
|
self._remove_subdomains()
|
2024-03-14 22:20:11 +03:00
|
|
|
|
self._optimize_fqdn_dict()
|
|
|
|
|
|
self._optimize_ip_dict()
|
|
|
|
|
|
self._group_ip_ranges()
|
|
|
|
|
|
self._group_cidr_ranges()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WriteConfigFiles(Config):
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
self.write_buffer = -1
|
|
|
|
|
|
|
|
|
|
|
|
def write_ipset_config(self, ip_dict, cidr_set):
|
|
|
|
|
|
with open(self.IP_DATA_FILE, "wt", buffering=self.write_buffer) as file_handler:
|
|
|
|
|
|
for i in (self.NFTSET_CIDR, self.NFTSET_IP):
|
|
|
|
|
|
file_handler.write("flush set {} {}\n".format(self.NFT_TABLE, i))
|
|
|
|
|
|
file_handler.write(
|
2024-11-03 02:20:45 +03:00
|
|
|
|
"table {} {{\n{}".format(self.NFT_TABLE, self.NFTSET_CIDR_STRING_MAIN)
|
2024-03-14 22:20:11 +03:00
|
|
|
|
)
|
|
|
|
|
|
if len(cidr_set) > 0:
|
|
|
|
|
|
file_handler.write("elements={")
|
|
|
|
|
|
for i in cidr_set:
|
|
|
|
|
|
file_handler.write(f"{i},")
|
|
|
|
|
|
file_handler.write("};")
|
|
|
|
|
|
file_handler.write(
|
2024-11-03 02:20:45 +03:00
|
|
|
|
"}}\n{}".format(self.NFTSET_IP_STRING_MAIN)
|
2024-03-14 22:20:11 +03:00
|
|
|
|
)
|
|
|
|
|
|
if len(ip_dict) > 0:
|
|
|
|
|
|
file_handler.write("elements={")
|
|
|
|
|
|
for i in ip_dict:
|
|
|
|
|
|
file_handler.write(f"{i},")
|
|
|
|
|
|
file_handler.write("};")
|
|
|
|
|
|
file_handler.write("}\n}\n")
|
|
|
|
|
|
|
|
|
|
|
|
def write_dnsmasq_config(self, fqdn_dict):
|
|
|
|
|
|
with open(self.DNSMASQ_DATA_FILE, "wt", buffering=self.write_buffer) as file_handler:
|
|
|
|
|
|
for fqdn in fqdn_dict:
|
|
|
|
|
|
file_handler.write(
|
|
|
|
|
|
f"server=/{fqdn}/{self.BLLIST_ALT_DNS_ADDR}\nnftset=/{fqdn}/{self.NFT_TABLE_DNSMASQ}#{self.NFTSET_DNSMASQ}\n"
|
|
|
|
|
|
if self.BLLIST_ALT_NSLOOKUP else
|
|
|
|
|
|
f"nftset=/{fqdn}/{self.NFT_TABLE_DNSMASQ}#{self.NFTSET_DNSMASQ}\n")
|
|
|
|
|
|
|
|
|
|
|
|
def write_update_status_file(self, ip_count, cidr_count, fqdn_count):
|
|
|
|
|
|
with open(self.UPDATE_STATUS_FILE, "wt") as file_handler:
|
|
|
|
|
|
file_handler.write(
|
|
|
|
|
|
f"{cidr_count} {ip_count} {fqdn_count}")
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-13 18:14:06 +03:00
|
|
|
|
class RblHybrid(BlackListParser):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
def __init__(self):
|
|
|
|
|
|
super().__init__()
|
|
|
|
|
|
self.url = self.RBL_ALL_URL
|
2023-08-31 18:59:10 +03:00
|
|
|
|
self.records_separator = '{"appearDate": '
|
2022-12-25 19:45:07 +03:00
|
|
|
|
self.ips_separator = ", "
|
2025-11-13 18:14:06 +03:00
|
|
|
|
self.entry_regexp = re.compile(r'"domains": \["?(.*?)"?\].*?"ips": \[([a-f0-9/.:", ]*)\]')
|
|
|
|
|
|
self.org_value_regexp = re.compile(r'"name": "(.*?)"')
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
def parser_func(self):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
2025-11-13 18:14:06 +03:00
|
|
|
|
org = self.org_value_regexp.search(entry)
|
|
|
|
|
|
if not org or not self.org_value_processing(org.group(1)):
|
|
|
|
|
|
res = self.entry_regexp.search(entry)
|
|
|
|
|
|
if not res:
|
|
|
|
|
|
continue
|
|
|
|
|
|
ip_string = res.group(2).replace('"', "")
|
|
|
|
|
|
fqdn_string = res.group(1)
|
|
|
|
|
|
if fqdn_string:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.fqdn_value_processing(fqdn_string)
|
|
|
|
|
|
except FieldValueError:
|
|
|
|
|
|
for i in ip_string.split(self.ips_separator):
|
|
|
|
|
|
self.ip_value_processing(i)
|
|
|
|
|
|
else:
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for i in ip_string.split(self.ips_separator):
|
|
|
|
|
|
self.ip_value_processing(i)
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
2024-03-14 22:20:11 +03:00
|
|
|
|
class RblDPI(BlackListParser):
|
|
|
|
|
|
def __init__(self):
|
|
|
|
|
|
super().__init__()
|
|
|
|
|
|
self.url = self.RBL_DPI_URL
|
|
|
|
|
|
self.BLLIST_MIN_ENTRIES = 0
|
|
|
|
|
|
self.records_separator = '{"domains"'
|
2025-11-13 18:14:06 +03:00
|
|
|
|
self.entry_regexp = re.compile(r': \[(.*?)\]')
|
2024-03-14 22:20:11 +03:00
|
|
|
|
|
|
|
|
|
|
def parser_func(self):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
2025-11-13 18:14:06 +03:00
|
|
|
|
res = self.entry_regexp.search(entry)
|
2024-03-21 00:06:43 +03:00
|
|
|
|
if not res:
|
|
|
|
|
|
continue
|
|
|
|
|
|
fqdn_string = res.group(1)
|
|
|
|
|
|
if fqdn_string:
|
|
|
|
|
|
for i in fqdn_string.split(', "'):
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.fqdn_value_processing(i.strip('"'))
|
|
|
|
|
|
except FieldValueError:
|
|
|
|
|
|
pass
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-13 18:14:06 +03:00
|
|
|
|
class RblFQDN(RblHybrid):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
def parser_func(self):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
2025-11-13 18:14:06 +03:00
|
|
|
|
org = self.org_value_regexp.search(entry)
|
|
|
|
|
|
if not org or not self.org_value_processing(org.group(1)):
|
|
|
|
|
|
res = self.entry_regexp.search(entry)
|
|
|
|
|
|
if not res:
|
|
|
|
|
|
continue
|
|
|
|
|
|
fqdn_string = res.group(1)
|
|
|
|
|
|
if fqdn_string:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.fqdn_value_processing(fqdn_string)
|
|
|
|
|
|
except FieldValueError:
|
|
|
|
|
|
pass
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-13 18:14:06 +03:00
|
|
|
|
class RblIp(RblHybrid):
|
|
|
|
|
|
def parser_func(self):
|
|
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
|
|
|
|
|
org = self.org_value_regexp.search(entry)
|
|
|
|
|
|
if not org or not self.org_value_processing(org.group(1)):
|
|
|
|
|
|
res = self.entry_regexp.search(entry)
|
|
|
|
|
|
if not res:
|
|
|
|
|
|
continue
|
|
|
|
|
|
ip_string = res.group(2).replace('"', "")
|
|
|
|
|
|
if ip_string:
|
|
|
|
|
|
for i in ip_string.split(self.ips_separator):
|
|
|
|
|
|
self.ip_value_processing(i)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ZiHybrid(BlackListParser):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
def __init__(self):
|
|
|
|
|
|
super().__init__()
|
|
|
|
|
|
self.url = self.ZI_ALL_URL
|
|
|
|
|
|
self.site_encoding = self.ZI_ENCODING
|
2024-03-14 22:20:11 +03:00
|
|
|
|
self.fields_separator = ";"
|
|
|
|
|
|
self.ips_separator = "|"
|
2025-01-21 23:33:51 +03:00
|
|
|
|
# self.decomp_obj = zlib.decompressobj(wbits=47)
|
|
|
|
|
|
|
|
|
|
|
|
# def prepare_data(self, url):
|
|
|
|
|
|
# """
|
|
|
|
|
|
# for https://raw.githubusercontent.com/zapret-info/z-i/master/dump.csv.gz
|
|
|
|
|
|
# """
|
|
|
|
|
|
# for chunk in self._download_data(url):
|
|
|
|
|
|
# if chunk:
|
|
|
|
|
|
# data = self.decomp_obj.decompress(chunk)
|
|
|
|
|
|
# yield data
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
def parser_func(self):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
|
|
|
|
|
entry_list = entry.split(self.fields_separator)
|
|
|
|
|
|
try:
|
2025-11-13 18:14:06 +03:00
|
|
|
|
if not entry_list[3] or not self.org_value_processing(entry_list[3]):
|
|
|
|
|
|
if entry_list[1]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.fqdn_value_processing(entry_list[1])
|
|
|
|
|
|
except FieldValueError:
|
|
|
|
|
|
for i in entry_list[0].split(self.ips_separator):
|
|
|
|
|
|
self.ip_value_processing(i)
|
|
|
|
|
|
else:
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for i in entry_list[0].split(self.ips_separator):
|
2025-11-13 18:14:06 +03:00
|
|
|
|
self.ip_value_processing(i)
|
2024-03-21 00:06:43 +03:00
|
|
|
|
except IndexError:
|
|
|
|
|
|
pass
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-13 18:14:06 +03:00
|
|
|
|
class ZiFQDN(ZiHybrid):
|
2020-06-19 20:43:08 +03:00
|
|
|
|
def parser_func(self):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
|
|
|
|
|
entry_list = entry.split(self.fields_separator)
|
2025-11-13 18:14:06 +03:00
|
|
|
|
try:
|
|
|
|
|
|
if not entry_list[3] or not self.org_value_processing(entry_list[3]):
|
|
|
|
|
|
if entry_list[1]:
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.fqdn_value_processing(entry_list[1])
|
|
|
|
|
|
except FieldValueError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except IndexError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ZiIp(ZiHybrid):
|
|
|
|
|
|
def parser_func(self):
|
|
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
|
|
|
|
|
entry_list = entry.split(self.fields_separator)
|
|
|
|
|
|
try:
|
|
|
|
|
|
if not entry_list[3] or not self.org_value_processing(entry_list[3]):
|
|
|
|
|
|
for i in entry_list[0].split(self.ips_separator):
|
|
|
|
|
|
self.ip_value_processing(i)
|
|
|
|
|
|
except IndexError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
class AfFQDN(BlackListParser):
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
|
self.url = self.AF_FQDN_URL
|
|
|
|
|
|
|
|
|
|
|
|
def parser_func(self):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.fqdn_value_processing(entry)
|
|
|
|
|
|
except FieldValueError:
|
2025-11-13 18:14:06 +03:00
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AfIpFull(BlackListParser):
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
|
self.url = self.AF_IP_FULL_URL
|
|
|
|
|
|
self.entry_regexp = re.compile(r"/32$")
|
|
|
|
|
|
|
|
|
|
|
|
def parser_func(self):
|
|
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
|
|
|
|
|
self.ip_value_processing(self.entry_regexp.sub("", entry))
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AfIp(BlackListParser):
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
|
self.url = self.AF_IP_URL
|
|
|
|
|
|
|
|
|
|
|
|
def parser_func(self):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
|
|
|
|
|
self.ip_value_processing(entry)
|
|
|
|
|
|
|
|
|
|
|
|
|
2025-11-13 18:14:06 +03:00
|
|
|
|
class AfNet(BlackListParser):
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
|
|
self.BLLIST_MIN_ENTRIES = 0
|
|
|
|
|
|
self.url = self.AF_NET_URL
|
|
|
|
|
|
|
|
|
|
|
|
def parser_func(self):
|
|
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
|
|
|
|
|
self.ip_value_processing(entry)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FzHybrid(BlackListParser):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
def __init__(self):
|
|
|
|
|
|
super().__init__()
|
|
|
|
|
|
self.url = self.FZ_URL
|
|
|
|
|
|
self.site_encoding = self.FZ_ENCODING
|
|
|
|
|
|
self.records_separator = "</content>"
|
|
|
|
|
|
self.fqdn_value_regexp = re.compile(r"<domain><\!\[CDATA\[(.*?)\]\]></domain>", re.U)
|
|
|
|
|
|
self.ip_value_regexp = re.compile(r"<ip>(.*?)</ip>")
|
|
|
|
|
|
self.cidr_value_regexp = re.compile(r"<ipSubnet>(.*?)</ipSubnet>")
|
2025-11-13 18:14:06 +03:00
|
|
|
|
self.org_value_regexp = re.compile(r'org="(.*?)"')
|
2024-03-21 00:06:43 +03:00
|
|
|
|
|
|
|
|
|
|
def parser_func(self):
|
|
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
2025-11-13 18:14:06 +03:00
|
|
|
|
org = self.org_value_regexp.search(entry)
|
|
|
|
|
|
if not org or not self.org_value_processing(org.group(1)):
|
|
|
|
|
|
res = self.fqdn_value_regexp.search(entry)
|
|
|
|
|
|
if res and res.group(1):
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.fqdn_value_processing(res.group(1))
|
|
|
|
|
|
except FieldValueError:
|
|
|
|
|
|
pass
|
|
|
|
|
|
else:
|
|
|
|
|
|
continue
|
|
|
|
|
|
for i in self.ip_value_regexp.finditer(entry):
|
|
|
|
|
|
if i.group(1):
|
|
|
|
|
|
self.ip_value_processing(i.group(1))
|
|
|
|
|
|
for i in self.cidr_value_regexp.finditer(entry):
|
|
|
|
|
|
if i.group(1):
|
|
|
|
|
|
self.ip_value_processing(i.group(1))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FzFQDN(FzHybrid):
|
|
|
|
|
|
def parser_func(self):
|
|
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
|
|
|
|
|
org = self.org_value_regexp.search(entry)
|
|
|
|
|
|
if not org or not self.org_value_processing(org.group(1)):
|
|
|
|
|
|
res = self.fqdn_value_regexp.search(entry)
|
|
|
|
|
|
if res and res.group(1):
|
|
|
|
|
|
try:
|
|
|
|
|
|
self.fqdn_value_processing(res.group(1))
|
|
|
|
|
|
except FieldValueError:
|
|
|
|
|
|
pass
|
2024-03-21 00:06:43 +03:00
|
|
|
|
|
|
|
|
|
|
|
2025-11-13 18:14:06 +03:00
|
|
|
|
class FzIp(FzHybrid):
|
2024-03-21 00:06:43 +03:00
|
|
|
|
def parser_func(self):
|
|
|
|
|
|
for url in self.url:
|
|
|
|
|
|
for entry in self._split_entries(url):
|
2025-11-13 18:14:06 +03:00
|
|
|
|
org = self.org_value_regexp.search(entry)
|
|
|
|
|
|
if not org or not self.org_value_processing(org.group(1)):
|
|
|
|
|
|
for i in self.ip_value_regexp.finditer(entry):
|
|
|
|
|
|
if i.group(1):
|
|
|
|
|
|
self.ip_value_processing(i.group(1))
|
|
|
|
|
|
for i in self.cidr_value_regexp.finditer(entry):
|
|
|
|
|
|
if i.group(1):
|
|
|
|
|
|
self.ip_value_processing(i.group(1))
|
2020-06-19 20:43:08 +03:00
|
|
|
|
|
2022-12-25 19:45:07 +03:00
|
|
|
|
|
2024-09-23 00:52:58 +03:00
|
|
|
|
class Ra(BlackListParser):
|
2022-05-05 18:28:32 +03:00
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
|
|
super().__init__(*args, **kwargs)
|
2024-09-23 00:52:58 +03:00
|
|
|
|
self.url_ipset = self.DL_IPSET_URL
|
|
|
|
|
|
self.url_dnsmasq = self.DL_DMASK_URL
|
|
|
|
|
|
self.url_stat = self.DL_STAT_URL
|
2022-05-05 18:28:32 +03:00
|
|
|
|
|
|
|
|
|
|
def download_config(self, url, cfg_file):
|
|
|
|
|
|
self.url = url
|
2024-04-07 17:07:12 +03:00
|
|
|
|
file_handler = None
|
|
|
|
|
|
for chunk in self._download_data(self.url[0]):
|
|
|
|
|
|
if chunk:
|
|
|
|
|
|
if not file_handler:
|
|
|
|
|
|
try:
|
|
|
|
|
|
file_handler = open(cfg_file, "wb", buffering=-1)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
break
|
|
|
|
|
|
if file_handler:
|
|
|
|
|
|
file_handler.write(chunk)
|
|
|
|
|
|
if file_handler:
|
|
|
|
|
|
file_handler.close()
|
|
|
|
|
|
file_handler = None
|
2022-05-05 18:28:32 +03:00
|
|
|
|
|
|
|
|
|
|
def run(self):
|
2024-04-07 17:07:12 +03:00
|
|
|
|
ret_value = 0
|
2022-05-05 18:28:32 +03:00
|
|
|
|
self.download_config(self.url_ipset, self.IP_DATA_FILE)
|
|
|
|
|
|
self.download_config(self.url_dnsmasq, self.DNSMASQ_DATA_FILE)
|
|
|
|
|
|
self.download_config(self.url_stat, self.UPDATE_STATUS_FILE)
|
2024-04-07 17:07:12 +03:00
|
|
|
|
for i in self.http_codes:
|
|
|
|
|
|
if i != 200:
|
|
|
|
|
|
ret_value = 2
|
|
|
|
|
|
break
|
|
|
|
|
|
self.http_codes = set()
|
|
|
|
|
|
return ret_value
|
2022-05-05 18:28:32 +03:00
|
|
|
|
|
2022-12-25 19:45:07 +03:00
|
|
|
|
|
2020-06-19 20:43:08 +03:00
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
Config.load_environ_config()
|
|
|
|
|
|
Config.load_fqdn_filter()
|
|
|
|
|
|
Config.load_ip_filter()
|
2024-09-23 00:52:58 +03:00
|
|
|
|
Config.load_gr_excluded_sld()
|
|
|
|
|
|
Config.load_gr_excluded_sld_masks()
|
|
|
|
|
|
Config.load_gr_excluded_nets()
|
|
|
|
|
|
Config.load_fqdn_excluded()
|
|
|
|
|
|
Config.load_ip_excluded()
|
2025-09-15 18:25:20 +03:00
|
|
|
|
Config.load_cidr_excluded()
|
2025-11-13 18:14:06 +03:00
|
|
|
|
Config.load_org_excluded()
|
2024-03-14 22:20:11 +03:00
|
|
|
|
parsers_dict = {
|
2025-11-13 18:14:06 +03:00
|
|
|
|
"ip": {
|
|
|
|
|
|
"rublacklist": [RblIp], "zapret-info": [ZiIp],
|
|
|
|
|
|
"antifilter": [AfIpFull, AfNet], "fz": [FzIp], "ruantiblock": [Ra],
|
|
|
|
|
|
},
|
|
|
|
|
|
"fqdn": {
|
|
|
|
|
|
"rublacklist": [RblHybrid, RblDPI], "zapret-info": [ZiHybrid],
|
|
|
|
|
|
"antifilter": [AfFQDN, AfNet], "fz": [FzHybrid], "ruantiblock": [Ra],
|
|
|
|
|
|
},
|
|
|
|
|
|
"fqdn-only": {
|
|
|
|
|
|
"rublacklist": [RblFQDN, RblDPI], "zapret-info": [ZiFQDN],
|
|
|
|
|
|
"antifilter": [AfFQDN], "fz": [FzFQDN], "ruantiblock": [Ra],
|
|
|
|
|
|
},
|
2020-06-19 20:43:08 +03:00
|
|
|
|
}
|
|
|
|
|
|
try:
|
2024-03-14 22:20:11 +03:00
|
|
|
|
parser_classes = parsers_dict[Config.BLLIST_MODE][Config.BLLIST_SOURCE]
|
2020-06-19 20:43:08 +03:00
|
|
|
|
except KeyError:
|
2024-03-14 22:20:11 +03:00
|
|
|
|
print("Wrong configuration! (Config.BLLIST_MODE, Config.BLLIST_SOURCE)",
|
2020-06-19 20:43:08 +03:00
|
|
|
|
file=sys.stderr)
|
|
|
|
|
|
sys.exit(1)
|
2024-03-14 22:20:11 +03:00
|
|
|
|
parser_instances = [i() for i in parser_classes]
|
|
|
|
|
|
ret_list = [i.run() for i in parser_instances]
|
|
|
|
|
|
if sum(ret_list) == 0 and Config.BLLIST_SOURCE != "ruantiblock":
|
|
|
|
|
|
oc_obj = OptimizeConfig(parser_instances)
|
|
|
|
|
|
oc_obj.optimize()
|
|
|
|
|
|
write_cfg_obj = WriteConfigFiles()
|
|
|
|
|
|
write_cfg_obj.write_dnsmasq_config(oc_obj.fqdn_dict)
|
|
|
|
|
|
write_cfg_obj.write_ipset_config(oc_obj.ip_dict, oc_obj.cidr_set)
|
|
|
|
|
|
write_cfg_obj.write_update_status_file(oc_obj.ip_count, oc_obj.cidr_count, oc_obj.output_fqdn_count)
|
|
|
|
|
|
sys.exit(1 if 1 in ret_list else (2 if 2 in ret_list else 0))
|