utils.py 4.74 KB
Newer Older
Alexandre's avatar
Alexandre committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# utils.py

import sys
import re
import socket
import urllib.parse

try:
    # https://github.com/drkjam/netaddr/
    import netaddr
except ImportError as e:
    print("Error: missing module")
    print(e)
    sys.exit(1)

import homer

# Do not change these
re_host = re.compile(r'^([0-9a-z][0-9a-z-\.]*)|([0-9:]+)|([0-9\.])$')

21
22
def dump_data(data, text="data"):
    pref = ' ' * (len(text) - 4)
23
    print('length:', len(data))
24
25
26
27
    print(f'{text}: ', data)
    print(pref, 'hex:', " ".join(format(c, '02x') for c in data))
    print(pref, 'bin:', " ".join(format(c, '08b') for c in data))

Alexandre's avatar
Alexandre committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def is_valid_hostname(name):
    name = canonicalize(name)
    return re_host.search(name)

def canonicalize(hostname):
    result = hostname.lower()
    # TODO handle properly the case where it fails with UnicodeError
    # (two consecutive dots for instance) to get a custom exception
    result = result.encode('idna').decode()
    if result[len(result)-1] == '.':
        result = result[:-1]
    return result

def is_valid_ip_address(addr):
    """ Return True and the address family if the IP address is valid. """
    try:
        baddr = netaddr.IPAddress(addr)
    except netaddr.core.AddrFormatError:
        return (False, None)
    return (True, baddr.version)

def is_valid_url(url):
  try:
    result = urllib.parse.urlparse(url) # A very poor validation, many
    # errors (for instance whitespaces, IPv6 address litterals without
    # brackets...) are ignored.
    return (result.scheme=="https" and result.netloc != "")
  except ValueError:
    return False

def _get_certificate_san(x509cert):
    san = ""
    ext_count = x509cert.get_extension_count()
    for i in range(0, ext_count):
        ext = x509cert.get_extension(i)
        if "subjectAltName" in str(ext.get_short_name()):
            san = str(ext)
    return san

# Try one possible name. Names must be already canonicalized.
def _match_hostname(hostname, possibleMatch):
    if possibleMatch.startswith("*."): # Wildcard
        base = possibleMatch[1:] # Skip the star
        # RFC 6125 says that we MAY accept left-most labels with
        # wildcards included (foo*bar). We don't do it here.
        try:
            (first, rest) = hostname.split(".", maxsplit=1)
        except ValueError: # One-label name
            rest = hostname
        if rest == base[1:]:
            return True
        if hostname == base[1:]:
            return True
        return False
    else:
        return hostname == possibleMatch

# Try all the names in the certificate
def validate_hostname(hostname, cert):
    # Complete specification is in RFC 6125. It is long and
    # complicated and I'm not sure we do it perfectly.
    (is_addr, family) = is_valid_ip_address(hostname)
    hostname = canonicalize(hostname)
    for alt_name in _get_certificate_san(cert).split(", "):
        if alt_name.startswith("DNS:") and not is_addr:
            (start, base) = alt_name.split("DNS:")
            base = canonicalize(base)
            found = _match_hostname(hostname, base)
            if found:
                return True
        elif alt_name.startswith("IP Address:") and is_addr:
            host_i = netaddr.IPAddress(hostname)
            (start, base) = alt_name.split("IP Address:")
            if base.endswith("\n"):
                base = base[:-1]
            try:
                base_i = netaddr.IPAddress(base)
            except netaddr.core.AddrFormatError:
                continue # Ignore broken IP addresses in certificates. Are we too liberal?
            if host_i == base_i:
                return True
        else:
            pass # Ignore unknown alternative name types. May be
                 # accept URI alternative names for DoH,
    # According to RFC 6125, we MUST NOT try the Common Name before the Subject Alternative Names.
    cn = canonicalize(cert.get_subject().commonName)
    found = _match_hostname(hostname, cn)
    if found:
        return True
    return False

def get_addrfamily(addr, forceIPv4=False, forceIPv6=False):
    """Return the family as a socket object of the address."""

    (is_ip, family) = is_valid_ip_address(addr)

    # thoses checks between the IP family and the command line option
    # might need to land somewhere else
    if forceIPv4 and family == 6:
        raise homer.FamilyException("You cannot force IPv4 with a litteral IPv6 address (%s)" % addr)
    elif forceIPv6 and family == 4:
        raise homer.FamilyException("You cannot force IPv6 with a litteral IPv4 address (%s)" % addr)

    if forceIPv4 or family == 4:
        family = socket.AF_INET
    elif forceIPv6 or family == 6:
        family = socket.AF_INET6
    else:
        family = 0

    return family

def check_ip_address(addr, forceIPv4=False, forceIPv6=False):
    return get_addrfamily(addr, forceIPv4=forceIPv4, forceIPv6=forceIPv6)