Commit b41db117 authored by Jost Rossel's avatar Jost Rossel
Browse files

Add destructive mutator class

parent 74c311f4
"""Takes a string containing XML and mutates it."""
import io
import random
import re
from copy import deepcopy
from lxml import etree
from .mutator import Mutator
class DestructiveMutator(Mutator):
"""Contains mutation functions that destroy the XML syntax.
Once the structure of the string is far removed from XML it resets itself."""
def __init__(
self,
seed,
number_of_mutations_between: tuple[int, int] = None,
number_of_mutations: int = None,
) -> None:
super().__init__(self.__class__, seed, number_of_mutations_between, number_of_mutations)
self.elements = []
self.number_of_elements = 0
self._fallback_xml: str = ""
self.xml: str = ""
self.target_size: int = 0
def init(self, input_xml: bytearray):
"""Initialize the mutator from a XML bytearray"""
self.xml = input_xml.decode(encoding="utf-8")
try:
etree.parse(io.BytesIO(input_xml))
self._fallback_xml = deepcopy(self.xml)
self.target_size = len(self._fallback_xml)
except etree.ParseError:
pass
self.elements = self._to_list()
self.number_of_elements = len(self.elements)
def mutate(self, max_size) -> bytearray:
"""Returns the (un)mutated XML string as an bytearray.
max_size is the maximal length of the resulting bytearray
number_of_mutations is a tuple that defines a range of how many mutations are executed
"""
self.target_size = max_size // 2
for _ in range(self.number_of_mutations()):
self._exec_mutation()
while len(self.xml.encode("utf-8")) > max_size:
self.xml = self.xml[: -max_size // 8]
return bytearray(self.xml, encoding="utf-8")
def _exec_mutation(self) -> str:
if self.target_size is None:
function_pool = self.complete_function_pool
elif len(self.xml) <= self.target_size:
function_pool = self.additive_function_pool
elif len(self.xml) > self.target_size:
function_pool = self.subtractive_function_pool
func = random.choice(function_pool)
func(self)
self.xml = "".join(self.elements)
def _to_list(self) -> list[str]:
"""
This method separates an xml in a "line by line" form.
This does not happen perfectly (e.g. with CDATA) but that doesn't really matter for fuzzing.
Example:
xmlString: "<xml a="b">f00</xml>"
result: [ '<xml a="b">' , 'f00', '</xml>']
"""
res = re.split("(<.*?>)", self.xml.replace("\n", ""))
if len(res) < 2:
self.xml = self._fallback_xml
res = re.split("(<.*?>)", self.xml.replace("\n", ""))
assert len(res) > 1
return [r for r in res if r != ""]
def neutral_switch_items(self) -> None:
"""Switches two random items in the list."""
if self.number_of_elements < 2:
return
index1, index2 = random.sample(range(self.number_of_elements), 2)
self.elements[index1], self.elements[index2] = self.elements[index2], self.elements[index1]
def subtractive_drop_item(self) -> None:
"""Drops a random item from the list."""
if self.number_of_elements < 3:
return
index = random.randint(0, self.number_of_elements - 1)
self.elements.pop(index)
self.number_of_elements -= 1
def additive_duplicate_item(self) -> None:
"""Duplicates a random item at a random position in the list."""
if self.number_of_elements < 2:
self.elements += self.elements
self.number_of_elements += 1
return
index1, index2 = random.sample(range(self.number_of_elements), 2)
self.elements.insert(index2, self.elements[index1])
self.number_of_elements += 1
def neutral_reverse_item(self) -> None:
"""Reverses the item itself (the characters)."""
if self.number_of_elements == 0:
print()
index = random.randint(0, self.number_of_elements - 1)
self.elements[index] = self.elements[index][::-1]
def subtractive_drop_startend_char_of_item(self) -> None:
"""Drops first and last character from an random item."""
index = random.randint(0, self.number_of_elements - 1)
self.elements[index] = self.elements[index][1:-1]
def subtractive_remove_special_char_of_item(self) -> None:
"""Drops a random special character from an random item."""
index = random.randint(0, self.number_of_elements - 1)
char = random.choice("<<&>>\"'/")
self.elements[index] = self.elements[index].replace(char, "")
def subtractive_drop_char_of_item(self) -> None:
"""Drops a random character from an random item."""
index1 = random.randint(0, self.number_of_elements - 1)
if self.elements[index1].strip() == "":
return
index2 = random.randint(0, len(self.elements[index1]) - 1)
self.elements[index1] = self.elements[index1][:index2] + self.elements[index1][index2 + 1 :]
def additive_add_special_char_to_item(self) -> None:
"""Adds a special XML character at the start and/or end of a random item."""
index = random.randint(0, self.number_of_elements - 1)
char1, char2 = random.choices("<&>\"'/ ", k=2)
self.elements[index] = char1 + self.elements[index] + char2
def additive_add_tags_to_item(self) -> None:
"""Adds a tag to a random item."""
index = random.randint(0, self.number_of_elements - 1)
element = self.elements[index].split(" ")
element[0] = "<" + element[0] + ">"
element[-1] = "</" + element[-1] + ">"
self.elements[index] = " ".join(element)
def subtractive_strip_elements(self) -> None:
"""Strips all elements off their leading/trailing spaces."""
self.elements = [element.strip() for element in self.elements]
def neutral_replace_char_with_utf8(self) -> None:
"""Replaces a random character from an random item."""
index1 = random.randint(0, self.number_of_elements - 1)
if self.elements[index1].strip() == "":
return
index2 = random.randint(0, len(self.elements[index1]) - 1)
self.elements[index1] = (
self.elements[index1][:index2]
+ self.rcg.random_utf8_string()
+ self.elements[index1][index2 + 1 :]
)
def neutral_replace_char_with_ascii(self) -> None:
"""Replaces a random character from an random item."""
index1 = random.randint(0, self.number_of_elements - 1)
if self.elements[index1].strip() == "":
return
index2 = random.randint(0, len(self.elements[index1]) - 1)
self.elements[index1] = (
self.elements[index1][:index2]
+ self.rcg.random_ascii_string()
+ self.elements[index1][index2 + 1 :]
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment