User:Bagatelle/python

From A KoL Wiki
import re
import datetime

Dir = r"c:\personal\python\qa\\"

################################################################################

# utility function that needs to be defined prior to pattern definitions
# return a lowercase abbreviation of class given a title-cased plural of it
# return "1" if not in dictionary (all classes)
def ClassAbbrev(InMatch):
  if InMatch.group(1):
    return {
      "Seal Clubbers": "sc", "Turtle Tamers": "tt", "Pastamancers": "pm",
      "Saucerors": "s", "Disco Bandits": "db", "Accordion Thieves": "at"
    }[InMatch.group(1)]
  else:
    return "1"

# duplicated nuisance text
FooterText = (
  r"(?:Type:|Selling Price:|Cannot be discarded|Cannot be traded|"
  r"Free pull from Hagnk's|</blockquote>|"
  r"\(Meat Pasting component\)|\(Meatsmithing component\)|"
  r"\(Cocktailcrafting ingredient\)|\(Cooking ingredient\)|"
  r"\(Jewelrymaking component\))"
)

# pop-up parsing patterns
# see Parser() for documentation
PatsKoL = [
  ["haiku", r"<img [^>]+?><blockquote>"],
  [
    "image",
    r"(?:<blockquote>)?<img src=\"http://images\.kingdomofloathing\.com/" +
      r"(.*?/[^/]*?\.gif)"
  ],
  ["name", r"<div [^>]+?><center><img .+?><br><b>(.+?)</b></center><p>"],
  ["desc", r"<p><blockquote>(.+?)<br>(?:<br>)?" + FooterText],
  ["haikuname", r"<img .+?><blockquote><b>(.+?)</b><br>"],
  [
    "haikudesc",
     r"(?!<p>)<blockquote><b>(?:.+?)</b><br>(.+?)<br><br>" + FooterText
  ],
  ["paste", r"<br>\(Meat Pasting component\)"],
  ["smith", r"<br>\(Meatsmithing component\)"],
  ["cocktail", r"<br>\(Cocktailcrafting ingredient\)"],
  ["cook", r"<br>\(Cooking ingredient\)"],
  ["jewelry", r"<br>\(Jewelrymaking component\)"],
  ["type", r"<br>Type: <b>(.+?)</b>"],
  ["alsocombat", r"<br>Type: <b>.+?</b><br>\(can also be used in combat\)"],
  [
    "power",
    r"<br>(?:Power|Damage Reduction|Capacity): <b>(\d+?(?: per level)?)</b>"
  ],
  [
    "powertype",
    r"<br>(Damage Reduction|Capacity): <b>\d+?(?: per level)?</b>"
  ],
  ["stat", r"<br>(Muscle|Mysticality|Moxie) Required: <b>\d+?</b>"],
  ["statreq", r"<br>(?:Muscle|Mysticality|Moxie) Required: <b>(\d+?)</b>"],
  ["level", r"<br>Level required: <b>(\d+)</b>"],
  ["outfit", r"<br>Outfit: <b>(.+?)</b>\."],
  ["autosell", r"<br>Selling Price: <b>(\d+) Meat\.</b>"],
  ["cost", r"<br>Cost: <b>(\d+?) Meat</b>"],
  ["notrade", r"<br>Cannot be traded"],
  ["nodiscard", r"<br>Cannot be discarded"],
  ["gift", r"<br><b>Gift Item</b>"],
  ["quest", r"<br><b>Quest Item</b>"],
  [
    "enchantment",
    r"<center>Enchantment:<br><b><font color=blue>(.+?)"
      r"(?:(?:<br>)+\(Bonus for (?:Seal Clubbers|Turtle Tamers|Pastamancers|"
      r"Saucerors|Disco Bandits|Accordion Thieves) only\))?(?:<br>)*"
      r"</font></b></center>"
  ],
  [
    "enchclass",
    r"(?:<br\s*/?><br\s*/?>)"
      r"\(Bonus for (Seal Clubbers|Turtle Tamers|Pastamancers|"
      r"Saucerors|Disco Bandits|Accordion Thieves) only\)</font></b></center>",
    ClassAbbrev
  ],
  [
    "critical",
    r"<br><b>NOTE:</b> If you wear multiple items that increase your Critical "
      r"Hit multiplier, only the highest multiplier applies\."
  ],
  [
    "nohardcore",
    r"<br><b>NOTE:</b> This item cannot be equipped while in Hardcore\."
  ],
  [
    "mpreduce",
    r"<br><b>NOTE:</b> Items that reduce the MP cost of skills will not do so"
      r" by more than 3 points, in total\."
  ],
  [
    "limit",
    r"<br><b>NOTE:</b> You may not equip more than one of this item at a time\."
  ],
  ["hagnk", r"<br>Free pull from Hagnk's"],
  [
    "spelltype",
    r"<br><b>NOTE:</b> This item only works for (.+?) Spells\."
  ],
  [
    "class",
    r"<p><font color=blue><b>Only "
      r"(Seal Clubbers|Turtle Tamers|Pastamancers|"
      r"Saucerors|Disco Bandits|Accordion Thieves) "
      r"may use this item\.</b></font>",
    ClassAbbrev
  ],
  [
    "grantskill",
    r"<br><b>NOTE:</b> This item grants a skill(?: that can only be used by "
      r"(Seal Clubbers|Turtle Tamers|Pastamancers|"
      r"Saucerors|Disco Bandits|Accordion Thieves))?\.",
    ClassAbbrev
  ],
  [
    "lounge",
    r"<br><b>NOTE:</b> When used, this item will be installed in your "
      r"Clan Hall's VIP Lounge, and will be usable by anybody in your Clan "
      r"with VIP Lounge access."
  ],
  [
    "bluenote",
    r"<p><font color=blue><b>"
      r"(?!Only "
      r"(?:Seal Clubbers|Turtle Tamers|Pastamancers|"
      r"Saucerors|Disco Bandits|Accordion Thieves) "
      r"may use this item\.)"
      r"(.+?)</b></font>"
  ]
]

PatsEnchantmentClean = [
  [
    "eltdmg",
    r"^\+(\d+) (?:<font color=\"?(?:blue|red|blueviolet|gray|green)\"?>)"
      r"(Cold|Hot|Sleaze|Spooky|Stench) Damage(?:</font>)$",
    r"+\1 {{element|\2}}"
  ],
  [
    "eltspelldmg",
    r"^\+(\d+) (?:Damage )?to " + \
      r"(?:<font color=\"?(?:blue|red|blueviolet|gray|green)\"?>)" + \
      r"(Cold|Hot|Sleaze|Spooky|Stench) Spells(?:</font>)$",
    r"+\1 Damage to {{element|\2|Spells}}"
  ],
  [
    "eltpassive",
    r"Deals (\d+(?:\-\d+)) " + \
      r"(?:<font color=\"?(?:blue|red|blueviolet|gray|green)\"?>)" + \
      r"(Cold|Hot|Sleaze|Spooky|Stench) Damage</font> to attackers",
    r"Deals \1 {{element|\2}} to attackers"
  ],
  ["intrinsic", r"^Intrinsic effect: (.+)$", r"Intrinsic effect: [[\1]]"]
]

def ReturnFirstGroup(Match):
  for Group in Match.groups():
    if Group:
      return Group
  return ""

# pattern order matters for stuff like "underwater only" tags
PatsEnchantmentParse = [
  [
    "mus", r"^(?:((?:\+|\-)\s*\d+)\s+)Muscle$|^Muscle(?:\s+((?:\+|\-)\s*\d+))$",
    ReturnFirstGroup
  ],
  [
    "muspct",
    r"^(?:((?:\+|\-)\s*\d+%)\s+)Muscle$|" + \
      r"^Muscle(?:\s+((?:\+|\-)\s*\d+%))$",
    ReturnFirstGroup
  ],
  [
    "mys",
    r"^(?:((?:\+|\-)\s*\d+)\s+)Mysticality$|" + \
      r"^Mysticality(?:\s+((?:\+|\-)\s*\d+))$",
    ReturnFirstGroup
  ],
  [
    "myspct",
    r"^(?:((?:\+|\-)\s*\d+%)\s+)Mysticality$|" + \
      r"^Mysticality(?:\s+((?:\+|\-)\s*\d+%))$",
    ReturnFirstGroup
  ],
  [
    "mox",
    r"^(?:((?:\+|\-)\s*\d+)\s+)Moxie$|" + \
      r"^Moxie(?:\s+((?:\+|\-)\s*\d+))$",
    ReturnFirstGroup
  ],
  [
    "moxpct",
    r"^(?:((?:\+|\-)\s*\d+%)\s+)Moxie$|" + \
      r"^Moxie(?:\s+((?:\+|\-)\s*\d+%))$",
    ReturnFirstGroup
  ],
  ["hp", r"^Maximum HP(?:\s+((?:\+|\-)\s*\d+))$"],
  ["mp", r"^Maximum MP(?:\s+((?:\+|\-)\s*\d+))$"],
  ["hpmp", r"^Maximum HP/MP(?:\s+((?:\+|\-)\s*\d+))$"],
  [
    "weapon",
    r"^(?:((?:\+|\-)\s*\d+)\s+)Weapon Damage$|" + \
      r"^Weapon Damage(?:\s+((?:\+|\-)\s*\d+))$",
    ReturnFirstGroup
  ],
  ["weaponpct", r"^Weapon Damage(?:\s+((?:\+|\-)\s*\d+%))$"],
  [
    "ranged",
    r"^(?:((?:\+|\-)\s*\d+)\s+)Ranged Damage$|" + \
      r"^Ranged Damage(?:\s+((?:\+|\-)\s*\d+))$",
    ReturnFirstGroup
  ],
  ["rangedpct", r"^Ranged Damage(?:\s+((?:\+|\-)\s*\d+%))$"],
  [
    "spell",
    r"^(?:((?:\+|\-)\s*\d+)\s+)Spell Damage$|" + \
      r"^Spell Damage(?:\s+((?:\+|\-)\s*\d+))$",
    ReturnFirstGroup
  ],
  [
    "spellpct",
    r"^(?:((?:\+|\-)\s*\d+%)\s+)Spell Damage$|" + \
      r"^Spell Damage(?:\s+((?:\+|\-)\s*\d+%))$",
    ReturnFirstGroup
  ],
  [
    "init",
    r"^(?:((?:\+|\-)\s*\d+%)\s+)Combat Initiative$|" + \
      r"^Combat Initiative(?:\s+((?:\+|\-)\s*\d+%))$",
    ReturnFirstGroup
  ],
  ["da", r"^Damage Absorption(?:\s+((?:\+|\-)\s*\d+))$"],
  ["dr", r"^Damage Reduction:(?:\s+((?:\+|\-)?\s*\d+))$"],
  [
    "fumble",
    r"^(Never) Fumble$|^(Reduced chance) of fumbling$|" +
      "^([\d.]+x) chance of Fumble$",
    ReturnFirstGroup
  ],
  ["meat", r"^(?:((?:\+|\-)\s*\d+%)\s*)Meat from Monsters$"],
  ["ml", r"^(?:((?:\+|\-)\s*\d+)\s*)to Monster Level$"],
  ["crit", r"^(?:((?:\+|\-)?\s*\d+(?:x|%))\s*)chance of Critical Hit$"],
  ["mpreduce", r"^(?:((?:\+|\-)?\s*\d+)\s*)MP to use Skills$"],
  ["hobo", r"^(?:((?:\+|\-)?\s*\d+)\s*)Hobo Power$"],
  [
    "rollover",
    r"^(?:((?:\+|\-)?\s*\d+)\s*)Adventure\(s\) per day when equipped\.?$"
  ],
  ["pvp", r"^(?:((?:\+|\-)?\s*\d+)\s*)PvP fight\(s\) per day when equipped\.?$"],
  [
    "items", r"^(?:((?:\+|\-)\s*\d+%)\s*)(.+)\s*Drops? from (.+)( \(.+\))?$",
    lambda X: (
      X.group(1) + " " + X.group(2) + \
      ((X.group(3) != None) * " ") + (X.group(3) or "") + \
      ((X.group(4) != None) * " ") + (X.group(4) or "")
    ).strip()
  ],
  [
    "allattributes",
    r"^All Attributes(?:\s+((?:\+|\-)\s*\d+))$",
    ReturnFirstGroup
  ],
  [
    "allattributespct",
    r"^All Attributes(?:\s+((?:\+|\-)\s*\d+%))$",
    ReturnFirstGroup
  ],
  ["fam", r"^(?:((?:\+|\-)\s*\d+)\s*)(?:to )?Familiar Weight$"],
  ["songs", r"^Allows you to keep (\d+) songs in your head instead of 3\.$"],
  [
    "mpregen", r"^Regenerate ([\d-]*) MP per adventure( \(.+\))?$",
    lambda X: (X.group(1) + " " + (X.group(2) or "")).strip()
  ],
  ["hpregen", r"^Regenerate ([\d-]*) HP per adventure$"],
  ["hpmpregen", r"^Regenerate ([\d-]*) HP and(?: \1)? MP per adventure$"],
  [
    "stats",
    r"^(?:((?:\+|\-)?\s*.+)\s*)(Muscle|Mysticality|Moxie)?" + \
      r"\s*Stat(?:s|\(s\)) Per Fight$",
    r"\1" + (r" \2" or "")
  ],
  ["combatfreq", r"^Monsters will be (more|less) attracted to you\.?$"],
  ["intrinsic", r"Intrinsic effect: \[\[(.*)\]\]"],

  [
    "res",
    r"^(Slight|So-So|Serious|Stupendous|Superhuman|Sublime) " + \
      r"(?:Resistance to (All Elements)|" + \
      r"(Cold|Hot|Sleaze|Spooky|Stench) Resistance)$",
    lambda X: X.group(1) + " " + (X.group(2) or X.group(3))
  ],
  [
    r"slimeres",
    r"^(Slight|So-So|Serious|Stupendous|Superhuman|Sublime) Slime Resistance$"
  ],
  [
    "elt",
    r"^(?:((?:\+|\-)?\s*\d+)\s*)\{\{element\|" + \
      r"(Cold|Hot|Sleaze|Spooky|Stench)\}\}$",
    r"\1 \2"
  ],
  [
    "eltspell",
    r"^(?:((?:\+|\-)?\s*\d+)\s*)(?:Damage )?to \{\{element\|" + \
      r"(Cold|Hot|Sleaze|Spooky|Stench)\|Spells\}\}$",
    r"\1 \2"
  ],
  [
    "tunespell",
    r"^All Spells Cast Are (Cold|Hot|Sleazy|Spooky|Stinky)$",
    lambda X: dict(
      Cold = "Cold", Hot = "Hot", Sleazy = "Sleaze", Spooky = "Spooky",
      Stinky = "Stench"
    )[X.group(1)]
  ],
  [
    "damagevs", r"^(?:((?:\+|\-)?\s*\d+)\s*)Damage vs\. (.+)$", r"\1 \2"
  ],
  [
    "passive",
    r"^Deals ([\d-]*) \{\{element\|(Cold|Hot|Sleaze|Spooky|Stench)\}\} " + \
      r"to attackers$|" + \
      r"^Deals ([\d-]*) damage to attackers$|" + \
      r"^Damages Attacking Opponent\s*(\(.+\))?$",
    lambda X:
      (X.group(1) and X.group(1) + " " + X.group(2)) or
      X.group(3) or
      X.group(4) or
      "some"
  ],
  [
    "weaken",
    r"Successful hit weakens opponent\.",
  ],
  [
    "dbcombat",
    r"(?:((?:\+|\-)?\s*\d+)\s*) damage to Disco Bandit Combat Skills"
  ],
  ["diver", r"Makes you a better diver"],
  [
    "oncritical",
    r"^On Critical: (.+)\s*$|^(.+) on Critical Hit$|^Critical Hits (.+)$",
    ReturnFirstGroup
  ]
]

# songs has no mech page
# intrinsic has no mech page
# dbcombat has no mech page
MechanicsLinks = [
  [["mus", "muspct", "allattributes", "allattributespct"], "Muscle Modifiers"],
  [
    ["mys", "myspct", "allattributes", "allattributespct"],
    "Mysticality Modifiers"
  ],
  [["mox", "moxpct", "allattributes", "allattributespct"], "Moxie Modifiers"],
  [["hp", "hpmp"], "HP Increasers"],
  [["mp", "hpmp"], "MP Increasers"],
  [["weapon", "weaponpct", "elt", "damagevs"], "Bonus Weapon Damage"],
  [["ranged", "rangedpct"], "Bonus Ranged Damage"],
  [["spell", "spellpct", "eltspell"], "Bonus Spell Damage"],
  [["init"], "Combat Initiative"],
  [["da"], "Damage Absorption"],
  [["dr"], "Damage Reduction"],
  [["fumble"], "Fumble Chance"],
  [["meat"], "Meat from Monsters"],
  [["items"], "Items from Monsters"],
  [["ml", "weaken"], "Monster Level"],
  [["crit"], "Critical Hit Chance"],
  [["mpreduce"], "Skill MP Cost Modifiers"],
  [["hobo"], "Hobo Power"],
  [["rollover"], "Extra Rollover Adventures"],
  [["pvp"], "Extra PvP Fights"],
  [["fam"], "Familiar Weight"],
  [["hpregen", "hpmpregen"], "HP Restorers"],
  [["mpregen", "hpmpregen"], "MP Restorers"],
  [["stats"], "Stat Gains from Fights"],
  [["combatfreq"], "Combat Frequency"],
  [["res"], "Elemental Resistance"],
  [["slimeres"], "Slime Resistance"],
  [["tunespell"], "Elemental Spell Damage"],
  [["passive"], "Passive Damage"],
  [["oncritical"], "Critical Hit"],
  [["diver"], "Underwater adventuring"]
]

# plurals of weapons > 1 instance, for use in categories
WeaponPlurals = dict()
WeaponPlurals["blowgun"] = "blowguns"
WeaponPlurals["rifle"] = "rifles"
WeaponPlurals["yoyo"] = "yoyos"
WeaponPlurals["banjo"] = "banjos"
WeaponPlurals["bow"] = "bows"
WeaponPlurals["horn"] = "horns"
WeaponPlurals["saucepan"] = "saucepans"
WeaponPlurals["whistle"] = "whistles"
WeaponPlurals["accordion"] = "accordions"
WeaponPlurals["boomerang"] = "boomerangs"
WeaponPlurals["flute"] = "flutes"
WeaponPlurals["umbrella"] = "umbrellas"
WeaponPlurals["drum"] = "drums"
WeaponPlurals["guitar"] = "guitars"
WeaponPlurals["pistol"] = "pistols"
WeaponPlurals["axe"] = "axes"
WeaponPlurals["chefstaff"] = "chefstaves"
WeaponPlurals["knife"] = "knives"
WeaponPlurals["spear"] = "spears"
WeaponPlurals["flail"] = "flails"
WeaponPlurals["polearm"] = "polearms"
WeaponPlurals["whip"] = "whips"
WeaponPlurals["utensil"] = "utensils"
WeaponPlurals["crossbow"] = "crossbows"
WeaponPlurals["staff"] = "staves"
WeaponPlurals["club"] = "clubs"
WeaponPlurals["sword"] = "swords"
WeaponPlurals["sling"] = "slingshots"

# style of the replacement text nudge
NudgeStyle = "font-size:200%; font-weight:bold;"

################################################################################

# for lazy typists
def Compile(Pat):
  return re.compile(Pat, re.I|re.S)

# find next place in given text where "parens" are matched
def FindBalancePos(InStr, Open, Close):
  PatOpen = Compile(re.escape(Open))
  PatClose = Compile(re.escape(Close))
  Pat = Compile("(" + re.escape(Open) + ")|(" + re.escape(Close) + ")")
  CurrPos = 0
  NumOpen = 0
  NumClose = 0
  PatIter = Pat.finditer(InStr)
  for Match in PatIter:
    if Match.group(1):
      GroupNum = 1
      NumOpen += 1
    else:
      GroupNum = 2
      NumClose += 1
    if NumOpen == NumClose:
      return Match.end(GroupNum)
      break

# function to combine string indices to determine how much has been parsed
# e.g., using slice-style notation, if [[1, 5], [10, 12], [15, 19]] of a
# string has been "used" and the next parsing operation uses [11, 14],
# this function returns the combined range [[1, 5], [10, 14], [15, 19]]
def CombineRanges(Old, Add):
  if len(Old) == 0:
    return [Add]
  else:
    OutList = []
    # find first existing range that comes after the add
    for N, Existing in enumerate(Old):
      if Add[0] <= Existing[1]:
        First = N
        break
      if N == len(Old) - 1:
        First = len(Old)
    # ranges before this one do not overlap; output
    if First != 0: OutList += Old[:First]
    # make a deep copy for editing
    TempRange = [Add[0], Add[1]]
    if First == len(Old):
      # the add is past all existing ranges
      OutList.append(TempRange)
    else:
      # the difficult case: the add is between extremes, and possibly
      # overlaps existing ranges
      Pos = First
      while Pos < len(Old):
        Current = Old[Pos]
        if (Current[0] <= TempRange[0] <= Current[1]) or \
           (Current[0] <= TempRange[1] <= Current[1]) or \
           (TempRange[0] <= Current[0] <= TempRange[1]) or \
           (TempRange[0] <= Current[1] <= TempRange[1]):
          # condense add with current if overlapped
          TempRange = \
            [min(TempRange[0], Current[0]), max(TempRange[1], Current[1])]
          # output if last in loop
          if Pos == len(Old) - 1:
            OutList.append(TempRange)
        else:
          # not overlapped, so append and quit loop
          OutList.append(TempRange)
          break
        Pos += 1
      # tack on rest of list
      if Pos <= len(Old):
        OutList += Old[Pos:]
  return OutList

# calculate what parts of a string remain unparsed by flipping the parsed bits
# R is a list of 2-long lists, which are the slice delimiters of the parsed
# areas; Length is the total length of the input text
# returns a list of 2-long lists
def ComplementRange(R, Length):
  if len(R) == 0:
    return [[0, Length]]
  else:
    OutList = []
    for Pos in range(len(R)):
      if Pos == 0:
        if R[0][0] > 0:
          # check if first range starts at first character
          OutList.append([0, R[0][0]])
      Current = R[Pos]
      if Pos == len(R) - 1:
        # last range--output from end if applicable
        if Current[1] < Length:
          OutList.append([Current[1], Length])
      else:
        # there is a next range--compute the unused space between
        Next = R[Pos + 1]
        OutList.append([Current[1], Next[0]])
  return OutList

## function that returns a list with two components
## the second list component is a list of strings from the input text that
## weren't parsed
## the first is a dictionary of parsed fields from the HTML source
## call:
## *InText is the HTML source.
## *Fields is a list of lists instructing how each field is extracted from the
##  source.
## **The first component of each inner list is the field's name (text); the
##   output dictionary uses this as a key.
## **The second is a RE, possibly with groups, identifying how to extract
##   from the HTML.
## ***If it has groups, it will return the first group as the key value on
##    a match as a default; if it has no groups, it returns "1" on a match; on
##    a non-match, it returns a blank string. The defaults can be overridden
##    by the third component.
## **The third list component is optional, to override the default extraction
##   of field values. It performs RE replacement according to the RE pattern
##   (input previously) and the substitution string/function (the third list
##   component).
## e.g., Fields could be:
## [
##   [
##     "req",
##     "<br>(Muscle|Mysticality|Moxie) Required: <b>(\d+?)</b>",
##     r"\1 \2"
##   ],
##   [
##     "class",
##     "<p><font color=blue><b>Only "
##       "(Seal Clubbers|Turtle Tamers|Pastamancers|"
##       "Saucerors|Disco Bandits|Accordion Thieves) "
##       "may use this item.</b></font>",
##     lambda X: {
##       "Seal Clubbers": "SC", "Turtle Tamers": "TT", "Pastamancers": "P",
##       "Saucerors": "S", "Disco Bandits": "DB", "Accordion Thieves": "AT"
##     }[X.group(1)]
##   ]
## ]
## *SingleParse is true/false, indicating whether the parser should stop once
##  the first pattern matches. Used for parsing enchantments one at a time;
##  no need to try every pattern once a match is found.
def Parser(InText, Fields, SingleParse = True):
  Item = {}
  Parsed = []
  for Field in Fields:
    Pat = Compile(Field[1])
    Match = Pat.search(InText)
    Name = Field[0]
    if len(Field) > 2:
      Extract = Field[2]
    else:
      # default is 1 or group 1
      Extract = ((Pat.groups == 0) * "1") or r"\1"
    if Match:
      Parsed = CombineRanges(Parsed, [Match.start(0), Match.end(0)])
      Item[Name] = Pat.sub(Extract, Match.group(0)).strip()
    else:
      if not SingleParse: Item[Name] = ""
    if Match and SingleParse:
      break
  # done parsing requested fields; see which bits of the input weren't parsed
  Unparsed = ComplementRange(Parsed, len(InText))
  # can safely strip out remaning HTML tags and leading/trailing spaces
  PatTag = Compile(r"</?.+?>")
  UnparsedStrings = []
  for SubStr in Unparsed:
    TempStr = PatTag.sub("", InText[SubStr[0]:SubStr[1]]).strip()
    if TempStr != "":
      UnparsedStrings.append(TempStr)
  return [Item, UnparsedStrings]

# clean desc field
def CleanDescKoL(Body):
  # save to retain comments
  Body = Body.strip()
  OrigBody = Body
  PatPlan = Compile(
    r"This detailed set of plans will teach you how to smith a fancy new "
      r"item:\n"
      r"<p><center><table style='border: 1px solid black;' cellpadding=5>"
      r"<tr><td align=center><img style='vertical-align: middle' "
      r"class=hand src='http://images\.kingdomofloathing\.com/itemimages/"
      r".+?\.gif' onclick='descitem\(.+?\)'><br><b>"
      r"(.+?)</b></td></tr></table>"
  )
  Body = PatPlan.sub(r"{{plans|\1}}", Body)
  Body = re.sub(r"\s+", " ", Body) # multispace -> space
  Body = re.sub(r"(?is)\s*<\s*br\s*/?>\s*", "<br />", Body) # standard <br />
  Body = re.sub(r"(?is)\s*</p>\s*", "", Body) # strip closing </p>
  Body = re.sub(r"(?is)(\s*<p>\s*){1,}", "<p>", Body) # multiple p to single
  Body = re.sub(r"(?is)(</?)super>", r"\1sup>", Body) # <super> -> <sup>
  # small fonts: these can be nested, but none so far, so let's ignore it...
  Body = re.sub(
    r"(?is)<font size\s*=\s*\"?1\"?>(.+?)</font>", r"<small>\1</small>", Body
  )
  # handle whitespace/tag weirdness
  PatOpen = Compile(r"(<(?!/)[^>]*?>)\s+")
  PatClose = Compile(r"\s+(</[^>]*?>)")
  while True:
    Temp = PatOpen.sub(r" \1", Body)
    Temp = PatClose.sub(r"\1 ", Temp)
    if Temp == Body:
      break
    Body = Temp
  # swap order of open/close <i/b> with <br />s
  PatIB = Compile(r"(.*?)(<(?:b|i)>)(<br />)(.*?)")
  PatIB2 = Compile(r"(.*?)(<br />)(</(?:b|i)>)(.*?)")
  while True:
    Temp = Body
    Body = PatIB.sub(r"\1\3\2\4", Body)
    Body = PatIB2.sub(r"\1\3\2\4", Body)
    if Temp == Body: break
  Body = re.sub(r"(?is)<br />\s*<p>", "<p>", Body) # break-p renders to p
  Body = re.sub(r"(?is)(\s*<p>\s*){1,}", "<p>", Body) # multiple p to single
  Body = re.sub(r"(?is)\s*<p>\s*", r"<br /><br />", Body) # <p> -> two breaks
  # break-comment -> comment-break
  PatBreakComment = Compile(r"(<br />)\s*(<!--.*?-->)")
  while True:
    Temp = Body
    Body = re.sub(PatBreakComment, r"\2\1", Body)
    if Temp == Body: break
  # strip breaks from beginning/end
  while True:
    Temp = Body
    Body = re.sub(r"(?is)^<br />|<br />$", "", Body)
    if Temp == Body: break
  # collapse multiple <br />s into linebreaks
  Body = re.sub(
    r"((?:<br />){2,})", lambda x: x.group(1).count("<br />") * "\n", Body
  )
  Body = re.sub(r"<br />", "<br />\n", Body)
  Body = re.sub(r" *\n *", "\n", Body)
  # balance <i/b/small> for breaks
  NewBody = ""
  NumI = NumB = NumS = 0
  PatNowiki = Compile(r"^([\[\]*#;:{}])(.*)")
  for Line in Body.split("\n"):
    if (NumB or NumI or NumS) and Line != "":
      Line = NumB * "<b>" + NumI * "<i>" + NumS * "<small>" + Line
      NumI = NumB = NumS = 0
    for Tag in ["i", "b", "small"]:
      if Line.count("<" + Tag + ">") > Line.count("</" + Tag + ">"):
        Pos = Line.rfind("<br />")
        BR = Line.endswith("<br />")
        Line = Line[0:-6 * BR] + "</" + Tag + ">" + BR * "<br />"
        NumB += (Tag == "b")
        NumI += (Tag == "i")
        NumS += (Tag == "small")
    Line = PatNowiki.sub(r"\1\2", Line)
    NewBody += Line + "\n"

  Body = NewBody.strip()
  # swap syntax
  Body = re.sub(r"</?b>", r"'''", Body)
  Body = re.sub(r"</?i>", r"''", Body)
  # reset comments
  PatComment = Compile(r"(<!--.*?-->)")
  PatComment2 = Compile(r"(\n*<!--.*?-->)")
  New = PatComment.finditer(Body)
  Orig = PatComment2.findall(OrigBody)
  NewBody = ""
  Prev = 0
  for N, Match in enumerate(New):
    NewBody += Body[Prev:Match.start()] + Orig[N]
    Prev = Match.end()
  Body = NewBody + Body[Prev:]
  return Body

# clean up enchantment text for {{item}}
# this is not a complete enchantment parser
def StandardiseEnchantmentText(InText):
  if not InText:
    return InText
  else:
    # process each line separately
    Enchantments = InText.split("<br>")
    CleanedList = []
    for Enchantment in Enchantments:
      Parsed = Parser(Enchantment, PatsEnchantmentClean)
      if Parsed[1]:
        # if unparsed components, just use the text as-is
        CleanedList.append(Parsed[1][0])
      else:
        if len(Parsed[0]):
          # should be only one
          CleanedList.append(list(Parsed[0].values())[0])
        else:
          CleanedList.append("")
    return "<br />".join(CleanedList)

# parse enchantments into types so we know what See Also links are needed
# not everything is caught, as there are many exceptional cases
def ParseEnchantment(Item):
  Enchantments = Item["enchantment"]
  if Enchantments == "":
    return None
  else:
    PatParen = Compile(r"^\([^()]+\)$")
    Enchantments = [X for X in Enchantments.split("<br />") if X != ""]
    EnchantmentDict = {}
    # if next is parenthetical, append to previous
    NewEnchantments = []
    Total = len(Enchantments)
    Used = False
    while len(Enchantments) != 0:
      if len(Enchantments) == 1:
        NewEnchantments.append(Enchantments[0])
        del Enchantments[0]
      else:
        if PatParen.search(Enchantments[1]):
          NewEnchantments.append(Enchantments[0] + " " + Enchantments[1])
          del Enchantments[0:2]
        else:
          NewEnchantments.append(Enchantments[0])
          del Enchantments[0]
    # parsing
    Unparsed = []
    for Enchantment in NewEnchantments:
      Parsed = Parser(Enchantment, PatsEnchantmentParse)
      if Parsed[1]:
        Unparsed.append(Parsed[1][0])
      else:
        # should be only one
        Entry = list(Parsed[0].items())[0]
        if Entry[0] in EnchantmentDict:
          EnchantmentDict[Entry[0]].append(Entry[1])
        else:
          EnchantmentDict[Entry[0]] = [Entry[1]]
    return [EnchantmentDict, Unparsed]

# clean up parser output
def GeneralCleanupKoL(Item):
  # switch in haiku format if applicable
  if Item["haiku"]:
    Item["name"] = Item["haikuname"]
    Item["desc"] = Item["haikudesc"]
    del Item["haikuname"]
    del Item["haikudesc"]
  Item["desc"] = CleanDescKoL(Item["desc"])
  Item["enchantment"] = StandardiseEnchantmentText(Item["enchantment"])
  Item["parsedenchantment"] = ParseEnchantment(Item)
  # suppressed level requirements
  if not Item["level"]:
    Type = Item["type"]
    if Type in ["food", "beverage", "booze"]:
      Item["level"] = "1"
  # weapon types and subtypes
  PatWeapon = Compile(r"^(ranged )?weapon \((\d)-handed (.*)\)$")
  Weapon = PatWeapon.search(Item["type"])
  Item["weapon"] = (Weapon != None) * "1"
  Item["weaponrange"] = ((Weapon and Weapon.group(1)) or "")
  Item["weaponhands"] = ((Weapon and Weapon.group(2)) or "")
  WeaponType = ((Weapon and Weapon.group(3)) or "")
  Item["weapontype"] = WeaponType
  if Weapon and not Item["weaponrange"]:
    if WeaponType in ["saucepan", "utensil", "chefstaff"]:
      Item["weaponrange"] = "mysticality"
    else:
      Item["weaponrange"] = "melee"

# convert parsed item into a Wiki article
def ConvertItem(Item, ItemID = "", Desc = ""):
  ItemID = re.sub(r"(?is)[^\d\-]", "", ItemID).strip()
  Desc = re.sub(r"\?", "", Desc).strip()
  Usable = Item["type"].find("usable") > -1
  Potion = Item["type"] == "potion"
  Combat = Item["type"].find("combat") > -1 or Item["alsocombat"]
  Familiar = Item["type"] == "familiar"
  Food = Item["type"] in ["food", "beverage"]
  Booze = Item["type"] == ("booze")
  Spleen = Usable and Item["level"] and not Item["class"]

  # calculate a range of dates to automatically input Mr. Store dates
  Date = datetime.date.today()
  Year, Month, Day = Date.year, Date.month, Date.day
  if Day >= 20:
    Month = (Month + 1) % 12
    Year += (Month == 0)
  Year = str(Year)
  Month = [
    "January", "February", "March", "April", "May", "June",
    "July", "August", "September", "October", "November", "December"
  ][Month]
  DateString = Month + " " + Year
  
  OutStr = "{{NeedsSpading|COMMENT}}\n{{NeedsSpading|COMMENT}}"
  # {{item}}
  OutStr += "\n{{item|\nitemid=" + ItemID + "|\ndescid=" + Desc
  for X in [
    "desc", "paste", "smith", "cocktail", "cook", "jewelry",
    "type", "power", "powertype"
  ]:
    if Item[X]: OutStr += "|\n" + X + "=" + Item[X]
  if Item["stat"] or Item["statreq"]:
    OutStr += r"|\nstat=" + Item["stat"] + "|\nstatreq=" + Item["statreq"]
  for X in ["level", "alsocombat", "outfit"]:
    if Item[X]: OutStr += "|\n" + X + "=" + Item[X]
  if Item["nodiscard"]: OutStr += "|\nautosell=0"
  for X in ["autosell", "cost", "notrade", "gift", "quest", "enchantment"]:
    if Item[X]: OutStr += "|\n" + X + "=" + Item[X]
  for X in [
    "critical", "nohardcore", "mpreduce", "limit", "hagnk", "class",
    "spelltype", "grantskill", "lounge", "bluenote", "haiku"
  ]:
    if Item[X]: OutStr += "|\n" + X + "=" + Item[X]
  # other standard sections
  OutStr += "}}"
  OutStr += "\n\n==Recipe=="
  OutStr += \
    "\n{| class=\"recipe\"" + \
    "\n|- class=\"row1\"" + \
    "\n! {{cocktail}}" + \
    "\n| [[ingredient 1]]" + \
    "\n| [[ingredient 2]]" + \
    "\n|- class=\"row2\"" + \
    "\n! {{equals}}" + \
    "\n| colspan=\"2\" | [[product]]" + \
    "\n|}"
  OutStr += "\n\n==Obtained From==" + \
    "\n;[[Location]]\n:[[Monster]]\n:''[[Non-Combat]]''"
  if Potion or Usable or Combat or Familiar or Food or Booze or Spleen:
    if Usable or Combat:
      if Spleen:
        OutStr += "\n\n==When Used=="
      else:
        OutStr += "\n\n==When Consumed=="
        OutStr += "\n{{useitem|\ntext=CONSUME TEXT"
        OutStr += "|\nadv=|\nmus=gain |\nmys=gain |\nmox=gain |"
        OutStr += \
          "\nposteffect={{acquireEffect|effect=EFFECT|duration=?}}|\ntype="
      if Food: OutStr += "food"
      if Booze: OutStr += "booze"
      if Spleen: OutStr += "spleen"
      OutStr += "|\nlimiter=?<!-- when the limiter is found, the " + \
        "value in the category tag needs to be filled out too -->}}"
    if (Potion or (not Spleen and Usable) or Combat or Familiar):
      OutStr += "\n\n==When Used=="
      MultiUses = ((Usable + Potion + Combat + Familiar) > 1)
      if Familiar:
        OutStr += MultiUses * "\n*''From inventory:''" + \
          "\n{{useitem|\ntext=USE TEXT|\ntype=familiar}}"
      if Combat:
        OutStr += MultiUses * "\n*''In combat:''" + \
          "\n{{useitem|\ntext=USE TEXT|\ntype=combat}}"
      if Usable or Potion:
        OutStr += MultiUses * "\n*''From inventory:''" + \
          "\n{{useitem|\ntext=USE TEXT" + \
          "|\neffect={{acquireEffect|effect=EFFECT|duration=?}}}}"

  if Item["paste"] or Item["smith"] or \
     Item["cook"] or Item["cocktail"] or Item["jewelry"]:
    OutStr += "\n\n==Uses==\n*[[]]"
  OutStr += "\n\n==Notes=="
  if Item["type"] == "familiar":
    OutStr += "\n*Becomes a [[Some Familiar]]."
  OutStr += "\n*" + DateString + \
    "'s special of the month from [[Mr. Store]]." + \
    "\n*Its [[Mr. Store]] description was:\n*:DESCRIPTION"

  # See Also
  ParsedEnchantments = Item["parsedenchantment"]
  if ParsedEnchantments:
    ParsedEnchantments = ParsedEnchantments[0]
    if len(ParsedEnchantments):
      OutStr += "\n\n==See Also=="
      for N, MechanicsLink in enumerate(MechanicsLinks):
        if len(set(MechanicsLink[0]) & set(ParsedEnchantments.keys())) or \
           (
             "dr" in MechanicsLink and \
             Item["type"] == "off-hand item (shield)" and
             Item["power"] and Item["powertype"] == "Damage Reduction"
           ):
          OutStr += "\n*[[" + MechanicsLink[1] + "]]"
          break
      if "elt" in ParsedEnchantments:
        Prismatic = True
        for Element in ["Cold", "Hot", "Sleaze", "Spooky", "Stench"]:
          PatElt = Compile(r"\b" + Element + r"\b")
          Matched = [X for X in ParsedEnchantments["elt"] if PatElt.search(X)]
          if not len(Matched):
            Prismatic = False
            break
        if Prismatic:
          OutStr += "\n*[[Prismatic Damage]]"

  # collection/category
  if not Item["quest"]:
    OutStr += "\n\n==Collection==\n<collection>" + ItemID + "</collection>"
  OutStr += \
    "\n\n{{iotm|duration=" + DateString + "|before=" + "PREVIOUS IOTM|after=}}"
  if Food:
    OutStr += \
      "\n\n[[Category:Food (By Fullness)|?, " + Item["name"].lower() + "]]"
  if Booze:
    OutStr += \
      "\n\n[[Category:Booze (By Drunkenness)|?, " + Item["name"].lower() + "]]"
  if Spleen:
    OutStr += "\n\n[[Category:Spleentacular Items (By Spleen Damage)|?, " + \
      Item["name"].lower() + "]]"
  if Item["weapon"]:
    RangeType = Item["weaponrange"]
    RangeType = RangeType[0].upper() + RangeType[1:] + " Weapons"
    OutStr += "\n\n[[Category:" + RangeType + "]]"
    OutStr += "\n[[Category:" + Item["weaponhands"] + "-Handed Weapons]]"
    OtherType = \
      WeaponPlurals.get(Item["weapontype"], "other weapons").title()
    OutStr += "\n[[Category:" + OtherType + "]]"
  return OutStr