User:Skillsbot/source

From Discworld MUD Wiki
Jump to: navigation, search
#!/usr/bin/perl

# This script is run without arguments.
# To run it, you need to:
#  - Have perl of version 5.10 or later.
#  - Have the 'wget' tool.
#  - Paste the output of your 'skills stats' command from the MUD into the
#    a file with the INPUT_FILENAME below.
#  - Manually fill out $password below with the actual password for the
#    account.

use strict;
use 5.010_000;
use URI::Escape;

use constant INPUT_FILENAME => "skills.dat";
use constant RESULT_FILENAME => "results.xml";
use constant COOKIE_FILENAME => "cookie.txt";
use constant C_CON => "C";
use constant C_DEX => "D";
use constant C_INT => "I";
use constant C_STR => "S";
use constant C_WIS => "W";
use constant C_NONE => "-";
use constant API_URL => "{{SERVER}}/w/api.php";
use constant USERNAME => "Skillsbot";
use constant APIHIGHLIMITS_QUERYCAP => 500;
use constant EDIT_SUMMARY => "Automatically created article";

my $password = <!--Fill this out yourself!-->;

my @columns;
my %skills;
my %children;
my %abbreviations;
my %chunk_names;
my %last_names;
my $max_columns = 0;
my $num_skills = 0;

my $m_con = C_CON;
my $m_dex = C_DEX;
my $m_int = C_INT;
my $m_str = C_STR;
my $m_wis = C_WIS;

my $login_token;
my $login_userid;
my $login_sessionid;
my $login_cookieprefix;

sub parse_file;
sub create_skills;
sub make_unique_shortnames;
sub get_shortname;
sub dump_results;
sub http_login;
sub http_logout;
sub remove_existing;
sub create_skill_articles;
sub create_abbreviations;

# Main execution
&parse_file(INPUT_FILENAME);
&create_skills();
&make_unique_shortnames();

&http_login($password);
sleep(1);
&remove_existing(\%skills);
sleep(1);
&remove_existing(\%abbreviations);
sleep(1);
&dump_results();
sleep(1);
&create_skill_articles();
sleep(1);
&create_abbreviations();
sleep(1);
&http_logout();

sub parse_file
{
  my ($filename) = @_;
  my $column;
  my $skill;
  my $nest_string;
  my $name;
  my $stats;
  my $nest_level;
  
  # Open the input file.  This should contain a dump of the output of your 
  # 'skills stats' command (strip out any 'more prompts').
  open (INFILE, $filename) or die "Could not open ".INPUT_FILENAME."\n";

  while ($_ = <INFILE>)
  {
    chomp;
    $column = 0;
    while ($_ ne "")
    {
      # Parse the skill.  This gets the '| ' bits in front (tells us the
      # nesting level), the skill name and its stat dependencies.  It 
      # deliberately only gets one skill at a time - this lets us keep track of 
      # the column count.
      $skill = $_;
      if ($skill =~ /(?<nest>([|] )*)(?<name>[a-z-]+)\.*[ ]+(?<stats>[CDSIW-]+)[ ]*/)
      {
        $_ = $';
        $nest_string = $+{nest};
        $name = $+{name};
        $stats = $+{stats};
        
        # Convert the nest string into a nesting level
        $nest_level = 0;
        while($nest_string ne "")
        {
          $nest_level++;
          $nest_string =~ s/[|] //;
        }
        push(@{$columns[$column]}, [$name, $nest_level, $stats]);
        $column++;
        $num_skills++;
        if ($column > $max_columns)
        {
          $max_columns = $column;
        }
      }
      else
      {
        last;
      }
    }  
  }

  close(INFILE);
}

sub create_skills
{
  my $i;
  my $j;
  my $k;
  my $name;
  my $full_name;
  my $nest_level;
  my $abbr_name;
  my $stats;
  my @tree;
  my $con;
  my $dex;
  my $int;
  my $str;
  my $wis;
  my $parent;
  my $chunk_name;
  
  # Iterate over each entry.  Because of the iteration ordering below, we do
  # all the entries in the first column, then all the entries in the second
  # column, and so on.  This gives us the 'correct' skill ordering -
  # specifically, such that all children follow their parents.  
  for ($i = 0; $i < $max_columns; $i++)
  {
    for ($j = 0; $j <= $#{$columns[$i]}; $j++)
    {
      $name = $columns[$i][$j][0];
      $nest_level = $columns[$i][$j][1];
      $stats = $columns[$i][$j][2];
      
      # First, we want to convert this skill into its full name - to do so,
      # we save off the name at each tree level when it is encountered, and
      # make use of the property that the last encountered skill at a given
      # tree level must be the direct ancestor of the current skill.     
      #
      # We also want the 2.2.2.2 form of each name, which we get using the 
      # same technique.
      #
      # We're also interested in storing the parent of each node - we'll use
      # it later when we're determining the abbreviations for elements.
      $tree[$nest_level] = $name;
      $full_name = $tree[0];
      $abbr_name = substr($tree[0], 0, 2);
      if ($nest_level > 0)
      {
        $parent = $tree[0];
        for ($k = 1; $k <= $nest_level; $k++)
        {
          $full_name = join('.', $full_name, $tree[$k]);
          $abbr_name = join('.', $abbr_name, substr($tree[$k], 0, 2));
          if ($k < $nest_level)
          {
            $parent = $full_name;
          }
        }
      }
      else
      {
        $parent = "";
      }
      
      # Put this skill in the children hash entry belonging to its parent.
      push(@{$children{$parent}}, $full_name);
      
      # Put this skill in the abbreviations hash entry belonging to the 
      # 2.2.2.2 name, and to its last name.
      push(@{$abbreviations{$abbr_name}}, $full_name);
      push(@{$abbreviations{$name}}, $full_name);
      
      # Save off this skill's last name for convenience.
      $last_names{$full_name} = $name;
      
      # Get the 'chunk name' for this element - this is the first two
      # characters of its final text portion.  This is used later when
      # we're sorting out the unique shortnames.
      $chunk_name = substr($name, 0, 2);
      $chunk_names{$full_name} = $chunk_name;
      
      # Now convert the stats, by simply counting up the occurrences.  Note
      # that there's a special case for certain tree roots that have no 
      # associated skills (such as people.trading).
      if ($stats eq C_NONE)
      {
        $con = C_NONE;
        $dex = C_NONE;
        $int = C_NONE;
        $str = C_NONE;
        $wis = C_NONE;
      }
      else
      {
        $con = (($stats =~ s/$m_con//g) or 0);
        $dex = (($stats =~ s/$m_dex//g) or 0);
        $int = (($stats =~ s/$m_int//g) or 0);
        $str = (($stats =~ s/$m_str//g) or 0);
        $wis = (($stats =~ s/$m_wis//g) or 0);
      }            
      
      print $full_name." (".$parent.") [".$con." ".$dex." ".$int." ".$str." ".$wis."]\n";
      @skills{$full_name} = [$con, $dex, $int, $str, $wis];
    } 
  }
}

sub make_unique_shortnames
{
  my $parent;
  my $i;
  my $j;
  my $sibling_a;
  my $sibling_b;
  my $len;
  
  # Iterate over all the parents.
  foreach $parent (keys %children)
  {
    # Each entry in the children array corresponds to a direct child of this
    # parent.  In order for shortnames to be unique, the chunk name for each
    # element must be unique amongst its direct siblings.  This is what we
    # enforce here.
    for ($i = 0; $i <= $#{@{children{$parent}}}; $i++)
    {
      $sibling_a = $children{$parent}[$i];
      for ($j = $i + 1; $j <= $#{@{children{$parent}}}; $j++)
      {
        $sibling_b = $children{$parent}[$j];
        $len = length($chunk_names{$sibling_a});
        while ($chunk_names{$sibling_a} eq $chunk_names{$sibling_b})
        {
          $len++;
          $chunk_names{$sibling_a} = substr($last_names{$sibling_a}, 0, $len);
          $chunk_names{$sibling_b} = substr($last_names{$sibling_b}, 0, $len);
        }        
      }     
    }    
  }
}

sub get_shortname
{
  my ($full_name) = @_;
  my $shortname;
  my @name_segments;
  my $cur_ancestor;
  my $i;
  
  # Split the full name up.
  @name_segments = split(/\./, $full_name);
  
  # Create the shortname as the concatenation of the chunk names of each of 
  # this skill's ancestors.
  $cur_ancestor = $name_segments[0];
  $shortname = $chunk_names{$cur_ancestor};
  for ($i = 1; $i <= $#name_segments; $i++)
  {
    $cur_ancestor = join('.', $cur_ancestor, $name_segments[$i]);
    $shortname = join('.', $shortname, $chunk_names{$cur_ancestor});
  }
  
  return $shortname;        
}

sub dump_results
{
  my $skill;
  my $abbreviation;  
  my $j;
  
  # Dump each skill, its unique shortname and its stat dependencies
  print "SKILLS:\n";
  foreach $skill (sort keys %skills)
  {
    print $skill." (".&get_shortname($skill)."): ".$skills{$skill}[0]." ".$skills{$skill}[1]." ".$skills{$skill}[2]." ".$skills{$skill}[3]." ".$skills{$skill}[4]."\n";
  }
  
  # Now dump all the abbreviations, highlighting those which are ambiguous
  print "\nABBREVIATIONS:\n";
  foreach $abbreviation (sort keys %abbreviations)
  {
    if ($#{@{abbreviations{$abbreviation}}} > 0)
    {
      print "* ";
    }
    print $abbreviation." ->";
  
    for ($j = 0; $j <= $#{@{abbreviations{$abbreviation}}}; $j++)
    {
      print " ".$abbreviations{$abbreviation}[$j];
    }
    print "\n";
  }        
}

sub http_login
{
  my ($password) = @_;
  my $post_data;
  my $command;
  
  # Build the POST data
  $post_data = "lgname=".USERNAME."&lgpassword=".uri_escape($password);
  
  # Execute login
  print "Logging in... ";
  $command = "wget -q --save-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." --post-data=\"".$post_data."\" \"".API_URL."?action=login&format=xml\"";
  `$command`;
  
  open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
  $_ = <RESULTS>;
  close(RESULTS);
  
  # Check the login was successful
  if (!(/login result=\"Success\"/))
  {
    print "FAILED\n";
    print $_;
    die; 
  }
  
  print "OK\n";
  
  # Save off the tokens
  /lguserid=\"(?<userid>[^\"]+)\".*lgtoken=\"(?<token>[^\"]+)\" cookieprefix=\"(?<cp>[^\"]+)\" sessionid=\"(?<sessionid>[^\"]+)\"/;  
  $login_userid = $+{userid};
  $login_token = $+{token};
  $login_cookieprefix = $+{cp};
  $login_sessionid = $+{sessionid};
  
  print "Userid:        ".$login_userid."\n";
  print "Token:         ".$login_token."\n";
  print "Cookie prefix: ".$login_cookieprefix."\n";
  print "Session ID:    ".$login_sessionid."\n";  
}

sub http_logout
{
  my ($password) = @_;
  my $post_data;
  my $command;
  
  # Execute logout
  print "Logging out... ";
  $command = "wget -q --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=logout&format=xml\"";
  `$command`;
  
  # There aren't any useful results to speak of, and there's little we can do 
  # if the logout fails, so just end here.
  print "OK\n";
}

sub remove_existing
{
  my ($href) = @_;
  my $key;
  my @queries;
  my $query_num = 0;
  my $keys_in_query = 0;  
  my $query;
  my $command;
  
  # Build up lists of all the titles we want to query.
  foreach $key (sort keys %$href)
  {
    # Add each element into a query.  Note that there's a (high) limit on the 
    # number we can query with each http request, hence we may need to send
    # multiple batches
    if ($keys_in_query == 0)
    {
      $queries[$query_num] = $key;
    }
    else
    {
      $queries[$query_num] = join('|', $queries[$query_num], $key);
    }
    $keys_in_query++;
    if ($keys_in_query >= APIHIGHLIMITS_QUERYCAP)
    {
      $query_num++;
      $keys_in_query = 0;
    }    
  }
  
  # Now send in the queries.
  foreach $query (@queries)
  {
    print "Querying: ".$query."\n\n";
    $command = "wget -q --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=query&format=xml&titles=".$query."\"";
    `$command`;    
    
    open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
    $_ = <RESULTS>;
    close(RESULTS);
    
    # Parse the results.  We're looking for pages that _aren't_ marked as 
    # missing.  Since those are the pages that already exist, erase them from
    # our local list of pages to deal with.
    while (/title=\"(?<key>[^\"]+)\" \/>/)
    {
      $key = lc($+{key});
      $_ = $';
      delete $$href{$key};
    }    
  }  
}

sub create_skill_articles
{
  my $skill;
  my $command;
  my $edit_token;
  my $article_text;
  my $post_data;
  my $is_branch = 0;
  my $childlevel = 0;
  my @elements;
  my $child;
  my $child_string;
  my $child_num;
    
  # Get an edit token
  $command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --save-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=query&format=xml&prop=info&intoken=edit&titles=Main%20Page\"";
  `$command`;    
    
  open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
  $_ = <RESULTS>;
  close(RESULTS);
    
  # Parse the results to get the edit token.
  if (/edittoken=\"(?<token>[^\"]+)\"/)
  {
    $edit_token = $+{token};
  } 
  else
  {
    print "Could not find edit token:\n";
    print $_;
    die;
  }   
  
  foreach $skill (sort keys %skills)
  {  
  
    # Determine whether this skill is a leaf or a branch.
    if (exists $children{$skill})
    {
      # Branch
      $is_branch = 1;
      @elements = split(/\./, $skill);
      $childlevel = $#elements + 2;
    }
    else
    {
      # Leaf
      $is_branch = 0;
    }        
      
    # Build the article data
    if ($is_branch == 1)
    {
      $article_text = "{{Infonav skill\n".
                      " |shortform=".&get_shortname($skill)."\n".
                      " |con=".$skills{$skill}[0]."\n".
                      " |dex=".$skills{$skill}[1]."\n".
                      " |int=".$skills{$skill}[2]."\n".
                      " |str=".$skills{$skill}[3]."\n".
                      " |wis=".$skills{$skill}[4]."\n".
                      " |childlevel=".$childlevel."\n";
      $child_num = 1;
      for ($child_num = 0; $child_num <= $#{@{children{$skill}}}; $child_num++)
      {
        $child = $children{$skill}[$child_num];
        @elements = split(/\./, $child);
        $child_string = " |child".($child_num + 1)."=".$elements[$#elements]."\n";
        $article_text = join('', $article_text, $child_string);        
      }
      
      $article_text = join('', 
                           $article_text,               
                           "}}\n".
                           "\n".
                           "{{skillsbot-stub}}\n".
                           "\n".
                           "[[Category:Skills]]\n");
    }
    else
    {
      $article_text = "{{Infonav skill\n".
                      " |shortform=".&get_shortname($skill)."\n".
                      " |con=".$skills{$skill}[0]."\n".
                      " |dex=".$skills{$skill}[1]."\n".
                      " |int=".$skills{$skill}[2]."\n".
                      " |str=".$skills{$skill}[3]."\n".
                      " |wis=".$skills{$skill}[4]."\n".
                      "}}\n".
                      "\n".
                      "{{skillsbot-stub}}\n".
                      "\n".
                      "[[Category:Skills]]\n";
    }
   
    # Build the POST data.     
    $post_data = "action=edit".
                 "&format=xml".
                 "&title=".uri_escape(ucfirst($skill)).
                 "&summary=".uri_escape(EDIT_SUMMARY).
                 "&bot".
                 "&createonly".
                 "&text=".uri_escape($article_text).
                 "&token=".uri_escape($edit_token);
                 
    # Execute the command, check it completed OK.             
    print "Creating: ".$skill." ... ";
    $command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." --post-data=\"".$post_data."\" \"".API_URL."\"";
    `$command`;    
    
    open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
    $_ = <RESULTS>;
    close(RESULTS);
    
    # Check the login was successful
    if (!(/result=\"Success\"/))
    {
      print "FAILED\n";
      print $_;
      die; 
    }
    print "OK\n";
  }
}

sub create_abbreviations
{
  my $abbr;
  my $command;
  my $edit_token;
  my $article_text;
  my $post_data;
  my $is_disambig = 0;
  my $child;
  my $child_string;
  my $child_num;
    
  # Get an edit token
  $command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --save-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=query&format=xml&prop=info&intoken=edit&titles=Main%20Page\"";
  `$command`;    
    
  open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
  $_ = <RESULTS>;
  close(RESULTS);
    
  # Parse the results to get the edit token.
  if (/edittoken=\"(?<token>[^\"]+)\"/)
  {
    $edit_token = $+{token};
  } 
  else
  {
    print "Could not find edit token:\n";
    print $_;
    die;
  }   
  
  foreach $abbr (sort keys %abbreviations)
  {  
    # Determine whether this it to be a redirect or disambiguation.
    if ($#{@{abbreviations{$abbr}}} > 0)
    {
      # Disambiguation
      $is_disambig = 1;
    }
    else 
    {
      # Redirect
      $is_disambig = 0;
    }
      
    # Build the article data
    if ($is_disambig == 1)
    {
      $article_text = "{{disambig}}\n".
                      "\n".
                      "'''".$abbr."''' may refer to:\n";
      for ($child_num = 0; $child_num <= $#{@{abbreviations{$abbr}}}; $child_num++)
      {
        $child = $abbreviations{$abbr}[$child_num];
        $child_string = "* [[".$child."]]\n";
        $article_text = join('', $article_text, $child_string);        
      }      
    }
    else
    {
      $child = $abbreviations{$abbr}[0];
      $article_text = "#REDIRECT [[".$child."]]\n";
    }
   
    # Build the POST data.     
    $post_data = "action=edit".
                 "&format=xml".
                 "&title=".uri_escape(ucfirst($abbr)).
                 "&summary=".uri_escape(EDIT_SUMMARY).
                 "&bot".
                 "&createonly".
                 "&text=".uri_escape($article_text).
                 "&token=".uri_escape($edit_token);
                 
    # Execute the command, check it completed OK.             
    print "Creating: ".$abbr." ... ";
    $command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." --post-data=\"".$post_data."\" \"".API_URL."\"";
    `$command`;    
    
    open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n";
    $_ = <RESULTS>;
    close(RESULTS);
    
    # Check the login was successful
    if (!(/result=\"Success\"/))
    {
      print "FAILED\n";
      print $_;
      die; 
    }
    print "OK\n";
  }
}