User:Skillsbot/source
From Discworld MUD Wiki
#!/usr/bin/perl # This script is run without arguments. # To run it, you need to: # - Have perl of version 5.10 or later. # - Have the 'wget' tool. # - Paste the output of your 'skills stats' command from the MUD into the # a file with the INPUT_FILENAME below. # - Manually fill out $password below with the actual password for the # account. use strict; use 5.010_000; use URI::Escape; use constant INPUT_FILENAME => "skills.dat"; use constant RESULT_FILENAME => "results.xml"; use constant COOKIE_FILENAME => "cookie.txt"; use constant C_CON => "C"; use constant C_DEX => "D"; use constant C_INT => "I"; use constant C_STR => "S"; use constant C_WIS => "W"; use constant C_NONE => "-"; use constant API_URL => "{{SERVER}}/w/api.php"; use constant USERNAME => "Skillsbot"; use constant APIHIGHLIMITS_QUERYCAP => 500; use constant EDIT_SUMMARY => "Automatically created article"; my $password = <!--Fill this out yourself!-->; my @columns; my %skills; my %children; my %abbreviations; my %chunk_names; my %last_names; my $max_columns = 0; my $num_skills = 0; my $m_con = C_CON; my $m_dex = C_DEX; my $m_int = C_INT; my $m_str = C_STR; my $m_wis = C_WIS; my $login_token; my $login_userid; my $login_sessionid; my $login_cookieprefix; sub parse_file; sub create_skills; sub make_unique_shortnames; sub get_shortname; sub dump_results; sub http_login; sub http_logout; sub remove_existing; sub create_skill_articles; sub create_abbreviations; # Main execution &parse_file(INPUT_FILENAME); &create_skills(); &make_unique_shortnames(); &http_login($password); sleep(1); &remove_existing(\%skills); sleep(1); &remove_existing(\%abbreviations); sleep(1); &dump_results(); sleep(1); &create_skill_articles(); sleep(1); &create_abbreviations(); sleep(1); &http_logout(); sub parse_file { my ($filename) = @_; my $column; my $skill; my $nest_string; my $name; my $stats; my $nest_level; # Open the input file. This should contain a dump of the output of your # 'skills stats' command (strip out any 'more prompts'). open (INFILE, $filename) or die "Could not open ".INPUT_FILENAME."\n"; while ($_ = <INFILE>) { chomp; $column = 0; while ($_ ne "") { # Parse the skill. This gets the '| ' bits in front (tells us the # nesting level), the skill name and its stat dependencies. It # deliberately only gets one skill at a time - this lets us keep track of # the column count. $skill = $_; if ($skill =~ /(?<nest>([|] )*)(?<name>[a-z-]+)\.*[ ]+(?<stats>[CDSIW-]+)[ ]*/) { $_ = $'; $nest_string = $+{nest}; $name = $+{name}; $stats = $+{stats}; # Convert the nest string into a nesting level $nest_level = 0; while($nest_string ne "") { $nest_level++; $nest_string =~ s/[|] //; } push(@{$columns[$column]}, [$name, $nest_level, $stats]); $column++; $num_skills++; if ($column > $max_columns) { $max_columns = $column; } } else { last; } } } close(INFILE); } sub create_skills { my $i; my $j; my $k; my $name; my $full_name; my $nest_level; my $abbr_name; my $stats; my @tree; my $con; my $dex; my $int; my $str; my $wis; my $parent; my $chunk_name; # Iterate over each entry. Because of the iteration ordering below, we do # all the entries in the first column, then all the entries in the second # column, and so on. This gives us the 'correct' skill ordering - # specifically, such that all children follow their parents. for ($i = 0; $i < $max_columns; $i++) { for ($j = 0; $j <= $#{$columns[$i]}; $j++) { $name = $columns[$i][$j][0]; $nest_level = $columns[$i][$j][1]; $stats = $columns[$i][$j][2]; # First, we want to convert this skill into its full name - to do so, # we save off the name at each tree level when it is encountered, and # make use of the property that the last encountered skill at a given # tree level must be the direct ancestor of the current skill. # # We also want the 2.2.2.2 form of each name, which we get using the # same technique. # # We're also interested in storing the parent of each node - we'll use # it later when we're determining the abbreviations for elements. $tree[$nest_level] = $name; $full_name = $tree[0]; $abbr_name = substr($tree[0], 0, 2); if ($nest_level > 0) { $parent = $tree[0]; for ($k = 1; $k <= $nest_level; $k++) { $full_name = join('.', $full_name, $tree[$k]); $abbr_name = join('.', $abbr_name, substr($tree[$k], 0, 2)); if ($k < $nest_level) { $parent = $full_name; } } } else { $parent = ""; } # Put this skill in the children hash entry belonging to its parent. push(@{$children{$parent}}, $full_name); # Put this skill in the abbreviations hash entry belonging to the # 2.2.2.2 name, and to its last name. push(@{$abbreviations{$abbr_name}}, $full_name); push(@{$abbreviations{$name}}, $full_name); # Save off this skill's last name for convenience. $last_names{$full_name} = $name; # Get the 'chunk name' for this element - this is the first two # characters of its final text portion. This is used later when # we're sorting out the unique shortnames. $chunk_name = substr($name, 0, 2); $chunk_names{$full_name} = $chunk_name; # Now convert the stats, by simply counting up the occurrences. Note # that there's a special case for certain tree roots that have no # associated skills (such as people.trading). if ($stats eq C_NONE) { $con = C_NONE; $dex = C_NONE; $int = C_NONE; $str = C_NONE; $wis = C_NONE; } else { $con = (($stats =~ s/$m_con//g) or 0); $dex = (($stats =~ s/$m_dex//g) or 0); $int = (($stats =~ s/$m_int//g) or 0); $str = (($stats =~ s/$m_str//g) or 0); $wis = (($stats =~ s/$m_wis//g) or 0); } print $full_name." (".$parent.") [".$con." ".$dex." ".$int." ".$str." ".$wis."]\n"; @skills{$full_name} = [$con, $dex, $int, $str, $wis]; } } } sub make_unique_shortnames { my $parent; my $i; my $j; my $sibling_a; my $sibling_b; my $len; # Iterate over all the parents. foreach $parent (keys %children) { # Each entry in the children array corresponds to a direct child of this # parent. In order for shortnames to be unique, the chunk name for each # element must be unique amongst its direct siblings. This is what we # enforce here. for ($i = 0; $i <= $#{@{children{$parent}}}; $i++) { $sibling_a = $children{$parent}[$i]; for ($j = $i + 1; $j <= $#{@{children{$parent}}}; $j++) { $sibling_b = $children{$parent}[$j]; $len = length($chunk_names{$sibling_a}); while ($chunk_names{$sibling_a} eq $chunk_names{$sibling_b}) { $len++; $chunk_names{$sibling_a} = substr($last_names{$sibling_a}, 0, $len); $chunk_names{$sibling_b} = substr($last_names{$sibling_b}, 0, $len); } } } } } sub get_shortname { my ($full_name) = @_; my $shortname; my @name_segments; my $cur_ancestor; my $i; # Split the full name up. @name_segments = split(/\./, $full_name); # Create the shortname as the concatenation of the chunk names of each of # this skill's ancestors. $cur_ancestor = $name_segments[0]; $shortname = $chunk_names{$cur_ancestor}; for ($i = 1; $i <= $#name_segments; $i++) { $cur_ancestor = join('.', $cur_ancestor, $name_segments[$i]); $shortname = join('.', $shortname, $chunk_names{$cur_ancestor}); } return $shortname; } sub dump_results { my $skill; my $abbreviation; my $j; # Dump each skill, its unique shortname and its stat dependencies print "SKILLS:\n"; foreach $skill (sort keys %skills) { print $skill." (".&get_shortname($skill)."): ".$skills{$skill}[0]." ".$skills{$skill}[1]." ".$skills{$skill}[2]." ".$skills{$skill}[3]." ".$skills{$skill}[4]."\n"; } # Now dump all the abbreviations, highlighting those which are ambiguous print "\nABBREVIATIONS:\n"; foreach $abbreviation (sort keys %abbreviations) { if ($#{@{abbreviations{$abbreviation}}} > 0) { print "* "; } print $abbreviation." ->"; for ($j = 0; $j <= $#{@{abbreviations{$abbreviation}}}; $j++) { print " ".$abbreviations{$abbreviation}[$j]; } print "\n"; } } sub http_login { my ($password) = @_; my $post_data; my $command; # Build the POST data $post_data = "lgname=".USERNAME."&lgpassword=".uri_escape($password); # Execute login print "Logging in... "; $command = "wget -q --save-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." --post-data=\"".$post_data."\" \"".API_URL."?action=login&format=xml\""; `$command`; open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n"; $_ = <RESULTS>; close(RESULTS); # Check the login was successful if (!(/login result=\"Success\"/)) { print "FAILED\n"; print $_; die; } print "OK\n"; # Save off the tokens /lguserid=\"(?<userid>[^\"]+)\".*lgtoken=\"(?<token>[^\"]+)\" cookieprefix=\"(?<cp>[^\"]+)\" sessionid=\"(?<sessionid>[^\"]+)\"/; $login_userid = $+{userid}; $login_token = $+{token}; $login_cookieprefix = $+{cp}; $login_sessionid = $+{sessionid}; print "Userid: ".$login_userid."\n"; print "Token: ".$login_token."\n"; print "Cookie prefix: ".$login_cookieprefix."\n"; print "Session ID: ".$login_sessionid."\n"; } sub http_logout { my ($password) = @_; my $post_data; my $command; # Execute logout print "Logging out... "; $command = "wget -q --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=logout&format=xml\""; `$command`; # There aren't any useful results to speak of, and there's little we can do # if the logout fails, so just end here. print "OK\n"; } sub remove_existing { my ($href) = @_; my $key; my @queries; my $query_num = 0; my $keys_in_query = 0; my $query; my $command; # Build up lists of all the titles we want to query. foreach $key (sort keys %$href) { # Add each element into a query. Note that there's a (high) limit on the # number we can query with each http request, hence we may need to send # multiple batches if ($keys_in_query == 0) { $queries[$query_num] = $key; } else { $queries[$query_num] = join('|', $queries[$query_num], $key); } $keys_in_query++; if ($keys_in_query >= APIHIGHLIMITS_QUERYCAP) { $query_num++; $keys_in_query = 0; } } # Now send in the queries. foreach $query (@queries) { print "Querying: ".$query."\n\n"; $command = "wget -q --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=query&format=xml&titles=".$query."\""; `$command`; open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n"; $_ = <RESULTS>; close(RESULTS); # Parse the results. We're looking for pages that _aren't_ marked as # missing. Since those are the pages that already exist, erase them from # our local list of pages to deal with. while (/title=\"(?<key>[^\"]+)\" \/>/) { $key = lc($+{key}); $_ = $'; delete $$href{$key}; } } } sub create_skill_articles { my $skill; my $command; my $edit_token; my $article_text; my $post_data; my $is_branch = 0; my $childlevel = 0; my @elements; my $child; my $child_string; my $child_num; # Get an edit token $command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --save-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=query&format=xml&prop=info&intoken=edit&titles=Main%20Page\""; `$command`; open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n"; $_ = <RESULTS>; close(RESULTS); # Parse the results to get the edit token. if (/edittoken=\"(?<token>[^\"]+)\"/) { $edit_token = $+{token}; } else { print "Could not find edit token:\n"; print $_; die; } foreach $skill (sort keys %skills) { # Determine whether this skill is a leaf or a branch. if (exists $children{$skill}) { # Branch $is_branch = 1; @elements = split(/\./, $skill); $childlevel = $#elements + 2; } else { # Leaf $is_branch = 0; } # Build the article data if ($is_branch == 1) { $article_text = "{{Infonav skill\n". " |shortform=".&get_shortname($skill)."\n". " |con=".$skills{$skill}[0]."\n". " |dex=".$skills{$skill}[1]."\n". " |int=".$skills{$skill}[2]."\n". " |str=".$skills{$skill}[3]."\n". " |wis=".$skills{$skill}[4]."\n". " |childlevel=".$childlevel."\n"; $child_num = 1; for ($child_num = 0; $child_num <= $#{@{children{$skill}}}; $child_num++) { $child = $children{$skill}[$child_num]; @elements = split(/\./, $child); $child_string = " |child".($child_num + 1)."=".$elements[$#elements]."\n"; $article_text = join('', $article_text, $child_string); } $article_text = join('', $article_text, "}}\n". "\n". "{{skillsbot-stub}}\n". "\n". "[[Category:Skills]]\n"); } else { $article_text = "{{Infonav skill\n". " |shortform=".&get_shortname($skill)."\n". " |con=".$skills{$skill}[0]."\n". " |dex=".$skills{$skill}[1]."\n". " |int=".$skills{$skill}[2]."\n". " |str=".$skills{$skill}[3]."\n". " |wis=".$skills{$skill}[4]."\n". "}}\n". "\n". "{{skillsbot-stub}}\n". "\n". "[[Category:Skills]]\n"; } # Build the POST data. $post_data = "action=edit". "&format=xml". "&title=".uri_escape(ucfirst($skill)). "&summary=".uri_escape(EDIT_SUMMARY). "&bot". "&createonly". "&text=".uri_escape($article_text). "&token=".uri_escape($edit_token); # Execute the command, check it completed OK. print "Creating: ".$skill." ... "; $command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." --post-data=\"".$post_data."\" \"".API_URL."\""; `$command`; open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n"; $_ = <RESULTS>; close(RESULTS); # Check the login was successful if (!(/result=\"Success\"/)) { print "FAILED\n"; print $_; die; } print "OK\n"; } } sub create_abbreviations { my $abbr; my $command; my $edit_token; my $article_text; my $post_data; my $is_disambig = 0; my $child; my $child_string; my $child_num; # Get an edit token $command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --save-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." \"".API_URL."?action=query&format=xml&prop=info&intoken=edit&titles=Main%20Page\""; `$command`; open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n"; $_ = <RESULTS>; close(RESULTS); # Parse the results to get the edit token. if (/edittoken=\"(?<token>[^\"]+)\"/) { $edit_token = $+{token}; } else { print "Could not find edit token:\n"; print $_; die; } foreach $abbr (sort keys %abbreviations) { # Determine whether this it to be a redirect or disambiguation. if ($#{@{abbreviations{$abbr}}} > 0) { # Disambiguation $is_disambig = 1; } else { # Redirect $is_disambig = 0; } # Build the article data if ($is_disambig == 1) { $article_text = "{{disambig}}\n". "\n". "'''".$abbr."''' may refer to:\n"; for ($child_num = 0; $child_num <= $#{@{abbreviations{$abbr}}}; $child_num++) { $child = $abbreviations{$abbr}[$child_num]; $child_string = "* [[".$child."]]\n"; $article_text = join('', $article_text, $child_string); } } else { $child = $abbreviations{$abbr}[0]; $article_text = "#REDIRECT [[".$child."]]\n"; } # Build the POST data. $post_data = "action=edit". "&format=xml". "&title=".uri_escape(ucfirst($abbr)). "&summary=".uri_escape(EDIT_SUMMARY). "&bot". "&createonly". "&text=".uri_escape($article_text). "&token=".uri_escape($edit_token); # Execute the command, check it completed OK. print "Creating: ".$abbr." ... "; $command = "wget -q --keep-session-cookies --load-cookies=".COOKIE_FILENAME." --output-document=".RESULT_FILENAME." --post-data=\"".$post_data."\" \"".API_URL."\""; `$command`; open(RESULTS, RESULT_FILENAME) or die "Could not find ".RESULT_FILENAME."\n"; $_ = <RESULTS>; close(RESULTS); # Check the login was successful if (!(/result=\"Success\"/)) { print "FAILED\n"; print $_; die; } print "OK\n"; } }