Okay then. Attached is the new, improved Masterlister program. It should
just compile with "mcs Masterlister.cs", using the MCS compiler from
Mono. Usage is "Masterlister.exe path/to/source/masterlist" - a diff is
produced on STDOUT (with program info on STDERR, a bit like OggEnc).
This program goes through the source file, and for each Site: entry with
an Archive-http or Archive-ftp (HTTP preferred) will download a
directory listing for $MIRRORLOCATION/dists/sid/main/ to a temporary
location. Mirrors are tried only once, and given a 30 second timeout.
The listing file is then parsed using a simple regular expression to
build a hashtable of architectures held on a given mirror. The source
file is re-read, written verbatim to the temporary location, with
Archive-architecture: lines replaced where a replacement is stored in
the hashtable. Finally, diff is called to compare the source and new files.
Also attached is another diff. Due to a pair stupid 1-character typos on
my previous effort, FTP-only mirrors were being counted as bad, as were
HTTP mirrors whose HREFs didn't end with "/". New statistics are: 330
sites in Mirrors.masterlist, of which 289 are archive mirrors. 254 of
the archive mirrors are polled automatically without issue. Of those
254, only 2 returned bad directory listings (HTTP but not FTP on
ftp.is.co.za appears broken;
ftp.mcc.ac.uk seems to have hiccuped as
it's fine via a web browser)
//
// Masterlister.cs: Reads Mirrors.masterlist file, attempts to contact
// the mirrors, and updates the Archive-architecture
// lines appropriately
//
// Author:
// Jo Shields (directhex@
apebox.org)
//
// Licensed under Do What The Fuck You Want To Public License (WTFPL)
// Version 2.
//
using System;
using
System.Collections;
using System.Diagnostics;
using System.IO;
using System.Text.RegularExpressions;
namespace Masterlister
{
class MainClass
{
public static void Main(string[] args)
{
Console.Error.WriteLine( "Masterlister v0.01 by Jo Shields\n================================\n" );
if( args.Length != 1 )
ShowUsageThenExit( );
FileInfo SourceMasterlist = new FileInfo( args[0] );
if( !SourceMasterlist.Exists )
ShowUsageThenExit( );
string tempFolder =
Path.Combine( Path.GetTempPath(), Path.GetTempFileName() );
File.Delete( tempFolder );
Directory.CreateDirectory( tempFolder );
GetMirrorListings( tempFolder, SourceMasterlist.FullName );
Hashtable HostedArches = GetHostedArches( tempFolder );
CreateNewMasterlist( tempFolder, SourceMasterlist.FullName, HostedArches );
Process diffProcess = new Process( );
diffProcess.StartInfo.FileName = "diff";
diffProcess.StartInfo.Arguments = "-u " + SourceMasterlist.FullName + " " +
Path.Combine( tempFolder, "Mirrors.masterlist" );
diffProcess.Start( );
diffProcess.WaitForExit( );
Directory.Delete( tempFolder, true );
}
private static void ShowUsageThenExit( )
{
Console.Error.WriteLine( "Usage: masterlister.exe original-list\n\nDiffed list is written to STDOUT,\n program messages to STDERR" );
Environment.Exit( 0 );
}
private static void CreateNewMasterlist( string tempFolder, string sourceFile, Hashtable HostedArches )
{
StreamReader SourceReader;
StreamWriter TargetWriter;
string currentLine = "";
Regex HostMatch = new Regex( "Site: (.*)" );
Regex ArchMatch = new Regex( "Archive-architecture: (.*)" );
SourceReader = File.OpenText( sourceFile );
TargetWriter = File.CreateText(
Path.Combine( tempFolder, "Mirrors.masterlist" ) );
currentLine = SourceReader.ReadLine();
string CurrentHost = "";
while( currentLine != null )
{
if( currentLine.Trim( ) == "" )
CurrentHost = "";
if( HostMatch.IsMatch( currentLine.Trim( ) ) )
CurrentHost = HostMatch.Split( currentLine.Trim( ) )[1];
if( ArchMatch.IsMatch( currentLine.Trim( ) ) )
{
if( CurrentHost != "" &&
HostedArches.ContainsKey( CurrentHost ) )
{
TargetWriter.Write( "Archive-architecture:" );
foreach( string Arch in (string[])(HostedArches[CurrentHost]) )
TargetWriter.Write( " {0}", Arch );
TargetWriter.WriteLine( );
CurrentHost = "";
}
else
TargetWriter.WriteLine( currentLine );
}
else
TargetWriter.WriteLine( currentLine );
currentLine = SourceReader.ReadLine( );
}
SourceReader.Close( );
TargetWriter.Close( );
}
private static Hashtable GetHostedArches( string tempFolder )
{
Hashtable Arches = new Hashtable( );
StreamReader MirrorReader;
ArrayList MirrorArches = new ArrayList( );
Regex ArchesMatch = new Regex( "binary-([a-zA-Z0-9-]+)" );
FileInfo[] ArchesFolder = (new DirectoryInfo( tempFolder )).GetFiles( );
foreach( FileInfo Mirror in ArchesFolder )
{
MirrorArches.Clear( );
string HtmlLine = "";
int NumArches = 0;
MirrorReader = File.OpenText( Mirror.FullName );
HtmlLine = MirrorReader.ReadLine( );
while( HtmlLine != null )
{
if( HtmlLine != null )
{
if( ArchesMatch.IsMatch( HtmlLine ) )
{
NumArches++;
MirrorArches.Add( ArchesMatch.Split( HtmlLine )[1] );
}
}
HtmlLine = MirrorReader.ReadLine( );
}
if( NumArches > 0 )
{
Console.Error.WriteLine( "{0} hosts {1} architectures",
Mirror.Name.PadLeft( 32 ), NumArches.ToString( ).PadLeft( 2 ) );
Arches.Add(
Mirror.Name, (string[])MirrorArches.ToArray( typeof( string ) ) );
}
else
Console.Error.WriteLine( "{0} has *NO* architectures",
Mirror.Name.PadLeft( 32 ) );
MirrorReader.Close( );
Mirror.Delete( );
}
return Arches;
}
private static void GetMirrorListings( string tempFolder, string mirrorList )
{
Regex HostMatch = new Regex( "Site: (.*)" );
Regex HttpMatch = new Regex( "Archive-http: (.*)" );
Regex FtpMatch = new Regex( "Archive-ftp: (.*)" );
string Host = "";
string HttpDir = "";
string FtpDir = "";
string CompleteUrl = "";
Process wgetMaster = new Process( );
wgetMaster.StartInfo.FileName = "wget";
wgetMaster.StartInfo.UseShellExecute = false;
wgetMaster.StartInfo.RedirectStandardError = true;
StreamReader SourceReader = File.OpenText( mirrorList );
string CurrentLine = SourceReader.ReadLine( );
while( CurrentLine != null )
{
if( CurrentLine != null )
if( HostMatch.IsMatch( CurrentLine.Trim( ) ) )
Host = HostMatch.Split( CurrentLine.Trim( ) )[1];
else if( HttpMatch.IsMatch( CurrentLine.Trim( ) ) )
HttpDir = HttpMatch.Split( CurrentLine.Trim( ) )[1];
else if( FtpMatch.IsMatch( CurrentLine.Trim( ) ) )
FtpDir = FtpMatch.Split( CurrentLine.Trim( ) )[1];
else if( CurrentLine.Trim( ) == "" )
{
if( Host != "" )
{
Console.Error.Write( "{0} : ", Host.PadRight( 32 ) );
if( HttpDir != "" )
{
CompleteUrl = "http://" + Host + HttpDir + "dists/sid/main/";
Console.Error.WriteLine( "HTTP Archive" );
}
else if( FtpDir != "" )
{
CompleteUrl = "ftp://" + Host + FtpDir + "dists/sid/main/";
Console.Error.WriteLine( "FTP Archive" );
}
else
Console.Error.WriteLine( "-- Not an archive mirror --" );
if( CompleteUrl.Trim( ) != "" )
{
wgetMaster.StartInfo.Arguments = "--timeout=30 --no-remove-listing -t 1 -O " +
Path.Combine( tempFolder, Host ) + " " + CompleteUrl;
wgetMaster.Start( );
wgetMaster.WaitForExit( );
FileInfo downloadedListing = new FileInfo(
Path.Combine( tempFolder, Host ) );
if( downloadedListing.Length == 0 )
{
File.Delete( downloadedListing.FullName );
Console.Error.WriteLine( "\t^^ Bad mirror, excluding from update" );
}
}
}
Host = "";
HttpDir = "";
FtpDir = "";
CompleteUrl = "";
}
CurrentLine = SourceReader.ReadLine();
}
SourceReader.Close( );
}
}
}
--- /home/directhex/Projects/Masterlister/bin/Debug/Mirrors.masterlist 2006-08-09 22:20:29.000000000 +0100
+++ /tmp/tmp20536772.tmp/Mirrors.masterlist 2006-08-10 01:19:05.000000000 +0100
@@ -3,7 +3,7 @@
Includes:
saens.debian.org ftp.egr.msu.edu raff.debian.org mirrors.kernel.org archive.progeny.com debian.osuosl.org
Type: Push-Primary
Archive-http: /debian/
-Archive-architecture: !m68k !s390
+Archive-architecture: amd64 i386
Archive-ftp: /debian/
Archive-rsync: debian/
Country: US United States
@@ -206,7 +206,7 @@
Alias:
ftp.demon.net
Type: leaf
Archive-ftp: /pub/mirrors/linux/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
Maintainer: uploads@
demon.net, Malcolm Muir
demon.net>
Country: GB Great Britain
Sponsor: Demon Internet Ltd http://www.demon.net/
@@ -214,7 +214,7 @@
Site: ftp.uwa.edu.au
Type: leaf
Archive-ftp: /mirrors/linux/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sh sparc
NonUS-ftp: /mirrors/linux/debian-non-US/
Mirrors-from: ftp.au.debian.org
Maintainer: David Luyer ucs.uwa.edu.au>
@@ -412,7 +412,7 @@
Country: DE Germany
Location: Clausthal-Zellerfeld, Niedersachsen
Sponsor: Rechenzentrum der TU-Clausthal http://www.rz.tu-clausthal.de/
-Archive-architecture: alpha arm i386 ia64 m68k mips mipsel powerpc sparc
+Archive-architecture: amd64 arm i386 ia64 m68k mips powerpc sparc
Comment: s390, sh, hppa and hurd excluded from main archive;
hppa, mips, mipsel and s390 excluded from CDs, (jigdo available);
s390 and hppa excluded from non-US. Bandwidth limitation: 1 Mbit per
@@ -795,7 +795,7 @@
Site: ftp.arnes.si
Type: leaf
Archive-ftp: /packages/debian/
-Archive-architecture: !amd64
+Archive-architecture: amd64 hurd-i386 i386
NonUS-ftp: /packages/debian-non-US/
Mirrors-from: ftp.de.debian.org
Country: SI Slovenia
@@ -838,7 +838,7 @@
Aliases: ftp.proxad.fr
Type: leaf
Archive-ftp: /mirrors/ftp.debian.org/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
NonUS-ftp: /mirrors/nonus.debian.org/
CDImage-ftp: /mirrors/cdimage.debian.org/debian-cd/
WWW-ftp: /mirrors/web.debian.org/
@@ -984,7 +984,7 @@
Country: CA Canada
Location: Vancouver
Sponsor: ID Internet Direct Ltd. http://www.direct.ca/
-Archive-architecture: amd64 i386 ia64 sparc
+Archive-architecture: amd64 i386
Site: ftp.si.debian.org
Alias: ftp.camtp.uni-mb.si
@@ -1077,7 +1077,7 @@
Type: leaf
Alias: ftp.tuwien.ac.at
Archive-ftp: /opsys/linux/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
Archive-http: /opsys/linux/debian/
Archive-rsync: opsys/linux/debian/
NonUS-ftp: /opsys/linux/debian-non-US/
@@ -1126,7 +1126,7 @@
Aliases: ftp.uninett.no
Type: Push-Secondary
Archive-ftp: /debian/
-Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 mips mipsel powerpc s390 sparc
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 powerpc s390 sparc
Archive-http: /debian/
Archive-rsync: debian/
NonUS-ftp: /debian-non-US/
@@ -1661,7 +1661,7 @@
Country: US United States
Location: Minneapolis, Minnesota
Sponsor: Real-Time Enterprises http://www.real-time.com/
-Archive-architecture: i386
+Archive-architecture: alpha amd64 i386 powerpc sparc
Site: mirrors.kernel.org
Aliases: rsync.kernel.org
@@ -1731,7 +1731,7 @@
Site: ftp.eutelia.it
Type: leaf
Archive-ftp: /pub/Debian_Mirror/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
Mirrors-from: ftp.de.debian.org
Maintainer: Max Gargani eutelia.com>
Country: IT Italy
@@ -1929,7 +1929,7 @@
Alias: dl.xs4all.nl
Type: leaf
Archive-ftp: /pub/mirror/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
Maintainer: unixbeheer@xs4all.nl, info@xs4all.nl
Country: NL Netherlands
Location: AMS-IX, Amsterdam
@@ -2080,7 +2080,7 @@
Alias: slagroom.snt.utwente.nl
Type: Push-Primary
Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha
Archive-http: /debian/
Archive-rsync: debian/
NonUS-ftp: /debian-non-US/
@@ -2105,7 +2105,7 @@
Site: ftp.iinet.net.au
Type: leaf
Archive-ftp: /debian/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
Archive-http: /debian/debian/
CDImage-ftp: /debian/debian-cd/
CDImage-http: /debian/debian-cd/
@@ -2208,7 +2208,7 @@
Alias: Hefe.ZEDAT.FU-Berlin.DE
Type: leaf
Archive-ftp: /pub/unix/linux/mirrors/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
NonUS-ftp: /pub/unix/linux/mirrors/debian-non-US/
Mirrors-from: ftp.de.debian.org
Maintainer: ftp-adm@FU-Berlin.DE
@@ -2303,7 +2303,7 @@
Aliases: download.nectec.or.th
Type: leaf
Archive-ftp: /pub/linux-distributions/Debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
NonUS-ftp: /pub/linux-distributions/Debian-non-US/
Mirrors-from: ftp.jp.debian.org
Maintainer: PubNet team of NECTEC
@@ -2442,7 +2442,7 @@
Site: ftp.mpi-sb.mpg.de
Type: leaf
Archive-ftp: /pub/linux/distributions/debian/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
NonUS-ftp: /pub/linux/distributions/debian/non-us/debian-non-US/
CDImage-ftp: /pub/linux/distributions/debian/debian-cd/
Mirrors-from: ftp.debian.org
@@ -2544,7 +2544,7 @@
Country: FI Finland
Location: Jyväskylä
Sponsor: University of Jyväskylä http://www.jyu.fi/
-Archive-architecture: !arm !m68k !mips !mipsel !s390
+Archive-architecture: alpha amd64 hppa hurd-i386 i386 ia64 powerpc sparc
Site: ftp.bittivuoto.net
Type: leaf
@@ -3057,7 +3057,7 @@
Location: Sofia
Sponsor: Lirex Net http://www.lirex.net/
Sponsor: Ludost.net http://www.ludost.net/
-Archive-architecture: amd64 i386
+Archive-architecture: i386
Site: debian.blueyonder.co.uk
Alias: mirror2.blueyonder.co.uk
@@ -3173,7 +3173,7 @@
Site: nisamox.fciencias.unam.mx
Type: leaf
Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
Archive-http: /debian/
NonUS-ftp: /debian-non-US/
NonUS-http: /debian-non-US/
@@ -3422,7 +3422,7 @@
Site: ftp.linux.org.tr
Type: leaf
Archive-ftp: /pub/mirrors/debian/
-Archive-architecture: !amd64
+Archive-architecture: amd64 i386
CDImage-ftp: /pub/mirrors/debian-cd/
Maintainer: ftp@linux.org.tr
Country: TR Turkey
@@ -3569,7 +3569,7 @@
Site: debian.midco.net
Type: leaf
X-Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
Archive-http: /debian/
Archive-rsync: /debian/
X-CDImage-ftp: /pub/iso/
@@ -3759,7 +3759,7 @@
Country: NZ New Zealand
Location: Auckland
Sponsor: Ihug Ltd http://www.ihug.co.nz/
-Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 mips mipsel powerpc s390 sh sparc
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 mips mipsel powerpc s390 sparc
Site: debian.spark.net.gr
Type: leaf
@@ -3948,7 +3948,7 @@
Country: RU Russia
Location: Moscow
Sponsor: Corbina telecom http://www.corbina.ru/
-Archive-architecture: i386 ia64
+Archive-architecture: amd64 i386 ia64
Site: debian.indika.net.id
Type: leaf
@@ -3981,7 +3981,7 @@
Alias: ftp.mines.inpl-nancy.fr
Type: leaf
Archive-ftp: /debian/
-Archive-architecture: !alpha !arm !m68k
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
Archive-http: /debian/
CDImage-ftp: /debian-cd/
CDImage-http: /debian-cd/
@@ -4060,7 +4060,7 @@
Alias: horacio.cica.es
Type: leaf
Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: amd64 hurd-i386 i386
CDImage-ftp: /debian-cd/
NonUS-ftp: /debian-non-US/
Mirrors-from: ftp.es.debian.org
@@ -4174,7 +4174,7 @@
Site: ftp.informatik.hu-berlin.de
Type: leaf
Archive-ftp: /pub/Mirrors/ftp.de.debian.org/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
CDImage-ftp: /pub/Mirrors/ftp.de.debian.org/debian-cd/
NonUS-ftp: /pub/Mirrors/ftp.de.debian.org/debian-non-US/
Mirrors-from: ftp.fu-berlin.de
@@ -4187,7 +4187,7 @@
Alias: ftp.ecc.u-tokyo.ac.jp
Type: leaf
Archive-ftp: /DEBIAN/debian/
-Archive-architecture: !amd64
+Archive-architecture: amd64 hurd-i386 i386 powerpc
NonUS-ftp: /DEBIAN/debian-non-US/
Mirrors-from: ftp.jp.debian.org
Maintainer: NOGAMI Daisuke niwa.c.u-tokyo.ac.jp>
@@ -4263,7 +4263,7 @@
Site: ftp.debian.ikoula.com
Type: leaf
Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
NonUS-ftp: /debian-non-US/
Mirrors-from: ftp2.fr.debian.org
Maintainer: Florence LIU ikoula.com>
@@ -4352,7 +4352,7 @@
Aliases: rubycon.man.szczecin.pl
Type: leaf
Archive-ftp: /pub/Linux/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
NonUS-ftp: /pub/Linux/debian-non-US/
Mirrors-from: ftp.de.debian.org
Maintainer: Tomasz Grabowski
@@ -4372,7 +4372,7 @@
Country: BR Brazil
Location: Campinas - SP
Sponsor: LAS-IC-UNICAMP - Systems Administration and Security Laboratory http://www.las.ic.unicamp.br/
-Archive-architecture: amd64 i386
+Archive-architecture: amd64 hurd-i386 i386 powerpc sparc
Site: ftp.gwdg.de
Type: leaf