Another abandoned server code base... this is kind of an ancestor of taskrambler.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

765 lines
21 KiB

#!/usr/bin/python
""" slurpIcalSpec.py -- extract formal view of iCalendar/vCard specs
produces XHTML with various typed links
@@TODO: make each marked up item visible via css
(unless they're all linked).
see also: rfc2html in dev.w3.org @@
"""
__version__ = '$Id: slurpIcalSpec.py,v 1.27 2005/11/09 23:10:49 connolly Exp $'
#see also: changelog at end
import sys
import re
class Usage(Exception):
"""USAGE: python slurpIcalSpec.py NNNN <rfcNNNN.txt >rfcNNNN.html
where NNNN is one of the supported RFCS
(so far: 2425, 2426, and 2445)
"""
# rfcnum: (footerStarters, typos, example_tags)
Specs = { 2445: (("RFC", "Dawson"),
(('"EVENT"', '"VEVENT"'), # 4.8.7.3 Last Modified
(' Purpose This value type',
' Purpose: This value type'),
('11. Full Copyright Statement',
'11 Full Copyright Statement'),
('3.11 Contact for Further Information:',
'3.11 Contact for Further Information'),
),
('Example',),
),
2426: (("RFC", "Dawson"),
# a ref is split across lines; put it on one line
(('Hence, this [MIME-',
'Hence, this [MIME-DIR]'),
('DIR] profile is',
' profile is'),
),
('Type example',),
),
2425: (("RFC", "Howes"),
(),
(),
)
}
def main(argv):
if len(argv) == 2:
try:
rfcnum = int(argv[1])
pgbrks, typos, exampleTags = Specs[rfcnum]
except (ValueError, KeyError):
raise Usage()
else:
raise Usage()
fp = sys.stdin
lines = depaginate(fp, pgbrks, typos)
sections = bySection(lines)
head = sections.next()
#print >>sys.stderr, "RFC header:", `head`
title, rfcnum, category, date, authlines = titleEtc(head)
print htmlTop(title, rfcnum, category, date, authlines, exampleTags)
w = sys.stdout.write
sections = list(sections)
refs = findRefs(sections)
for sec in sections:
if sec[0].find("Table of Contents") >= 0:
tocSect(w, sec)
elif sec[0][0].isdigit():
numSect(w, sec, refs)
else:
flowSect(w, sec, refs)
print htmlBot()
def depaginate(fp, footerStarters, typos):
"""undo RFC pagination: generate sequence of lines
skip lines at top of page
'Each page must be limited to 58 lines followed by a form feed on a
line by itself.'
--Instructions to RFC Authors
Postel & Reynolds Oct 1997
http://www.ietf.org/rfc/rfc2223
"""
sv = []
top = 1
while 1:
line = fp.readline()
if not line: break
line = line.rstrip("\r\n")
for err, fix in typos:
if err in line:
line = line.replace(err, fix)
for s in footerStarters:
if line.startswith(s):
line = "\f"
break
if line == "\f":
sv = [''] # leave a blank line where the pagebreak was
top = 1
continue
if len(line):
if sv:
for l in sv:
yield l
sv = []
yield line
top = 0
else:
if not top:
sv.append(line)
def bySection(lines):
sec = []
for l in lines:
if len(l):
if l.startswith(" ") or \
l.startswith("Request for Comments") or \
l.startswith("Category:") or \
l.find(":") >= 0 or \
l.find("<") >= 0 or \
l.find("--") >= 0:
sec.append(l)
else:
if len(sec):
yield sec
sec = []
sec.append(l)
else:
sec.append(l)
if len(sec):
yield sec
def titleEtc(lines):
sep = None
for idx in range(0, len(lines)):
if lines[idx].strip() == '':
sep = idx
break
if sep is None:
raise ValueError, \
"no blank line separating header from title: " + `lines`
title = ' '.join(map(lambda l: l.strip(), lines[sep+1:])).strip()
rfcnum = lines[1][:40].strip().split()[-1]
category = lines[2][:40].strip().split(':')[1].strip()
date = lines[sep-1].strip()
authlines = []
for idx in range(0, sep-1):
authlines.append(lines[idx][40:].strip())
return title, rfcnum, category, date, authlines
def findRefs(sections):
for sec in sections:
if not sec[0].strip().endswith("References"): continue
refs = []
state = ''
for ln in sec[1:]:
ln = ln.strip()
if ln == '':
state = ''
continue
if state == '':
junk, ln = ln.split('[', 1)
ref, ln = ln.split(']', 1)
#print >>sys.stderr, "found ref:", ref
refs.append(ref)
state = 'inref'
return refs
raise ValueError, "no References section found"
def htmlTop(title, rfcnum, category, date, authlines, exampleTags):
css = ""
for t in exampleTags:
css = css + "dd.%s { border-style: solid; border-color: #d0dbe7; border-width: 0 0 0 .25em; padding-left: 0.5em;\n" % asClass(t)
html = """
<html xmlns='http://www.w3.org/1999/xhtml'>
<head profile='http://www.w3.org/2003/g/data-view'>
<title>%s</title>
<link rel="transformation"
href="http://www.w3.org/2002/12/cal/webize2445.xsl"/>
<style type="text/css">
%s
</style>
</head>
<body>
<table>
<tr><td>Network Working Group<br />
Request for Comments: %s<br />
Category: %s<br />
</td>
<td>%s</td>
</tr>
<tr><td>&#160;</td><td>%s</td></tr>
</table>
<h1>%s</h1>
<address>
$Revision: 1.27 $ of $Date: 2005/11/09 23:10:49 $
derived from <a href="http://www.ietf.org/rfc/rfc%s.txt">rfc%s.txt</a>
and enhanced
for <a href="http://www.w3.org/2004/01/rdxh/spec">gleaning formal description</a>
using <a href="slurpIcalSpec.py">slurpIcalSpec.py</a>
by <a href="http://www.w3.org/People/Connolly/">Dan Connolly</a>
</address>
""" % (title, css, rfcnum, category, '<br />'.join(authlines), date, title,
rfcnum, rfcnum)
return html
def htmlBot():
return "</body></html>"
def flowSect(w, lines, refs):
w("<div><h2>%s</h2>\n" % (lines[0],))
flowSectRest(w, lines, refs)
def flowSectRest(w, lines, refs):
p = 0
for l in lines[1:]:
if l.strip():
if not p: w("<p>\n")
bodyText(w, l, refs)
w("\n")
p = 1
else:
if p: w("</p>\n")
p = 0
if p: w("</p>\n")
w("</div>")
def tocSect(w, lines):
w("<div><h2>%s</h2>\n" % (lines[0],))
w("<ul type='none'>\n")
for l in lines[1:]:
l = l.strip()
if l == '': continue
if l[0].isdigit():
num, l = l.split(None, 1)
else: num = ''
head = l.split(".", 1)[0]
w("<li><a href='#sec%s'>%s %s</a></li>\n" % (num, num, head))
w("</ul></div>")
def refSect(w, lines):
state = ''
ref = None
dd = None
w("<dl>\n")
for ln in lines[1:]:
ln = ln.strip()
if ln == '':
if dd: refEntry(w, ref, dd)
state = ''
continue
if state == '':
junk, ln = ln.split('[', 1)
ref, dd = ln.split(']', 1)
dd = dd + "\n"
state = 'inref'
elif state == 'inref':
dd = dd + ln + "\n"
if dd: refEntry(w, ref, dd)
w("</dl>\n")
w("</div>\n")
def refEntry(w, ref, dd):
"""write a reference entry
>>> import StringIO
>>> w = StringIO.StringIO()
>>> refEntry(w.write, 'IMIP', 'Dawson, F., Mansour, S. and S. Silverberg, "iCalendar Message-based Interoperability Protocol (IMIP)", RFC 2447, November 1998.'); w.getvalue()
"<dt id='ref_IMIP'>[IMIP]</dt>\n<dd>Dawson, F., Mansour, S. and S. Silverberg, <cite><a href='http://www.ietf.org/rfc/rfc2447'>iCalendar Message-based Interoperability Protocol (IMIP)</a></cite>, RFC 2447, November 1998.</dd>\n"
"""
w("<dt id='%s'>[%s]</dt>\n" % (asID(ref), ref))
w("<dd>")
# try to mark up the title
parts = dd.split('"')
if len(parts) == 3:
before, title, after = parts
doChars(w, before)
w("<cite>")
# try to make it a link
href = None
if ref.startswith("RFC "):
href = rfcAddr(ref.split(' ')[1])
else:
m = re.search('RFC (\d\d\d\d?)', dd)
if m:
href = rfcAddr(m.group(1))
else:
m = re.search(r'((http|ftp)://[^ ,]+)', dd)
if m:
href = m.group(1)
if href:
w("<a href='%s'>" % href)
doChars(w, title)
w("</a>")
else:
doChars(w, title)
w("</cite>")
doChars(w, after)
else:
doChars(w, dd)
w("</dd>\n")
def rfcAddr(num):
return 'http://www.ietf.org/rfc/rfc%s' % num
def asID(ref):
"""turn a reference label into an ID
>>> asID('VCARD')
'ref_VCARD'
>>> asID('RFC 1872')
'ref_RFC_1872'
"""
return 'ref_' + ref.replace(' ', '_')
def numSect(w, lines, refs):
num, head = lines[0].split(None, 1)
w("<div><h2 id='sec%s'>%s %s</h2>\n" % (num, num, head))
#print >>sys.stderr, "numSect:", num, head
if head == "References":
refSect(w, lines)
elif head == 'Full Copyright Statement' or \
head == 'Acknowledgements' or \
head == 'Acknowledgments' or \
head == 'Abstract':
flowSectRest(w, lines, refs)
elif lines[2].startswith(" Property Name:"):
doStructuredSection(w, lines, refs, "Property", 'Property')
elif lines[2].startswith(" Value Name:") or \
lines[2].startswith(" Value Name:"): # 4.8.3 indented oddly
doStructuredSection(w, lines, refs, "Value")
elif lines[2].startswith(" Component Name:"):
doStructuredSection(w, lines, refs, "Component", 'Class')
elif lines[2].startswith(" Parameter Name:"):
doStructuredSection(w, lines, refs, "Parameter", 'Property')
elif lines[2].startswith(" To: ietf-mime-directory@imc.org"):
doStructuredSection(w, lines, refs, "Type")
else:
w("<pre>")
for l in lines[1:]:
bodyText(w, l, refs)
w("\n")
w("</pre>\n</div>\n")
def bodyText(w, txt, refs):
""" write body text, linking refs
>>> import StringIO
>>> w = StringIO.StringIO()
>>> bodyText(w.write, 'abc [def] ghi', ['def']); w.getvalue()
"abc <a href='#ref_def'>[def]</a> ghi"
"""
for part in txt.split('['):
for ref in refs:
if part.startswith(ref + ']'):
junk, part = part.split(']')
w("<a href='#%s'>[%s]</a>" % (asID(ref), ref))
break
doChars(w, part)
def doChars(w, txt):
w(txt.replace("&", "&amp;").replace("<", "&lt;"))
import string
# ala iana-token = 1*(ALPHA / DIGIT / "-")
NAMECHARS = string.letters + string.digits + '-'
Tags = ('Purpose',
'Formal Definition',
'Value Type',
'Property Parameters',
'Property Parameter',
'Conformance',
'Description',
'Format Definition',
'Example',
'To',
'Subject',
'Type name',
'Type purpose',
'Type encoding',
'Type value',
'Type special notes',
'Type example'
)
def doStructuredSection(w, lines, refs, secType, rdfClass=None):
w("<dl>\n")
secLabel = '%s Name' % secType
dt = ''
dd = []
idx = 2
while idx < len(lines):
l = lines[idx]
if ':' in l:
hd, rest = l.lstrip().split(":", 1)
if hd == secLabel or hd in Tags:
if dd:
subSect(w, secType, rdfClass, dt, dd, refs)
dd = []
dt = hd
dd.append(rest)
else:
dd.append(l)
else:
dd.append(l)
idx += 1
if dd:
subSect(w, secType, rdfClass, dt, dd, refs)
w("</dl>\n")
w("</div>\n")
def subSect(w, secType, rdfClass, dt, dd, refs):
if dt.endswith(" Name") or dt == 'Type name':
name = ''.join(dd).strip()
# VEVENT is quoted extraneously
if name[0] == '"':
name = name[1:-1]
dd[0] = name
if name.startswith("Any property name with"):
name = "X-"
elif rdfClass:
name = camelCase(name, rdfClass == 'Class')
else:
name = secType + "_" + name
w("<dt id='%s'>%s</dt>\n" % (name, dt))
else:
w("<dt>%s</dt>\n" % (dt,))
w("<dd class='%s'>" % (asClass(dt),))
if dt == 'Value Type':
rest = dd[0]
rel='value-type'
if '.' in rest:
name, rest = rest.split('.', 1)
name = name.strip()
rest = '.' + rest
else:
name = rest.strip()
rest = ''
if name.startswith("The default"):
rel='default-value-type'
txt = name
name = txt.split()[-1]
w(txt[:-len(name)])
if 'separated' in rest:
rel = 'list-of'
w("<a rel='%s' href='#Value_%s'>%s</a> <pre> %s\n"
% (rel, name, name, rest))
for l in dd[1:]:
l = tokenRefs(w, l, 'allowed-type',
{'DATE': 'Value_DATE',
'DATE-TIME': 'Value_DATE-TIME',
'PERIOD': 'Value_PERIOD',
'BINARY': 'Value_BINARY'})
bodyText(w, l, refs)
w("\n")
else:
tokens = None
rel = None
if secType == "Property" and \
(dt == 'Conformance' or dt == 'Description'):
rel = 'applies-to'
tokens = {'VEVENT': 'Vevent',
'VTODO': 'Vtodo',
'VJOURNAL': 'Vjournal',
'VFREEBUSY': 'Vfreebusy',
'VTIMEZONE': 'Vtimezone',
'VALARM': 'Valarm'
}
elif dt == 'Description' and secType == "Component":
rel = 'def'
tokens = {
'STANDARD': 'standard',
'DAYLIGHT': 'daylight',
}
elif dt == 'Description' and secType == "Value":
rel = 'def'
tokens = {
'FREQ': 'freq',
'UNTIL': 'until',
'COUNT': 'count',
'INTERVAL': 'interval',
'BYSECOND': 'bysecond',
'BYMINUTE': 'byminute',
'BYHOUR': 'byhour',
'BYDAY': 'byday',
'BYMONTHDAY': 'bymonthday',
'BYYEARDAY': 'byyearday',
'BYWEEKNO': 'byweekno',
'BYMONTH': 'bymonth',
'BYSETPOS': 'bysetpos',
'WKST': 'wkst',
}
w("<pre> ")
for l in dd:
if tokens: l = tokenRefs(w, l, rel, tokens)
bodyText(w, l, refs)
w("\n")
w("</pre>\n</dd>\n")
def asClass(t):
"""return heading tag t as a class name
"""
return t.replace(" ", '')
def tokenRefs(w, l, rel, tokens):
pat = re.compile('|'.join(tokens.keys()))
seen = {}
while l:
m = pat.search(l)
if not m: break
doChars(w, l[:m.start()])
t = l[m.start():m.end()]
if rel == 'def' and not seen.has_key(tokens[t]):
w('<a id="%s" rel="%s" href="#%s">%s</a>' % (tokens[t], rel,
tokens[t], t))
seen[tokens[t]] = 1
else:
w('<a rel="%s" href="#%s">%s</a>' % (rel, tokens[t], t))
l = l[m.end():]
return l
def camelCase(n, initialCap=0):
words = map(lambda w: w.lower(), n.split('-'))
def ucfirst(w):
return w[0].upper() + w[1:]
if initialCap:
return ''.join(map(ucfirst, words))
else:
return words[0] + ''.join(map(ucfirst, words[1:]))
def _test():
import doctest
doctest.testmod()
if __name__ == '__main__':
if '--test' in sys.argv:
_test()
else:
try:
main(sys.argv)
except Usage, e:
print >>sys.stderr, e.__doc__
# $Log: slurpIcalSpec.py,v $
# Revision 1.27 2005/11/09 23:10:49 connolly
# - changed the way duration values are modelled
# The iCalendar DURATION value type is actually more than just a
# XMLSchema.duration; it also has a RELATED parameter.
# So for
# TRIGGER;VALUE=DURATION;RELATED=START:-PT15M
# we'll write
# { ?E cal:trigger [ rdf:value "-PT15M"^^xsdt:duration;
# cal:related "START"] }
#
# - fixed test data to have rdf:datatype on integer
# values, to match the schema (which matches the RFC)
#
# - fixed schema to show DATE-TIME properties (dtstart, ...)
# as DatatypeProperties
# (there are little/no tests for PERIOD; beware)
#
# - scraped more details about property parameters (e.g. partstat, cn,
# cutype, ...) and rrule parts (freq, interval, ...) from the RFC so
# that they show up as links in the hypertext version and as RDF
# properties in the schema. likewise timezone components (standard,
# daylight)
# - side effect: added some whitespace in rfc2445.html
#
# - demoted x- properties
# - removed x- properties from .rdf versions of test data
# this allows the round-trip tests to pass
# - fromIcal.py doesn't output them unless you give the --x option
#
# - added Makefile support for consistency checking with pellet
#
# - demoted blank line diagnostic in fromIcal.py to a comment
#
# - silenced some left-over debug diagnostics in slurpIcalSpec.py
#
# - fixed test/test-created.rdf; added it to fromIcalTest.py list
#
# Revision 1.26 2005/07/22 21:14:32 connolly
# remove : from iCalendar heading
#
# Revision 1.25 2005/07/22 21:00:00 connolly
# - added support for RFC2425, which has
# - numbered Abstract and TOC
# - examples that start in column 1
#
# Revision 1.24 2005/07/22 20:42:12 connolly
# - handle VCARD structured section tags
# - working on example extraction; started with CSS style
# - no bullets on TOC items; just the numbers
# - handle a ref split across lines in VCARD as a couple typos
#
# Revision 1.23 2005/07/22 19:51:28 connolly
# - parameterize RFC-specific bits so it works for RFC2426 also
# - factor out typo handling
# - take RFC number on command line; write diagnostic for incorrect usage
#
# Revision 1.22 2005/07/22 19:28:18 connolly
# - mark up titles in bibliography; make links to RFCs
# - render copyright, acks sections flowed rather than pre
# - fix extra . at end of section ID
#
# Revision 1.21 2005/07/22 18:49:58 connolly
# - handle references in 2 passes
# - 1st pass to find ref labels in refs section
# - 2nd pass to format references from the body and the bibliography
# - added some unit tests and a --test option
# - handle unnumbered overview section in TOC
#
# Revision 1.20 2004/02/29 14:52:00 connolly
# new grddl names
#
# Revision 1.19 2004/02/12 06:31:23 connolly
# fix EVENT to VEVENT typo
#
# Revision 1.18 2004/02/08 03:30:54 connolly
# allow or odd indentation of 4.3.8 Integer
#
# Revision 1.17 2004/02/08 00:06:03 connolly
# find domain info in Descriptions of properties as well as Conformance
#
# Revision 1.16 2004/02/07 06:30:12 connolly
# take out broken conformance links to Vcalendar
#
# Revision 1.15 2004/02/07 06:02:02 connolly
# - links from property conformance subsections to components
#
# Revision 1.14 2004/02/07 05:31:21 connolly
# - handle Property Name: Any ... X-
# - add purposes to formal schema as rdfs:comment
#
# Revision 1.13 2004/02/07 05:21:33 connolly
# - simplify subSect
# - fix a typo in RFC 2445
#
# Revision 1.12 2004/02/07 04:55:50 connolly
# - refactored doStructuredSection to collect dd lines
# - use doChars() to fix a bug noted in a comment
# - removed deblank (dead code)
# - removed list(lines) (debugging code)
#
# Revision 1.11 2004/02/07 04:30:41 connolly
# - use generators for depagination, section splitting
# - cite RFC guidelines RFC
# - factor out some hard-coded strings
#
# Revision 1.10 2004/02/07 02:39:10 connolly
# doStructuredSection was getting out of hand;
# refactored it before working on Conformance section
#
# Revision 1.9 2004/02/07 00:04:37 connolly
# find more value type info; turn into allowed-type links
#
# Revision 1.8 2004/02/01 07:43:16 connolly
# recognize (though do not fully handle) value types with defaults
#
# Revision 1.7 2004/02/01 06:55:11 connolly
# add provenance in address element
#
# Revision 1.6 2004/01/30 01:15:39 connolly
# fixed case/hypenation
#
# Revision 1.5 2004/01/29 19:40:47 connolly
# added profile for GRDDL
#
# Revision 1.4 2004/01/29 16:49:32 connolly
# first steps towards gleaning a schema from RFC2445 via XHTML, XSLT
#
# Revision 1.3 2004/01/28 10:29:46 connolly
# handle (some cases) of prose in the Value Type field
# handle more section types
#
# Revision 1.2 2004/01/28 10:02:03 connolly
# groks quite a bit more structure
#
# Revision 1.1 2004/01/28 08:54:24 connolly
# produces pretty reasonable XHTML
#