head 1.9; access; symbols pkgsrc-2014Q1:1.8.0.8 pkgsrc-2014Q1-base:1.8 pkgsrc-2013Q4:1.8.0.6 pkgsrc-2013Q4-base:1.8 pkgsrc-2013Q3:1.8.0.4 pkgsrc-2013Q3-base:1.8 pkgsrc-2013Q2:1.8.0.2 pkgsrc-2013Q2-base:1.8 pkgsrc-2013Q1:1.7.0.16 pkgsrc-2013Q1-base:1.7 pkgsrc-2012Q4:1.7.0.14 pkgsrc-2012Q4-base:1.7 pkgsrc-2012Q3:1.7.0.12 pkgsrc-2012Q3-base:1.7 pkgsrc-2012Q2:1.7.0.10 pkgsrc-2012Q2-base:1.7 pkgsrc-2012Q1:1.7.0.8 pkgsrc-2012Q1-base:1.7 pkgsrc-2011Q4:1.7.0.6 pkgsrc-2011Q4-base:1.7 pkgsrc-2011Q3:1.7.0.4 pkgsrc-2011Q3-base:1.7 pkgsrc-2011Q2:1.7.0.2 pkgsrc-2011Q2-base:1.7 pkgsrc-2011Q1:1.6.0.20 pkgsrc-2011Q1-base:1.6 pkgsrc-2010Q4:1.6.0.18 pkgsrc-2010Q4-base:1.6 pkgsrc-2010Q3:1.6.0.16 pkgsrc-2010Q3-base:1.6 pkgsrc-2010Q2:1.6.0.14 pkgsrc-2010Q2-base:1.6 pkgsrc-2010Q1:1.6.0.12 pkgsrc-2010Q1-base:1.6 pkgsrc-2009Q4:1.6.0.10 pkgsrc-2009Q4-base:1.6 pkgsrc-2009Q3:1.6.0.8 pkgsrc-2009Q3-base:1.6 pkgsrc-2009Q2:1.6.0.6 pkgsrc-2009Q2-base:1.6 pkgsrc-2009Q1:1.6.0.4 pkgsrc-2009Q1-base:1.6 pkgsrc-2008Q4:1.6.0.2 pkgsrc-2008Q4-base:1.6 pkgsrc-2008Q3:1.5.0.4 pkgsrc-2008Q3-base:1.5 cube-native-xorg:1.5.0.2 cube-native-xorg-base:1.5 pkgsrc-2008Q2:1.4.0.4 pkgsrc-2008Q2-base:1.4 cwrapper:1.4.0.2 pkgsrc-2008Q1:1.3.0.2 pkgsrc-2008Q1-base:1.3 pkgsrc-2007Q4:1.2.0.6 pkgsrc-2007Q4-base:1.2 pkgsrc-2007Q3:1.2.0.4 pkgsrc-2007Q3-base:1.2 pkgsrc-2007Q2:1.2.0.2 pkgsrc-2007Q2-base:1.2 pkgsrc-2007Q1:1.1.0.2 pkgsrc-2007Q1-base:1.1; locks; strict; comment @# @; 1.9 date 2014.04.20.01.44.53; author schmonz; state dead; branches; next 1.8; commitid AwhHRKspzeLRumxx; 1.8 date 2013.06.06.01.52.01; author schmonz; state Exp; branches; next 1.7; commitid F22QdNvs3NC3LuSw; 1.7 date 2011.06.09.06.21.38; author schmonz; state Exp; branches; next 1.6; 1.6 date 2008.10.19.18.39.56; author schmonz; state Exp; branches; next 1.5; 1.5 date 2008.07.31.02.47.45; author schmonz; state Exp; branches; next 1.4; 1.4 date 2008.06.01.04.52.15; author schmonz; state Exp; branches; next 1.3; 1.3 date 2008.02.19.10.31.54; author schmonz; state Exp; branches; next 1.2; 1.2 date 2007.06.08.11.39.02; author wiz; state Exp; branches; next 1.1; 1.1 date 2007.01.19.00.55.06; author schmonz; state Exp; branches; next ; desc @@ 1.9 log @Update to 2014.4.5 (new upstream and versioning). From the git log: * Make html2text.py compatible with python 3.*. * PEP8ize the script. * Switch tests to unittest (w/generated test cases). * Remove .editorconfig. * Remove deprecated function has_key. * Handle invalid literals for start attribute. * Add bodywidth parameter instead of module-wide BODY_WIDTH. @ text @$NetBSD: patch-aa,v 1.8 2013/06/06 01:52:01 schmonz Exp $ Small cleanup patch from Debian. --- html2text.py.orig 2012-01-07 15:00:40.000000000 +0000 +++ html2text.py @@@@ -479,6 +479,7 @@@@ class HTML2Text(HTMLParser.HTMLParser): if has_key(attrs, 'src'): attrs['href'] = attrs['src'] alt = attrs.get('alt', '') + alt = re.sub('\n', ' ', alt) if self.inline_links: self.o("![") self.o(alt) @@@@ -512,7 +513,7 @@@@ class HTML2Text(HTMLParser.HTMLParser): list_style = tag numbering_start = list_numbering_start(attrs) self.list.append({'name':list_style, 'num':numbering_start}) - else: + elif self.list: if self.list: self.list.pop() self.lastWasList = True else: @ 1.8 log @Update to 3.200.3. No changelog provided. Summarized highlights from the git log: * allow module to be imported * set UTF-8 as default encoding * better handling of Google Docs HTML * better handling of more edge-case inputs * nitpicky bugfixes to whitespace, emphasis, etc. * new config options @ text @d1 1 a1 1 $NetBSD: patch-aa,v 1.7 2011/06/09 06:21:38 schmonz Exp $ @ 1.7 log @Update to 3.02. From the changelog: 3.02: * Use optparse for parsing and checking arguments * Encode all output as UTF-8 * Accept optional encoding for local file or URL * Use chardet for guessing local file character sets * Fix double-newlines inside code blocks 3.01: * Fix bug with unknown entities 3.0: * Add forward compatibility with Python3 2.40: * Update from sgmllib to HTMLParser so that valid XHTML doesn't fail @ text @d1 1 a1 1 $NetBSD: patch-aa,v 1.6 2008/10/19 18:39:56 schmonz Exp $ d5 1 a5 1 --- html2text.py.orig 2011-03-08 23:35:25.000000000 +0000 d7 1 a7 1 @@@@ -305,6 +305,7 @@@@ class _html2text(HTMLParser.HTMLParser): d12 7 a18 7 i = self.previousIndex(attrs) if i is not None: attrs = self.a[i] @@@@ -325,7 +326,7 @@@@ class _html2text(HTMLParser.HTMLParser): if tag in ["ol", "ul"]: if start: self.list.append({'name':tag, 'num':0}) d22 2 a23 2 self.p() @ 1.6 log @Update to 2.34. From the changelog: 2008-10-09: 2.34. elim extra \ns (tx Keith Bussell) 2008-09-19: 2.33. add support for abbr (tx Nathan Youngman) 2008-07-31: 2.32. fix parsing bug with fastcompany (tx Elias Soong) @ text @d1 1 a1 1 $NetBSD: patch-aa,v 1.5 2008/07/31 02:47:45 schmonz Exp $ d3 3 a5 1 --- html2text.py.orig 2008-10-19 14:36:07.000000000 -0400 d7 2 a8 2 @@@@ -282,6 +282,7 @@@@ class _html2text(sgmllib.SGMLParser): if attrs.has_key('src'): d15 1 a15 1 @@@@ -302,7 +303,7 @@@@ class _html2text(sgmllib.SGMLParser): @ 1.5 log @Update to 2.31: * fix unicode support (tx John Chapman) @ text @d1 1 a1 1 $NetBSD: patch-aa,v 1.4 2008/06/01 04:52:15 schmonz Exp $ d3 1 a3 1 --- html2text.py.orig 2008-07-30 22:37:59.000000000 -0400 d5 1 a5 1 @@@@ -266,6 +266,7 @@@@ class _html2text(sgmllib.SGMLParser): d13 1 a13 1 @@@@ -286,7 +287,7 @@@@ class _html2text(sgmllib.SGMLParser): @ 1.4 log @Update to 2.3. From the changelog: - add SKIP_INTERNAL_LINKS (tx Christian Siefkes) - prelim JS support, various fixes, improved performances (tx Johannes Fitz) @ text @d1 1 a1 1 $NetBSD$ d3 1 a3 1 --- html2text.py.orig 2008-06-01 00:37:59.000000000 -0400 d5 1 a5 10 @@@@ -155,7 +155,7 @@@@ class _html2text(sgmllib.SGMLParser): self.lastWasNL = 0 def outtextf(self, s): - if type(s) is type(''): s = codecs.utf_8_decode(s)[0] + if type(s) is type(''): s = codecs.utf_8_decode(s, "replace")[0] self.outtext += s def close(self): @@@@ -267,6 +267,7 @@@@ class _html2text(sgmllib.SGMLParser): d13 1 a13 1 @@@@ -287,7 +288,7 @@@@ class _html2text(sgmllib.SGMLParser): @ 1.3 log @Update to 2.29. From the changelog: * fix degenerate sites (cough 9rules) that don't close head tags * fix crash when feedparser wasn't available (tx Johann Burkard) @ text @d1 1 a1 1 $NetBSD: patch-aa,v 1.2 2007/06/08 11:39:02 wiz Exp $ d3 1 a3 1 --- html2text.py.orig 2008-02-13 16:24:14.000000000 -0500 d5 26 a30 26 @@@@ -150,7 +150,7 @@@@ class _html2text(sgmllib.SGMLParser): self.lastWasNL = 0 def outtextf(self, s): - if type(s) is type(''): s = codecs.utf_8_decode(s)[0] + if type(s) is type(''): s = codecs.utf_8_decode(s, "replace")[0] self.outtext += s def close(self): @@@@ -262,6 +262,7 @@@@ class _html2text(sgmllib.SGMLParser): if attrs.has_key('src'): attrs['href'] = attrs['src'] alt = attrs.get('alt', '') + alt = re.sub('\n', ' ', alt) i = self.previousIndex(attrs) if i is not None: attrs = self.a[i] @@@@ -282,7 +283,7 @@@@ class _html2text(sgmllib.SGMLParser): if tag in ["ol", "ul"]: if start: self.list.append({'name':tag, 'num':0}) - else: + elif self.list: if self.list: self.list.pop() self.p() @ 1.2 log @Update to 2.28. Simplify package. Changes: 2007-04-12: 2.28. fix tables (tx Pete Savage) 2007-04-09: 2.27. fix line breaks (tx Danny O'Brien) 2007-02-23: 2.26. input unicode better (tx John Cavanaugh for the push) @ text @d1 1 a1 1 $NetBSD: patch-aa,v 1.1 2007/01/19 00:55:06 schmonz Exp $ d3 1 a3 1 --- html2text.py.orig 2007-01-18 19:06:49.000000000 -0500 d14 1 a14 1 @@@@ -259,6 +259,7 @@@@ class _html2text(sgmllib.SGMLParser): d22 1 a22 1 @@@@ -279,7 +280,7 @@@@ class _html2text(sgmllib.SGMLParser): @ 1.1 log @Apply small cleanup patch from Debian. Bump PKGREVISION. @ text @d1 1 a1 1 $NetBSD$ d3 2 a4 2 --- html2text-2.25.py.orig 2007-01-18 19:06:49.000000000 -0500 +++ html2text-2.25.py @