Logo Search packages:      
Sourcecode: hellanzb version File versions  Download package

def Hellanzb::NZBLeecher::ArticleDecoder::parseArticleData (   segment,
  justExtractFilename = False 
)

Clean the specified segment's articleData, and get the article's filename from the
articleData. If not justExtractFilename, also decode the articleData to the segment's
destination 

Definition at line 165 of file ArticleDecoder.py.

00165                                                           :
    """ Clean the specified segment's articleData, and get the article's filename from the
    articleData. If not justExtractFilename, also decode the articleData to the segment's
    destination """
    if segment.articleData is None:
        raise FatalError('Could not getFilenameFromArticleData')

    # First, clean it
    stripArticleData(segment.articleData)

    encodingType = UNKNOWN
    withinData = False
    index = -1
    for line in segment.articleData:
        index += 1

        if withinData:
            # un-double-dot any lines :\
            if line[:2] == '..':
                line = line[1:]
                segment.articleData[index] = line

        # After stripping the articleData, we should find a yencode header, uuencode
        # header, or a uuencode part header (an empty line)
        if not withinData and line.startswith('=ybegin'):
            # Parse the =ybegin line. Be explicit about the length of =ybegin fields we're
            # splitting, incase badly named filenames destroy the regexp Example:
            # =ybegin part=1 line=128 size=71492 name=--=GRUB=-- Puker_S1_D1.par2
            splits = 3
            if line.find(' part=') > -1:
                splits += 1
            if line.find(' total=') > -1:
                splits += 1
            ybegin = ySplit(line, splits)
            
            if not ('line' in ybegin and 'size' in ybegin and 'name' in ybegin):
                # FIXME: show filename information
                raise FatalError('* Invalid =ybegin line in part %d!' % segment.number)

            setRealFileName(segment.nzbFile, ybegin['name'],
                            settingSegmentNumber = segment.number)
            if segment.nzbFile.ySize is None:
                    segment.nzbFile.ySize = yInt(ybegin['size'],
                                                  '* Invalid =ybegin line in part %d!' % segment.number)
                    
            encodingType = YENCODE

        elif not withinData and line.startswith('=ypart'):
            # ybegin doesn't ensure a ypart on the next line
            withinData = True

            ypart = ySplit(line)
            if 'begin' in ypart:
                segment.yBegin = yInt(ypart['begin'])
            if 'end' in ypart:
                segment.yEnd = yInt(ypart['end'])

            # Just incase a bad post doesn't include a begin header, ensure
            # the correct encodingType
            encodingType = YENCODE

        elif withinData and line.startswith('=yend'):
            yend = ySplit(line)
            if 'size' in yend:
                segment.ySize = yInt(yend['size'])
            if 'pcrc32' in yend:
                segment.yCrc = '0' * (8 - len(yend['pcrc32'])) + yend['pcrc32'].upper()
            elif 'crc32' in yend and yend.get('part', '1') == '1':
                segment.yCrc = '0' * (8 - len(yend['crc32'])) + yend['crc32'].upper()

        elif not withinData and line.startswith('begin '):
            filename = line.rstrip().split(' ', 2)[2]
            if not filename:
                # FIXME: show filename information
                raise FatalError('* Invalid begin line in part %d!' % segment.number)
            setRealFileName(segment.nzbFile, filename,
                            settingSegmentNumber = segment.number)
            encodingType = UUENCODE
            withinData = True

        elif not withinData and encodingType == YENCODE:
            # Found ybegin, but no ypart. withinData should have started on the previous
            # line -- so instead we have to process the current line
            withinData = True

            # un-double-dot any lines :\
            if line[:2] == '..':
                line = line[1:]
                segment.articleData[index] = line

        elif not withinData and segment.number == 1:
            # Assume segment #1 has a valid header -- continue until we find it. I've seen
            # some UUEncoded archives start like this:
            #
            # 222 423850423 <PLSmfijf.803495116$Es4.92395@feung.shui.beek.dk> body
            # BSD.ARCHIVE HERE IT IS
            # begin 644 bsd-archive.part45.rar
            # MJ"D+D:J6@1L'J0[O;JXTO/V`HR]4JO:/Q\J$M79S9("@]^]MFIGW/\`VJJC_
            #
            # (and of course, only segment #1 actually contains a filename). The UUDecode
            # function will also quietly ignore the first couple of lines if they are
            # garbage (can't decode)
            continue

        elif not withinData:
            # Assume this is a subsequent uuencode segment
            withinData = True
            encodingType = UUENCODE

    # FIXME: could put this check even higher up
    if justExtractFilename:
        return

    encodingType = decodeSegmentToFile(segment, encodingType)
    del segment.articleData
    segment.articleData = '' # We often check it for is None
    return encodingType
decodeArticleData=parseArticleData

def setRealFileName(nzbFile, filename, forceChange = False, settingSegmentNumber = None):


Generated by  Doxygen 1.6.0   Back to index