fb:porticula NoPaste
Wortzaehler
| Uploader: |  grindstone | 
| Datum/Zeit: | 19.08.2014 12:52:09 | 
#define NULL 0
#define recordLen SizeOf(tNode)
Const skipChars = 32
Const treeSubNodeCount = 256 - skipChars
Const printInterval = 750
Enum EState
    undefined
    word
End Enum
Type tNode
    subNodes(1 To treeSubNodeCount) As tNode Ptr
    count As UInteger = 0
End Type
Declare Function isSplittingChar (char As UByte) As Integer
Declare Sub putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer)
Declare Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String)
Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
Declare Sub deallocateTree (tree() As tNode Ptr)
Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr)
Dim As UByte char, c
Dim As EState state = EState.undefined
Dim As UInteger numWords = 0
Dim As UInteger currentWordCharCount = 0, totalCharCount = 0
Dim As String buffer = ""
Dim As Double tStart, tEnd
Dim As String text
Dim As Integer anfang, ende
Dim tree(1 To treeSubNodeCount) As tNode Ptr
For i As Integer = LBound(tree) To UBound(tree)
    tree(i) = CAllocate(recordLen)
Next i
tStart = TIMER
'Open ExePath + "/debian-reference.de.txt" For Binary As #1
Open ExePath + "/debianreferenz.txt" For Binary As #1
text = Input (Lof(1),1) 'datei einlesen
Close 1
Do 'text abarbeiten
    Do 'nächsten wortanfang suchen
        anfang += 1
    Loop Until (isSplittingChar(text[anfang]) = 0) Or (anfang > Len(text))
    ende = anfang
    Do 'nächstes wortende suchen
        ende += 1
    Loop Until (isSplittingChar(text[ende]) <> 0)  Or (ende >= Len(text))
    buffer = Mid(text,anfang + 1, ende - anfang) 'wort aus text holen
    totalCharCount += ende - anfang
    If buffer = "" Then
        Exit Do
    EndIf
    anfang = ende 'zeiger für nächste suche setzen
    numWords += 1
    putCharsIntoTree(tree(buffer[0]-skipChars), buffer, 0)
Loop Until ende >= Len(text)
text = "" 'speicher freigeben
tEnd = TIMER
Locate 1,1
Print "Datei eingelesen in ";
Print Using "##.###"; (tEnd-tStart);
Print " Sekunden."
Print
Dim As Double averageLength = 0
If (numWords > 0) Then
    averageLength = (totalCharCount / CDbl(numWords))
End If
Print "Es wurden " & numWords & " Woerter gezaehlt.    "
Print "Die durchschnittliche Wortlaenge betrug ";
Print Using "##.### "; averageLength;
Print "Buchstaben."
Print
'Ganzen Baum ausgeben:
'Print "Haeufigkeiten:"
'For i As Integer = 1 To treeSubNodeCount
'    traverseTree(tree(i), i, Chr(skipChars+i))
'Next i
'Print
Print "Gezielter Lookup von Woertern:"
Dim lookupWords(1 To ...) As String = { "Shell", "Editor", "der", "die", "das" }
tStart = Timer
For i As Integer = LBound(lookupWords) To UBound(lookupWords)
    Print "Wie oft kommt " & chr(34) & lookupWords(i) & chr(34) & " vor? " & lookupWordCount(tree(), lookupWords(i)) & "x"
Next i
tEnd = Timer
Print
Print "Das Nachschlagen der Haeufigkeiten dauerte ";
Print Using "##.###"; (tEnd-tStart);
Print " Sekunden."
Print
Print "Abbau des Baums aus dem Speicher...  ";
deallocateTree(tree())
Print "Fertig."
Print
Print "Druecken Sie eine beliebige Taste zum Beenden."
GetKey
End 0
'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende
'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein
'Zeilenumbruch oder ein Komma).
Function isSplittingChar (char As UByte) As Integer
    '                               ,              .              :              ;              /
    Return ((char < 33) Or (char = 44) Or (char = 46) Or (char = 58) Or (char = 59) Or (char = 47))
End Function
Sub putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer)
    Dim As UByte c = buffer[index]
    If ((index+1) >= Len(buffer)) Then
        ' Wort endet hier in diesem Knoten
        node->count += 1
    Else
        c = buffer[index+1]
        If (node->subNodes(c-skipChars) = NULL) Then
            node->subNodes(c-skipChars) = CAllocate(recordLen)
        End if
        putCharsIntoTree(node->subNodes(c-skipChars), buffer, index+1)
    End If
End Sub
Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String)
    If (node = NULL) Then Return
    If (node->count > 0) Then
        Print path & " => " & node->count & " x"
    End If
    For i As Integer = 1 To treeSubNodeCount
        If (node->subNodes(i) <> NULL) Then
            traverseTree (node->subNodes(i), i, path + Chr(i+skipChars))
        End If
    Next i
End Sub
Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
    If (Len(word) < 1) Then Return 0
    Return lookupCharacterCount(tree(word[0]-skipChars), word, 0)
End Function
Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
    If ((index+1) >= Len(word)) Then
        Return node->count
    Else
        Dim As UByte nextChar = word[index+1]
        Return lookupCharacterCount (node->subNodes(nextChar-skipChars), word, index+1)
    End If
End Function
Sub deallocateTree (tree() As tNode Ptr)
    For i As Integer = 1 To treeSubNodeCount
        deallocateNodeAndSubNodes(tree(i))
        DeAllocate tree(i)
    Next i
End Sub
Sub deallocateNodeAndSubNodes (node As tNode Ptr)
    If (node = NULL) Then Return
    deallocateTree(node->subNodes())
End Sub
	


			FreeBASIC-Nachrichten jetzt auch über Twitter erhalten. Follow us!



