Buchempfehlung
Mikrocomputertechnik mit Controllern der Atmel AVR-RISC-Familie
Mikrocomputertechnik mit Controllern der Atmel AVR-RISC-Familie
Umfassend, aber leicht verständlich führt dieses Buch in die Programmierung von ATMEL AVR Mikrocontrollern ein. [Mehr Infos...]
FreeBASIC-Chat
Es sind Benutzer im FreeBASIC-Chat online.
(Stand:  )
FreeBASIC bei Twitter
Twitter FreeBASIC-Nachrichten jetzt auch über Twitter erhalten. Follow us!

fb:porticula NoPaste

Info
Info / Hilfe
Liste
Übersicht / Liste
Neu
Datei hochladen
Suche
Quellcode suchen
Download
Dateidownload

Wortzaehler

Uploader:Mitgliedgrindstone
Datum/Zeit:19.08.2014 12:52:09

#define NULL 0
#define recordLen SizeOf(tNode)

Const skipChars = 32
Const treeSubNodeCount = 256 - skipChars
Const printInterval = 750

Enum EState
    undefined
    word
End Enum

Type tNode
    subNodes(1 To treeSubNodeCount) As tNode Ptr
    count As UInteger = 0
End Type

Declare Function isSplittingChar (char As UByte) As Integer
Declare Sub putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer)
Declare Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String)
Declare Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
Declare Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
Declare Sub deallocateTree (tree() As tNode Ptr)
Declare Sub deallocateNodeAndSubNodes (node As tNode Ptr)


Dim As UByte char, c
Dim As EState state = EState.undefined
Dim As UInteger numWords = 0
Dim As UInteger currentWordCharCount = 0, totalCharCount = 0
Dim As String buffer = ""
Dim As Double tStart, tEnd
Dim As String text
Dim As Integer anfang, ende

Dim tree(1 To treeSubNodeCount) As tNode Ptr

For i As Integer = LBound(tree) To UBound(tree)
    tree(i) = CAllocate(recordLen)
Next i

tStart = TIMER
'Open ExePath + "/debian-reference.de.txt" For Binary As #1
Open ExePath + "/debianreferenz.txt" For Binary As #1
text = Input (Lof(1),1) 'datei einlesen
Close 1

Do 'text abarbeiten
    Do 'nächsten wortanfang suchen
        anfang += 1
    Loop Until (isSplittingChar(text[anfang]) = 0) Or (anfang > Len(text))
    ende = anfang
    Do 'nächstes wortende suchen
        ende += 1
    Loop Until (isSplittingChar(text[ende]) <> 0)  Or (ende >= Len(text))
    buffer = Mid(text,anfang + 1, ende - anfang) 'wort aus text holen
    totalCharCount += ende - anfang

    If buffer = "" Then
        Exit Do
    EndIf
    anfang = ende 'zeiger für nächste suche setzen
    numWords += 1
    putCharsIntoTree(tree(buffer[0]-skipChars), buffer, 0)

Loop Until ende >= Len(text)
text = "" 'speicher freigeben
tEnd = TIMER

Locate 1,1
Print "Datei eingelesen in ";
Print Using "##.###"; (tEnd-tStart);
Print " Sekunden."
Print

Dim As Double averageLength = 0
If (numWords > 0) Then
    averageLength = (totalCharCount / CDbl(numWords))
End If


Print "Es wurden " & numWords & " Woerter gezaehlt.    "
Print "Die durchschnittliche Wortlaenge betrug ";
Print Using "##.### "; averageLength;
Print "Buchstaben."
Print

'Ganzen Baum ausgeben:
'Print "Haeufigkeiten:"
'For i As Integer = 1 To treeSubNodeCount
'    traverseTree(tree(i), i, Chr(skipChars+i))
'Next i
'Print

Print "Gezielter Lookup von Woertern:"

Dim lookupWords(1 To ...) As String = { "Shell", "Editor", "der", "die", "das" }

tStart = Timer
For i As Integer = LBound(lookupWords) To UBound(lookupWords)
    Print "Wie oft kommt " & chr(34) & lookupWords(i) & chr(34) & " vor? " & lookupWordCount(tree(), lookupWords(i)) & "x"
Next i
tEnd = Timer
Print
Print "Das Nachschlagen der Haeufigkeiten dauerte ";
Print Using "##.###"; (tEnd-tStart);
Print " Sekunden."

Print
Print "Abbau des Baums aus dem Speicher...  ";
deallocateTree(tree())
Print "Fertig."

Print
Print "Druecken Sie eine beliebige Taste zum Beenden."



GetKey
End 0


'Ermittelt, ob das uebergebene Zeichen eines ist, das das Ende
'eines Wortes kennzeichnet (zum Beispiel ein Leerzeichen, ein
'Zeilenumbruch oder ein Komma).
Function isSplittingChar (char As UByte) As Integer
    '                               ,              .              :              ;              /
    Return ((char < 33) Or (char = 44) Or (char = 46) Or (char = 58) Or (char = 59) Or (char = 47))
End Function

Sub putCharsIntoTree (node As tNode Ptr, buffer As String, index As Integer)
    Dim As UByte c = buffer[index]
    If ((index+1) >= Len(buffer)) Then
        ' Wort endet hier in diesem Knoten
        node->count += 1
    Else
        c = buffer[index+1]
        If (node->subNodes(c-skipChars) = NULL) Then
            node->subNodes(c-skipChars) = CAllocate(recordLen)
        End if
        putCharsIntoTree(node->subNodes(c-skipChars), buffer, index+1)
    End If
End Sub

Sub traverseTree (node As tNode Ptr, index As Ubyte, path As String)
    If (node = NULL) Then Return
    If (node->count > 0) Then
        Print path & " => " & node->count & " x"
    End If
    For i As Integer = 1 To treeSubNodeCount
        If (node->subNodes(i) <> NULL) Then
            traverseTree (node->subNodes(i), i, path + Chr(i+skipChars))
        End If
    Next i
End Sub

Function lookupWordCount (tree() As tNode Ptr, word As String) As Integer
    If (Len(word) < 1) Then Return 0
    Return lookupCharacterCount(tree(word[0]-skipChars), word, 0)
End Function

Function lookupCharacterCount (node As tNode Ptr, word As String, index As Integer) As Integer
    If ((index+1) >= Len(word)) Then
        Return node->count
    Else
        Dim As UByte nextChar = word[index+1]
        Return lookupCharacterCount (node->subNodes(nextChar-skipChars), word, index+1)
    End If
End Function

Sub deallocateTree (tree() As tNode Ptr)
    For i As Integer = 1 To treeSubNodeCount
        deallocateNodeAndSubNodes(tree(i))
        DeAllocate tree(i)
    Next i
End Sub

Sub deallocateNodeAndSubNodes (node As tNode Ptr)
    If (node = NULL) Then Return
    deallocateTree(node->subNodes())
End Sub