0

I need to make a program with assembly language, which finds words, which starts with letters from the given interval, in files and make a list of those words. Words cannot reapeat, there must be a number showing how much times that word has repeated and a list must be sorted alphabetically. Then I have to make another list, which is sorted in repetition descending order.

I made a program which writes down every word, which starts with letters from the interval to the file.

Now i want to instantly check if the word is repeating and if yes, add one to repetition number, or if word is bigger add it to the text file.

The problem is that I need to add lines in the middle of the text file or completely replace some bites. I now I can use int 21h ah=42h to move file pointer, but how can I delete part of the file or replace it?

Can you give me some suggestions how can I make this work or offer me another algortihm, which could work?

EDIT: It's 8086 assembly language The code program I have already written:

.model small                                                     
.stack 100H
.data
ri db 2, 0, 2 dup (0)
dFail dw ?
skbuf db 255, 0, 255 dup (0)

buff_dydis db 0h
pask_vieta db 0h
eilp_vieta db 0h
failo_pabaiga db 0h
rez db "rez.txt", 0h
rFail dw ?
;pagalbos pranesimas
help db "Iveskite raidzdiu intervala taokiu formatu *-*, tada iveskite failu pavadinimus ar direktorijas$"

failaspav db 255, 0, 255 dup(0)
failas db 255, 0, 255 dup (0)
zodis1 db '                           $'
zodis2 db 30 dup(0)
skaicius db "  1$"
passk db 3, 0, 3
didsk db 20h, 20h, 31h
enteris db 0Dh, 0Ah

klaid1 db "Neivesti duomenys i parametru eilute$"
klaid2 db "Failo sukurimo klaida $"
klaid3 db "Failo atidarymo klaida $"
klaid4 db "Skaitymo is failo klaida $"
klaid5 db "Rasymo i faila klaida $"
klaid6 db "Klaida uzdarant faila $"
klaid7 db "Neivesti failu pavadinimai $" 

.code

start:
mov dx, @data
mov ds, dx
;--------------------------------------------------------------------------------   
xor ch, ch                                        ;tikrinam parametru eilute
mov cl, es:[0080h]
cmp cx, 0h
je klaida1                                        ;jei nera parametru, metam klaida
cmp cx, 3h                                        ;jei parametru tik 3, tai reiskia, kad nera failu
je klaida7                                        ;todel metam, kad nera failu pavadinimu
mov bx, 82h
;--------------------------------------------------------------------------------   
Ieskok:
cmp es:[bx], '?/'                                  ;ziurim ar yra /?, t.y. help
Je Yra                                            ;jei randam, isvedam ir baigiam
inc bx
loop Ieskok
jmp Nera                                            ;jei neram tesiam programa

Yra:
mov ah, 9
mov dx, offset help                              ;jei radom /? isvedam help
int 21h
jmp pabaiga 

Nera:
;--------------------------------------------------------------------------------
;rezultatu failo sukurimas
mov ah, 3Ch
mov cx, 0h
mov dx, offset rez
int 21h
jc klaida2
mov rFail, ax
jmp toliau
;--------------------------------------------------------------------------------   
;Klaidos 1, 2, 7 
klaida1:
mov ah, 09h
mov dx, offset klaid1
int 21h
jmp pabaiga
klaida2:
mov ah, 09h
mov dx, offset klaid2
int 21h
jmp pabaiga

klaida7:
mov ah, 09h
mov dx, offset klaid7
int 21h
jmp pabaiga 
;--------------------------------------------------------------------------------     
toliau: 
;parametru eilutes ilgis    
mov cl, es:[80h]                                    ;80-ajame es saugomas parametru eilutes ilgis   
sub cl, 5d                                        ;nuimame 5, nes pirmi 5 bus tarpa ir raidziu intervalas
mov buff_dydis, cl                                ;issaugom likusi eilutes ilgi
;raidziu intervalas  
mov al, es:[82h]                                    ;isirasom pirma intervalo raida
mov [ri+0], al
mov al, es:[84h]                                    ;isirasom antra intervalo raide
mov [ri+1], al  
;tekstu failai
mov si, 86h
;push si
xor di, di                                        ;eisime dirbti su pirmu failu
jmp fail_at
;--------------------------------------------------------------------------------   
;==============================================================================
;==============================================================================
;============================================================================== 
naujas_failas:                                        ;cia dirbsime tik nuo antro failo
xor di, di
xor ax, ax                            
mov al, pask_vieta                                ;praeito failo pavadinimo paskutine vieta
mov si, ax
inc si                                            ;padidinam ja 1, nes ten bus tarpas
push si
xor ax, ax
mov dFail, ax                                      ;nusinulinam deskriptoriu

mov cx, 30d                                      ;nusinulinam failu pav buferi
mov si, 0
nulinti1:
mov al, 0h
mov [failaspav+si], al
inc si
loop nulinti1                                          

mov cx, 255d                                        ;nusinulinam eilutes buferi
mov si, 0
nulinti2:
mov al, 0h
mov [failas+si], al
inc si
loop nulinti2
                                                    ;;nusispacinam zodi   
mov cx, 30d
mov si, 0
spacinti:
mov al, 20h
mov [zodis1+si], al
inc si
loop spacinti

mov al, buff_dydis                                ;nuimam nuo buff_dydis 1, nes praleidom tarpa
dec al
mov buff_dydis, al

pop si
fail_at:                                               ;i bufferi sukeliam failo pavadinima po viena simboli iki tarpo
mov dl, es:[si]                                  ;pirma failo raide
cmp dl, 0h                                        ;jei 0, vadinasi failo pavadinimas baigesi
je darbas
cmp dl, 20h                                      ;jei tarpas, failo pavadinimas irgi baigesi
je darbas
cmp dl, 13d                                      ;jei enter failo pavadinimas irgi baigesi
je darbas
mov [failaspav+di], dl                            ;jei ne tarpas irasom i failo pavadinimo bufferi
inc si
inc di
jmp fail_at                                      ;darom loopa kol nerasim 0, space, enter

darbas:
xor ax, ax
mov ax, di
sub buff_dydis, al                                ;issisaugom pirmos raides pozicija
xor ax, ax
mov ax, si                                    
mov pask_vieta, al                                ;issisaugom paskutines raides pozicija

;atidarom faila
mov ah, 3Dh
mov al, 0h
mov dx, offset failaspav
int 21h                              
jc klaida3                                        ;jei failas neatsidaro, metam klaida
mov dFail, ax                                      ;issisaugom failo deskriptoriaus numeri
jmp pirmeil                                      ;einam dirbti su pirma eilute
;--------------------------------------------------------------------------------
;klaida 3   
klaida3:
mov ah, 09h
mov dx, offset klaid3
int 21h
jmp pabaiga
;--------------------------------------------------------------------------------   
;===============================================================================================================
;===============================================================================================================     
skaitomeil:
mov cl, eilp_vieta
cmp cl, 255
je toliau3
jmp fail_uz 
toliau3:    
mov cx, 255d                                        ;nusinulinam eilutes buferi
mov si, 0
nulinti3:
mov al, 0h
mov [failas+si], al
inc si
loop nulinti3
                                                    ;;nusispacinam zodi   
mov cx, 30d
mov si, 0
spacinti1:
mov al, 20h
mov [zodis1+si], al
inc si
loop spacinti1

pirmeil:
mov ah, 3Fh
mov bx, dFail
mov cx, 255d                                        ;skaitome 255 simbolius         
mov dx, offset failas                              ;nuskaitome 1 eilute
int 21h
jc klaida4                                        
mov eilp_vieta, al                                ;issisaugom, kiek yra eilutej simboliu
cmp ax, 0h
jne testi1                                        ;jei nera error, tesiam
jmp fail_uz

testi1:
xor si, si
xor di, di                            
jmp ieskom_zodzio                                   ;einame skaityti pirmo zodzio

;================================================================================================================

kitas_zodis:
xor ax, ax
mov al, eilp_vieta                                ;sekantis zodis prasides nuo paskutinio tarpo vietos+1
cmp si, ax
jl toliau2
jmp skaitomeil
toliau2:
xor di, di                                        ;di - naujo zodzio piram raide
inc si
                                                    ;si - kur paliko  bufferi raide
mov cx, 30d
spacinti2:
mov al, 20h
mov [zodis1+di], al
inc di
loop spacinti2
xor di, di


ieskom_zodzio:                                        
cmp [failas+si], 20h                                ;tikrinam ar simbolis yra tarpas
je tikrinam
cmp [failas+si], 0Dh                                ;ar simbolis yra enter
je tikrinam
cmp [failas+si], 0Ah                                ;ar simbolis yra trpas
je tikrinam
cmp [failas+si], 0h                              ;ar simbolis yra 0
je tikrinam                                      ;jei taip vadinasi baigesi bufferis
mov al, [failas+si]                              ;kol ne tarpas perkaliam simbolius is failas bufferio i zodis1 bufferi
mov [zodis1+di], al                              
inc si
inc di                                            ;einam prie kitu simboliu
jmp ieskom_zodzio

;--------------------------------------------------------------------------------
;klaida4
klaida4: 
mov ah, 09h
mov dx, offset klaid4
int 21h
jmp pabaiga 
;--------------------------------------------------------------------------------



tikrinam:
mov bl, [ri+0]                                    ;nuskaitom pirma intervalo raide
mov bh, [ri+1]                                    ;nuskaitom antra intervalo raide
cmp [zodis1+0], bl                                ;tikrinam ar pirma raide ieina i intervala
jl kitas_zodis                                    ;jei raide mazesne, ieskom kito zodzio       
cmp [zodis1+0], bh                                ;jei raide didesne arba lygi, tikrinam ar ji didesne uz antra raide
jg kitas_zodis                                    ;jei didesne, einam toliau

mov ah, 42h                                      ;einam i failo pradzia
mov bx, rFail
mov al, 00h
mov dx, 0h
mov cx, 0h
int 21h

tik_ar_yra:                                          

mov ah, 42h                                      ;einam i failo pradzia
mov bx, rFail
mov al, 01h
mov dx, 0h
mov cx, 0h
int 21h
push dx
push cx

mov bx, rFail                                      ;nuskaitome pirma zodi is rezultato failo
mov dx, offset zodis2
mov ah, 3Fh
mov cx, 30d                                      ;skaitome pirmus 30 simboliu, nes tiek duosime irasydami
int 21h
jc klaida4                                        ;jei neiseijo nuskaityti, mesime klaida
cmp ax, 0h
je prid_i_viet                                    ;jei nuskaiteme0, vadinasi, dar nera


mov bl, zodis1                                    ;tikrinam ar tas zodis lygus nuskaitytam zodziui
cmp bl, zodis2                                    
je sudeti                                          ;jei taip, pridesime viena prie pasikartojimu skaiciaus
cmp bl, zodis2
jg prid_i_viet                                      ;jei zodis pagal abecele mazesnis, pridesime i ta vieta
jmp tik_ar_yra                                                  


prid_i_viet:
pop cx
pop dx
mov ah, 42h                                      
mov bx, rFail
mov al, 00h
int 21h

xor dx, dx                                        ;jei zpdis naujas: 
mov ah, 40h
mov bx, rFail
mov cx, 30d
mov dx, offset zodis1                              ;irasome zodi
int 21h
jc klaida5

mov ah, 40h
mov bx, rFail
mov cx, 3d
mov dx, offset skaicius                          ;irasome 3 vietas skirtas skaiciui su irasytu 1
int 21h
jc klaida5

mov ah, 40h
mov bx, rFail
mov cx, 1d
mov dx, offset enteris                            ;irasom enter
int 21h
jc klaida5

cmp [failas+si], 0h                              ;tikrinam ar si 0
je sk_eil                                      ;jei taip, eilute baigesi
jmp kitas_zodis                                  ;jei ne, einam prie kito zodzio

sk_eil:
jmp skaitomeil
;--------------------------------------------------------------------------------
;klaida 5
klaida5:
mov ah, 09h
mov dx, offset klaid5
int 21h
jmp pabaiga

klaida42: 
mov ah, 09h
mov dx, offset klaid4
int 21h
jmp pabaiga 
;--------------------------------------------------------------------------------   

sudeti:  
mov bx, rFail
mov dx, offset passk                                ;nuskaitome pasikartojimu skaiciu
mov ah, 3Fh
mov cx, 3d
int 21h
jc klaida42                                            
xor ax, ax
koks_sk:                                               
mov al, [passk+2]                                  
cmp al, 39h                                      ;tikrinam ar skaitmuo 9
je kita_des                                      ;jei taip, einam i kita desimti
inc al
mov [passk+2], al                                  ;jei ne, pridedam 1 prie dabartinio skaiciaus
jmp irasyti_i_faila

kita_des:                                              
mov al, 30h
mov [passk+2], al                                  ;pakeiciam vienetu skaiciu 0
mov al, [passk+1]                                  
cmp al, 39h                                      ;tikrinam ar desimciu skaicius lygus 9
je kitas_sim                                        ;jei taip, einam i kita simta
cmp al, 20h                                      ;tikrinam ar is vis egzistuoja vienetu skaicius
je pirma_des                                        ;jei ne, pridesim i ta vieta 1
inc al
mov [passk+1], al
jmp irasyti_i_faila

pirma_des:                                            ;jei dar nera desimciu, pridedame 1
mov al, 31h
mov [passk+1], al
jmp irasyti_i_faila

kitas_sim:
mov al, 30h
mov [passk+1], al
mov al, [passk+0]
cmp al, 20h
je pirmas_simtas
inc al
mov [passk+0], al
jmp irasyti_i_faila

pirmas_simtas:                                        ;;jei dar nera simtu, pridedame 1
mov al, 31h
mov [passk+1], al
jmp irasyti_i_faila

irasyti_i_faila:                                        ;irasome gauta skaiciu i faila
pop cx
pop dx
sub dx, 3
mov ah, 42h                                      
mov bx, rFail
mov al, 00h
int 21h

xor ah, ah
push ax
mov ah, 40h
mov bx, rFail
mov cx, 3d
mov dx, offset passk
int 21h
jc klaida52
mov ah, 40h
mov bx, rFail
mov cx, 1d
mov dx, offset enteris                            ;irasom enter
int 21h
jc klaida52

pop ax
cmp al, didsk
jg didesnis_sk
jmp kitas_zodis

didesnis_sk:                                            ;jei radome nauja didziausia skaiciu, ji issaugome
mov didsk, al
jmp kitas_zodis 


;=======================================================================================================

klaida52:
jmp klaida5

;===============================================================================================================
;===============================================================================================================



fail_uz:  
mov ah, 3Eh
mov bx, dFail
int 21h
jc klaida6 



;ziurim ar dar bus failo pavadinimu  
xor cx, cx
mov cl, buff_dydis
cmp cx, 0h
je failo_uzdarymas
jmp naujas_failas   

;==============================================================================
;==============================================================================
;==============================================================================          


failo_uzdarymas:


mov ah, 3Eh
mov bx, rFail
int 21h
jc klaida6
jmp pabaiga



klaida6:
mov ah, 09h
mov dx, offset klaid6
int 21h
jmp pabaiga 

pabaiga:    
mov ah, 4ch
mov al,00h
int 21h

end start    
tadman
  • 208,517
  • 23
  • 234
  • 262
pavel
  • 1
  • 1
  • "I need to make a program..." is something you generally need to do before you ask a question here. We can help you with broken code, or code that you're really stuck on trying to finish, but we're not here to write code for you based on a long specification. If you need mentoring or coaching try services like [Codementor](https://www.codementor.io), [Savvy](https://www.savvy.is), [Hackhands](https://hackhands.com), or [airpair](https://www.airpair.com). – tadman Nov 23 '17 at 21:44
  • 2
    Keep your partial result in memory and only write it out at the end? – Jester Nov 23 '17 at 21:44
  • 1
    Also "assembly" is a very broad thing: The architecture and operating systems play a huge role here and you've tagged neither of them. – tadman Nov 23 '17 at 21:45
  • @Jester at the beggining I don't know how many files and words I'll have and can be really huge numbers, so saving everything in the memory would be problematic. – pavel Nov 23 '17 at 22:09
  • 1
    @tadman I add the code I have written. It's 8086 assembly. – pavel Nov 23 '17 at 22:09
  • If you insist on using a file, you will have to do some clever things or else it will be horribly slow. It's not exactly clear what you want to store in the file and what operations and constraints you have. You can preallocate space for the worst-case count (e.g. 64 bit binary or 20 digit text should be plenty) so you never need to move stuff around for that reason. – Jester Nov 23 '17 at 22:15
  • A well educated person can have about 80k words active vocabulary (English), unless you process really lot of weird files including Shakespeare and stuff, you will probably not break through that limit. Average word length is about 5.1 char, that's about 400k of raw byte data, plus counters... which makes this task not a very good candidate for 16b DOS. Maybe using something similar to LZW to compress the tree would bring down the raw size and also resolve problems with alphabetical sorting, but overall it sounds like 640k may be not enough. But using partial memory buffer "until it fills"... – Ped7g Nov 24 '17 at 00:24
  • About insertion/deletion of file content.. yeah, that's a tricky one, you may want to read a thing or two about databases, those guys have to deal with it quite often... quite often allocating fixed reserve of bytes, or having complex indexing schemes, or file chunks, resembling custom file systems inside single file, etc.. And writing that in 16b real mode is pure masochism, why not at least 32b? And some reasonable memory buffer, with mere ~64MiB you would be probably OK to just make some quick a stupid solution, it would still beat 100x anything smart done over 640k using files. – Ped7g Nov 24 '17 at 00:29
  • 1
    If this is some non-PhD school project (must be, otherwise 16b doesn't make sense, PhD should do some practical real stuff), get through those numbers one more time to evaluate it properly, and show it to the lector, this task doesn't make much sense, it's huge effort spend in plain stupid way, as you can do this in much more simpler and performant way with 32b x86, and mind you, the 32b is already obsolete because of x86-64. The 16b is not just obsolete, it's dead, buried, forgotten and completely useless except to have some fun with 13h VGA graphic mode and 256B intros (and bootloaders). – Ped7g Nov 24 '17 at 00:33
  • Then again maybe it was meant to work only with limited amount of words (i.e. to fit into ~500kiB of free DOS memory.. or even just within single 64kiB segment). Rather make sure the guy putting up this task is dead serious, because it sounds like it was not meant to be that hard-core... or he had some kind of compression scheme and advanced tree structure in mind. – Ped7g Nov 24 '17 at 00:38
  • @Ped7g: Related: [Memory-constrained external sorting of strings, with duplicates combined&counted, on a critical server (billions of filenames)](https://stackoverflow.com/questions/32535222/memory-constrained-external-sorting-of-strings-with-duplicates-combinedcounted). If you really can't keep all your strings +counters in memory at once, then you're facing the same problem and should look at the algorithms and data structures I suggested in that article. Take your pick along the spectrum from easy-to-implement vs. very-space-efficient (like a Radix Trie to compress common prefixes) – Peter Cordes Nov 24 '17 at 01:26
  • Of course, if you can use [big unreal mode](http://wiki.osdev.org/Unreal_Mode) (32-bit addressing modes with a segment limit of 4GiB), you can run in 16-bit mode without having a problem with memory constraints for this problem. – Peter Cordes Nov 24 '17 at 01:29
  • What limits can you assume? Are 32-bit duplicate-counters enough, or do you need to support theoretically unlimited counts with 64-bit or arbitrary-precision integers? Can a single word be longer than 64kiB? Can a single word be larger than 1MiB or 4GiB? Do you have any performance requirement or goal for this? (And if so, why are you implementing it in 8086 assembly with DOS system calls?) Are you going to run it on a real 8086, in 16-bit mode on a modern CPU like Haswell, or in a simulator? Can you use 386 and newer instructions like `movzx`? – Peter Cordes Nov 24 '17 at 01:40

0 Answers0